1 /*
   2  * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/shared/barrierSet.hpp"
  27 #include "gc/shared/c2/barrierSetC2.hpp"
  28 #include "memory/allocation.inline.hpp"
  29 #include "memory/resourceArea.hpp"
  30 #include "opto/addnode.hpp"
  31 #include "opto/callnode.hpp"
  32 #include "opto/castnode.hpp"
  33 #include "opto/connode.hpp"
  34 #include "opto/castnode.hpp"
  35 #include "opto/divnode.hpp"
  36 #include "opto/loopnode.hpp"
  37 #include "opto/matcher.hpp"
  38 #include "opto/mulnode.hpp"
  39 #include "opto/movenode.hpp"
  40 #include "opto/opaquenode.hpp"
  41 #include "opto/rootnode.hpp"
  42 #include "opto/subnode.hpp"
  43 #include "opto/subtypenode.hpp"
  44 #include "utilities/macros.hpp"
  45 
  46 //=============================================================================
  47 //------------------------------split_thru_phi---------------------------------
  48 // Split Node 'n' through merge point if there is enough win.
  49 Node* PhaseIdealLoop::split_thru_phi(Node* n, Node* region, int policy) {
  50   if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
  51     // ConvI2L may have type information on it which is unsafe to push up
  52     // so disable this for now
  53     return NULL;
  54   }
  55 
  56   // Splitting range check CastIIs through a loop induction Phi can
  57   // cause new Phis to be created that are left unrelated to the loop
  58   // induction Phi and prevent optimizations (vectorization)
  59   if (n->Opcode() == Op_CastII && region->is_CountedLoop() &&
  60       n->in(1) == region->as_CountedLoop()->phi()) {
  61     return NULL;
  62   }
  63 
  64   // Bail out if 'n' is a Div or Mod node whose zero check was removed earlier (i.e. control is NULL) and its divisor is an induction variable
  65   // phi p of a trip-counted (integer) loop whose inputs could be zero (include zero in their type range). p could have a more precise type
  66   // range that does not necessarily include all values of its inputs. Since each of these inputs will be a divisor of the newly cloned nodes
  67   // of 'n', we need to bail out of one of these divisors could be zero (zero in its type range).
  68   if ((n->Opcode() == Op_DivI || n->Opcode() == Op_ModI) && n->in(0) == NULL
  69       && region->is_CountedLoop() && n->in(2) == region->as_CountedLoop()->phi()) {
  70     Node* phi = region->as_CountedLoop()->phi();
  71     for (uint i = 1; i < phi->req(); i++) {
  72       if (_igvn.type(phi->in(i))->filter_speculative(TypeInt::ZERO) != Type::TOP) {
  73         // Zero could be a possible value but we already removed the zero check. Bail out to avoid a possible division by zero at a later point.
  74         return NULL;
  75       }
  76     }
  77   }
  78 
  79   int wins = 0;
  80   assert(!n->is_CFG(), "");
  81   assert(region->is_Region(), "");
  82 
  83   const Type* type = n->bottom_type();
  84   const TypeOopPtr* t_oop = _igvn.type(n)->isa_oopptr();
  85   Node* phi;
  86   if (t_oop != NULL && t_oop->is_known_instance_field()) {
  87     int iid    = t_oop->instance_id();
  88     int index  = C->get_alias_index(t_oop);
  89     int offset = t_oop->offset();
  90     phi = new PhiNode(region, type, NULL, iid, index, offset);
  91   } else {
  92     phi = PhiNode::make_blank(region, n);
  93   }
  94   uint old_unique = C->unique();
  95   for (uint i = 1; i < region->req(); i++) {
  96     Node* x;
  97     Node* the_clone = NULL;
  98     if (region->in(i) == C->top()) {
  99       x = C->top();             // Dead path?  Use a dead data op
 100     } else {
 101       x = n->clone();           // Else clone up the data op
 102       the_clone = x;            // Remember for possible deletion.
 103       // Alter data node to use pre-phi inputs
 104       if (n->in(0) == region)
 105         x->set_req( 0, region->in(i) );
 106       for (uint j = 1; j < n->req(); j++) {
 107         Node* in = n->in(j);
 108         if (in->is_Phi() && in->in(0) == region)
 109           x->set_req(j, in->in(i)); // Use pre-Phi input for the clone
 110       }
 111     }
 112     // Check for a 'win' on some paths
 113     const Type* t = x->Value(&_igvn);
 114 
 115     bool singleton = t->singleton();
 116 
 117     // A TOP singleton indicates that there are no possible values incoming
 118     // along a particular edge. In most cases, this is OK, and the Phi will
 119     // be eliminated later in an Ideal call. However, we can't allow this to
 120     // happen if the singleton occurs on loop entry, as the elimination of
 121     // the PhiNode may cause the resulting node to migrate back to a previous
 122     // loop iteration.
 123     if (singleton && t == Type::TOP) {
 124       // Is_Loop() == false does not confirm the absence of a loop (e.g., an
 125       // irreducible loop may not be indicated by an affirmative is_Loop());
 126       // therefore, the only top we can split thru a phi is on a backedge of
 127       // a loop.
 128       singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
 129     }
 130 
 131     if (singleton) {
 132       wins++;
 133       x = ((PhaseGVN&)_igvn).makecon(t);
 134     } else {
 135       // We now call Identity to try to simplify the cloned node.
 136       // Note that some Identity methods call phase->type(this).
 137       // Make sure that the type array is big enough for
 138       // our new node, even though we may throw the node away.
 139       // (Note: This tweaking with igvn only works because x is a new node.)
 140       _igvn.set_type(x, t);
 141       // If x is a TypeNode, capture any more-precise type permanently into Node
 142       // otherwise it will be not updated during igvn->transform since
 143       // igvn->type(x) is set to x->Value() already.
 144       x->raise_bottom_type(t);
 145       Node* y = x->Identity(&_igvn);
 146       if (y != x) {
 147         wins++;
 148         x = y;
 149       } else {
 150         y = _igvn.hash_find(x);
 151         if (y) {
 152           wins++;
 153           x = y;
 154         } else {
 155           // Else x is a new node we are keeping
 156           // We do not need register_new_node_with_optimizer
 157           // because set_type has already been called.
 158           _igvn._worklist.push(x);
 159         }
 160       }
 161     }
 162     if (x != the_clone && the_clone != NULL)
 163       _igvn.remove_dead_node(the_clone);
 164     phi->set_req( i, x );
 165   }
 166   // Too few wins?
 167   if (wins <= policy) {
 168     _igvn.remove_dead_node(phi);
 169     return NULL;
 170   }
 171 
 172   // Record Phi
 173   register_new_node( phi, region );
 174 
 175   for (uint i2 = 1; i2 < phi->req(); i2++) {
 176     Node *x = phi->in(i2);
 177     // If we commoned up the cloned 'x' with another existing Node,
 178     // the existing Node picks up a new use.  We need to make the
 179     // existing Node occur higher up so it dominates its uses.
 180     Node *old_ctrl;
 181     IdealLoopTree *old_loop;
 182 
 183     if (x->is_Con()) {
 184       // Constant's control is always root.
 185       set_ctrl(x, C->root());
 186       continue;
 187     }
 188     // The occasional new node
 189     if (x->_idx >= old_unique) {     // Found a new, unplaced node?
 190       old_ctrl = NULL;
 191       old_loop = NULL;               // Not in any prior loop
 192     } else {
 193       old_ctrl = get_ctrl(x);
 194       old_loop = get_loop(old_ctrl); // Get prior loop
 195     }
 196     // New late point must dominate new use
 197     Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
 198     if (new_ctrl == old_ctrl) // Nothing is changed
 199       continue;
 200 
 201     IdealLoopTree *new_loop = get_loop(new_ctrl);
 202 
 203     // Don't move x into a loop if its uses are
 204     // outside of loop. Otherwise x will be cloned
 205     // for each use outside of this loop.
 206     IdealLoopTree *use_loop = get_loop(region);
 207     if (!new_loop->is_member(use_loop) &&
 208         (old_loop == NULL || !new_loop->is_member(old_loop))) {
 209       // Take early control, later control will be recalculated
 210       // during next iteration of loop optimizations.
 211       new_ctrl = get_early_ctrl(x);
 212       new_loop = get_loop(new_ctrl);
 213     }
 214     // Set new location
 215     set_ctrl(x, new_ctrl);
 216     // If changing loop bodies, see if we need to collect into new body
 217     if (old_loop != new_loop) {
 218       if (old_loop && !old_loop->_child)
 219         old_loop->_body.yank(x);
 220       if (!new_loop->_child)
 221         new_loop->_body.push(x);  // Collect body info
 222     }
 223   }
 224 
 225   return phi;
 226 }
 227 
 228 //------------------------------dominated_by------------------------------------
 229 // Replace the dominated test with an obvious true or false.  Place it on the
 230 // IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
 231 // live path up to the dominating control.
 232 void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
 233   if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); }
 234 
 235   // prevdom is the dominating projection of the dominating test.
 236   assert(iff->is_If(), "must be");
 237   assert(iff->Opcode() == Op_If ||
 238          iff->Opcode() == Op_CountedLoopEnd ||
 239          iff->Opcode() == Op_LongCountedLoopEnd ||
 240          iff->Opcode() == Op_RangeCheck,
 241         "Check this code when new subtype is added");
 242 
 243   int pop = prevdom->Opcode();
 244   assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
 245   if (flip) {
 246     if (pop == Op_IfTrue)
 247       pop = Op_IfFalse;
 248     else
 249       pop = Op_IfTrue;
 250   }
 251   // 'con' is set to true or false to kill the dominated test.
 252   Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
 253   set_ctrl(con, C->root()); // Constant gets a new use
 254   // Hack the dominated test
 255   _igvn.replace_input_of(iff, 1, con);
 256 
 257   // If I dont have a reachable TRUE and FALSE path following the IfNode then
 258   // I can assume this path reaches an infinite loop.  In this case it's not
 259   // important to optimize the data Nodes - either the whole compilation will
 260   // be tossed or this path (and all data Nodes) will go dead.
 261   if (iff->outcnt() != 2) return;
 262 
 263   // Make control-dependent data Nodes on the live path (path that will remain
 264   // once the dominated IF is removed) become control-dependent on the
 265   // dominating projection.
 266   Node* dp = iff->as_If()->proj_out_or_null(pop == Op_IfTrue);
 267 
 268   // Loop predicates may have depending checks which should not
 269   // be skipped. For example, range check predicate has two checks
 270   // for lower and upper bounds.
 271   if (dp == NULL)
 272     return;
 273 
 274   ProjNode* dp_proj  = dp->as_Proj();
 275   ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
 276   if (exclude_loop_predicate &&
 277       (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL ||
 278        unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_profile_predicate) != NULL ||
 279        unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check) != NULL)) {
 280     // If this is a range check (IfNode::is_range_check), do not
 281     // reorder because Compile::allow_range_check_smearing might have
 282     // changed the check.
 283     return; // Let IGVN transformation change control dependence.
 284   }
 285 
 286   IdealLoopTree* old_loop = get_loop(dp);
 287 
 288   for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
 289     Node* cd = dp->fast_out(i); // Control-dependent node
 290     // Do not rewire Div and Mod nodes which could have a zero divisor to avoid skipping their zero check.
 291     if (cd->depends_only_on_test() && _igvn.no_dependent_zero_check(cd)) {
 292       assert(cd->in(0) == dp, "");
 293       _igvn.replace_input_of(cd, 0, prevdom);
 294       set_early_ctrl(cd, false);
 295       IdealLoopTree* new_loop = get_loop(get_ctrl(cd));
 296       if (old_loop != new_loop) {
 297         if (!old_loop->_child) {
 298           old_loop->_body.yank(cd);
 299         }
 300         if (!new_loop->_child) {
 301           new_loop->_body.push(cd);
 302         }
 303       }
 304       --i;
 305       --imax;
 306     }
 307   }
 308 }
 309 
 310 //------------------------------has_local_phi_input----------------------------
 311 // Return TRUE if 'n' has Phi inputs from its local block and no other
 312 // block-local inputs (all non-local-phi inputs come from earlier blocks)
 313 Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
 314   Node *n_ctrl = get_ctrl(n);
 315   // See if some inputs come from a Phi in this block, or from before
 316   // this block.
 317   uint i;
 318   for( i = 1; i < n->req(); i++ ) {
 319     Node *phi = n->in(i);
 320     if( phi->is_Phi() && phi->in(0) == n_ctrl )
 321       break;
 322   }
 323   if( i >= n->req() )
 324     return NULL;                // No Phi inputs; nowhere to clone thru
 325 
 326   // Check for inputs created between 'n' and the Phi input.  These
 327   // must split as well; they have already been given the chance
 328   // (courtesy of a post-order visit) and since they did not we must
 329   // recover the 'cost' of splitting them by being very profitable
 330   // when splitting 'n'.  Since this is unlikely we simply give up.
 331   for( i = 1; i < n->req(); i++ ) {
 332     Node *m = n->in(i);
 333     if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
 334       // We allow the special case of AddP's with no local inputs.
 335       // This allows us to split-up address expressions.
 336       if (m->is_AddP() &&
 337           get_ctrl(m->in(2)) != n_ctrl &&
 338           get_ctrl(m->in(3)) != n_ctrl) {
 339         // Move the AddP up to dominating point
 340         Node* c = find_non_split_ctrl(idom(n_ctrl));
 341         if (c->is_OuterStripMinedLoop()) {
 342           c->as_Loop()->verify_strip_mined(1);
 343           c = c->in(LoopNode::EntryControl);
 344         }
 345         set_ctrl_and_loop(m, c);
 346         continue;
 347       }
 348       return NULL;
 349     }
 350     assert(n->is_Phi() || m->is_Phi() || is_dominator(get_ctrl(m), n_ctrl), "m has strange control");
 351   }
 352 
 353   return n_ctrl;
 354 }
 355 
 356 //------------------------------remix_address_expressions----------------------
 357 // Rework addressing expressions to get the most loop-invariant stuff
 358 // moved out.  We'd like to do all associative operators, but it's especially
 359 // important (common) to do address expressions.
 360 Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
 361   if (!has_ctrl(n))  return NULL;
 362   Node *n_ctrl = get_ctrl(n);
 363   IdealLoopTree *n_loop = get_loop(n_ctrl);
 364 
 365   // See if 'n' mixes loop-varying and loop-invariant inputs and
 366   // itself is loop-varying.
 367 
 368   // Only interested in binary ops (and AddP)
 369   if( n->req() < 3 || n->req() > 4 ) return NULL;
 370 
 371   Node *n1_ctrl = get_ctrl(n->in(                    1));
 372   Node *n2_ctrl = get_ctrl(n->in(                    2));
 373   Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
 374   IdealLoopTree *n1_loop = get_loop( n1_ctrl );
 375   IdealLoopTree *n2_loop = get_loop( n2_ctrl );
 376   IdealLoopTree *n3_loop = get_loop( n3_ctrl );
 377 
 378   // Does one of my inputs spin in a tighter loop than self?
 379   if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
 380       (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
 381       (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
 382     return NULL;                // Leave well enough alone
 383 
 384   // Is at least one of my inputs loop-invariant?
 385   if( n1_loop == n_loop &&
 386       n2_loop == n_loop &&
 387       n3_loop == n_loop )
 388     return NULL;                // No loop-invariant inputs
 389 
 390 
 391   int n_op = n->Opcode();
 392 
 393   // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
 394   if( n_op == Op_LShiftI ) {
 395     // Scale is loop invariant
 396     Node *scale = n->in(2);
 397     Node *scale_ctrl = get_ctrl(scale);
 398     IdealLoopTree *scale_loop = get_loop(scale_ctrl );
 399     if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
 400       return NULL;
 401     const TypeInt *scale_t = scale->bottom_type()->isa_int();
 402     if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
 403       return NULL;              // Dont bother with byte/short masking
 404     // Add must vary with loop (else shift would be loop-invariant)
 405     Node *add = n->in(1);
 406     Node *add_ctrl = get_ctrl(add);
 407     IdealLoopTree *add_loop = get_loop(add_ctrl);
 408     //assert( n_loop == add_loop, "" );
 409     if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
 410 
 411     // Convert I-V into I+ (0-V); same for V-I
 412     if( add->Opcode() == Op_SubI &&
 413         _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
 414       Node *zero = _igvn.intcon(0);
 415       set_ctrl(zero, C->root());
 416       Node *neg = new SubINode( _igvn.intcon(0), add->in(2) );
 417       register_new_node( neg, get_ctrl(add->in(2) ) );
 418       add = new AddINode( add->in(1), neg );
 419       register_new_node( add, add_ctrl );
 420     }
 421     if( add->Opcode() != Op_AddI ) return NULL;
 422     // See if one add input is loop invariant
 423     Node *add_var = add->in(1);
 424     Node *add_var_ctrl = get_ctrl(add_var);
 425     IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
 426     Node *add_invar = add->in(2);
 427     Node *add_invar_ctrl = get_ctrl(add_invar);
 428     IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
 429     if( add_var_loop == n_loop ) {
 430     } else if( add_invar_loop == n_loop ) {
 431       // Swap to find the invariant part
 432       add_invar = add_var;
 433       add_invar_ctrl = add_var_ctrl;
 434       add_invar_loop = add_var_loop;
 435       add_var = add->in(2);
 436       Node *add_var_ctrl = get_ctrl(add_var);
 437       IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
 438     } else                      // Else neither input is loop invariant
 439       return NULL;
 440     if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
 441       return NULL;              // No invariant part of the add?
 442 
 443     // Yes!  Reshape address expression!
 444     Node *inv_scale = new LShiftINode( add_invar, scale );
 445     Node *inv_scale_ctrl =
 446       dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
 447       add_invar_ctrl : scale_ctrl;
 448     register_new_node( inv_scale, inv_scale_ctrl );
 449     Node *var_scale = new LShiftINode( add_var, scale );
 450     register_new_node( var_scale, n_ctrl );
 451     Node *var_add = new AddINode( var_scale, inv_scale );
 452     register_new_node( var_add, n_ctrl );
 453     _igvn.replace_node( n, var_add );
 454     return var_add;
 455   }
 456 
 457   // Replace (I+V) with (V+I)
 458   if( n_op == Op_AddI ||
 459       n_op == Op_AddL ||
 460       n_op == Op_AddF ||
 461       n_op == Op_AddD ||
 462       n_op == Op_MulI ||
 463       n_op == Op_MulL ||
 464       n_op == Op_MulF ||
 465       n_op == Op_MulD ) {
 466     if( n2_loop == n_loop ) {
 467       assert( n1_loop != n_loop, "" );
 468       n->swap_edges(1, 2);
 469     }
 470   }
 471 
 472   // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
 473   // but not if I2 is a constant.
 474   if( n_op == Op_AddP ) {
 475     if( n2_loop == n_loop && n3_loop != n_loop ) {
 476       if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
 477         Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
 478         Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
 479         IdealLoopTree *n22loop = get_loop( n22_ctrl );
 480         IdealLoopTree *n23_loop = get_loop( n23_ctrl );
 481         if( n22loop != n_loop && n22loop->is_member(n_loop) &&
 482             n23_loop == n_loop ) {
 483           Node *add1 = new AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
 484           // Stuff new AddP in the loop preheader
 485           register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
 486           Node *add2 = new AddPNode( n->in(1), add1, n->in(2)->in(3) );
 487           register_new_node( add2, n_ctrl );
 488           _igvn.replace_node( n, add2 );
 489           return add2;
 490         }
 491       }
 492     }
 493 
 494     // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
 495     if (n2_loop != n_loop && n3_loop == n_loop) {
 496       if (n->in(3)->Opcode() == Op_AddX) {
 497         Node *V = n->in(3)->in(1);
 498         Node *I = n->in(3)->in(2);
 499         if (is_member(n_loop,get_ctrl(V))) {
 500         } else {
 501           Node *tmp = V; V = I; I = tmp;
 502         }
 503         if (!is_member(n_loop,get_ctrl(I))) {
 504           Node *add1 = new AddPNode(n->in(1), n->in(2), I);
 505           // Stuff new AddP in the loop preheader
 506           register_new_node(add1, n_loop->_head->in(LoopNode::EntryControl));
 507           Node *add2 = new AddPNode(n->in(1), add1, V);
 508           register_new_node(add2, n_ctrl);
 509           _igvn.replace_node(n, add2);
 510           return add2;
 511         }
 512       }
 513     }
 514   }
 515 
 516   return NULL;
 517 }
 518 
 519 // Optimize ((in1[2*i] * in2[2*i]) + (in1[2*i+1] * in2[2*i+1]))
 520 Node *PhaseIdealLoop::convert_add_to_muladd(Node* n) {
 521   assert(n->Opcode() == Op_AddI, "sanity");
 522   Node * nn = NULL;
 523   Node * in1 = n->in(1);
 524   Node * in2 = n->in(2);
 525   if (in1->Opcode() == Op_MulI && in2->Opcode() == Op_MulI) {
 526     IdealLoopTree* loop_n = get_loop(get_ctrl(n));
 527     if (loop_n->is_counted() &&
 528         loop_n->_head->as_Loop()->is_valid_counted_loop(T_INT) &&
 529         Matcher::match_rule_supported(Op_MulAddVS2VI) &&
 530         Matcher::match_rule_supported(Op_MulAddS2I)) {
 531       Node* mul_in1 = in1->in(1);
 532       Node* mul_in2 = in1->in(2);
 533       Node* mul_in3 = in2->in(1);
 534       Node* mul_in4 = in2->in(2);
 535       if (mul_in1->Opcode() == Op_LoadS &&
 536           mul_in2->Opcode() == Op_LoadS &&
 537           mul_in3->Opcode() == Op_LoadS &&
 538           mul_in4->Opcode() == Op_LoadS) {
 539         IdealLoopTree* loop1 = get_loop(get_ctrl(mul_in1));
 540         IdealLoopTree* loop2 = get_loop(get_ctrl(mul_in2));
 541         IdealLoopTree* loop3 = get_loop(get_ctrl(mul_in3));
 542         IdealLoopTree* loop4 = get_loop(get_ctrl(mul_in4));
 543         IdealLoopTree* loop5 = get_loop(get_ctrl(in1));
 544         IdealLoopTree* loop6 = get_loop(get_ctrl(in2));
 545         // All nodes should be in the same counted loop.
 546         if (loop_n == loop1 && loop_n == loop2 && loop_n == loop3 &&
 547             loop_n == loop4 && loop_n == loop5 && loop_n == loop6) {
 548           Node* adr1 = mul_in1->in(MemNode::Address);
 549           Node* adr2 = mul_in2->in(MemNode::Address);
 550           Node* adr3 = mul_in3->in(MemNode::Address);
 551           Node* adr4 = mul_in4->in(MemNode::Address);
 552           if (adr1->is_AddP() && adr2->is_AddP() && adr3->is_AddP() && adr4->is_AddP()) {
 553             if ((adr1->in(AddPNode::Base) == adr3->in(AddPNode::Base)) &&
 554                 (adr2->in(AddPNode::Base) == adr4->in(AddPNode::Base))) {
 555               nn = new MulAddS2INode(mul_in1, mul_in2, mul_in3, mul_in4);
 556               register_new_node(nn, get_ctrl(n));
 557               _igvn.replace_node(n, nn);
 558               return nn;
 559             } else if ((adr1->in(AddPNode::Base) == adr4->in(AddPNode::Base)) &&
 560                        (adr2->in(AddPNode::Base) == adr3->in(AddPNode::Base))) {
 561               nn = new MulAddS2INode(mul_in1, mul_in2, mul_in4, mul_in3);
 562               register_new_node(nn, get_ctrl(n));
 563               _igvn.replace_node(n, nn);
 564               return nn;
 565             }
 566           }
 567         }
 568       }
 569     }
 570   }
 571   return nn;
 572 }
 573 
 574 //------------------------------conditional_move-------------------------------
 575 // Attempt to replace a Phi with a conditional move.  We have some pretty
 576 // strict profitability requirements.  All Phis at the merge point must
 577 // be converted, so we can remove the control flow.  We need to limit the
 578 // number of c-moves to a small handful.  All code that was in the side-arms
 579 // of the CFG diamond is now speculatively executed.  This code has to be
 580 // "cheap enough".  We are pretty much limited to CFG diamonds that merge
 581 // 1 or 2 items with a total of 1 or 2 ops executed speculatively.
 582 Node *PhaseIdealLoop::conditional_move( Node *region ) {
 583 
 584   assert(region->is_Region(), "sanity check");
 585   if (region->req() != 3) return NULL;
 586 
 587   // Check for CFG diamond
 588   Node *lp = region->in(1);
 589   Node *rp = region->in(2);
 590   if (!lp || !rp) return NULL;
 591   Node *lp_c = lp->in(0);
 592   if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
 593   IfNode *iff = lp_c->as_If();
 594 
 595   // Check for ops pinned in an arm of the diamond.
 596   // Can't remove the control flow in this case
 597   if (lp->outcnt() > 1) return NULL;
 598   if (rp->outcnt() > 1) return NULL;
 599 
 600   IdealLoopTree* r_loop = get_loop(region);
 601   assert(r_loop == get_loop(iff), "sanity");
 602   // Always convert to CMOVE if all results are used only outside this loop.
 603   bool used_inside_loop = (r_loop == _ltree_root);
 604 
 605   // Check profitability
 606   int cost = 0;
 607   int phis = 0;
 608   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 609     Node *out = region->fast_out(i);
 610     if (!out->is_Phi()) continue; // Ignore other control edges, etc
 611     phis++;
 612     PhiNode* phi = out->as_Phi();
 613     BasicType bt = phi->type()->basic_type();
 614     switch (bt) {
 615     case T_DOUBLE:
 616     case T_FLOAT:
 617       if (C->use_cmove()) {
 618         continue; //TODO: maybe we want to add some cost
 619       }
 620       cost += Matcher::float_cmove_cost(); // Could be very expensive
 621       break;
 622     case T_LONG: {
 623       cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
 624     }
 625     case T_INT:                 // These all CMOV fine
 626     case T_ADDRESS: {           // (RawPtr)
 627       cost++;
 628       break;
 629     }
 630     case T_NARROWOOP: // Fall through
 631     case T_OBJECT: {            // Base oops are OK, but not derived oops
 632       const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
 633       // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
 634       // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
 635       // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
 636       // have a Phi for the base here that we convert to a CMOVE all is well
 637       // and good.  But if the base is dead, we'll not make a CMOVE.  Later
 638       // the allocator will have to produce a base by creating a CMOVE of the
 639       // relevant bases.  This puts the allocator in the business of
 640       // manufacturing expensive instructions, generally a bad plan.
 641       // Just Say No to Conditionally-Moved Derived Pointers.
 642       if (tp && tp->offset() != 0)
 643         return NULL;
 644       cost++;
 645       break;
 646     }
 647     default:
 648       return NULL;              // In particular, can't do memory or I/O
 649     }
 650     // Add in cost any speculative ops
 651     for (uint j = 1; j < region->req(); j++) {
 652       Node *proj = region->in(j);
 653       Node *inp = phi->in(j);
 654       if (get_ctrl(inp) == proj) { // Found local op
 655         cost++;
 656         // Check for a chain of dependent ops; these will all become
 657         // speculative in a CMOV.
 658         for (uint k = 1; k < inp->req(); k++)
 659           if (get_ctrl(inp->in(k)) == proj)
 660             cost += ConditionalMoveLimit; // Too much speculative goo
 661       }
 662     }
 663     // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
 664     // This will likely Split-If, a higher-payoff operation.
 665     for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
 666       Node* use = phi->fast_out(k);
 667       if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
 668         cost += ConditionalMoveLimit;
 669       // Is there a use inside the loop?
 670       // Note: check only basic types since CMoveP is pinned.
 671       if (!used_inside_loop && is_java_primitive(bt)) {
 672         IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
 673         if (r_loop == u_loop || r_loop->is_member(u_loop)) {
 674           used_inside_loop = true;
 675         }
 676       }
 677     }
 678   }//for
 679   Node* bol = iff->in(1);
 680   if (bol->Opcode() == Op_Opaque4) {
 681     return NULL; // Ignore loop predicate checks (the Opaque4 ensures they will go away)
 682   }
 683   assert(bol->Opcode() == Op_Bool, "Unexpected node");
 684   int cmp_op = bol->in(1)->Opcode();
 685   if (cmp_op == Op_SubTypeCheck) { // SubTypeCheck expansion expects an IfNode
 686     return NULL;
 687   }
 688   // It is expensive to generate flags from a float compare.
 689   // Avoid duplicated float compare.
 690   if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
 691 
 692   float infrequent_prob = PROB_UNLIKELY_MAG(3);
 693   // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
 694   if (used_inside_loop) {
 695     if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
 696 
 697     // BlockLayoutByFrequency optimization moves infrequent branch
 698     // from hot path. No point in CMOV'ing in such case (110 is used
 699     // instead of 100 to take into account not exactness of float value).
 700     if (BlockLayoutByFrequency) {
 701       infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
 702     }
 703   }
 704   // Check for highly predictable branch.  No point in CMOV'ing if
 705   // we are going to predict accurately all the time.
 706   if (C->use_cmove() && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) {
 707     //keep going
 708   } else if (iff->_prob < infrequent_prob ||
 709       iff->_prob > (1.0f - infrequent_prob))
 710     return NULL;
 711 
 712   // --------------
 713   // Now replace all Phis with CMOV's
 714   Node *cmov_ctrl = iff->in(0);
 715   uint flip = (lp->Opcode() == Op_IfTrue);
 716   Node_List wq;
 717   while (1) {
 718     PhiNode* phi = NULL;
 719     for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 720       Node *out = region->fast_out(i);
 721       if (out->is_Phi()) {
 722         phi = out->as_Phi();
 723         break;
 724       }
 725     }
 726     if (phi == NULL || _igvn.type(phi) == Type::TOP) {
 727       break;
 728     }
 729     if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); }
 730     // Move speculative ops
 731     wq.push(phi);
 732     while (wq.size() > 0) {
 733       Node *n = wq.pop();
 734       for (uint j = 1; j < n->req(); j++) {
 735         Node* m = n->in(j);
 736         if (m != NULL && !is_dominator(get_ctrl(m), cmov_ctrl)) {
 737 #ifndef PRODUCT
 738           if (PrintOpto && VerifyLoopOptimizations) {
 739             tty->print("  speculate: ");
 740             m->dump();
 741           }
 742 #endif
 743           set_ctrl(m, cmov_ctrl);
 744           wq.push(m);
 745         }
 746       }
 747     }
 748     Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi));
 749     register_new_node( cmov, cmov_ctrl );
 750     _igvn.replace_node( phi, cmov );
 751 #ifndef PRODUCT
 752     if (TraceLoopOpts) {
 753       tty->print("CMOV  ");
 754       r_loop->dump_head();
 755       if (Verbose) {
 756         bol->in(1)->dump(1);
 757         cmov->dump(1);
 758       }
 759     }
 760     if (VerifyLoopOptimizations) verify();
 761 #endif
 762   }
 763 
 764   // The useless CFG diamond will fold up later; see the optimization in
 765   // RegionNode::Ideal.
 766   _igvn._worklist.push(region);
 767 
 768   return iff->in(1);
 769 }
 770 
 771 static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
 772   for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
 773     Node* u = m->fast_out(i);
 774     if (u->is_CFG()) {
 775       if (u->Opcode() == Op_NeverBranch) {
 776         u = ((NeverBranchNode*)u)->proj_out(0);
 777         enqueue_cfg_uses(u, wq);
 778       } else {
 779         wq.push(u);
 780       }
 781     }
 782   }
 783 }
 784 
 785 // Try moving a store out of a loop, right before the loop
 786 Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
 787   // Store has to be first in the loop body
 788   IdealLoopTree *n_loop = get_loop(n_ctrl);
 789   if (n->is_Store() && n_loop != _ltree_root &&
 790       n_loop->is_loop() && n_loop->_head->is_Loop() &&
 791       n->in(0) != NULL) {
 792     Node* address = n->in(MemNode::Address);
 793     Node* value = n->in(MemNode::ValueIn);
 794     Node* mem = n->in(MemNode::Memory);
 795     IdealLoopTree* address_loop = get_loop(get_ctrl(address));
 796     IdealLoopTree* value_loop = get_loop(get_ctrl(value));
 797 
 798     // - address and value must be loop invariant
 799     // - memory must be a memory Phi for the loop
 800     // - Store must be the only store on this memory slice in the
 801     // loop: if there's another store following this one then value
 802     // written at iteration i by the second store could be overwritten
 803     // at iteration i+n by the first store: it's not safe to move the
 804     // first store out of the loop
 805     // - nothing must observe the memory Phi: it guarantees no read
 806     // before the store, we are also guaranteed the store post
 807     // dominates the loop head (ignoring a possible early
 808     // exit). Otherwise there would be extra Phi involved between the
 809     // loop's Phi and the store.
 810     // - there must be no early exit from the loop before the Store
 811     // (such an exit most of the time would be an extra use of the
 812     // memory Phi but sometimes is a bottom memory Phi that takes the
 813     // store as input).
 814 
 815     if (!n_loop->is_member(address_loop) &&
 816         !n_loop->is_member(value_loop) &&
 817         mem->is_Phi() && mem->in(0) == n_loop->_head &&
 818         mem->outcnt() == 1 &&
 819         mem->in(LoopNode::LoopBackControl) == n) {
 820 
 821       assert(n_loop->_tail != NULL, "need a tail");
 822       assert(is_dominator(n_ctrl, n_loop->_tail), "store control must not be in a branch in the loop");
 823 
 824       // Verify that there's no early exit of the loop before the store.
 825       bool ctrl_ok = false;
 826       {
 827         // Follow control from loop head until n, we exit the loop or
 828         // we reach the tail
 829         ResourceMark rm;
 830         Unique_Node_List wq;
 831         wq.push(n_loop->_head);
 832 
 833         for (uint next = 0; next < wq.size(); ++next) {
 834           Node *m = wq.at(next);
 835           if (m == n->in(0)) {
 836             ctrl_ok = true;
 837             continue;
 838           }
 839           assert(!has_ctrl(m), "should be CFG");
 840           if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) {
 841             ctrl_ok = false;
 842             break;
 843           }
 844           enqueue_cfg_uses(m, wq);
 845           if (wq.size() > 10) {
 846             ctrl_ok = false;
 847             break;
 848           }
 849         }
 850       }
 851       if (ctrl_ok) {
 852         // move the Store
 853         _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
 854         _igvn.replace_input_of(n, 0, n_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl));
 855         _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
 856         // Disconnect the phi now. An empty phi can confuse other
 857         // optimizations in this pass of loop opts.
 858         _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
 859         n_loop->_body.yank(mem);
 860 
 861         set_ctrl_and_loop(n, n->in(0));
 862 
 863         return n;
 864       }
 865     }
 866   }
 867   return NULL;
 868 }
 869 
 870 // Try moving a store out of a loop, right after the loop
 871 void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
 872   if (n->is_Store() && n->in(0) != NULL) {
 873     Node *n_ctrl = get_ctrl(n);
 874     IdealLoopTree *n_loop = get_loop(n_ctrl);
 875     // Store must be in a loop
 876     if (n_loop != _ltree_root && !n_loop->_irreducible) {
 877       Node* address = n->in(MemNode::Address);
 878       Node* value = n->in(MemNode::ValueIn);
 879       IdealLoopTree* address_loop = get_loop(get_ctrl(address));
 880       // address must be loop invariant
 881       if (!n_loop->is_member(address_loop)) {
 882         // Store must be last on this memory slice in the loop and
 883         // nothing in the loop must observe it
 884         Node* phi = NULL;
 885         for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 886           Node* u = n->fast_out(i);
 887           if (has_ctrl(u)) { // control use?
 888             IdealLoopTree *u_loop = get_loop(get_ctrl(u));
 889             if (!n_loop->is_member(u_loop)) {
 890               continue;
 891             }
 892             if (u->is_Phi() && u->in(0) == n_loop->_head) {
 893               assert(_igvn.type(u) == Type::MEMORY, "bad phi");
 894               // multiple phis on the same slice are possible
 895               if (phi != NULL) {
 896                 return;
 897               }
 898               phi = u;
 899               continue;
 900             }
 901           }
 902           return;
 903         }
 904         if (phi != NULL) {
 905           // Nothing in the loop before the store (next iteration)
 906           // must observe the stored value
 907           bool mem_ok = true;
 908           {
 909             ResourceMark rm;
 910             Unique_Node_List wq;
 911             wq.push(phi);
 912             for (uint next = 0; next < wq.size() && mem_ok; ++next) {
 913               Node *m = wq.at(next);
 914               for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
 915                 Node* u = m->fast_out(i);
 916                 if (u->is_Store() || u->is_Phi()) {
 917                   if (u != n) {
 918                     wq.push(u);
 919                     mem_ok = (wq.size() <= 10);
 920                   }
 921                 } else {
 922                   mem_ok = false;
 923                   break;
 924                 }
 925               }
 926             }
 927           }
 928           if (mem_ok) {
 929             // Move the store out of the loop if the LCA of all
 930             // users (except for the phi) is outside the loop.
 931             Node* hook = new Node(1);
 932             hook->init_req(0, n_ctrl); // Add an input to prevent hook from being dead
 933             _igvn.rehash_node_delayed(phi);
 934             int count = phi->replace_edge(n, hook, &_igvn);
 935             assert(count > 0, "inconsistent phi");
 936 
 937             // Compute latest point this store can go
 938             Node* lca = get_late_ctrl(n, get_ctrl(n));
 939             if (lca->is_OuterStripMinedLoop()) {
 940               lca = lca->in(LoopNode::EntryControl);
 941             }
 942             if (n_loop->is_member(get_loop(lca))) {
 943               // LCA is in the loop - bail out
 944               _igvn.replace_node(hook, n);
 945               return;
 946             }
 947 #ifdef ASSERT
 948             if (n_loop->_head->is_Loop() && n_loop->_head->as_Loop()->is_strip_mined()) {
 949               assert(n_loop->_head->Opcode() == Op_CountedLoop, "outer loop is a strip mined");
 950               n_loop->_head->as_Loop()->verify_strip_mined(1);
 951               Node* outer = n_loop->_head->as_CountedLoop()->outer_loop();
 952               IdealLoopTree* outer_loop = get_loop(outer);
 953               assert(n_loop->_parent == outer_loop, "broken loop tree");
 954               assert(get_loop(lca) == outer_loop, "safepoint in outer loop consume all memory state");
 955             }
 956 #endif
 957             lca = place_outside_loop(lca, n_loop);
 958             assert(!n_loop->is_member(get_loop(lca)), "control must not be back in the loop");
 959             assert(get_loop(lca)->_nest < n_loop->_nest || lca->in(0)->Opcode() == Op_NeverBranch, "must not be moved into inner loop");
 960 
 961             // Move store out of the loop
 962             _igvn.replace_node(hook, n->in(MemNode::Memory));
 963             _igvn.replace_input_of(n, 0, lca);
 964             set_ctrl_and_loop(n, lca);
 965 
 966             // Disconnect the phi now. An empty phi can confuse other
 967             // optimizations in this pass of loop opts..
 968             if (phi->in(LoopNode::LoopBackControl) == phi) {
 969               _igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
 970               n_loop->_body.yank(phi);
 971             }
 972           }
 973         }
 974       }
 975     }
 976   }
 977 }
 978 
 979 //------------------------------split_if_with_blocks_pre-----------------------
 980 // Do the real work in a non-recursive function.  Data nodes want to be
 981 // cloned in the pre-order so they can feed each other nicely.
 982 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
 983   // Cloning these guys is unlikely to win
 984   int n_op = n->Opcode();
 985   if (n_op == Op_MergeMem) {
 986     return n;
 987   }
 988   if (n->is_Proj()) {
 989     return n;
 990   }
 991   // Do not clone-up CmpFXXX variations, as these are always
 992   // followed by a CmpI
 993   if (n->is_Cmp()) {
 994     return n;
 995   }
 996   // Attempt to use a conditional move instead of a phi/branch
 997   if (ConditionalMoveLimit > 0 && n_op == Op_Region) {
 998     Node *cmov = conditional_move( n );
 999     if (cmov) {
1000       return cmov;
1001     }
1002   }
1003   if (n->is_CFG() || n->is_LoadStore()) {
1004     return n;
1005   }
1006   if (n->is_Opaque1() ||     // Opaque nodes cannot be mod'd
1007       n_op == Op_Opaque2) {
1008     if (!C->major_progress()) {   // If chance of no more loop opts...
1009       _igvn._worklist.push(n);  // maybe we'll remove them
1010     }
1011     return n;
1012   }
1013 
1014   if (n->is_Con()) {
1015     return n;   // No cloning for Con nodes
1016   }
1017 
1018   Node *n_ctrl = get_ctrl(n);
1019   if (!n_ctrl) {
1020     return n;       // Dead node
1021   }
1022 
1023   Node* res = try_move_store_before_loop(n, n_ctrl);
1024   if (res != NULL) {
1025     return n;
1026   }
1027 
1028   // Attempt to remix address expressions for loop invariants
1029   Node *m = remix_address_expressions( n );
1030   if( m ) return m;
1031 
1032   if (n_op == Op_AddI) {
1033     Node *nn = convert_add_to_muladd( n );
1034     if ( nn ) return nn;
1035   }
1036 
1037   if (n->is_ConstraintCast()) {
1038     Node* dom_cast = n->as_ConstraintCast()->dominating_cast(&_igvn, this);
1039     // ConstraintCastNode::dominating_cast() uses node control input to determine domination.
1040     // Node control inputs don't necessarily agree with loop control info (due to
1041     // transformations happened in between), thus additional dominance check is needed
1042     // to keep loop info valid.
1043     if (dom_cast != NULL && is_dominator(get_ctrl(dom_cast), get_ctrl(n))) {
1044       _igvn.replace_node(n, dom_cast);
1045       return dom_cast;
1046     }
1047   }
1048 
1049   // Determine if the Node has inputs from some local Phi.
1050   // Returns the block to clone thru.
1051   Node *n_blk = has_local_phi_input( n );
1052   if( !n_blk ) return n;
1053 
1054   // Do not clone the trip counter through on a CountedLoop
1055   // (messes up the canonical shape).
1056   if (((n_blk->is_CountedLoop() || (n_blk->is_Loop() && n_blk->as_Loop()->is_transformed_long_inner_loop())) && n->Opcode() == Op_AddI) ||
1057       (n_blk->is_LongCountedLoop() && n->Opcode() == Op_AddL)) {
1058     return n;
1059   }
1060 
1061   // Check for having no control input; not pinned.  Allow
1062   // dominating control.
1063   if (n->in(0)) {
1064     Node *dom = idom(n_blk);
1065     if (dom_lca(n->in(0), dom) != n->in(0)) {
1066       return n;
1067     }
1068   }
1069   // Policy: when is it profitable.  You must get more wins than
1070   // policy before it is considered profitable.  Policy is usually 0,
1071   // so 1 win is considered profitable.  Big merges will require big
1072   // cloning, so get a larger policy.
1073   int policy = n_blk->req() >> 2;
1074 
1075   // If the loop is a candidate for range check elimination,
1076   // delay splitting through it's phi until a later loop optimization
1077   if (n_blk->is_CountedLoop()) {
1078     IdealLoopTree *lp = get_loop(n_blk);
1079     if (lp && lp->_rce_candidate) {
1080       return n;
1081     }
1082   }
1083 
1084   if (must_throttle_split_if()) return n;
1085 
1086   // Split 'n' through the merge point if it is profitable
1087   Node *phi = split_thru_phi( n, n_blk, policy );
1088   if (!phi) return n;
1089 
1090   // Found a Phi to split thru!
1091   // Replace 'n' with the new phi
1092   _igvn.replace_node( n, phi );
1093   // Moved a load around the loop, 'en-registering' something.
1094   if (n_blk->is_Loop() && n->is_Load() &&
1095       !phi->in(LoopNode::LoopBackControl)->is_Load())
1096     C->set_major_progress();
1097 
1098   return phi;
1099 }
1100 
1101 static bool merge_point_too_heavy(Compile* C, Node* region) {
1102   // Bail out if the region and its phis have too many users.
1103   int weight = 0;
1104   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1105     weight += region->fast_out(i)->outcnt();
1106   }
1107   int nodes_left = C->max_node_limit() - C->live_nodes();
1108   if (weight * 8 > nodes_left) {
1109     if (PrintOpto) {
1110       tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
1111     }
1112     return true;
1113   } else {
1114     return false;
1115   }
1116 }
1117 
1118 static bool merge_point_safe(Node* region) {
1119   // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
1120   // having a PhiNode input. This sidesteps the dangerous case where the split
1121   // ConvI2LNode may become TOP if the input Value() does not
1122   // overlap the ConvI2L range, leaving a node which may not dominate its
1123   // uses.
1124   // A better fix for this problem can be found in the BugTraq entry, but
1125   // expediency for Mantis demands this hack.
1126 #ifdef _LP64
1127   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1128     Node* n = region->fast_out(i);
1129     if (n->is_Phi()) {
1130       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1131         Node* m = n->fast_out(j);
1132         if (m->Opcode() == Op_ConvI2L)
1133           return false;
1134         if (m->is_CastII()) {
1135           return false;
1136         }
1137       }
1138     }
1139   }
1140 #endif
1141   return true;
1142 }
1143 
1144 
1145 //------------------------------place_outside_loop---------------------------------
1146 // Place some computation outside of this loop on the path to the use passed as argument
1147 Node* PhaseIdealLoop::place_outside_loop(Node* useblock, IdealLoopTree* loop) const {
1148   Node* head = loop->_head;
1149   assert(!loop->is_member(get_loop(useblock)), "must be outside loop");
1150   if (head->is_Loop() && head->as_Loop()->is_strip_mined()) {
1151     loop = loop->_parent;
1152     assert(loop->_head->is_OuterStripMinedLoop(), "malformed strip mined loop");
1153   }
1154 
1155   // Pick control right outside the loop
1156   for (;;) {
1157     Node* dom = idom(useblock);
1158     if (loop->is_member(get_loop(dom)) ||
1159         // NeverBranch nodes are not assigned to the loop when constructed
1160         (dom->Opcode() == Op_NeverBranch && loop->is_member(get_loop(dom->in(0))))) {
1161       break;
1162     }
1163     useblock = dom;
1164   }
1165   assert(find_non_split_ctrl(useblock) == useblock, "should be non split control");
1166   return useblock;
1167 }
1168 
1169 
1170 bool PhaseIdealLoop::identical_backtoback_ifs(Node *n) {
1171   if (!n->is_If() || n->is_BaseCountedLoopEnd()) {
1172     return false;
1173   }
1174   if (!n->in(0)->is_Region()) {
1175     return false;
1176   }
1177   Node* region = n->in(0);
1178   Node* dom = idom(region);
1179   if (!dom->is_If() || dom->in(1) != n->in(1)) {
1180     return false;
1181   }
1182   IfNode* dom_if = dom->as_If();
1183   Node* proj_true = dom_if->proj_out(1);
1184   Node* proj_false = dom_if->proj_out(0);
1185 
1186   for (uint i = 1; i < region->req(); i++) {
1187     if (is_dominator(proj_true, region->in(i))) {
1188       continue;
1189     }
1190     if (is_dominator(proj_false, region->in(i))) {
1191       continue;
1192     }
1193     return false;
1194   }
1195 
1196   return true;
1197 }
1198 
1199 
1200 bool PhaseIdealLoop::can_split_if(Node* n_ctrl) {
1201   if (must_throttle_split_if()) {
1202     return false;
1203   }
1204 
1205   // Do not do 'split-if' if irreducible loops are present.
1206   if (_has_irreducible_loops) {
1207     return false;
1208   }
1209 
1210   if (merge_point_too_heavy(C, n_ctrl)) {
1211     return false;
1212   }
1213 
1214   // Do not do 'split-if' if some paths are dead.  First do dead code
1215   // elimination and then see if its still profitable.
1216   for (uint i = 1; i < n_ctrl->req(); i++) {
1217     if (n_ctrl->in(i) == C->top()) {
1218       return false;
1219     }
1220   }
1221 
1222   // If trying to do a 'Split-If' at the loop head, it is only
1223   // profitable if the cmp folds up on BOTH paths.  Otherwise we
1224   // risk peeling a loop forever.
1225 
1226   // CNC - Disabled for now.  Requires careful handling of loop
1227   // body selection for the cloned code.  Also, make sure we check
1228   // for any input path not being in the same loop as n_ctrl.  For
1229   // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
1230   // because the alternative loop entry points won't be converted
1231   // into LoopNodes.
1232   IdealLoopTree *n_loop = get_loop(n_ctrl);
1233   for (uint j = 1; j < n_ctrl->req(); j++) {
1234     if (get_loop(n_ctrl->in(j)) != n_loop) {
1235       return false;
1236     }
1237   }
1238 
1239   // Check for safety of the merge point.
1240   if (!merge_point_safe(n_ctrl)) {
1241     return false;
1242   }
1243 
1244   return true;
1245 }
1246 
1247 // Detect if the node is the inner strip-mined loop
1248 // Return: NULL if it's not the case, or the exit of outer strip-mined loop
1249 static Node* is_inner_of_stripmined_loop(const Node* out) {
1250   Node* out_le = NULL;
1251 
1252   if (out->is_CountedLoopEnd()) {
1253       const CountedLoopNode* loop = out->as_CountedLoopEnd()->loopnode();
1254 
1255       if (loop != NULL && loop->is_strip_mined()) {
1256         out_le = loop->in(LoopNode::EntryControl)->as_OuterStripMinedLoop()->outer_loop_exit();
1257       }
1258   }
1259 
1260   return out_le;
1261 }
1262 
1263 //------------------------------split_if_with_blocks_post----------------------
1264 // Do the real work in a non-recursive function.  CFG hackery wants to be
1265 // in the post-order, so it can dirty the I-DOM info and not use the dirtied
1266 // info.
1267 void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
1268 
1269   // Cloning Cmp through Phi's involves the split-if transform.
1270   // FastLock is not used by an If
1271   if (n->is_Cmp() && !n->is_FastLock()) {
1272     Node *n_ctrl = get_ctrl(n);
1273     // Determine if the Node has inputs from some local Phi.
1274     // Returns the block to clone thru.
1275     Node *n_blk = has_local_phi_input(n);
1276     if (n_blk != n_ctrl) {
1277       return;
1278     }
1279 
1280     if (!can_split_if(n_ctrl)) {
1281       return;
1282     }
1283 
1284     if (n->outcnt() != 1) {
1285       return; // Multiple bool's from 1 compare?
1286     }
1287     Node *bol = n->unique_out();
1288     assert(bol->is_Bool(), "expect a bool here");
1289     if (bol->outcnt() != 1) {
1290       return;// Multiple branches from 1 compare?
1291     }
1292     Node *iff = bol->unique_out();
1293 
1294     // Check some safety conditions
1295     if (iff->is_If()) {        // Classic split-if?
1296       if (iff->in(0) != n_ctrl) {
1297         return; // Compare must be in same blk as if
1298       }
1299     } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
1300       // Can't split CMove with different control edge.
1301       if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) {
1302         return;
1303       }
1304       if (get_ctrl(iff->in(2)) == n_ctrl ||
1305           get_ctrl(iff->in(3)) == n_ctrl) {
1306         return;                 // Inputs not yet split-up
1307       }
1308       if (get_loop(n_ctrl) != get_loop(get_ctrl(iff))) {
1309         return;                 // Loop-invar test gates loop-varying CMOVE
1310       }
1311     } else {
1312       return;  // some other kind of node, such as an Allocate
1313     }
1314 
1315     // When is split-if profitable?  Every 'win' on means some control flow
1316     // goes dead, so it's almost always a win.
1317     int policy = 0;
1318     // Split compare 'n' through the merge point if it is profitable
1319     Node *phi = split_thru_phi( n, n_ctrl, policy);
1320     if (!phi) {
1321       return;
1322     }
1323 
1324     // Found a Phi to split thru!
1325     // Replace 'n' with the new phi
1326     _igvn.replace_node(n, phi);
1327 
1328     // Now split the bool up thru the phi
1329     Node *bolphi = split_thru_phi(bol, n_ctrl, -1);
1330     guarantee(bolphi != NULL, "null boolean phi node");
1331 
1332     _igvn.replace_node(bol, bolphi);
1333     assert(iff->in(1) == bolphi, "");
1334 
1335     if (bolphi->Value(&_igvn)->singleton()) {
1336       return;
1337     }
1338 
1339     // Conditional-move?  Must split up now
1340     if (!iff->is_If()) {
1341       Node *cmovphi = split_thru_phi(iff, n_ctrl, -1);
1342       _igvn.replace_node(iff, cmovphi);
1343       return;
1344     }
1345 
1346     // Now split the IF
1347     do_split_if(iff);
1348     return;
1349   }
1350 
1351   // Two identical ifs back to back can be merged
1352   if (identical_backtoback_ifs(n) && can_split_if(n->in(0))) {
1353     Node *n_ctrl = n->in(0);
1354     PhiNode* bolphi = PhiNode::make_blank(n_ctrl, n->in(1));
1355     IfNode* dom_if = idom(n_ctrl)->as_If();
1356     Node* proj_true = dom_if->proj_out(1);
1357     Node* proj_false = dom_if->proj_out(0);
1358     Node* con_true = _igvn.makecon(TypeInt::ONE);
1359     Node* con_false = _igvn.makecon(TypeInt::ZERO);
1360 
1361     for (uint i = 1; i < n_ctrl->req(); i++) {
1362       if (is_dominator(proj_true, n_ctrl->in(i))) {
1363         bolphi->init_req(i, con_true);
1364       } else {
1365         assert(is_dominator(proj_false, n_ctrl->in(i)), "bad if");
1366         bolphi->init_req(i, con_false);
1367       }
1368     }
1369     register_new_node(bolphi, n_ctrl);
1370     _igvn.replace_input_of(n, 1, bolphi);
1371 
1372     // Now split the IF
1373     do_split_if(n);
1374     return;
1375   }
1376 
1377   // Check for an IF ready to split; one that has its
1378   // condition codes input coming from a Phi at the block start.
1379   int n_op = n->Opcode();
1380 
1381   // Check for an IF being dominated by another IF same test
1382   if (n_op == Op_If ||
1383       n_op == Op_RangeCheck) {
1384     Node *bol = n->in(1);
1385     uint max = bol->outcnt();
1386     // Check for same test used more than once?
1387     if (max > 1 && bol->is_Bool()) {
1388       // Search up IDOMs to see if this IF is dominated.
1389       Node *cutoff = get_ctrl(bol);
1390 
1391       // Now search up IDOMs till cutoff, looking for a dominating test
1392       Node *prevdom = n;
1393       Node *dom = idom(prevdom);
1394       while (dom != cutoff) {
1395         if (dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom) {
1396           // It's invalid to move control dependent data nodes in the inner
1397           // strip-mined loop, because:
1398           //  1) break validation of LoopNode::verify_strip_mined()
1399           //  2) move code with side-effect in strip-mined loop
1400           // Move to the exit of outer strip-mined loop in that case.
1401           Node* out_le = is_inner_of_stripmined_loop(dom);
1402           if (out_le != NULL) {
1403             prevdom = out_le;
1404           }
1405           // Replace the dominated test with an obvious true or false.
1406           // Place it on the IGVN worklist for later cleanup.
1407           C->set_major_progress();
1408           dominated_by(prevdom, n, false, true);
1409 #ifndef PRODUCT
1410           if( VerifyLoopOptimizations ) verify();
1411 #endif
1412           return;
1413         }
1414         prevdom = dom;
1415         dom = idom(prevdom);
1416       }
1417     }
1418   }
1419 
1420   try_sink_out_of_loop(n);
1421 
1422   try_move_store_after_loop(n);
1423 
1424   // Check for Opaque2's who's loop has disappeared - who's input is in the
1425   // same loop nest as their output.  Remove 'em, they are no longer useful.
1426   if( n_op == Op_Opaque2 &&
1427       n->in(1) != NULL &&
1428       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1429     _igvn.replace_node( n, n->in(1) );
1430   }
1431 }
1432 
1433 // See if a shared loop-varying computation has no loop-varying uses.
1434 // Happens if something is only used for JVM state in uncommon trap exits,
1435 // like various versions of induction variable+offset.  Clone the
1436 // computation per usage to allow it to sink out of the loop.
1437 void PhaseIdealLoop::try_sink_out_of_loop(Node* n) {
1438   bool is_raw_to_oop_cast = n->is_ConstraintCast() &&
1439                             n->in(1)->bottom_type()->isa_rawptr() &&
1440                             !n->bottom_type()->isa_rawptr();
1441   if (has_ctrl(n) &&
1442       !n->is_Phi() &&
1443       !n->is_Bool() &&
1444       !n->is_Proj() &&
1445       !n->is_MergeMem() &&
1446       !n->is_CMove() &&
1447       !is_raw_to_oop_cast && // don't extend live ranges of raw oops
1448       n->Opcode() != Op_Opaque4 &&
1449       !n->is_Type()) {
1450     Node *n_ctrl = get_ctrl(n);
1451     IdealLoopTree *n_loop = get_loop(n_ctrl);
1452 
1453     if (n->in(0) != NULL) {
1454       IdealLoopTree* loop_ctrl = get_loop(n->in(0));
1455       if (n_loop != loop_ctrl && n_loop->is_member(loop_ctrl)) {
1456         // n has a control input inside a loop but get_ctrl() is member of an outer loop. This could happen, for example,
1457         // for Div nodes inside a loop (control input inside loop) without a use except for an UCT (outside the loop).
1458         // Rewire control of n to right outside of the loop, regardless if its input(s) are later sunk or not.
1459         _igvn.replace_input_of(n, 0, place_outside_loop(n_ctrl, loop_ctrl));
1460       }
1461     }
1462     if (n_loop != _ltree_root && n->outcnt() > 1) {
1463       // Compute early control: needed for anti-dependence analysis. It's also possible that as a result of
1464       // previous transformations in this loop opts round, the node can be hoisted now: early control will tell us.
1465       Node* early_ctrl = compute_early_ctrl(n, n_ctrl);
1466       if (n_loop->is_member(get_loop(early_ctrl)) && // check that this one can't be hoisted now
1467           ctrl_of_all_uses_out_of_loop(n, early_ctrl, n_loop)) { // All uses in outer loops!
1468         assert(!n->is_Store() && !n->is_LoadStore(), "no node with a side effect");
1469         Node* outer_loop_clone = NULL;
1470         for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin;) {
1471           Node* u = n->last_out(j); // Clone private computation per use
1472           _igvn.rehash_node_delayed(u);
1473           Node* x = n->clone(); // Clone computation
1474           Node* x_ctrl = NULL;
1475           if (u->is_Phi()) {
1476             // Replace all uses of normal nodes.  Replace Phi uses
1477             // individually, so the separate Nodes can sink down
1478             // different paths.
1479             uint k = 1;
1480             while (u->in(k) != n) k++;
1481             u->set_req(k, x);
1482             // x goes next to Phi input path
1483             x_ctrl = u->in(0)->in(k);
1484             // Find control for 'x' next to use but not inside inner loops.
1485             x_ctrl = place_outside_loop(x_ctrl, n_loop);
1486             --j;
1487           } else {              // Normal use
1488             if (has_ctrl(u)) {
1489               x_ctrl = get_ctrl(u);
1490             } else {
1491               x_ctrl = u->in(0);
1492             }
1493             // Find control for 'x' next to use but not inside inner loops.
1494             x_ctrl = place_outside_loop(x_ctrl, n_loop);
1495             // Replace all uses
1496             if (u->is_ConstraintCast() && u->bottom_type()->higher_equal(_igvn.type(n)) && u->in(0) == x_ctrl) {
1497               // If we're sinking a chain of data nodes, we might have inserted a cast to pin the use which is not necessary
1498               // anymore now that we're going to pin n as well
1499               _igvn.replace_node(u, x);
1500               --j;
1501             } else {
1502               int nb = u->replace_edge(n, x, &_igvn);
1503               j -= nb;
1504             }
1505           }
1506 
1507           if (n->is_Load()) {
1508             // For loads, add a control edge to a CFG node outside of the loop
1509             // to force them to not combine and return back inside the loop
1510             // during GVN optimization (4641526).
1511             assert(x_ctrl == get_late_ctrl_with_anti_dep(x->as_Load(), early_ctrl, x_ctrl), "anti-dependences were already checked");
1512 
1513             IdealLoopTree* x_loop = get_loop(x_ctrl);
1514             Node* x_head = x_loop->_head;
1515             if (x_head->is_Loop() && x_head->is_OuterStripMinedLoop()) {
1516               // Do not add duplicate LoadNodes to the outer strip mined loop
1517               if (outer_loop_clone != NULL) {
1518                 _igvn.replace_node(x, outer_loop_clone);
1519                 continue;
1520               }
1521               outer_loop_clone = x;
1522             }
1523             x->set_req(0, x_ctrl);
1524           } else if (n->in(0) != NULL){
1525             x->set_req(0, x_ctrl);
1526           }
1527           assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
1528           assert(!n_loop->is_member(get_loop(x_ctrl)), "should have moved out of loop");
1529           register_new_node(x, x_ctrl);
1530 
1531           // Chain of AddP: (AddP base (AddP base )) must keep the same base after sinking so:
1532           // 1- We don't add a CastPP here when the first one is sunk so if the second one is not, their bases remain
1533           // the same.
1534           // (see 2- below)
1535           assert(!x->is_AddP() || !x->in(AddPNode::Address)->is_AddP() ||
1536                  x->in(AddPNode::Address)->in(AddPNode::Base) == x->in(AddPNode::Base) ||
1537                  !x->in(AddPNode::Address)->in(AddPNode::Base)->eqv_uncast(x->in(AddPNode::Base)), "unexpected AddP shape");
1538           if (x->in(0) == NULL && !x->is_DecodeNarrowPtr() &&
1539               !(x->is_AddP() && x->in(AddPNode::Address)->is_AddP() && x->in(AddPNode::Address)->in(AddPNode::Base) == x->in(AddPNode::Base))) {
1540             assert(!x->is_Load(), "load should be pinned");
1541             // Use a cast node to pin clone out of loop
1542             Node* cast = NULL;
1543             for (uint k = 0; k < x->req(); k++) {
1544               Node* in = x->in(k);
1545               if (in != NULL && n_loop->is_member(get_loop(get_ctrl(in)))) {
1546                 const Type* in_t = _igvn.type(in);
1547                 cast = ConstraintCastNode::make_cast_for_type(x_ctrl, in, in_t, ConstraintCastNode::UnconditionalDependency);
1548               }
1549               if (cast != NULL) {
1550                 register_new_node(cast, x_ctrl);
1551                 x->replace_edge(in, cast);
1552                 // Chain of AddP:
1553                 // 2- A CastPP of the base is only added now that both AddP nodes are sunk
1554                 if (x->is_AddP() && k == AddPNode::Base) {
1555                   for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) {
1556                     Node* u = x->fast_out(i);
1557                     if (u->is_AddP() && u->in(AddPNode::Base) == n->in(AddPNode::Base)) {
1558                       _igvn.replace_input_of(u, AddPNode::Base, cast);
1559                       assert(u->find_out_with(Op_AddP) == NULL, "more than 2 chained AddP nodes?");
1560                     }
1561                   }
1562                 }
1563                 break;
1564               }
1565             }
1566             assert(cast != NULL, "must have added a cast to pin the node");
1567           }
1568         }
1569         _igvn.remove_dead_node(n);
1570       }
1571       _dom_lca_tags_round = 0;
1572     }
1573   }
1574 }
1575 
1576 Node* PhaseIdealLoop::compute_early_ctrl(Node* n, Node* n_ctrl) {
1577   Node* early_ctrl = NULL;
1578   ResourceMark rm;
1579   Unique_Node_List wq;
1580   wq.push(n);
1581   for (uint i = 0; i < wq.size(); i++) {
1582     Node* m = wq.at(i);
1583     Node* c = NULL;
1584     if (m->is_CFG()) {
1585       c = m;
1586     } else if (m->pinned()) {
1587       c = m->in(0);
1588     } else {
1589       for (uint j = 0; j < m->req(); j++) {
1590         Node* in = m->in(j);
1591         if (in == NULL) {
1592           continue;
1593         }
1594         wq.push(in);
1595       }
1596     }
1597     if (c != NULL) {
1598       assert(is_dominator(c, n_ctrl), "");
1599       if (early_ctrl == NULL) {
1600         early_ctrl = c;
1601       } else if (is_dominator(early_ctrl, c)) {
1602         early_ctrl = c;
1603       }
1604     }
1605   }
1606   assert(is_dominator(early_ctrl, n_ctrl), "early control must dominate current control");
1607   return early_ctrl;
1608 }
1609 
1610 bool PhaseIdealLoop::ctrl_of_all_uses_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop) {
1611   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1612     Node* u = n->fast_out(i);
1613     if (u->Opcode() == Op_Opaque1) {
1614       return false;  // Found loop limit, bugfix for 4677003
1615     }
1616     // We can't reuse tags in PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal() so make sure calls to
1617     // get_late_ctrl_with_anti_dep() use their own tag
1618     _dom_lca_tags_round++;
1619     assert(_dom_lca_tags_round != 0, "shouldn't wrap around");
1620 
1621     if (u->is_Phi()) {
1622       for (uint j = 1; j < u->req(); ++j) {
1623         if (u->in(j) == n && !ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, u->in(0)->in(j))) {
1624           return false;
1625         }
1626       }
1627     } else {
1628       Node* ctrl = has_ctrl(u) ? get_ctrl(u) : u->in(0);
1629       if (!ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, ctrl)) {
1630         return false;
1631       }
1632     }
1633   }
1634   return true;
1635 }
1636 
1637 bool PhaseIdealLoop::ctrl_of_use_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop, Node* ctrl) {
1638   if (n->is_Load()) {
1639     ctrl = get_late_ctrl_with_anti_dep(n->as_Load(), n_ctrl, ctrl);
1640   }
1641   IdealLoopTree *u_loop = get_loop(ctrl);
1642   if (u_loop == n_loop) {
1643     return false; // Found loop-varying use
1644   }
1645   if (n_loop->is_member(u_loop)) {
1646     return false; // Found use in inner loop
1647   }
1648   return true;
1649 }
1650 
1651 //------------------------------split_if_with_blocks---------------------------
1652 // Check for aggressive application of 'split-if' optimization,
1653 // using basic block level info.
1654 void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack) {
1655   Node* root = C->root();
1656   visited.set(root->_idx); // first, mark root as visited
1657   // Do pre-visit work for root
1658   Node* n   = split_if_with_blocks_pre(root);
1659   uint  cnt = n->outcnt();
1660   uint  i   = 0;
1661 
1662   while (true) {
1663     // Visit all children
1664     if (i < cnt) {
1665       Node* use = n->raw_out(i);
1666       ++i;
1667       if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1668         // Now do pre-visit work for this use
1669         use = split_if_with_blocks_pre(use);
1670         nstack.push(n, i); // Save parent and next use's index.
1671         n   = use;         // Process all children of current use.
1672         cnt = use->outcnt();
1673         i   = 0;
1674       }
1675     }
1676     else {
1677       // All of n's children have been processed, complete post-processing.
1678       if (cnt != 0 && !n->is_Con()) {
1679         assert(has_node(n), "no dead nodes");
1680         split_if_with_blocks_post(n);
1681       }
1682       if (must_throttle_split_if()) {
1683         nstack.clear();
1684       }
1685       if (nstack.is_empty()) {
1686         // Finished all nodes on stack.
1687         break;
1688       }
1689       // Get saved parent node and next use's index. Visit the rest of uses.
1690       n   = nstack.node();
1691       cnt = n->outcnt();
1692       i   = nstack.index();
1693       nstack.pop();
1694     }
1695   }
1696 }
1697 
1698 
1699 //=============================================================================
1700 //
1701 //                   C L O N E   A   L O O P   B O D Y
1702 //
1703 
1704 //------------------------------clone_iff--------------------------------------
1705 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1706 // "Nearly" because all Nodes have been cloned from the original in the loop,
1707 // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1708 // through the Phi recursively, and return a Bool.
1709 Node* PhaseIdealLoop::clone_iff(PhiNode *phi, IdealLoopTree *loop) {
1710 
1711   // Convert this Phi into a Phi merging Bools
1712   uint i;
1713   for (i = 1; i < phi->req(); i++) {
1714     Node *b = phi->in(i);
1715     if (b->is_Phi()) {
1716       _igvn.replace_input_of(phi, i, clone_iff(b->as_Phi(), loop));
1717     } else {
1718       assert(b->is_Bool() || b->Opcode() == Op_Opaque4, "");
1719     }
1720   }
1721 
1722   Node* n = phi->in(1);
1723   Node* sample_opaque = NULL;
1724   Node *sample_bool = NULL;
1725   if (n->Opcode() == Op_Opaque4) {
1726     sample_opaque = n;
1727     sample_bool = n->in(1);
1728     assert(sample_bool->is_Bool(), "wrong type");
1729   } else {
1730     sample_bool = n;
1731   }
1732   Node *sample_cmp = sample_bool->in(1);
1733 
1734   // Make Phis to merge the Cmp's inputs.
1735   PhiNode *phi1 = new PhiNode(phi->in(0), Type::TOP);
1736   PhiNode *phi2 = new PhiNode(phi->in(0), Type::TOP);
1737   for (i = 1; i < phi->req(); i++) {
1738     Node *n1 = sample_opaque == NULL ? phi->in(i)->in(1)->in(1) : phi->in(i)->in(1)->in(1)->in(1);
1739     Node *n2 = sample_opaque == NULL ? phi->in(i)->in(1)->in(2) : phi->in(i)->in(1)->in(1)->in(2);
1740     phi1->set_req(i, n1);
1741     phi2->set_req(i, n2);
1742     phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1743     phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1744   }
1745   // See if these Phis have been made before.
1746   // Register with optimizer
1747   Node *hit1 = _igvn.hash_find_insert(phi1);
1748   if (hit1) {                   // Hit, toss just made Phi
1749     _igvn.remove_dead_node(phi1); // Remove new phi
1750     assert(hit1->is_Phi(), "" );
1751     phi1 = (PhiNode*)hit1;      // Use existing phi
1752   } else {                      // Miss
1753     _igvn.register_new_node_with_optimizer(phi1);
1754   }
1755   Node *hit2 = _igvn.hash_find_insert(phi2);
1756   if (hit2) {                   // Hit, toss just made Phi
1757     _igvn.remove_dead_node(phi2); // Remove new phi
1758     assert(hit2->is_Phi(), "" );
1759     phi2 = (PhiNode*)hit2;      // Use existing phi
1760   } else {                      // Miss
1761     _igvn.register_new_node_with_optimizer(phi2);
1762   }
1763   // Register Phis with loop/block info
1764   set_ctrl(phi1, phi->in(0));
1765   set_ctrl(phi2, phi->in(0));
1766   // Make a new Cmp
1767   Node *cmp = sample_cmp->clone();
1768   cmp->set_req(1, phi1);
1769   cmp->set_req(2, phi2);
1770   _igvn.register_new_node_with_optimizer(cmp);
1771   set_ctrl(cmp, phi->in(0));
1772 
1773   // Make a new Bool
1774   Node *b = sample_bool->clone();
1775   b->set_req(1,cmp);
1776   _igvn.register_new_node_with_optimizer(b);
1777   set_ctrl(b, phi->in(0));
1778 
1779   if (sample_opaque != NULL) {
1780     Node* opaque = sample_opaque->clone();
1781     opaque->set_req(1, b);
1782     _igvn.register_new_node_with_optimizer(opaque);
1783     set_ctrl(opaque, phi->in(0));
1784     return opaque;
1785   }
1786 
1787   assert(b->is_Bool(), "");
1788   return b;
1789 }
1790 
1791 //------------------------------clone_bool-------------------------------------
1792 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1793 // "Nearly" because all Nodes have been cloned from the original in the loop,
1794 // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1795 // through the Phi recursively, and return a Bool.
1796 CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1797   uint i;
1798   // Convert this Phi into a Phi merging Bools
1799   for( i = 1; i < phi->req(); i++ ) {
1800     Node *b = phi->in(i);
1801     if( b->is_Phi() ) {
1802       _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1803     } else {
1804       assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1805     }
1806   }
1807 
1808   Node *sample_cmp = phi->in(1);
1809 
1810   // Make Phis to merge the Cmp's inputs.
1811   PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1812   PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1813   for( uint j = 1; j < phi->req(); j++ ) {
1814     Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1815     Node *n1, *n2;
1816     if( cmp_top->is_Cmp() ) {
1817       n1 = cmp_top->in(1);
1818       n2 = cmp_top->in(2);
1819     } else {
1820       n1 = n2 = cmp_top;
1821     }
1822     phi1->set_req( j, n1 );
1823     phi2->set_req( j, n2 );
1824     phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1825     phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1826   }
1827 
1828   // See if these Phis have been made before.
1829   // Register with optimizer
1830   Node *hit1 = _igvn.hash_find_insert(phi1);
1831   if( hit1 ) {                  // Hit, toss just made Phi
1832     _igvn.remove_dead_node(phi1); // Remove new phi
1833     assert( hit1->is_Phi(), "" );
1834     phi1 = (PhiNode*)hit1;      // Use existing phi
1835   } else {                      // Miss
1836     _igvn.register_new_node_with_optimizer(phi1);
1837   }
1838   Node *hit2 = _igvn.hash_find_insert(phi2);
1839   if( hit2 ) {                  // Hit, toss just made Phi
1840     _igvn.remove_dead_node(phi2); // Remove new phi
1841     assert( hit2->is_Phi(), "" );
1842     phi2 = (PhiNode*)hit2;      // Use existing phi
1843   } else {                      // Miss
1844     _igvn.register_new_node_with_optimizer(phi2);
1845   }
1846   // Register Phis with loop/block info
1847   set_ctrl(phi1, phi->in(0));
1848   set_ctrl(phi2, phi->in(0));
1849   // Make a new Cmp
1850   Node *cmp = sample_cmp->clone();
1851   cmp->set_req( 1, phi1 );
1852   cmp->set_req( 2, phi2 );
1853   _igvn.register_new_node_with_optimizer(cmp);
1854   set_ctrl(cmp, phi->in(0));
1855 
1856   assert( cmp->is_Cmp(), "" );
1857   return (CmpNode*)cmp;
1858 }
1859 
1860 //------------------------------sink_use---------------------------------------
1861 // If 'use' was in the loop-exit block, it now needs to be sunk
1862 // below the post-loop merge point.
1863 void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1864   if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1865     set_ctrl(use, post_loop);
1866     for (DUIterator j = use->outs(); use->has_out(j); j++)
1867       sink_use(use->out(j), post_loop);
1868   }
1869 }
1870 
1871 void PhaseIdealLoop::clone_loop_handle_data_uses(Node* old, Node_List &old_new,
1872                                                  IdealLoopTree* loop, IdealLoopTree* outer_loop,
1873                                                  Node_List*& split_if_set, Node_List*& split_bool_set,
1874                                                  Node_List*& split_cex_set, Node_List& worklist,
1875                                                  uint new_counter, CloneLoopMode mode) {
1876   Node* nnn = old_new[old->_idx];
1877   // Copy uses to a worklist, so I can munge the def-use info
1878   // with impunity.
1879   for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1880     worklist.push(old->fast_out(j));
1881 
1882   while( worklist.size() ) {
1883     Node *use = worklist.pop();
1884     if (!has_node(use))  continue; // Ignore dead nodes
1885     if (use->in(0) == C->top())  continue;
1886     IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1887     // Check for data-use outside of loop - at least one of OLD or USE
1888     // must not be a CFG node.
1889 #ifdef ASSERT
1890     if (loop->_head->as_Loop()->is_strip_mined() && outer_loop->is_member(use_loop) && !loop->is_member(use_loop) && old_new[use->_idx] == NULL) {
1891       Node* sfpt = loop->_head->as_CountedLoop()->outer_safepoint();
1892       assert(mode != IgnoreStripMined, "incorrect cloning mode");
1893       assert((mode == ControlAroundStripMined && use == sfpt) || !use->is_reachable_from_root(), "missed a node");
1894     }
1895 #endif
1896     if (!loop->is_member(use_loop) && !outer_loop->is_member(use_loop) && (!old->is_CFG() || !use->is_CFG())) {
1897 
1898       // If the Data use is an IF, that means we have an IF outside of the
1899       // loop that is switching on a condition that is set inside of the
1900       // loop.  Happens if people set a loop-exit flag; then test the flag
1901       // in the loop to break the loop, then test is again outside of the
1902       // loop to determine which way the loop exited.
1903       // Loop predicate If node connects to Bool node through Opaque1 node.
1904       if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use) || use->Opcode() == Op_Opaque4) {
1905         // Since this code is highly unlikely, we lazily build the worklist
1906         // of such Nodes to go split.
1907         if (!split_if_set) {
1908           split_if_set = new Node_List();
1909         }
1910         split_if_set->push(use);
1911       }
1912       if (use->is_Bool()) {
1913         if (!split_bool_set) {
1914           split_bool_set = new Node_List();
1915         }
1916         split_bool_set->push(use);
1917       }
1918       if (use->Opcode() == Op_CreateEx) {
1919         if (!split_cex_set) {
1920           split_cex_set = new Node_List();
1921         }
1922         split_cex_set->push(use);
1923       }
1924 
1925 
1926       // Get "block" use is in
1927       uint idx = 0;
1928       while( use->in(idx) != old ) idx++;
1929       Node *prev = use->is_CFG() ? use : get_ctrl(use);
1930       assert(!loop->is_member(get_loop(prev)) && !outer_loop->is_member(get_loop(prev)), "" );
1931       Node *cfg = prev->_idx >= new_counter
1932         ? prev->in(2)
1933         : idom(prev);
1934       if( use->is_Phi() )     // Phi use is in prior block
1935         cfg = prev->in(idx);  // NOT in block of Phi itself
1936       if (cfg->is_top()) {    // Use is dead?
1937         _igvn.replace_input_of(use, idx, C->top());
1938         continue;
1939       }
1940 
1941       // If use is referenced through control edge... (idx == 0)
1942       if (mode == IgnoreStripMined && idx == 0) {
1943         LoopNode *head = loop->_head->as_Loop();
1944         if (head->is_strip_mined() && is_dominator(head->outer_loop_exit(), prev)) {
1945           // That node is outside the inner loop, leave it outside the
1946           // outer loop as well to not confuse verification code.
1947           assert(!loop->_parent->is_member(use_loop), "should be out of the outer loop");
1948           _igvn.replace_input_of(use, 0, head->outer_loop_exit());
1949           continue;
1950         }
1951       }
1952 
1953       while(!outer_loop->is_member(get_loop(cfg))) {
1954         prev = cfg;
1955         cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1956       }
1957       // If the use occurs after merging several exits from the loop, then
1958       // old value must have dominated all those exits.  Since the same old
1959       // value was used on all those exits we did not need a Phi at this
1960       // merge point.  NOW we do need a Phi here.  Each loop exit value
1961       // is now merged with the peeled body exit; each exit gets its own
1962       // private Phi and those Phis need to be merged here.
1963       Node *phi;
1964       if( prev->is_Region() ) {
1965         if( idx == 0 ) {      // Updating control edge?
1966           phi = prev;         // Just use existing control
1967         } else {              // Else need a new Phi
1968           phi = PhiNode::make( prev, old );
1969           // Now recursively fix up the new uses of old!
1970           for( uint i = 1; i < prev->req(); i++ ) {
1971             worklist.push(phi); // Onto worklist once for each 'old' input
1972           }
1973         }
1974       } else {
1975         // Get new RegionNode merging old and new loop exits
1976         prev = old_new[prev->_idx];
1977         assert( prev, "just made this in step 7" );
1978         if( idx == 0) {      // Updating control edge?
1979           phi = prev;         // Just use existing control
1980         } else {              // Else need a new Phi
1981           // Make a new Phi merging data values properly
1982           phi = PhiNode::make( prev, old );
1983           phi->set_req( 1, nnn );
1984         }
1985       }
1986       // If inserting a new Phi, check for prior hits
1987       if( idx != 0 ) {
1988         Node *hit = _igvn.hash_find_insert(phi);
1989         if( hit == NULL ) {
1990           _igvn.register_new_node_with_optimizer(phi); // Register new phi
1991         } else {                                      // or
1992           // Remove the new phi from the graph and use the hit
1993           _igvn.remove_dead_node(phi);
1994           phi = hit;                                  // Use existing phi
1995         }
1996         set_ctrl(phi, prev);
1997       }
1998       // Make 'use' use the Phi instead of the old loop body exit value
1999       _igvn.replace_input_of(use, idx, phi);
2000       if( use->_idx >= new_counter ) { // If updating new phis
2001         // Not needed for correctness, but prevents a weak assert
2002         // in AddPNode from tripping (when we end up with different
2003         // base & derived Phis that will become the same after
2004         // IGVN does CSE).
2005         Node *hit = _igvn.hash_find_insert(use);
2006         if( hit )             // Go ahead and re-hash for hits.
2007           _igvn.replace_node( use, hit );
2008       }
2009 
2010       // If 'use' was in the loop-exit block, it now needs to be sunk
2011       // below the post-loop merge point.
2012       sink_use( use, prev );
2013     }
2014   }
2015 }
2016 
2017 static void clone_outer_loop_helper(Node* n, const IdealLoopTree *loop, const IdealLoopTree* outer_loop,
2018                                     const Node_List &old_new, Unique_Node_List& wq, PhaseIdealLoop* phase,
2019                                     bool check_old_new) {
2020   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2021     Node* u = n->fast_out(j);
2022     assert(check_old_new || old_new[u->_idx] == NULL, "shouldn't have been cloned");
2023     if (!u->is_CFG() && (!check_old_new || old_new[u->_idx] == NULL)) {
2024       Node* c = phase->get_ctrl(u);
2025       IdealLoopTree* u_loop = phase->get_loop(c);
2026       assert(!loop->is_member(u_loop), "can be in outer loop or out of both loops only");
2027       if (outer_loop->is_member(u_loop) ||
2028           // nodes pinned with control in the outer loop but not referenced from the safepoint must be moved out of
2029           // the outer loop too
2030           (u->in(0) != NULL && outer_loop->is_member(phase->get_loop(u->in(0))))) {
2031         wq.push(u);
2032       }
2033     }
2034   }
2035 }
2036 
2037 void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
2038                                       IdealLoopTree* outer_loop, int dd, Node_List &old_new,
2039                                       Node_List& extra_data_nodes) {
2040   if (head->is_strip_mined() && mode != IgnoreStripMined) {
2041     CountedLoopNode* cl = head->as_CountedLoop();
2042     Node* l = cl->outer_loop();
2043     Node* tail = cl->outer_loop_tail();
2044     IfNode* le = cl->outer_loop_end();
2045     Node* sfpt = cl->outer_safepoint();
2046     CountedLoopEndNode* cle = cl->loopexit();
2047     CountedLoopNode* new_cl = old_new[cl->_idx]->as_CountedLoop();
2048     CountedLoopEndNode* new_cle = new_cl->as_CountedLoop()->loopexit_or_null();
2049     Node* cle_out = cle->proj_out(false);
2050 
2051     Node* new_sfpt = NULL;
2052     Node* new_cle_out = cle_out->clone();
2053     old_new.map(cle_out->_idx, new_cle_out);
2054     if (mode == CloneIncludesStripMined) {
2055       // clone outer loop body
2056       Node* new_l = l->clone();
2057       Node* new_tail = tail->clone();
2058       IfNode* new_le = le->clone()->as_If();
2059       new_sfpt = sfpt->clone();
2060 
2061       set_loop(new_l, outer_loop->_parent);
2062       set_idom(new_l, new_l->in(LoopNode::EntryControl), dd);
2063       set_loop(new_cle_out, outer_loop->_parent);
2064       set_idom(new_cle_out, new_cle, dd);
2065       set_loop(new_sfpt, outer_loop->_parent);
2066       set_idom(new_sfpt, new_cle_out, dd);
2067       set_loop(new_le, outer_loop->_parent);
2068       set_idom(new_le, new_sfpt, dd);
2069       set_loop(new_tail, outer_loop->_parent);
2070       set_idom(new_tail, new_le, dd);
2071       set_idom(new_cl, new_l, dd);
2072 
2073       old_new.map(l->_idx, new_l);
2074       old_new.map(tail->_idx, new_tail);
2075       old_new.map(le->_idx, new_le);
2076       old_new.map(sfpt->_idx, new_sfpt);
2077 
2078       new_l->set_req(LoopNode::LoopBackControl, new_tail);
2079       new_l->set_req(0, new_l);
2080       new_tail->set_req(0, new_le);
2081       new_le->set_req(0, new_sfpt);
2082       new_sfpt->set_req(0, new_cle_out);
2083       new_cle_out->set_req(0, new_cle);
2084       new_cl->set_req(LoopNode::EntryControl, new_l);
2085 
2086       _igvn.register_new_node_with_optimizer(new_l);
2087       _igvn.register_new_node_with_optimizer(new_tail);
2088       _igvn.register_new_node_with_optimizer(new_le);
2089     } else {
2090       Node *newhead = old_new[loop->_head->_idx];
2091       newhead->as_Loop()->clear_strip_mined();
2092       _igvn.replace_input_of(newhead, LoopNode::EntryControl, newhead->in(LoopNode::EntryControl)->in(LoopNode::EntryControl));
2093       set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
2094     }
2095     // Look at data node that were assigned a control in the outer
2096     // loop: they are kept in the outer loop by the safepoint so start
2097     // from the safepoint node's inputs.
2098     IdealLoopTree* outer_loop = get_loop(l);
2099     Node_Stack stack(2);
2100     stack.push(sfpt, 1);
2101     uint new_counter = C->unique();
2102     while (stack.size() > 0) {
2103       Node* n = stack.node();
2104       uint i = stack.index();
2105       while (i < n->req() &&
2106              (n->in(i) == NULL ||
2107               !has_ctrl(n->in(i)) ||
2108               get_loop(get_ctrl(n->in(i))) != outer_loop ||
2109               (old_new[n->in(i)->_idx] != NULL && old_new[n->in(i)->_idx]->_idx >= new_counter))) {
2110         i++;
2111       }
2112       if (i < n->req()) {
2113         stack.set_index(i+1);
2114         stack.push(n->in(i), 0);
2115       } else {
2116         assert(old_new[n->_idx] == NULL || n == sfpt || old_new[n->_idx]->_idx < new_counter, "no clone yet");
2117         Node* m = n == sfpt ? new_sfpt : n->clone();
2118         if (m != NULL) {
2119           for (uint i = 0; i < n->req(); i++) {
2120             if (m->in(i) != NULL && old_new[m->in(i)->_idx] != NULL) {
2121               m->set_req(i, old_new[m->in(i)->_idx]);
2122             }
2123           }
2124         } else {
2125           assert(n == sfpt && mode != CloneIncludesStripMined, "where's the safepoint clone?");
2126         }
2127         if (n != sfpt) {
2128           extra_data_nodes.push(n);
2129           _igvn.register_new_node_with_optimizer(m);
2130           assert(get_ctrl(n) == cle_out, "what other control?");
2131           set_ctrl(m, new_cle_out);
2132           old_new.map(n->_idx, m);
2133         }
2134         stack.pop();
2135       }
2136     }
2137     if (mode == CloneIncludesStripMined) {
2138       _igvn.register_new_node_with_optimizer(new_sfpt);
2139       _igvn.register_new_node_with_optimizer(new_cle_out);
2140     }
2141     // Some other transformation may have pessimistically assign some
2142     // data nodes to the outer loop. Set their control so they are out
2143     // of the outer loop.
2144     ResourceMark rm;
2145     Unique_Node_List wq;
2146     for (uint i = 0; i < extra_data_nodes.size(); i++) {
2147       Node* old = extra_data_nodes.at(i);
2148       clone_outer_loop_helper(old, loop, outer_loop, old_new, wq, this, true);
2149     }
2150 
2151     Node* inner_out = sfpt->in(0);
2152     if (inner_out->outcnt() > 1) {
2153       clone_outer_loop_helper(inner_out, loop, outer_loop, old_new, wq, this, true);
2154     }
2155 
2156     Node* new_ctrl = cl->outer_loop_exit();
2157     assert(get_loop(new_ctrl) != outer_loop, "must be out of the loop nest");
2158     for (uint i = 0; i < wq.size(); i++) {
2159       Node* n = wq.at(i);
2160       set_ctrl(n, new_ctrl);
2161       if (n->in(0) != NULL) {
2162         _igvn.replace_input_of(n, 0, new_ctrl);
2163       }
2164       clone_outer_loop_helper(n, loop, outer_loop, old_new, wq, this, false);
2165     }
2166   } else {
2167     Node *newhead = old_new[loop->_head->_idx];
2168     set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
2169   }
2170 }
2171 
2172 //------------------------------clone_loop-------------------------------------
2173 //
2174 //                   C L O N E   A   L O O P   B O D Y
2175 //
2176 // This is the basic building block of the loop optimizations.  It clones an
2177 // entire loop body.  It makes an old_new loop body mapping; with this mapping
2178 // you can find the new-loop equivalent to an old-loop node.  All new-loop
2179 // nodes are exactly equal to their old-loop counterparts, all edges are the
2180 // same.  All exits from the old-loop now have a RegionNode that merges the
2181 // equivalent new-loop path.  This is true even for the normal "loop-exit"
2182 // condition.  All uses of loop-invariant old-loop values now come from (one
2183 // or more) Phis that merge their new-loop equivalents.
2184 //
2185 // This operation leaves the graph in an illegal state: there are two valid
2186 // control edges coming from the loop pre-header to both loop bodies.  I'll
2187 // definitely have to hack the graph after running this transform.
2188 //
2189 // From this building block I will further edit edges to perform loop peeling
2190 // or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
2191 //
2192 // Parameter side_by_size_idom:
2193 //   When side_by_size_idom is NULL, the dominator tree is constructed for
2194 //      the clone loop to dominate the original.  Used in construction of
2195 //      pre-main-post loop sequence.
2196 //   When nonnull, the clone and original are side-by-side, both are
2197 //      dominated by the side_by_side_idom node.  Used in construction of
2198 //      unswitched loops.
2199 void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
2200                                 CloneLoopMode mode, Node* side_by_side_idom) {
2201 
2202   LoopNode* head = loop->_head->as_Loop();
2203   head->verify_strip_mined(1);
2204 
2205   if (C->do_vector_loop() && PrintOpto) {
2206     const char* mname = C->method()->name()->as_quoted_ascii();
2207     if (mname != NULL) {
2208       tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
2209     }
2210   }
2211 
2212   CloneMap& cm = C->clone_map();
2213   Dict* dict = cm.dict();
2214   if (C->do_vector_loop()) {
2215     cm.set_clone_idx(cm.max_gen()+1);
2216 #ifndef PRODUCT
2217     if (PrintOpto) {
2218       tty->print_cr("PhaseIdealLoop::clone_loop: _clone_idx %d", cm.clone_idx());
2219       loop->dump_head();
2220     }
2221 #endif
2222   }
2223 
2224   // Step 1: Clone the loop body.  Make the old->new mapping.
2225   uint i;
2226   for (i = 0; i < loop->_body.size(); i++) {
2227     Node* old = loop->_body.at(i);
2228     Node* nnn = old->clone();
2229     old_new.map(old->_idx, nnn);
2230     if (old->is_reduction()) {
2231       // Reduction flag is not copied by default. Copy it here when cloning the entire loop body.
2232       nnn->add_flag(Node::Flag_is_reduction);
2233     }
2234     if (C->do_vector_loop()) {
2235       cm.verify_insert_and_clone(old, nnn, cm.clone_idx());
2236     }
2237     _igvn.register_new_node_with_optimizer(nnn);
2238   }
2239 
2240   IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop;
2241 
2242   // Step 2: Fix the edges in the new body.  If the old input is outside the
2243   // loop use it.  If the old input is INside the loop, use the corresponding
2244   // new node instead.
2245   for( i = 0; i < loop->_body.size(); i++ ) {
2246     Node *old = loop->_body.at(i);
2247     Node *nnn = old_new[old->_idx];
2248     // Fix CFG/Loop controlling the new node
2249     if (has_ctrl(old)) {
2250       set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
2251     } else {
2252       set_loop(nnn, outer_loop->_parent);
2253       if (old->outcnt() > 0) {
2254         set_idom( nnn, old_new[idom(old)->_idx], dd );
2255       }
2256     }
2257     // Correct edges to the new node
2258     for( uint j = 0; j < nnn->req(); j++ ) {
2259         Node *n = nnn->in(j);
2260         if( n ) {
2261           IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
2262           if( loop->is_member( old_in_loop ) )
2263             nnn->set_req(j, old_new[n->_idx]);
2264         }
2265     }
2266     _igvn.hash_find_insert(nnn);
2267   }
2268 
2269   Node_List extra_data_nodes; // data nodes in the outer strip mined loop
2270   clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes);
2271 
2272   // Step 3: Now fix control uses.  Loop varying control uses have already
2273   // been fixed up (as part of all input edges in Step 2).  Loop invariant
2274   // control uses must be either an IfFalse or an IfTrue.  Make a merge
2275   // point to merge the old and new IfFalse/IfTrue nodes; make the use
2276   // refer to this.
2277   Node_List worklist;
2278   uint new_counter = C->unique();
2279   for( i = 0; i < loop->_body.size(); i++ ) {
2280     Node* old = loop->_body.at(i);
2281     if( !old->is_CFG() ) continue;
2282 
2283     // Copy uses to a worklist, so I can munge the def-use info
2284     // with impunity.
2285     for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
2286       worklist.push(old->fast_out(j));
2287 
2288     while( worklist.size() ) {  // Visit all uses
2289       Node *use = worklist.pop();
2290       if (!has_node(use))  continue; // Ignore dead nodes
2291       IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
2292       if( !loop->is_member( use_loop ) && use->is_CFG() ) {
2293         // Both OLD and USE are CFG nodes here.
2294         assert( use->is_Proj(), "" );
2295         Node* nnn = old_new[old->_idx];
2296 
2297         Node* newuse = NULL;
2298         if (head->is_strip_mined() && mode != IgnoreStripMined) {
2299           CountedLoopNode* cl = head->as_CountedLoop();
2300           CountedLoopEndNode* cle = cl->loopexit();
2301           Node* cle_out = cle->proj_out_or_null(false);
2302           if (use == cle_out) {
2303             IfNode* le = cl->outer_loop_end();
2304             use = le->proj_out(false);
2305             use_loop = get_loop(use);
2306             if (mode == CloneIncludesStripMined) {
2307               nnn = old_new[le->_idx];
2308             } else {
2309               newuse = old_new[cle_out->_idx];
2310             }
2311           }
2312         }
2313         if (newuse == NULL) {
2314           newuse = use->clone();
2315         }
2316 
2317         // Clone the loop exit control projection
2318         if (C->do_vector_loop()) {
2319           cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
2320         }
2321         newuse->set_req(0,nnn);
2322         _igvn.register_new_node_with_optimizer(newuse);
2323         set_loop(newuse, use_loop);
2324         set_idom(newuse, nnn, dom_depth(nnn) + 1 );
2325 
2326         // We need a Region to merge the exit from the peeled body and the
2327         // exit from the old loop body.
2328         RegionNode *r = new RegionNode(3);
2329         // Map the old use to the new merge point
2330         old_new.map( use->_idx, r );
2331         uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
2332         assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
2333 
2334         // The original user of 'use' uses 'r' instead.
2335         for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
2336           Node* useuse = use->last_out(l);
2337           _igvn.rehash_node_delayed(useuse);
2338           uint uses_found = 0;
2339           if( useuse->in(0) == use ) {
2340             useuse->set_req(0, r);
2341             uses_found++;
2342             if( useuse->is_CFG() ) {
2343               // This is not a dom_depth > dd_r because when new
2344               // control flow is constructed by a loop opt, a node and
2345               // its dominator can end up at the same dom_depth
2346               assert(dom_depth(useuse) >= dd_r, "");
2347               set_idom(useuse, r, dom_depth(useuse));
2348             }
2349           }
2350           for( uint k = 1; k < useuse->req(); k++ ) {
2351             if( useuse->in(k) == use ) {
2352               useuse->set_req(k, r);
2353               uses_found++;
2354               if (useuse->is_Loop() && k == LoopNode::EntryControl) {
2355                 // This is not a dom_depth > dd_r because when new
2356                 // control flow is constructed by a loop opt, a node
2357                 // and its dominator can end up at the same dom_depth
2358                 assert(dom_depth(useuse) >= dd_r , "");
2359                 set_idom(useuse, r, dom_depth(useuse));
2360               }
2361             }
2362           }
2363           l -= uses_found;    // we deleted 1 or more copies of this edge
2364         }
2365 
2366         // Now finish up 'r'
2367         r->set_req( 1, newuse );
2368         r->set_req( 2,    use );
2369         _igvn.register_new_node_with_optimizer(r);
2370         set_loop(r, use_loop);
2371         set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
2372       } // End of if a loop-exit test
2373     }
2374   }
2375 
2376   // Step 4: If loop-invariant use is not control, it must be dominated by a
2377   // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
2378   // there if needed.  Make a Phi there merging old and new used values.
2379   Node_List *split_if_set = NULL;
2380   Node_List *split_bool_set = NULL;
2381   Node_List *split_cex_set = NULL;
2382   for( i = 0; i < loop->_body.size(); i++ ) {
2383     Node* old = loop->_body.at(i);
2384     clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
2385                                 split_bool_set, split_cex_set, worklist, new_counter,
2386                                 mode);
2387   }
2388 
2389   for (i = 0; i < extra_data_nodes.size(); i++) {
2390     Node* old = extra_data_nodes.at(i);
2391     clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
2392                                 split_bool_set, split_cex_set, worklist, new_counter,
2393                                 mode);
2394   }
2395 
2396   // Check for IFs that need splitting/cloning.  Happens if an IF outside of
2397   // the loop uses a condition set in the loop.  The original IF probably
2398   // takes control from one or more OLD Regions (which in turn get from NEW
2399   // Regions).  In any case, there will be a set of Phis for each merge point
2400   // from the IF up to where the original BOOL def exists the loop.
2401   if (split_if_set) {
2402     while (split_if_set->size()) {
2403       Node *iff = split_if_set->pop();
2404       if (iff->in(1)->is_Phi()) {
2405         Node *b = clone_iff(iff->in(1)->as_Phi(), loop);
2406         _igvn.replace_input_of(iff, 1, b);
2407       }
2408     }
2409   }
2410   if (split_bool_set) {
2411     while (split_bool_set->size()) {
2412       Node *b = split_bool_set->pop();
2413       Node *phi = b->in(1);
2414       assert(phi->is_Phi(), "");
2415       CmpNode *cmp = clone_bool((PhiNode*)phi, loop);
2416       _igvn.replace_input_of(b, 1, cmp);
2417     }
2418   }
2419   if (split_cex_set) {
2420     while (split_cex_set->size()) {
2421       Node *b = split_cex_set->pop();
2422       assert(b->in(0)->is_Region(), "");
2423       assert(b->in(1)->is_Phi(), "");
2424       assert(b->in(0)->in(0) == b->in(1)->in(0), "");
2425       split_up(b, b->in(0), NULL);
2426     }
2427   }
2428 
2429 }
2430 
2431 
2432 //---------------------- stride_of_possible_iv -------------------------------------
2433 // Looks for an iff/bool/comp with one operand of the compare
2434 // being a cycle involving an add and a phi,
2435 // with an optional truncation (left-shift followed by a right-shift)
2436 // of the add. Returns zero if not an iv.
2437 int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
2438   Node* trunc1 = NULL;
2439   Node* trunc2 = NULL;
2440   const TypeInteger* ttype = NULL;
2441   if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
2442     return 0;
2443   }
2444   BoolNode* bl = iff->in(1)->as_Bool();
2445   Node* cmp = bl->in(1);
2446   if (!cmp || (cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU)) {
2447     return 0;
2448   }
2449   // Must have an invariant operand
2450   if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
2451     return 0;
2452   }
2453   Node* add2 = NULL;
2454   Node* cmp1 = cmp->in(1);
2455   if (cmp1->is_Phi()) {
2456     // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
2457     Node* phi = cmp1;
2458     for (uint i = 1; i < phi->req(); i++) {
2459       Node* in = phi->in(i);
2460       Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
2461                                 &trunc1, &trunc2, &ttype, T_INT);
2462       if (add && add->in(1) == phi) {
2463         add2 = add->in(2);
2464         break;
2465       }
2466     }
2467   } else {
2468     // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
2469     Node* addtrunc = cmp1;
2470     Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
2471                                 &trunc1, &trunc2, &ttype, T_INT);
2472     if (add && add->in(1)->is_Phi()) {
2473       Node* phi = add->in(1);
2474       for (uint i = 1; i < phi->req(); i++) {
2475         if (phi->in(i) == addtrunc) {
2476           add2 = add->in(2);
2477           break;
2478         }
2479       }
2480     }
2481   }
2482   if (add2 != NULL) {
2483     const TypeInt* add2t = _igvn.type(add2)->is_int();
2484     if (add2t->is_con()) {
2485       return add2t->get_con();
2486     }
2487   }
2488   return 0;
2489 }
2490 
2491 
2492 //---------------------- stay_in_loop -------------------------------------
2493 // Return the (unique) control output node that's in the loop (if it exists.)
2494 Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
2495   Node* unique = NULL;
2496   if (!n) return NULL;
2497   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2498     Node* use = n->fast_out(i);
2499     if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
2500       if (unique != NULL) {
2501         return NULL;
2502       }
2503       unique = use;
2504     }
2505   }
2506   return unique;
2507 }
2508 
2509 //------------------------------ register_node -------------------------------------
2510 // Utility to register node "n" with PhaseIdealLoop
2511 void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
2512   _igvn.register_new_node_with_optimizer(n);
2513   loop->_body.push(n);
2514   if (n->is_CFG()) {
2515     set_loop(n, loop);
2516     set_idom(n, pred, ddepth);
2517   } else {
2518     set_ctrl(n, pred);
2519   }
2520 }
2521 
2522 //------------------------------ proj_clone -------------------------------------
2523 // Utility to create an if-projection
2524 ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
2525   ProjNode* c = p->clone()->as_Proj();
2526   c->set_req(0, iff);
2527   return c;
2528 }
2529 
2530 //------------------------------ short_circuit_if -------------------------------------
2531 // Force the iff control output to be the live_proj
2532 Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
2533   guarantee(live_proj != NULL, "null projection");
2534   int proj_con = live_proj->_con;
2535   assert(proj_con == 0 || proj_con == 1, "false or true projection");
2536   Node *con = _igvn.intcon(proj_con);
2537   set_ctrl(con, C->root());
2538   if (iff) {
2539     iff->set_req(1, con);
2540   }
2541   return con;
2542 }
2543 
2544 //------------------------------ insert_if_before_proj -------------------------------------
2545 // Insert a new if before an if projection (* - new node)
2546 //
2547 // before
2548 //           if(test)
2549 //           /     \
2550 //          v       v
2551 //    other-proj   proj (arg)
2552 //
2553 // after
2554 //           if(test)
2555 //           /     \
2556 //          /       v
2557 //         |      * proj-clone
2558 //         v          |
2559 //    other-proj      v
2560 //                * new_if(relop(cmp[IU](left,right)))
2561 //                  /  \
2562 //                 v    v
2563 //         * new-proj  proj
2564 //         (returned)
2565 //
2566 ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
2567   IfNode* iff = proj->in(0)->as_If();
2568   IdealLoopTree *loop = get_loop(proj);
2569   ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2570   int ddepth = dom_depth(proj);
2571 
2572   _igvn.rehash_node_delayed(iff);
2573   _igvn.rehash_node_delayed(proj);
2574 
2575   proj->set_req(0, NULL);  // temporary disconnect
2576   ProjNode* proj2 = proj_clone(proj, iff);
2577   register_node(proj2, loop, iff, ddepth);
2578 
2579   Node* cmp = Signed ? (Node*) new CmpINode(left, right) : (Node*) new CmpUNode(left, right);
2580   register_node(cmp, loop, proj2, ddepth);
2581 
2582   BoolNode* bol = new BoolNode(cmp, relop);
2583   register_node(bol, loop, proj2, ddepth);
2584 
2585   int opcode = iff->Opcode();
2586   assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode");
2587   IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt):
2588     new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt);
2589   register_node(new_if, loop, proj2, ddepth);
2590 
2591   proj->set_req(0, new_if); // reattach
2592   set_idom(proj, new_if, ddepth);
2593 
2594   ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
2595   guarantee(new_exit != NULL, "null exit node");
2596   register_node(new_exit, get_loop(other_proj), new_if, ddepth);
2597 
2598   return new_exit;
2599 }
2600 
2601 //------------------------------ insert_region_before_proj -------------------------------------
2602 // Insert a region before an if projection (* - new node)
2603 //
2604 // before
2605 //           if(test)
2606 //          /      |
2607 //         v       |
2608 //       proj      v
2609 //               other-proj
2610 //
2611 // after
2612 //           if(test)
2613 //          /      |
2614 //         v       |
2615 // * proj-clone    v
2616 //         |     other-proj
2617 //         v
2618 // * new-region
2619 //         |
2620 //         v
2621 // *      dum_if
2622 //       /     \
2623 //      v       \
2624 // * dum-proj    v
2625 //              proj
2626 //
2627 RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
2628   IfNode* iff = proj->in(0)->as_If();
2629   IdealLoopTree *loop = get_loop(proj);
2630   ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2631   int ddepth = dom_depth(proj);
2632 
2633   _igvn.rehash_node_delayed(iff);
2634   _igvn.rehash_node_delayed(proj);
2635 
2636   proj->set_req(0, NULL);  // temporary disconnect
2637   ProjNode* proj2 = proj_clone(proj, iff);
2638   register_node(proj2, loop, iff, ddepth);
2639 
2640   RegionNode* reg = new RegionNode(2);
2641   reg->set_req(1, proj2);
2642   register_node(reg, loop, iff, ddepth);
2643 
2644   IfNode* dum_if = new IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
2645   register_node(dum_if, loop, reg, ddepth);
2646 
2647   proj->set_req(0, dum_if); // reattach
2648   set_idom(proj, dum_if, ddepth);
2649 
2650   ProjNode* dum_proj = proj_clone(other_proj, dum_if);
2651   register_node(dum_proj, loop, dum_if, ddepth);
2652 
2653   return reg;
2654 }
2655 
2656 //------------------------------ insert_cmpi_loop_exit -------------------------------------
2657 // Clone a signed compare loop exit from an unsigned compare and
2658 // insert it before the unsigned cmp on the stay-in-loop path.
2659 // All new nodes inserted in the dominator tree between the original
2660 // if and it's projections.  The original if test is replaced with
2661 // a constant to force the stay-in-loop path.
2662 //
2663 // This is done to make sure that the original if and it's projections
2664 // still dominate the same set of control nodes, that the ctrl() relation
2665 // from data nodes to them is preserved, and that their loop nesting is
2666 // preserved.
2667 //
2668 // before
2669 //          if(i <u limit)    unsigned compare loop exit
2670 //         /       |
2671 //        v        v
2672 //   exit-proj   stay-in-loop-proj
2673 //
2674 // after
2675 //          if(stay-in-loop-const)  original if
2676 //         /       |
2677 //        /        v
2678 //       /  if(i <  limit)    new signed test
2679 //      /  /       |
2680 //     /  /        v
2681 //    /  /  if(i <u limit)    new cloned unsigned test
2682 //   /  /   /      |
2683 //   v  v  v       |
2684 //    region       |
2685 //        |        |
2686 //      dum-if     |
2687 //     /  |        |
2688 // ether  |        |
2689 //        v        v
2690 //   exit-proj   stay-in-loop-proj
2691 //
2692 IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
2693   const bool Signed   = true;
2694   const bool Unsigned = false;
2695 
2696   BoolNode* bol = if_cmpu->in(1)->as_Bool();
2697   if (bol->_test._test != BoolTest::lt) return NULL;
2698   CmpNode* cmpu = bol->in(1)->as_Cmp();
2699   if (cmpu->Opcode() != Op_CmpU) return NULL;
2700   int stride = stride_of_possible_iv(if_cmpu);
2701   if (stride == 0) return NULL;
2702 
2703   Node* lp_proj = stay_in_loop(if_cmpu, loop);
2704   guarantee(lp_proj != NULL, "null loop node");
2705 
2706   ProjNode* lp_continue = lp_proj->as_Proj();
2707   ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
2708   if (!lp_exit->is_IfFalse()) {
2709     // The loop exit condition is (i <u limit) ==> (i >= 0 && i < limit).
2710     // We therefore can't add a single exit condition.
2711     return NULL;
2712   }
2713   // The loop exit condition is !(i <u limit) ==> (i < 0 || i >= limit).
2714   // Split out the exit condition (i < 0) for stride < 0 or (i >= limit) for stride > 0.
2715   Node* limit = NULL;
2716   if (stride > 0) {
2717     limit = cmpu->in(2);
2718   } else {
2719     limit = _igvn.makecon(TypeInt::ZERO);
2720     set_ctrl(limit, C->root());
2721   }
2722   // Create a new region on the exit path
2723   RegionNode* reg = insert_region_before_proj(lp_exit);
2724   guarantee(reg != NULL, "null region node");
2725 
2726   // Clone the if-cmpu-true-false using a signed compare
2727   BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
2728   ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
2729   reg->add_req(cmpi_exit);
2730 
2731   // Clone the if-cmpu-true-false
2732   BoolTest::mask rel_u = bol->_test._test;
2733   ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
2734   reg->add_req(cmpu_exit);
2735 
2736   // Force original if to stay in loop.
2737   short_circuit_if(if_cmpu, lp_continue);
2738 
2739   return cmpi_exit->in(0)->as_If();
2740 }
2741 
2742 //------------------------------ remove_cmpi_loop_exit -------------------------------------
2743 // Remove a previously inserted signed compare loop exit.
2744 void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
2745   Node* lp_proj = stay_in_loop(if_cmp, loop);
2746   assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
2747          stay_in_loop(lp_proj, loop)->is_If() &&
2748          stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
2749   Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
2750   set_ctrl(con, C->root());
2751   if_cmp->set_req(1, con);
2752 }
2753 
2754 //------------------------------ scheduled_nodelist -------------------------------------
2755 // Create a post order schedule of nodes that are in the
2756 // "member" set.  The list is returned in "sched".
2757 // The first node in "sched" is the loop head, followed by
2758 // nodes which have no inputs in the "member" set, and then
2759 // followed by the nodes that have an immediate input dependence
2760 // on a node in "sched".
2761 void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
2762 
2763   assert(member.test(loop->_head->_idx), "loop head must be in member set");
2764   VectorSet visited;
2765   Node_Stack nstack(loop->_body.size());
2766 
2767   Node* n  = loop->_head;  // top of stack is cached in "n"
2768   uint idx = 0;
2769   visited.set(n->_idx);
2770 
2771   // Initially push all with no inputs from within member set
2772   for(uint i = 0; i < loop->_body.size(); i++ ) {
2773     Node *elt = loop->_body.at(i);
2774     if (member.test(elt->_idx)) {
2775       bool found = false;
2776       for (uint j = 0; j < elt->req(); j++) {
2777         Node* def = elt->in(j);
2778         if (def && member.test(def->_idx) && def != elt) {
2779           found = true;
2780           break;
2781         }
2782       }
2783       if (!found && elt != loop->_head) {
2784         nstack.push(n, idx);
2785         n = elt;
2786         assert(!visited.test(n->_idx), "not seen yet");
2787         visited.set(n->_idx);
2788       }
2789     }
2790   }
2791 
2792   // traverse out's that are in the member set
2793   while (true) {
2794     if (idx < n->outcnt()) {
2795       Node* use = n->raw_out(idx);
2796       idx++;
2797       if (!visited.test_set(use->_idx)) {
2798         if (member.test(use->_idx)) {
2799           nstack.push(n, idx);
2800           n = use;
2801           idx = 0;
2802         }
2803       }
2804     } else {
2805       // All outputs processed
2806       sched.push(n);
2807       if (nstack.is_empty()) break;
2808       n   = nstack.node();
2809       idx = nstack.index();
2810       nstack.pop();
2811     }
2812   }
2813 }
2814 
2815 
2816 //------------------------------ has_use_in_set -------------------------------------
2817 // Has a use in the vector set
2818 bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
2819   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2820     Node* use = n->fast_out(j);
2821     if (vset.test(use->_idx)) {
2822       return true;
2823     }
2824   }
2825   return false;
2826 }
2827 
2828 
2829 //------------------------------ has_use_internal_to_set -------------------------------------
2830 // Has use internal to the vector set (ie. not in a phi at the loop head)
2831 bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
2832   Node* head  = loop->_head;
2833   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2834     Node* use = n->fast_out(j);
2835     if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
2836       return true;
2837     }
2838   }
2839   return false;
2840 }
2841 
2842 
2843 //------------------------------ clone_for_use_outside_loop -------------------------------------
2844 // clone "n" for uses that are outside of loop
2845 int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
2846   int cloned = 0;
2847   assert(worklist.size() == 0, "should be empty");
2848   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2849     Node* use = n->fast_out(j);
2850     if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
2851       worklist.push(use);
2852     }
2853   }
2854 
2855   if (C->check_node_count(worklist.size() + NodeLimitFudgeFactor,
2856                           "Too many clones required in clone_for_use_outside_loop in partial peeling")) {
2857     return -1;
2858   }
2859 
2860   while( worklist.size() ) {
2861     Node *use = worklist.pop();
2862     if (!has_node(use) || use->in(0) == C->top()) continue;
2863     uint j;
2864     for (j = 0; j < use->req(); j++) {
2865       if (use->in(j) == n) break;
2866     }
2867     assert(j < use->req(), "must be there");
2868 
2869     // clone "n" and insert it between the inputs of "n" and the use outside the loop
2870     Node* n_clone = n->clone();
2871     _igvn.replace_input_of(use, j, n_clone);
2872     cloned++;
2873     Node* use_c;
2874     if (!use->is_Phi()) {
2875       use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
2876     } else {
2877       // Use in a phi is considered a use in the associated predecessor block
2878       use_c = use->in(0)->in(j);
2879     }
2880     set_ctrl(n_clone, use_c);
2881     assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
2882     get_loop(use_c)->_body.push(n_clone);
2883     _igvn.register_new_node_with_optimizer(n_clone);
2884 #ifndef PRODUCT
2885     if (TracePartialPeeling) {
2886       tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
2887     }
2888 #endif
2889   }
2890   return cloned;
2891 }
2892 
2893 
2894 //------------------------------ clone_for_special_use_inside_loop -------------------------------------
2895 // clone "n" for special uses that are in the not_peeled region.
2896 // If these def-uses occur in separate blocks, the code generator
2897 // marks the method as not compilable.  For example, if a "BoolNode"
2898 // is in a different basic block than the "IfNode" that uses it, then
2899 // the compilation is aborted in the code generator.
2900 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
2901                                                         VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
2902   if (n->is_Phi() || n->is_Load()) {
2903     return;
2904   }
2905   assert(worklist.size() == 0, "should be empty");
2906   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2907     Node* use = n->fast_out(j);
2908     if ( not_peel.test(use->_idx) &&
2909          (use->is_If() || use->is_CMove() || use->is_Bool()) &&
2910          use->in(1) == n)  {
2911       worklist.push(use);
2912     }
2913   }
2914   if (worklist.size() > 0) {
2915     // clone "n" and insert it between inputs of "n" and the use
2916     Node* n_clone = n->clone();
2917     loop->_body.push(n_clone);
2918     _igvn.register_new_node_with_optimizer(n_clone);
2919     set_ctrl(n_clone, get_ctrl(n));
2920     sink_list.push(n_clone);
2921     not_peel.set(n_clone->_idx);
2922 #ifndef PRODUCT
2923     if (TracePartialPeeling) {
2924       tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2925     }
2926 #endif
2927     while( worklist.size() ) {
2928       Node *use = worklist.pop();
2929       _igvn.rehash_node_delayed(use);
2930       for (uint j = 1; j < use->req(); j++) {
2931         if (use->in(j) == n) {
2932           use->set_req(j, n_clone);
2933         }
2934       }
2935     }
2936   }
2937 }
2938 
2939 
2940 //------------------------------ insert_phi_for_loop -------------------------------------
2941 // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
2942 void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2943   Node *phi = PhiNode::make(lp, back_edge_val);
2944   phi->set_req(LoopNode::EntryControl, lp_entry_val);
2945   // Use existing phi if it already exists
2946   Node *hit = _igvn.hash_find_insert(phi);
2947   if( hit == NULL ) {
2948     _igvn.register_new_node_with_optimizer(phi);
2949     set_ctrl(phi, lp);
2950   } else {
2951     // Remove the new phi from the graph and use the hit
2952     _igvn.remove_dead_node(phi);
2953     phi = hit;
2954   }
2955   _igvn.replace_input_of(use, idx, phi);
2956 }
2957 
2958 #ifdef ASSERT
2959 //------------------------------ is_valid_loop_partition -------------------------------------
2960 // Validate the loop partition sets: peel and not_peel
2961 bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2962                                               VectorSet& not_peel ) {
2963   uint i;
2964   // Check that peel_list entries are in the peel set
2965   for (i = 0; i < peel_list.size(); i++) {
2966     if (!peel.test(peel_list.at(i)->_idx)) {
2967       return false;
2968     }
2969   }
2970   // Check at loop members are in one of peel set or not_peel set
2971   for (i = 0; i < loop->_body.size(); i++ ) {
2972     Node *def  = loop->_body.at(i);
2973     uint di = def->_idx;
2974     // Check that peel set elements are in peel_list
2975     if (peel.test(di)) {
2976       if (not_peel.test(di)) {
2977         return false;
2978       }
2979       // Must be in peel_list also
2980       bool found = false;
2981       for (uint j = 0; j < peel_list.size(); j++) {
2982         if (peel_list.at(j)->_idx == di) {
2983           found = true;
2984           break;
2985         }
2986       }
2987       if (!found) {
2988         return false;
2989       }
2990     } else if (not_peel.test(di)) {
2991       if (peel.test(di)) {
2992         return false;
2993       }
2994     } else {
2995       return false;
2996     }
2997   }
2998   return true;
2999 }
3000 
3001 //------------------------------ is_valid_clone_loop_exit_use -------------------------------------
3002 // Ensure a use outside of loop is of the right form
3003 bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
3004   Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
3005   return (use->is_Phi() &&
3006           use_c->is_Region() && use_c->req() == 3 &&
3007           (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
3008            use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
3009            use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
3010           loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
3011 }
3012 
3013 //------------------------------ is_valid_clone_loop_form -------------------------------------
3014 // Ensure that all uses outside of loop are of the right form
3015 bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
3016                                                uint orig_exit_idx, uint clone_exit_idx) {
3017   uint len = peel_list.size();
3018   for (uint i = 0; i < len; i++) {
3019     Node *def = peel_list.at(i);
3020 
3021     for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
3022       Node *use = def->fast_out(j);
3023       Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
3024       if (!loop->is_member(get_loop(use_c))) {
3025         // use is not in the loop, check for correct structure
3026         if (use->in(0) == def) {
3027           // Okay
3028         } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
3029           return false;
3030         }
3031       }
3032     }
3033   }
3034   return true;
3035 }
3036 #endif
3037 
3038 //------------------------------ partial_peel -------------------------------------
3039 // Partially peel (aka loop rotation) the top portion of a loop (called
3040 // the peel section below) by cloning it and placing one copy just before
3041 // the new loop head and the other copy at the bottom of the new loop.
3042 //
3043 //    before                       after                where it came from
3044 //
3045 //    stmt1                        stmt1
3046 //  loop:                          stmt2                     clone
3047 //    stmt2                        if condA goto exitA       clone
3048 //    if condA goto exitA        new_loop:                   new
3049 //    stmt3                        stmt3                     clone
3050 //    if !condB goto loop          if condB goto exitB       clone
3051 //  exitB:                         stmt2                     orig
3052 //    stmt4                        if !condA goto new_loop   orig
3053 //  exitA:                         goto exitA
3054 //                               exitB:
3055 //                                 stmt4
3056 //                               exitA:
3057 //
3058 // Step 1: find the cut point: an exit test on probable
3059 //         induction variable.
3060 // Step 2: schedule (with cloning) operations in the peel
3061 //         section that can be executed after the cut into
3062 //         the section that is not peeled.  This may need
3063 //         to clone operations into exit blocks.  For
3064 //         instance, a reference to A[i] in the not-peel
3065 //         section and a reference to B[i] in an exit block
3066 //         may cause a left-shift of i by 2 to be placed
3067 //         in the peel block.  This step will clone the left
3068 //         shift into the exit block and sink the left shift
3069 //         from the peel to the not-peel section.
3070 // Step 3: clone the loop, retarget the control, and insert
3071 //         phis for values that are live across the new loop
3072 //         head.  This is very dependent on the graph structure
3073 //         from clone_loop.  It creates region nodes for
3074 //         exit control and associated phi nodes for values
3075 //         flow out of the loop through that exit.  The region
3076 //         node is dominated by the clone's control projection.
3077 //         So the clone's peel section is placed before the
3078 //         new loop head, and the clone's not-peel section is
3079 //         forms the top part of the new loop.  The original
3080 //         peel section forms the tail of the new loop.
3081 // Step 4: update the dominator tree and recompute the
3082 //         dominator depth.
3083 //
3084 //                   orig
3085 //
3086 //                   stmt1
3087 //                     |
3088 //                     v
3089 //               loop predicate
3090 //                     |
3091 //                     v
3092 //                   loop<----+
3093 //                     |      |
3094 //                   stmt2    |
3095 //                     |      |
3096 //                     v      |
3097 //                    ifA     |
3098 //                   / |      |
3099 //                  v  v      |
3100 //               false true   ^  <-- last_peel
3101 //               /     |      |
3102 //              /   ===|==cut |
3103 //             /     stmt3    |  <-- first_not_peel
3104 //            /        |      |
3105 //            |        v      |
3106 //            v       ifB     |
3107 //          exitA:   / \      |
3108 //                  /   \     |
3109 //                 v     v    |
3110 //               false true   |
3111 //               /       \    |
3112 //              /         ----+
3113 //             |
3114 //             v
3115 //           exitB:
3116 //           stmt4
3117 //
3118 //
3119 //            after clone loop
3120 //
3121 //                   stmt1
3122 //                     |
3123 //                     v
3124 //               loop predicate
3125 //                 /       \
3126 //        clone   /         \   orig
3127 //               /           \
3128 //              /             \
3129 //             v               v
3130 //   +---->loop                loop<----+
3131 //   |      |                    |      |
3132 //   |    stmt2                stmt2    |
3133 //   |      |                    |      |
3134 //   |      v                    v      |
3135 //   |      ifA                 ifA     |
3136 //   |      | \                / |      |
3137 //   |      v  v              v  v      |
3138 //   ^    true  false      false true   ^  <-- last_peel
3139 //   |      |   ^   \       /    |      |
3140 //   | cut==|==  \   \     /  ===|==cut |
3141 //   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
3142 //   |      |    dom   | |       |      |
3143 //   |      v      \  1v v2      v      |
3144 //   |      ifB     regionA     ifB     |
3145 //   |      / \        |       / \      |
3146 //   |     /   \       v      /   \     |
3147 //   |    v     v    exitA:  v     v    |
3148 //   |    true  false      false true   |
3149 //   |    /     ^   \      /       \    |
3150 //   +----       \   \    /         ----+
3151 //               dom  \  /
3152 //                 \  1v v2
3153 //                  regionB
3154 //                     |
3155 //                     v
3156 //                   exitB:
3157 //                   stmt4
3158 //
3159 //
3160 //           after partial peel
3161 //
3162 //                  stmt1
3163 //                     |
3164 //                     v
3165 //               loop predicate
3166 //                 /
3167 //        clone   /             orig
3168 //               /          TOP
3169 //              /             \
3170 //             v               v
3171 //    TOP->loop                loop----+
3172 //          |                    |      |
3173 //        stmt2                stmt2    |
3174 //          |                    |      |
3175 //          v                    v      |
3176 //          ifA                 ifA     |
3177 //          | \                / |      |
3178 //          v  v              v  v      |
3179 //        true  false      false true   |     <-- last_peel
3180 //          |   ^   \       /    +------|---+
3181 //  +->newloop   \   \     /  === ==cut |   |
3182 //  |     stmt3   \   \   /     TOP     |   |
3183 //  |       |    dom   | |      stmt3   |   | <-- first_not_peel
3184 //  |       v      \  1v v2      v      |   |
3185 //  |       ifB     regionA     ifB     ^   v
3186 //  |       / \        |       / \      |   |
3187 //  |      /   \       v      /   \     |   |
3188 //  |     v     v    exitA:  v     v    |   |
3189 //  |     true  false      false true   |   |
3190 //  |     /     ^   \      /       \    |   |
3191 //  |    |       \   \    /         v   |   |
3192 //  |    |       dom  \  /         TOP  |   |
3193 //  |    |         \  1v v2             |   |
3194 //  ^    v          regionB             |   |
3195 //  |    |             |                |   |
3196 //  |    |             v                ^   v
3197 //  |    |           exitB:             |   |
3198 //  |    |           stmt4              |   |
3199 //  |    +------------>-----------------+   |
3200 //  |                                       |
3201 //  +-----------------<---------------------+
3202 //
3203 //
3204 //              final graph
3205 //
3206 //                  stmt1
3207 //                    |
3208 //                    v
3209 //               loop predicate
3210 //                    |
3211 //                    v
3212 //                  stmt2 clone
3213 //                    |
3214 //                    v
3215 //         ........> ifA clone
3216 //         :        / |
3217 //        dom      /  |
3218 //         :      v   v
3219 //         :  false   true
3220 //         :  |       |
3221 //         :  |       v
3222 //         :  |    newloop<-----+
3223 //         :  |        |        |
3224 //         :  |     stmt3 clone |
3225 //         :  |        |        |
3226 //         :  |        v        |
3227 //         :  |       ifB       |
3228 //         :  |      / \        |
3229 //         :  |     v   v       |
3230 //         :  |  false true     |
3231 //         :  |   |     |       |
3232 //         :  |   v    stmt2    |
3233 //         :  | exitB:  |       |
3234 //         :  | stmt4   v       |
3235 //         :  |       ifA orig  |
3236 //         :  |      /  \       |
3237 //         :  |     /    \      |
3238 //         :  |    v     v      |
3239 //         :  |  false  true    |
3240 //         :  |  /        \     |
3241 //         :  v  v         -----+
3242 //          RegionA
3243 //             |
3244 //             v
3245 //           exitA
3246 //
3247 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
3248 
3249   assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
3250   if (!loop->_head->is_Loop()) {
3251     return false;
3252   }
3253   LoopNode *head = loop->_head->as_Loop();
3254 
3255   if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
3256     return false;
3257   }
3258 
3259   // Check for complex exit control
3260   for (uint ii = 0; ii < loop->_body.size(); ii++) {
3261     Node *n = loop->_body.at(ii);
3262     int opc = n->Opcode();
3263     if (n->is_Call()        ||
3264         opc == Op_Catch     ||
3265         opc == Op_CatchProj ||
3266         opc == Op_Jump      ||
3267         opc == Op_JumpProj) {
3268 #ifndef PRODUCT
3269       if (TracePartialPeeling) {
3270         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
3271       }
3272 #endif
3273       return false;
3274     }
3275   }
3276 
3277   int dd = dom_depth(head);
3278 
3279   // Step 1: find cut point
3280 
3281   // Walk up dominators to loop head looking for first loop exit
3282   // which is executed on every path thru loop.
3283   IfNode *peel_if = NULL;
3284   IfNode *peel_if_cmpu = NULL;
3285 
3286   Node *iff = loop->tail();
3287   while (iff != head) {
3288     if (iff->is_If()) {
3289       Node *ctrl = get_ctrl(iff->in(1));
3290       if (ctrl->is_top()) return false; // Dead test on live IF.
3291       // If loop-varying exit-test, check for induction variable
3292       if (loop->is_member(get_loop(ctrl)) &&
3293           loop->is_loop_exit(iff) &&
3294           is_possible_iv_test(iff)) {
3295         Node* cmp = iff->in(1)->in(1);
3296         if (cmp->Opcode() == Op_CmpI) {
3297           peel_if = iff->as_If();
3298         } else {
3299           assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
3300           peel_if_cmpu = iff->as_If();
3301         }
3302       }
3303     }
3304     iff = idom(iff);
3305   }
3306 
3307   // Prefer signed compare over unsigned compare.
3308   IfNode* new_peel_if = NULL;
3309   if (peel_if == NULL) {
3310     if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
3311       return false;   // No peel point found
3312     }
3313     new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
3314     if (new_peel_if == NULL) {
3315       return false;   // No peel point found
3316     }
3317     peel_if = new_peel_if;
3318   }
3319   Node* last_peel        = stay_in_loop(peel_if, loop);
3320   Node* first_not_peeled = stay_in_loop(last_peel, loop);
3321   if (first_not_peeled == NULL || first_not_peeled == head) {
3322     return false;
3323   }
3324 
3325 #ifndef PRODUCT
3326   if (TraceLoopOpts) {
3327     tty->print("PartialPeel  ");
3328     loop->dump_head();
3329   }
3330 
3331   if (TracePartialPeeling) {
3332     tty->print_cr("before partial peel one iteration");
3333     Node_List wl;
3334     Node* t = head->in(2);
3335     while (true) {
3336       wl.push(t);
3337       if (t == head) break;
3338       t = idom(t);
3339     }
3340     while (wl.size() > 0) {
3341       Node* tt = wl.pop();
3342       tt->dump();
3343       if (tt == last_peel) tty->print_cr("-- cut --");
3344     }
3345   }
3346 #endif
3347   VectorSet peel;
3348   VectorSet not_peel;
3349   Node_List peel_list;
3350   Node_List worklist;
3351   Node_List sink_list;
3352 
3353   uint estimate = loop->est_loop_clone_sz(1);
3354   if (exceeding_node_budget(estimate)) {
3355     return false;
3356   }
3357 
3358   // Set of cfg nodes to peel are those that are executable from
3359   // the head through last_peel.
3360   assert(worklist.size() == 0, "should be empty");
3361   worklist.push(head);
3362   peel.set(head->_idx);
3363   while (worklist.size() > 0) {
3364     Node *n = worklist.pop();
3365     if (n != last_peel) {
3366       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3367         Node* use = n->fast_out(j);
3368         if (use->is_CFG() &&
3369             loop->is_member(get_loop(use)) &&
3370             !peel.test_set(use->_idx)) {
3371           worklist.push(use);
3372         }
3373       }
3374     }
3375   }
3376 
3377   // Set of non-cfg nodes to peel are those that are control
3378   // dependent on the cfg nodes.
3379   for (uint i = 0; i < loop->_body.size(); i++) {
3380     Node *n = loop->_body.at(i);
3381     Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
3382     if (peel.test(n_c->_idx)) {
3383       peel.set(n->_idx);
3384     } else {
3385       not_peel.set(n->_idx);
3386     }
3387   }
3388 
3389   // Step 2: move operations from the peeled section down into the
3390   //         not-peeled section
3391 
3392   // Get a post order schedule of nodes in the peel region
3393   // Result in right-most operand.
3394   scheduled_nodelist(loop, peel, peel_list);
3395 
3396   assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
3397 
3398   // For future check for too many new phis
3399   uint old_phi_cnt = 0;
3400   for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
3401     Node* use = head->fast_out(j);
3402     if (use->is_Phi()) old_phi_cnt++;
3403   }
3404 
3405 #ifndef PRODUCT
3406   if (TracePartialPeeling) {
3407     tty->print_cr("\npeeled list");
3408   }
3409 #endif
3410 
3411   // Evacuate nodes in peel region into the not_peeled region if possible
3412   bool too_many_clones = false;
3413   uint new_phi_cnt = 0;
3414   uint cloned_for_outside_use = 0;
3415   for (uint i = 0; i < peel_list.size();) {
3416     Node* n = peel_list.at(i);
3417 #ifndef PRODUCT
3418   if (TracePartialPeeling) n->dump();
3419 #endif
3420     bool incr = true;
3421     if (!n->is_CFG()) {
3422       if (has_use_in_set(n, not_peel)) {
3423         // If not used internal to the peeled region,
3424         // move "n" from peeled to not_peeled region.
3425         if (!has_use_internal_to_set(n, peel, loop)) {
3426           // if not pinned and not a load (which maybe anti-dependent on a store)
3427           // and not a CMove (Matcher expects only bool->cmove).
3428           if (n->in(0) == NULL && !n->is_Load() && !n->is_CMove()) {
3429             int new_clones = clone_for_use_outside_loop(loop, n, worklist);
3430             if (new_clones == -1) {
3431               too_many_clones = true;
3432               break;
3433             }
3434             cloned_for_outside_use += new_clones;
3435             sink_list.push(n);
3436             peel.remove(n->_idx);
3437             not_peel.set(n->_idx);
3438             peel_list.remove(i);
3439             incr = false;
3440 #ifndef PRODUCT
3441             if (TracePartialPeeling) {
3442               tty->print_cr("sink to not_peeled region: %d newbb: %d",
3443                             n->_idx, get_ctrl(n)->_idx);
3444             }
3445 #endif
3446           }
3447         } else {
3448           // Otherwise check for special def-use cases that span
3449           // the peel/not_peel boundary such as bool->if
3450           clone_for_special_use_inside_loop(loop, n, not_peel, sink_list, worklist);
3451           new_phi_cnt++;
3452         }
3453       }
3454     }
3455     if (incr) i++;
3456   }
3457 
3458   estimate += cloned_for_outside_use + new_phi_cnt;
3459   bool exceed_node_budget = !may_require_nodes(estimate);
3460   bool exceed_phi_limit = new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta;
3461 
3462   if (too_many_clones || exceed_node_budget || exceed_phi_limit) {
3463 #ifndef PRODUCT
3464     if (TracePartialPeeling && exceed_phi_limit) {
3465       tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
3466                     new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
3467     }
3468 #endif
3469     if (new_peel_if != NULL) {
3470       remove_cmpi_loop_exit(new_peel_if, loop);
3471     }
3472     // Inhibit more partial peeling on this loop
3473     assert(!head->is_partial_peel_loop(), "not partial peeled");
3474     head->mark_partial_peel_failed();
3475     if (cloned_for_outside_use > 0) {
3476       // Terminate this round of loop opts because
3477       // the graph outside this loop was changed.
3478       C->set_major_progress();
3479       return true;
3480     }
3481     return false;
3482   }
3483 
3484   // Step 3: clone loop, retarget control, and insert new phis
3485 
3486   // Create new loop head for new phis and to hang
3487   // the nodes being moved (sinked) from the peel region.
3488   LoopNode* new_head = new LoopNode(last_peel, last_peel);
3489   new_head->set_unswitch_count(head->unswitch_count()); // Preserve
3490   _igvn.register_new_node_with_optimizer(new_head);
3491   assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
3492   _igvn.replace_input_of(first_not_peeled, 0, new_head);
3493   set_loop(new_head, loop);
3494   loop->_body.push(new_head);
3495   not_peel.set(new_head->_idx);
3496   set_idom(new_head, last_peel, dom_depth(first_not_peeled));
3497   set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
3498 
3499   while (sink_list.size() > 0) {
3500     Node* n = sink_list.pop();
3501     set_ctrl(n, new_head);
3502   }
3503 
3504   assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
3505 
3506   clone_loop(loop, old_new, dd, IgnoreStripMined);
3507 
3508   const uint clone_exit_idx = 1;
3509   const uint orig_exit_idx  = 2;
3510   assert(is_valid_clone_loop_form(loop, peel_list, orig_exit_idx, clone_exit_idx), "bad clone loop");
3511 
3512   Node* head_clone             = old_new[head->_idx];
3513   LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
3514   Node* orig_tail_clone        = head_clone->in(2);
3515 
3516   // Add phi if "def" node is in peel set and "use" is not
3517 
3518   for (uint i = 0; i < peel_list.size(); i++) {
3519     Node *def  = peel_list.at(i);
3520     if (!def->is_CFG()) {
3521       for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
3522         Node *use = def->fast_out(j);
3523         if (has_node(use) && use->in(0) != C->top() &&
3524             (!peel.test(use->_idx) ||
3525              (use->is_Phi() && use->in(0) == head)) ) {
3526           worklist.push(use);
3527         }
3528       }
3529       while( worklist.size() ) {
3530         Node *use = worklist.pop();
3531         for (uint j = 1; j < use->req(); j++) {
3532           Node* n = use->in(j);
3533           if (n == def) {
3534 
3535             // "def" is in peel set, "use" is not in peel set
3536             // or "use" is in the entry boundary (a phi) of the peel set
3537 
3538             Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
3539 
3540             if ( loop->is_member(get_loop( use_c )) ) {
3541               // use is in loop
3542               if (old_new[use->_idx] != NULL) { // null for dead code
3543                 Node* use_clone = old_new[use->_idx];
3544                 _igvn.replace_input_of(use, j, C->top());
3545                 insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
3546               }
3547             } else {
3548               assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
3549               // use is not in the loop, check if the live range includes the cut
3550               Node* lp_if = use_c->in(orig_exit_idx)->in(0);
3551               if (not_peel.test(lp_if->_idx)) {
3552                 assert(j == orig_exit_idx, "use from original loop");
3553                 insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
3554               }
3555             }
3556           }
3557         }
3558       }
3559     }
3560   }
3561 
3562   // Step 3b: retarget control
3563 
3564   // Redirect control to the new loop head if a cloned node in
3565   // the not_peeled region has control that points into the peeled region.
3566   // This necessary because the cloned peeled region will be outside
3567   // the loop.
3568   //                            from    to
3569   //          cloned-peeled    <---+
3570   //    new_head_clone:            |    <--+
3571   //          cloned-not_peeled  in(0)    in(0)
3572   //          orig-peeled
3573 
3574   for (uint i = 0; i < loop->_body.size(); i++) {
3575     Node *n = loop->_body.at(i);
3576     if (!n->is_CFG()           && n->in(0) != NULL        &&
3577         not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
3578       Node* n_clone = old_new[n->_idx];
3579       _igvn.replace_input_of(n_clone, 0, new_head_clone);
3580     }
3581   }
3582 
3583   // Backedge of the surviving new_head (the clone) is original last_peel
3584   _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
3585 
3586   // Cut first node in original not_peel set
3587   _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
3588   new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
3589   new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
3590 
3591   // Copy head_clone back-branch info to original head
3592   // and remove original head's loop entry and
3593   // clone head's back-branch
3594   _igvn.rehash_node_delayed(head); // Multiple edge updates
3595   head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
3596   head->set_req(LoopNode::LoopBackControl, C->top());
3597   _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
3598 
3599   // Similarly modify the phis
3600   for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
3601     Node* use = head->fast_out(k);
3602     if (use->is_Phi() && use->outcnt() > 0) {
3603       Node* use_clone = old_new[use->_idx];
3604       _igvn.rehash_node_delayed(use); // Multiple edge updates
3605       use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
3606       use->set_req(LoopNode::LoopBackControl, C->top());
3607       _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
3608     }
3609   }
3610 
3611   // Step 4: update dominator tree and dominator depth
3612 
3613   set_idom(head, orig_tail_clone, dd);
3614   recompute_dom_depth();
3615 
3616   // Inhibit more partial peeling on this loop
3617   new_head_clone->set_partial_peel_loop();
3618   C->set_major_progress();
3619   loop->record_for_igvn();
3620 
3621 #ifndef PRODUCT
3622   if (TracePartialPeeling) {
3623     tty->print_cr("\nafter partial peel one iteration");
3624     Node_List wl;
3625     Node* t = last_peel;
3626     while (true) {
3627       wl.push(t);
3628       if (t == head_clone) break;
3629       t = idom(t);
3630     }
3631     while (wl.size() > 0) {
3632       Node* tt = wl.pop();
3633       if (tt == head) tty->print_cr("orig head");
3634       else if (tt == new_head_clone) tty->print_cr("new head");
3635       else if (tt == head_clone) tty->print_cr("clone head");
3636       tt->dump();
3637     }
3638   }
3639 #endif
3640   return true;
3641 }
3642 
3643 //------------------------------reorg_offsets----------------------------------
3644 // Reorganize offset computations to lower register pressure.  Mostly
3645 // prevent loop-fallout uses of the pre-incremented trip counter (which are
3646 // then alive with the post-incremented trip counter forcing an extra
3647 // register move)
3648 void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
3649   // Perform it only for canonical counted loops.
3650   // Loop's shape could be messed up by iteration_split_impl.
3651   if (!loop->_head->is_CountedLoop())
3652     return;
3653   if (!loop->_head->as_Loop()->is_valid_counted_loop(T_INT))
3654     return;
3655 
3656   CountedLoopNode *cl = loop->_head->as_CountedLoop();
3657   CountedLoopEndNode *cle = cl->loopexit();
3658   Node *exit = cle->proj_out(false);
3659   Node *phi = cl->phi();
3660 
3661   // Check for the special case when using the pre-incremented trip-counter on
3662   // the fall-out  path (forces the pre-incremented  and post-incremented trip
3663   // counter to be live  at the same time).  Fix this by  adjusting to use the
3664   // post-increment trip counter.
3665 
3666   bool progress = true;
3667   while (progress) {
3668     progress = false;
3669     for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
3670       Node* use = phi->fast_out(i);   // User of trip-counter
3671       if (!has_ctrl(use))  continue;
3672       Node *u_ctrl = get_ctrl(use);
3673       if (use->is_Phi()) {
3674         u_ctrl = NULL;
3675         for (uint j = 1; j < use->req(); j++)
3676           if (use->in(j) == phi)
3677             u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
3678       }
3679       IdealLoopTree *u_loop = get_loop(u_ctrl);
3680       // Look for loop-invariant use
3681       if (u_loop == loop) continue;
3682       if (loop->is_member(u_loop)) continue;
3683       // Check that use is live out the bottom.  Assuming the trip-counter
3684       // update is right at the bottom, uses of of the loop middle are ok.
3685       if (dom_lca(exit, u_ctrl) != exit) continue;
3686       // Hit!  Refactor use to use the post-incremented tripcounter.
3687       // Compute a post-increment tripcounter.
3688       Node* c = exit;
3689       if (cl->is_strip_mined()) {
3690         IdealLoopTree* outer_loop = get_loop(cl->outer_loop());
3691         if (!outer_loop->is_member(u_loop)) {
3692           c = cl->outer_loop_exit();
3693         }
3694       }
3695       Node *opaq = new Opaque2Node(C, cle->incr());
3696       register_new_node(opaq, c);
3697       Node *neg_stride = _igvn.intcon(-cle->stride_con());
3698       set_ctrl(neg_stride, C->root());
3699       Node *post = new AddINode(opaq, neg_stride);
3700       register_new_node(post, c);
3701       _igvn.rehash_node_delayed(use);
3702       for (uint j = 1; j < use->req(); j++) {
3703         if (use->in(j) == phi)
3704           use->set_req(j, post);
3705       }
3706       // Since DU info changed, rerun loop
3707       progress = true;
3708       break;
3709     }
3710   }
3711 
3712 }