1 /*
   2  * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/shared/barrierSet.hpp"
  27 #include "gc/shared/c2/barrierSetC2.hpp"
  28 #include "memory/allocation.inline.hpp"
  29 #include "memory/resourceArea.hpp"
  30 #include "opto/addnode.hpp"
  31 #include "opto/callnode.hpp"
  32 #include "opto/castnode.hpp"
  33 #include "opto/connode.hpp"
  34 #include "opto/castnode.hpp"
  35 #include "opto/divnode.hpp"
  36 #include "opto/loopnode.hpp"
  37 #include "opto/matcher.hpp"
  38 #include "opto/mulnode.hpp"
  39 #include "opto/movenode.hpp"
  40 #include "opto/opaquenode.hpp"
  41 #include "opto/rootnode.hpp"
  42 #include "opto/subnode.hpp"
  43 #include "utilities/macros.hpp"
  44 #if INCLUDE_ZGC
  45 #include "gc/z/c2/zBarrierSetC2.hpp"
  46 #endif
  47 #if INCLUDE_SHENANDOAHGC
  48 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  49 #endif
  50 
  51 //=============================================================================
  52 //------------------------------split_thru_phi---------------------------------
  53 // Split Node 'n' through merge point if there is enough win.
  54 Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
  55   if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
  56     // ConvI2L may have type information on it which is unsafe to push up
  57     // so disable this for now
  58     return NULL;
  59   }
  60 
  61   // Splitting range check CastIIs through a loop induction Phi can
  62   // cause new Phis to be created that are left unrelated to the loop
  63   // induction Phi and prevent optimizations (vectorization)
  64   if (n->Opcode() == Op_CastII && n->as_CastII()->has_range_check() &&
  65       region->is_CountedLoop() && n->in(1) == region->as_CountedLoop()->phi()) {
  66     return NULL;
  67   }
  68 
  69   int wins = 0;
  70   assert(!n->is_CFG(), "");
  71   assert(region->is_Region(), "");
  72 
  73   const Type* type = n->bottom_type();
  74   const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
  75   Node *phi;
  76   if (t_oop != NULL && t_oop->is_known_instance_field()) {
  77     int iid    = t_oop->instance_id();
  78     int index  = C->get_alias_index(t_oop);
  79     int offset = t_oop->offset();
  80     phi = new PhiNode(region, type, NULL, iid, index, offset);
  81   } else {
  82     phi = PhiNode::make_blank(region, n);
  83   }
  84   uint old_unique = C->unique();
  85   for (uint i = 1; i < region->req(); i++) {
  86     Node *x;
  87     Node* the_clone = NULL;
  88     if (region->in(i) == C->top()) {
  89       x = C->top();             // Dead path?  Use a dead data op
  90     } else {
  91       x = n->clone();           // Else clone up the data op
  92       the_clone = x;            // Remember for possible deletion.
  93       // Alter data node to use pre-phi inputs
  94       if (n->in(0) == region)
  95         x->set_req( 0, region->in(i) );
  96       for (uint j = 1; j < n->req(); j++) {
  97         Node *in = n->in(j);
  98         if (in->is_Phi() && in->in(0) == region)
  99           x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
 100       }
 101     }
 102     // Check for a 'win' on some paths
 103     const Type *t = x->Value(&_igvn);
 104 
 105     bool singleton = t->singleton();
 106 
 107     // A TOP singleton indicates that there are no possible values incoming
 108     // along a particular edge. In most cases, this is OK, and the Phi will
 109     // be eliminated later in an Ideal call. However, we can't allow this to
 110     // happen if the singleton occurs on loop entry, as the elimination of
 111     // the PhiNode may cause the resulting node to migrate back to a previous
 112     // loop iteration.
 113     if (singleton && t == Type::TOP) {
 114       // Is_Loop() == false does not confirm the absence of a loop (e.g., an
 115       // irreducible loop may not be indicated by an affirmative is_Loop());
 116       // therefore, the only top we can split thru a phi is on a backedge of
 117       // a loop.
 118       singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
 119     }
 120 
 121     if (singleton) {
 122       wins++;
 123       x = ((PhaseGVN&)_igvn).makecon(t);
 124     } else {
 125       // We now call Identity to try to simplify the cloned node.
 126       // Note that some Identity methods call phase->type(this).
 127       // Make sure that the type array is big enough for
 128       // our new node, even though we may throw the node away.
 129       // (Note: This tweaking with igvn only works because x is a new node.)
 130       _igvn.set_type(x, t);
 131       // If x is a TypeNode, capture any more-precise type permanently into Node
 132       // otherwise it will be not updated during igvn->transform since
 133       // igvn->type(x) is set to x->Value() already.
 134       x->raise_bottom_type(t);
 135       if (x->Opcode() != Op_ShenandoahWriteBarrier) {
 136         Node *y = x->Identity(&_igvn);
 137         if (y != x) {
 138           wins++;
 139           x = y;
 140         } else {
 141           y = _igvn.hash_find(x);
 142           if (y) {
 143             wins++;
 144             x = y;
 145           } else {
 146             // Else x is a new node we are keeping
 147             // We do not need register_new_node_with_optimizer
 148             // because set_type has already been called.
 149             _igvn._worklist.push(x);
 150           }
 151         }
 152       } else {
 153         _igvn._worklist.push(x);
 154       }
 155     }
 156     if (x != the_clone && the_clone != NULL)
 157       _igvn.remove_dead_node(the_clone);
 158     phi->set_req( i, x );
 159   }
 160   // Too few wins?
 161   if (wins <= policy) {
 162     _igvn.remove_dead_node(phi);
 163     return NULL;
 164   }
 165 
 166   // Record Phi
 167   register_new_node( phi, region );
 168 
 169   for (uint i2 = 1; i2 < phi->req(); i2++) {
 170     Node *x = phi->in(i2);
 171     // If we commoned up the cloned 'x' with another existing Node,
 172     // the existing Node picks up a new use.  We need to make the
 173     // existing Node occur higher up so it dominates its uses.
 174     Node *old_ctrl;
 175     IdealLoopTree *old_loop;
 176 
 177     if (x->is_Con()) {
 178       // Constant's control is always root.
 179       set_ctrl(x, C->root());
 180       continue;
 181     }
 182     // The occasional new node
 183     if (x->_idx >= old_unique) {     // Found a new, unplaced node?
 184       old_ctrl = NULL;
 185       old_loop = NULL;               // Not in any prior loop
 186     } else {
 187       old_ctrl = get_ctrl(x);
 188       old_loop = get_loop(old_ctrl); // Get prior loop
 189     }
 190     // New late point must dominate new use
 191     Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
 192     if (new_ctrl == old_ctrl) // Nothing is changed
 193       continue;
 194 
 195     IdealLoopTree *new_loop = get_loop(new_ctrl);
 196 
 197     // Don't move x into a loop if its uses are
 198     // outside of loop. Otherwise x will be cloned
 199     // for each use outside of this loop.
 200     IdealLoopTree *use_loop = get_loop(region);
 201     if (!new_loop->is_member(use_loop) &&
 202         (old_loop == NULL || !new_loop->is_member(old_loop))) {
 203       // Take early control, later control will be recalculated
 204       // during next iteration of loop optimizations.
 205       new_ctrl = get_early_ctrl(x);
 206       new_loop = get_loop(new_ctrl);
 207     }
 208     // Set new location
 209     set_ctrl(x, new_ctrl);
 210     // If changing loop bodies, see if we need to collect into new body
 211     if (old_loop != new_loop) {
 212       if (old_loop && !old_loop->_child)
 213         old_loop->_body.yank(x);
 214       if (!new_loop->_child)
 215         new_loop->_body.push(x);  // Collect body info
 216     }
 217   }
 218 
 219   return phi;
 220 }
 221 
 222 /**
 223  * When splitting a Shenandoah write barrier through a phi, we
 224  * can not replace the write-barrier input of the ShenandoahWBMemProj
 225  * with the phi. We must also split the ShenandoahWBMemProj through the
 226  * phi and generate a new memory phi for it.
 227  */
 228 void PhaseIdealLoop::split_mem_thru_phi(Node* n, Node* r, Node* phi) {
 229 #if INCLUDE_SHENANDOAHGC
 230   if (n->Opcode() == Op_ShenandoahWriteBarrier) {
 231     if (n->has_out_with(Op_ShenandoahWBMemProj)) {
 232       Node* old_mem_phi = n->in(ShenandoahBarrierNode::Memory);
 233       assert(r->is_Region(), "need region to control phi");
 234       assert(phi->is_Phi(), "expect phi");
 235       Node* memphi = PhiNode::make(r, old_mem_phi, Type::MEMORY, C->alias_type(n->adr_type())->adr_type());
 236       for (uint i = 1; i < r->req(); i++) {
 237         Node* wb = phi->in(i);
 238         if (wb->Opcode() == Op_ShenandoahWriteBarrier) {
 239           // assert(! wb->has_out_with(Op_ShenandoahWBMemProj), "new clone does not have mem proj");
 240           Node* new_proj = new ShenandoahWBMemProjNode(wb);
 241           register_new_node(new_proj, r->in(i));
 242           memphi->set_req(i, new_proj);
 243         } else {
 244           if (old_mem_phi->is_Phi() && old_mem_phi->in(0) == r) {
 245             memphi->set_req(i, old_mem_phi->in(i));
 246           }
 247         }
 248       }
 249       register_new_node(memphi, r);
 250       Node* old_mem_out = n->find_out_with(Op_ShenandoahWBMemProj);
 251       while (old_mem_out != NULL) {
 252         assert(old_mem_out != NULL, "expect memory projection");
 253         _igvn.replace_node(old_mem_out, memphi);
 254         old_mem_out = n->find_out_with(Op_ShenandoahWBMemProj);
 255       }
 256     }
 257     assert(! n->has_out_with(Op_ShenandoahWBMemProj), "no more memory outs");
 258   }
 259 #endif
 260 }
 261 
 262 //------------------------------dominated_by------------------------------------
 263 // Replace the dominated test with an obvious true or false.  Place it on the
 264 // IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
 265 // live path up to the dominating control.
 266 void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
 267   if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); }
 268 
 269   // prevdom is the dominating projection of the dominating test.
 270   assert( iff->is_If(), "" );
 271   assert(iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd || iff->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
 272   int pop = prevdom->Opcode();
 273   assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
 274   if (flip) {
 275     if (pop == Op_IfTrue)
 276       pop = Op_IfFalse;
 277     else
 278       pop = Op_IfTrue;
 279   }
 280   // 'con' is set to true or false to kill the dominated test.
 281   Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
 282   set_ctrl(con, C->root()); // Constant gets a new use
 283   // Hack the dominated test
 284   _igvn.replace_input_of(iff, 1, con);
 285 
 286   // If I dont have a reachable TRUE and FALSE path following the IfNode then
 287   // I can assume this path reaches an infinite loop.  In this case it's not
 288   // important to optimize the data Nodes - either the whole compilation will
 289   // be tossed or this path (and all data Nodes) will go dead.
 290   if (iff->outcnt() != 2) return;
 291 
 292   // Make control-dependent data Nodes on the live path (path that will remain
 293   // once the dominated IF is removed) become control-dependent on the
 294   // dominating projection.
 295   Node* dp = iff->as_If()->proj_out_or_null(pop == Op_IfTrue);
 296 
 297   // Loop predicates may have depending checks which should not
 298   // be skipped. For example, range check predicate has two checks
 299   // for lower and upper bounds.
 300   if (dp == NULL)
 301     return;
 302 
 303   ProjNode* dp_proj  = dp->as_Proj();
 304   ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
 305   if (exclude_loop_predicate &&
 306       (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL ||
 307        unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_profile_predicate) != NULL ||
 308        unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check) != NULL)) {
 309     // If this is a range check (IfNode::is_range_check), do not
 310     // reorder because Compile::allow_range_check_smearing might have
 311     // changed the check.
 312     return; // Let IGVN transformation change control dependence.
 313   }
 314 
 315   IdealLoopTree *old_loop = get_loop(dp);
 316 
 317   for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
 318     Node* cd = dp->fast_out(i); // Control-dependent node
 319     if (cd->depends_only_on_test()) {
 320       assert(cd->in(0) == dp, "");
 321       _igvn.replace_input_of(cd, 0, prevdom);
 322       set_early_ctrl(cd);
 323       IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
 324       if (old_loop != new_loop) {
 325         if (!old_loop->_child) old_loop->_body.yank(cd);
 326         if (!new_loop->_child) new_loop->_body.push(cd);
 327       }
 328       --i;
 329       --imax;
 330     }
 331   }
 332 }
 333 
 334 //------------------------------has_local_phi_input----------------------------
 335 // Return TRUE if 'n' has Phi inputs from its local block and no other
 336 // block-local inputs (all non-local-phi inputs come from earlier blocks)
 337 Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
 338   Node *n_ctrl = get_ctrl(n);
 339   // See if some inputs come from a Phi in this block, or from before
 340   // this block.
 341   uint i;
 342   for( i = 1; i < n->req(); i++ ) {
 343     Node *phi = n->in(i);
 344     if( phi->is_Phi() && phi->in(0) == n_ctrl )
 345       break;
 346   }
 347   if( i >= n->req() )
 348     return NULL;                // No Phi inputs; nowhere to clone thru
 349 
 350   // Check for inputs created between 'n' and the Phi input.  These
 351   // must split as well; they have already been given the chance
 352   // (courtesy of a post-order visit) and since they did not we must
 353   // recover the 'cost' of splitting them by being very profitable
 354   // when splitting 'n'.  Since this is unlikely we simply give up.
 355   for( i = 1; i < n->req(); i++ ) {
 356     Node *m = n->in(i);
 357     if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
 358       // We allow the special case of AddP's with no local inputs.
 359       // This allows us to split-up address expressions.
 360       if (m->is_AddP() &&
 361           get_ctrl(m->in(2)) != n_ctrl &&
 362           get_ctrl(m->in(3)) != n_ctrl) {
 363         // Move the AddP up to dominating point
 364         Node* c = find_non_split_ctrl(idom(n_ctrl));
 365         if (c->is_OuterStripMinedLoop()) {
 366           c->as_Loop()->verify_strip_mined(1);
 367           c = c->in(LoopNode::EntryControl);
 368         }
 369         set_ctrl_and_loop(m, c);
 370         continue;
 371       }
 372       return NULL;
 373     }
 374     assert(m->is_Phi() || is_dominator(get_ctrl(m), n_ctrl), "m has strange control");
 375   }
 376 
 377   return n_ctrl;
 378 }
 379 
 380 //------------------------------remix_address_expressions----------------------
 381 // Rework addressing expressions to get the most loop-invariant stuff
 382 // moved out.  We'd like to do all associative operators, but it's especially
 383 // important (common) to do address expressions.
 384 Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
 385   if (!has_ctrl(n))  return NULL;
 386   Node *n_ctrl = get_ctrl(n);
 387   IdealLoopTree *n_loop = get_loop(n_ctrl);
 388 
 389   // See if 'n' mixes loop-varying and loop-invariant inputs and
 390   // itself is loop-varying.
 391 
 392   // Only interested in binary ops (and AddP)
 393   if( n->req() < 3 || n->req() > 4 ) return NULL;
 394 
 395   Node *n1_ctrl = get_ctrl(n->in(                    1));
 396   Node *n2_ctrl = get_ctrl(n->in(                    2));
 397   Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
 398   IdealLoopTree *n1_loop = get_loop( n1_ctrl );
 399   IdealLoopTree *n2_loop = get_loop( n2_ctrl );
 400   IdealLoopTree *n3_loop = get_loop( n3_ctrl );
 401 
 402   // Does one of my inputs spin in a tighter loop than self?
 403   if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
 404       (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
 405       (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
 406     return NULL;                // Leave well enough alone
 407 
 408   // Is at least one of my inputs loop-invariant?
 409   if( n1_loop == n_loop &&
 410       n2_loop == n_loop &&
 411       n3_loop == n_loop )
 412     return NULL;                // No loop-invariant inputs
 413 
 414 
 415   int n_op = n->Opcode();
 416 
 417   // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
 418   if( n_op == Op_LShiftI ) {
 419     // Scale is loop invariant
 420     Node *scale = n->in(2);
 421     Node *scale_ctrl = get_ctrl(scale);
 422     IdealLoopTree *scale_loop = get_loop(scale_ctrl );
 423     if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
 424       return NULL;
 425     const TypeInt *scale_t = scale->bottom_type()->isa_int();
 426     if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
 427       return NULL;              // Dont bother with byte/short masking
 428     // Add must vary with loop (else shift would be loop-invariant)
 429     Node *add = n->in(1);
 430     Node *add_ctrl = get_ctrl(add);
 431     IdealLoopTree *add_loop = get_loop(add_ctrl);
 432     //assert( n_loop == add_loop, "" );
 433     if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
 434 
 435     // Convert I-V into I+ (0-V); same for V-I
 436     if( add->Opcode() == Op_SubI &&
 437         _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
 438       Node *zero = _igvn.intcon(0);
 439       set_ctrl(zero, C->root());
 440       Node *neg = new SubINode( _igvn.intcon(0), add->in(2) );
 441       register_new_node( neg, get_ctrl(add->in(2) ) );
 442       add = new AddINode( add->in(1), neg );
 443       register_new_node( add, add_ctrl );
 444     }
 445     if( add->Opcode() != Op_AddI ) return NULL;
 446     // See if one add input is loop invariant
 447     Node *add_var = add->in(1);
 448     Node *add_var_ctrl = get_ctrl(add_var);
 449     IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
 450     Node *add_invar = add->in(2);
 451     Node *add_invar_ctrl = get_ctrl(add_invar);
 452     IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
 453     if( add_var_loop == n_loop ) {
 454     } else if( add_invar_loop == n_loop ) {
 455       // Swap to find the invariant part
 456       add_invar = add_var;
 457       add_invar_ctrl = add_var_ctrl;
 458       add_invar_loop = add_var_loop;
 459       add_var = add->in(2);
 460       Node *add_var_ctrl = get_ctrl(add_var);
 461       IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
 462     } else                      // Else neither input is loop invariant
 463       return NULL;
 464     if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
 465       return NULL;              // No invariant part of the add?
 466 
 467     // Yes!  Reshape address expression!
 468     Node *inv_scale = new LShiftINode( add_invar, scale );
 469     Node *inv_scale_ctrl =
 470       dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
 471       add_invar_ctrl : scale_ctrl;
 472     register_new_node( inv_scale, inv_scale_ctrl );
 473     Node *var_scale = new LShiftINode( add_var, scale );
 474     register_new_node( var_scale, n_ctrl );
 475     Node *var_add = new AddINode( var_scale, inv_scale );
 476     register_new_node( var_add, n_ctrl );
 477     _igvn.replace_node( n, var_add );
 478     return var_add;
 479   }
 480 
 481   // Replace (I+V) with (V+I)
 482   if( n_op == Op_AddI ||
 483       n_op == Op_AddL ||
 484       n_op == Op_AddF ||
 485       n_op == Op_AddD ||
 486       n_op == Op_MulI ||
 487       n_op == Op_MulL ||
 488       n_op == Op_MulF ||
 489       n_op == Op_MulD ) {
 490     if( n2_loop == n_loop ) {
 491       assert( n1_loop != n_loop, "" );
 492       n->swap_edges(1, 2);
 493     }
 494   }
 495 
 496   // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
 497   // but not if I2 is a constant.
 498   if( n_op == Op_AddP ) {
 499     if( n2_loop == n_loop && n3_loop != n_loop ) {
 500       if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
 501         Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
 502         Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
 503         IdealLoopTree *n22loop = get_loop( n22_ctrl );
 504         IdealLoopTree *n23_loop = get_loop( n23_ctrl );
 505         if( n22loop != n_loop && n22loop->is_member(n_loop) &&
 506             n23_loop == n_loop ) {
 507           Node *add1 = new AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
 508           // Stuff new AddP in the loop preheader
 509           register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
 510           Node *add2 = new AddPNode( n->in(1), add1, n->in(2)->in(3) );
 511           register_new_node( add2, n_ctrl );
 512           _igvn.replace_node( n, add2 );
 513           return add2;
 514         }
 515       }
 516     }
 517 
 518     // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
 519     if (n2_loop != n_loop && n3_loop == n_loop) {
 520       if (n->in(3)->Opcode() == Op_AddX) {
 521         Node *V = n->in(3)->in(1);
 522         Node *I = n->in(3)->in(2);
 523         if (is_member(n_loop,get_ctrl(V))) {
 524         } else {
 525           Node *tmp = V; V = I; I = tmp;
 526         }
 527         if (!is_member(n_loop,get_ctrl(I))) {
 528           Node *add1 = new AddPNode(n->in(1), n->in(2), I);
 529           // Stuff new AddP in the loop preheader
 530           register_new_node(add1, n_loop->_head->in(LoopNode::EntryControl));
 531           Node *add2 = new AddPNode(n->in(1), add1, V);
 532           register_new_node(add2, n_ctrl);
 533           _igvn.replace_node(n, add2);
 534           return add2;
 535         }
 536       }
 537     }
 538   }
 539 
 540   return NULL;
 541 }
 542 
 543 //------------------------------conditional_move-------------------------------
 544 // Attempt to replace a Phi with a conditional move.  We have some pretty
 545 // strict profitability requirements.  All Phis at the merge point must
 546 // be converted, so we can remove the control flow.  We need to limit the
 547 // number of c-moves to a small handful.  All code that was in the side-arms
 548 // of the CFG diamond is now speculatively executed.  This code has to be
 549 // "cheap enough".  We are pretty much limited to CFG diamonds that merge
 550 // 1 or 2 items with a total of 1 or 2 ops executed speculatively.
 551 Node *PhaseIdealLoop::conditional_move( Node *region ) {
 552 
 553   assert(region->is_Region(), "sanity check");
 554   if (region->req() != 3) return NULL;
 555 
 556   // Check for CFG diamond
 557   Node *lp = region->in(1);
 558   Node *rp = region->in(2);
 559   if (!lp || !rp) return NULL;
 560   Node *lp_c = lp->in(0);
 561   if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
 562   IfNode *iff = lp_c->as_If();
 563 
 564   // Check for ops pinned in an arm of the diamond.
 565   // Can't remove the control flow in this case
 566   if (lp->outcnt() > 1) return NULL;
 567   if (rp->outcnt() > 1) return NULL;
 568 
 569   IdealLoopTree* r_loop = get_loop(region);
 570   assert(r_loop == get_loop(iff), "sanity");
 571   // Always convert to CMOVE if all results are used only outside this loop.
 572   bool used_inside_loop = (r_loop == _ltree_root);
 573 
 574   // Check profitability
 575   int cost = 0;
 576   int phis = 0;
 577   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 578     Node *out = region->fast_out(i);
 579     if (!out->is_Phi()) continue; // Ignore other control edges, etc
 580     phis++;
 581     PhiNode* phi = out->as_Phi();
 582     BasicType bt = phi->type()->basic_type();
 583     switch (bt) {
 584     case T_DOUBLE:
 585     case T_FLOAT:
 586       if (C->use_cmove()) {
 587         continue; //TODO: maybe we want to add some cost
 588       }
 589       cost += Matcher::float_cmove_cost(); // Could be very expensive
 590       break;
 591     case T_LONG: {
 592       cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
 593     }
 594     case T_INT:                 // These all CMOV fine
 595     case T_ADDRESS: {           // (RawPtr)
 596       cost++;
 597       break;
 598     }
 599     case T_NARROWOOP: // Fall through
 600     case T_OBJECT: {            // Base oops are OK, but not derived oops
 601       const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
 602       // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
 603       // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
 604       // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
 605       // have a Phi for the base here that we convert to a CMOVE all is well
 606       // and good.  But if the base is dead, we'll not make a CMOVE.  Later
 607       // the allocator will have to produce a base by creating a CMOVE of the
 608       // relevant bases.  This puts the allocator in the business of
 609       // manufacturing expensive instructions, generally a bad plan.
 610       // Just Say No to Conditionally-Moved Derived Pointers.
 611       if (tp && tp->offset() != 0)
 612         return NULL;
 613       cost++;
 614       break;
 615     }
 616     default:
 617       return NULL;              // In particular, can't do memory or I/O
 618     }
 619     // Add in cost any speculative ops
 620     for (uint j = 1; j < region->req(); j++) {
 621       Node *proj = region->in(j);
 622       Node *inp = phi->in(j);
 623       if (get_ctrl(inp) == proj) { // Found local op
 624         cost++;
 625         // Check for a chain of dependent ops; these will all become
 626         // speculative in a CMOV.
 627         for (uint k = 1; k < inp->req(); k++)
 628           if (get_ctrl(inp->in(k)) == proj)
 629             cost += ConditionalMoveLimit; // Too much speculative goo
 630       }
 631     }
 632     // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
 633     // This will likely Split-If, a higher-payoff operation.
 634     for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
 635       Node* use = phi->fast_out(k);
 636       if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
 637         cost += ConditionalMoveLimit;
 638       // Is there a use inside the loop?
 639       // Note: check only basic types since CMoveP is pinned.
 640       if (!used_inside_loop && is_java_primitive(bt)) {
 641         IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
 642         if (r_loop == u_loop || r_loop->is_member(u_loop)) {
 643           used_inside_loop = true;
 644         }
 645       }
 646     }
 647   }//for
 648   Node* bol = iff->in(1);
 649   assert(bol->Opcode() == Op_Bool, "");
 650   int cmp_op = bol->in(1)->Opcode();
 651   // It is expensive to generate flags from a float compare.
 652   // Avoid duplicated float compare.
 653   if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
 654 
 655   float infrequent_prob = PROB_UNLIKELY_MAG(3);
 656   // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
 657   if (used_inside_loop) {
 658     if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
 659 
 660     // BlockLayoutByFrequency optimization moves infrequent branch
 661     // from hot path. No point in CMOV'ing in such case (110 is used
 662     // instead of 100 to take into account not exactness of float value).
 663     if (BlockLayoutByFrequency) {
 664       infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
 665     }
 666   }
 667   // Check for highly predictable branch.  No point in CMOV'ing if
 668   // we are going to predict accurately all the time.
 669   if (C->use_cmove() && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) {
 670     //keep going
 671   } else if (iff->_prob < infrequent_prob ||
 672       iff->_prob > (1.0f - infrequent_prob))
 673     return NULL;
 674 
 675   // --------------
 676   // Now replace all Phis with CMOV's
 677   Node *cmov_ctrl = iff->in(0);
 678   uint flip = (lp->Opcode() == Op_IfTrue);
 679   Node_List wq;
 680   while (1) {
 681     PhiNode* phi = NULL;
 682     for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
 683       Node *out = region->fast_out(i);
 684       if (out->is_Phi()) {
 685         phi = out->as_Phi();
 686         break;
 687       }
 688     }
 689     if (phi == NULL)  break;
 690     if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); }
 691     // Move speculative ops
 692     wq.push(phi);
 693     while (wq.size() > 0) {
 694       Node *n = wq.pop();
 695       for (uint j = 1; j < n->req(); j++) {
 696         Node* m = n->in(j);
 697         if (m != NULL && !is_dominator(get_ctrl(m), cmov_ctrl)) {
 698 #ifndef PRODUCT
 699           if (PrintOpto && VerifyLoopOptimizations) {
 700             tty->print("  speculate: ");
 701             m->dump();
 702           }
 703 #endif
 704           set_ctrl(m, cmov_ctrl);
 705           wq.push(m);
 706         }
 707       }
 708     }
 709     Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi));
 710     register_new_node( cmov, cmov_ctrl );
 711     _igvn.replace_node( phi, cmov );
 712 #ifndef PRODUCT
 713     if (TraceLoopOpts) {
 714       tty->print("CMOV  ");
 715       r_loop->dump_head();
 716       if (Verbose) {
 717         bol->in(1)->dump(1);
 718         cmov->dump(1);
 719       }
 720     }
 721     if (VerifyLoopOptimizations) verify();
 722 #endif
 723   }
 724 
 725   // The useless CFG diamond will fold up later; see the optimization in
 726   // RegionNode::Ideal.
 727   _igvn._worklist.push(region);
 728 
 729   return iff->in(1);
 730 }
 731 
 732 static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
 733   for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
 734     Node* u = m->fast_out(i);
 735     if (u->is_CFG()) {
 736       if (u->Opcode() == Op_NeverBranch) {
 737         u = ((NeverBranchNode*)u)->proj_out(0);
 738         enqueue_cfg_uses(u, wq);
 739       } else {
 740         wq.push(u);
 741       }
 742     }
 743   }
 744 }
 745 
 746 // Try moving a store out of a loop, right before the loop
 747 Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
 748   // Store has to be first in the loop body
 749   IdealLoopTree *n_loop = get_loop(n_ctrl);
 750   if (n->is_Store() && n_loop != _ltree_root &&
 751       n_loop->is_loop() && n_loop->_head->is_Loop() &&
 752       n->in(0) != NULL) {
 753     Node* address = n->in(MemNode::Address);
 754     Node* value = n->in(MemNode::ValueIn);
 755     Node* mem = n->in(MemNode::Memory);
 756     IdealLoopTree* address_loop = get_loop(get_ctrl(address));
 757     IdealLoopTree* value_loop = get_loop(get_ctrl(value));
 758 
 759     // - address and value must be loop invariant
 760     // - memory must be a memory Phi for the loop
 761     // - Store must be the only store on this memory slice in the
 762     // loop: if there's another store following this one then value
 763     // written at iteration i by the second store could be overwritten
 764     // at iteration i+n by the first store: it's not safe to move the
 765     // first store out of the loop
 766     // - nothing must observe the memory Phi: it guarantees no read
 767     // before the store, we are also guaranteed the store post
 768     // dominates the loop head (ignoring a possible early
 769     // exit). Otherwise there would be extra Phi involved between the
 770     // loop's Phi and the store.
 771     // - there must be no early exit from the loop before the Store
 772     // (such an exit most of the time would be an extra use of the
 773     // memory Phi but sometimes is a bottom memory Phi that takes the
 774     // store as input).
 775 
 776     if (!n_loop->is_member(address_loop) &&
 777         !n_loop->is_member(value_loop) &&
 778         mem->is_Phi() && mem->in(0) == n_loop->_head &&
 779         mem->outcnt() == 1 &&
 780         mem->in(LoopNode::LoopBackControl) == n) {
 781 
 782       assert(n_loop->_tail != NULL, "need a tail");
 783       assert(is_dominator(n_ctrl, n_loop->_tail), "store control must not be in a branch in the loop");
 784 
 785       // Verify that there's no early exit of the loop before the store.
 786       bool ctrl_ok = false;
 787       {
 788         // Follow control from loop head until n, we exit the loop or
 789         // we reach the tail
 790         ResourceMark rm;
 791         Unique_Node_List wq;
 792         wq.push(n_loop->_head);
 793 
 794         for (uint next = 0; next < wq.size(); ++next) {
 795           Node *m = wq.at(next);
 796           if (m == n->in(0)) {
 797             ctrl_ok = true;
 798             continue;
 799           }
 800           assert(!has_ctrl(m), "should be CFG");
 801           if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) {
 802             ctrl_ok = false;
 803             break;
 804           }
 805           enqueue_cfg_uses(m, wq);
 806           if (wq.size() > 10) {
 807             ctrl_ok = false;
 808             break;
 809           }
 810         }
 811       }
 812       if (ctrl_ok) {
 813         // move the Store
 814         _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
 815         _igvn.replace_input_of(n, 0, n_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl));
 816         _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
 817         // Disconnect the phi now. An empty phi can confuse other
 818         // optimizations in this pass of loop opts.
 819         _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
 820         n_loop->_body.yank(mem);
 821 
 822         set_ctrl_and_loop(n, n->in(0));
 823 
 824         return n;
 825       }
 826     }
 827   }
 828   return NULL;
 829 }
 830 
 831 // Try moving a store out of a loop, right after the loop
 832 void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
 833   if (n->is_Store() && n->in(0) != NULL) {
 834     Node *n_ctrl = get_ctrl(n);
 835     IdealLoopTree *n_loop = get_loop(n_ctrl);
 836     // Store must be in a loop
 837     if (n_loop != _ltree_root && !n_loop->_irreducible) {
 838       Node* address = n->in(MemNode::Address);
 839       Node* value = n->in(MemNode::ValueIn);
 840       IdealLoopTree* address_loop = get_loop(get_ctrl(address));
 841       // address must be loop invariant
 842       if (!n_loop->is_member(address_loop)) {
 843         // Store must be last on this memory slice in the loop and
 844         // nothing in the loop must observe it
 845         Node* phi = NULL;
 846         for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 847           Node* u = n->fast_out(i);
 848           if (has_ctrl(u)) { // control use?
 849             IdealLoopTree *u_loop = get_loop(get_ctrl(u));
 850             if (!n_loop->is_member(u_loop)) {
 851               continue;
 852             }
 853             if (u->is_Phi() && u->in(0) == n_loop->_head) {
 854               assert(_igvn.type(u) == Type::MEMORY, "bad phi");
 855               // multiple phis on the same slice are possible
 856               if (phi != NULL) {
 857                 return;
 858               }
 859               phi = u;
 860               continue;
 861             }
 862           }
 863           return;
 864         }
 865         if (phi != NULL) {
 866           // Nothing in the loop before the store (next iteration)
 867           // must observe the stored value
 868           bool mem_ok = true;
 869           {
 870             ResourceMark rm;
 871             Unique_Node_List wq;
 872             wq.push(phi);
 873             for (uint next = 0; next < wq.size() && mem_ok; ++next) {
 874               Node *m = wq.at(next);
 875               for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
 876                 Node* u = m->fast_out(i);
 877                 if (u->is_Store() || u->is_Phi()) {
 878                   if (u != n) {
 879                     wq.push(u);
 880                     mem_ok = (wq.size() <= 10);
 881                   }
 882                 } else {
 883                   mem_ok = false;
 884                   break;
 885                 }
 886               }
 887             }
 888           }
 889           if (mem_ok) {
 890             // Move the store out of the loop if the LCA of all
 891             // users (except for the phi) is outside the loop.
 892             Node* hook = new Node(1);
 893             _igvn.rehash_node_delayed(phi);
 894             int count = phi->replace_edge(n, hook);
 895             assert(count > 0, "inconsistent phi");
 896 
 897             // Compute latest point this store can go
 898             Node* lca = get_late_ctrl(n, get_ctrl(n));
 899             if (n_loop->is_member(get_loop(lca))) {
 900               // LCA is in the loop - bail out
 901               _igvn.replace_node(hook, n);
 902               return;
 903             }
 904 #ifdef ASSERT
 905             if (n_loop->_head->is_Loop() && n_loop->_head->as_Loop()->is_strip_mined()) {
 906               assert(n_loop->_head->Opcode() == Op_CountedLoop, "outer loop is a strip mined");
 907               n_loop->_head->as_Loop()->verify_strip_mined(1);
 908               Node* outer = n_loop->_head->as_CountedLoop()->outer_loop();
 909               IdealLoopTree* outer_loop = get_loop(outer);
 910               assert(n_loop->_parent == outer_loop, "broken loop tree");
 911               assert(get_loop(lca) == outer_loop, "safepoint in outer loop consume all memory state");
 912             }
 913 #endif
 914 
 915             // Move store out of the loop
 916             _igvn.replace_node(hook, n->in(MemNode::Memory));
 917             _igvn.replace_input_of(n, 0, lca);
 918             set_ctrl_and_loop(n, lca);
 919 
 920             // Disconnect the phi now. An empty phi can confuse other
 921             // optimizations in this pass of loop opts..
 922             if (phi->in(LoopNode::LoopBackControl) == phi) {
 923               _igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
 924               n_loop->_body.yank(phi);
 925             }
 926           }
 927         }
 928       }
 929     }
 930   }
 931 }
 932 
 933 //------------------------------split_if_with_blocks_pre-----------------------
 934 // Do the real work in a non-recursive function.  Data nodes want to be
 935 // cloned in the pre-order so they can feed each other nicely.
 936 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
 937   // Cloning these guys is unlikely to win
 938   int n_op = n->Opcode();
 939   if( n_op == Op_MergeMem ) return n;
 940   if( n->is_Proj() ) return n;
 941   // Do not clone-up CmpFXXX variations, as these are always
 942   // followed by a CmpI
 943   if( n->is_Cmp() ) return n;
 944   // Attempt to use a conditional move instead of a phi/branch
 945   if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
 946     Node *cmov = conditional_move( n );
 947     if( cmov ) return cmov;
 948   }
 949   if( n->is_CFG() || n->is_LoadStore() )
 950     return n;
 951   if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
 952       n_op == Op_Opaque2 ) {
 953     if( !C->major_progress() )   // If chance of no more loop opts...
 954       _igvn._worklist.push(n);  // maybe we'll remove them
 955     return n;
 956   }
 957 
 958   if( n->is_Con() ) return n;   // No cloning for Con nodes
 959 
 960   Node *n_ctrl = get_ctrl(n);
 961   if( !n_ctrl ) return n;       // Dead node
 962 
 963   Node* res = try_move_store_before_loop(n, n_ctrl);
 964   if (res != NULL) {
 965     return n;
 966   }
 967 
 968 #if INCLUDE_SHENANDOAHGC
 969   if (n->Opcode() == Op_ShenandoahReadBarrier) {
 970     ((ShenandoahReadBarrierNode*)n)->try_move(n_ctrl, this);
 971   }
 972 #endif
 973 
 974   // Attempt to remix address expressions for loop invariants
 975   Node *m = remix_address_expressions( n );
 976   if( m ) return m;
 977 
 978   if (n->is_ConstraintCast()) {
 979     Node* dom_cast = n->as_ConstraintCast()->dominating_cast(&_igvn, this);
 980     // ConstraintCastNode::dominating_cast() uses node control input to determine domination.
 981     // Node control inputs don't necessarily agree with loop control info (due to
 982     // transformations happened in between), thus additional dominance check is needed
 983     // to keep loop info valid.
 984     if (dom_cast != NULL && is_dominator(get_ctrl(dom_cast), get_ctrl(n))) {
 985       _igvn.replace_node(n, dom_cast);
 986       return dom_cast;
 987     }
 988   }
 989 
 990   // Determine if the Node has inputs from some local Phi.
 991   // Returns the block to clone thru.
 992   Node *n_blk = has_local_phi_input( n );
 993   if( !n_blk ) return n;
 994 
 995   // Do not clone the trip counter through on a CountedLoop
 996   // (messes up the canonical shape).
 997   if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
 998 
 999   // Check for having no control input; not pinned.  Allow
1000   // dominating control.
1001   if (n->in(0)) {
1002     Node *dom = idom(n_blk);
1003     if (dom_lca(n->in(0), dom) != n->in(0)) {
1004       return n;
1005     }
1006   }
1007   // Policy: when is it profitable.  You must get more wins than
1008   // policy before it is considered profitable.  Policy is usually 0,
1009   // so 1 win is considered profitable.  Big merges will require big
1010   // cloning, so get a larger policy.
1011   int policy = n_blk->req() >> 2;
1012 
1013   // If the loop is a candidate for range check elimination,
1014   // delay splitting through it's phi until a later loop optimization
1015   if (n_blk->is_CountedLoop()) {
1016     IdealLoopTree *lp = get_loop(n_blk);
1017     if (lp && lp->_rce_candidate) {
1018       return n;
1019     }
1020   }
1021 
1022   // Use same limit as split_if_with_blocks_post
1023   if( C->live_nodes() > 35000 ) return n; // Method too big
1024 
1025   // Split 'n' through the merge point if it is profitable
1026   Node *phi = split_thru_phi( n, n_blk, policy );
1027   if (!phi) return n;
1028 
1029   // Found a Phi to split thru!
1030   // Replace 'n' with the new phi
1031   split_mem_thru_phi(n, n_blk, phi);
1032   _igvn.replace_node( n, phi );
1033   // Moved a load around the loop, 'en-registering' something.
1034   if (n_blk->is_Loop() && n->is_Load() &&
1035       !phi->in(LoopNode::LoopBackControl)->is_Load())
1036     C->set_major_progress();
1037 
1038   // Moved a barrier around the loop, 'en-registering' something.
1039   if (n_blk->is_Loop() && n->is_ShenandoahBarrier() &&
1040       !phi->in(LoopNode::LoopBackControl)->is_ShenandoahBarrier())
1041     C->set_major_progress();
1042 
1043   return phi;
1044 }
1045 
1046 static bool merge_point_too_heavy(Compile* C, Node* region) {
1047   // Bail out if the region and its phis have too many users.
1048   int weight = 0;
1049   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1050     weight += region->fast_out(i)->outcnt();
1051   }
1052   int nodes_left = C->max_node_limit() - C->live_nodes();
1053   if (weight * 8 > nodes_left) {
1054     if (PrintOpto) {
1055       tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
1056     }
1057     return true;
1058   } else {
1059     return false;
1060   }
1061 }
1062 
1063 static bool merge_point_safe_helper(Node* m) {
1064   if (m->is_FastLock()) {
1065     return false;
1066   }
1067 #ifdef _LP64
1068   if (m->Opcode() == Op_ConvI2L) {
1069     return false;
1070   }
1071   if (m->is_CastII() && m->isa_CastII()->has_range_check()) {
1072     return false;
1073   }
1074 #endif
1075   if (m->is_ShenandoahBarrier()) {
1076     for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
1077       Node* n = m->fast_out(i);
1078       if (!merge_point_safe_helper(n)) {
1079         return false;
1080       }
1081     }
1082   }
1083   return true;
1084 }
1085 
1086 static bool merge_point_safe(Node* region) {
1087   // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
1088   // having a PhiNode input. This sidesteps the dangerous case where the split
1089   // ConvI2LNode may become TOP if the input Value() does not
1090   // overlap the ConvI2L range, leaving a node which may not dominate its
1091   // uses.
1092   // A better fix for this problem can be found in the BugTraq entry, but
1093   // expediency for Mantis demands this hack.
1094   // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
1095   // split_if_with_blocks from splitting a block because we could not move around
1096   // the FastLockNode.
1097   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1098     Node* n = region->fast_out(i);
1099     if (n->is_Phi()) {
1100       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1101         Node* m = n->fast_out(j);
1102         if (!merge_point_safe_helper(m)) {
1103           return false;
1104         }
1105       }
1106     }
1107   }
1108   return true;
1109 }
1110 
1111 
1112 //------------------------------place_near_use---------------------------------
1113 // Place some computation next to use but not inside inner loops.
1114 // For inner loop uses move it to the preheader area.
1115 Node *PhaseIdealLoop::place_near_use(Node *useblock) const {
1116   IdealLoopTree *u_loop = get_loop( useblock );
1117   if (u_loop->_irreducible) {
1118     return useblock;
1119   }
1120   if (u_loop->_child) {
1121     if (useblock == u_loop->_head && u_loop->_head->is_OuterStripMinedLoop()) {
1122       return u_loop->_head->in(LoopNode::EntryControl);
1123     }
1124     return useblock;
1125   }
1126   return u_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
1127 }
1128 
1129 
1130 bool PhaseIdealLoop::identical_backtoback_ifs(Node *n) {
1131   if (!n->is_If() || n->is_CountedLoopEnd()) {
1132     return false;
1133   }
1134   Node* region = n->in(0);
1135 
1136   if (!region->is_Region()) {
1137     return false;
1138   }
1139   Node* dom = idom(region);
1140   if (!dom->is_If()) {
1141     return false;
1142   }
1143 
1144 #if INCLUDE_SHENANDOAHGC
1145   bool shenandoah_heap_stable = ShenandoahWriteBarrierNode::is_heap_stable_test(n);
1146   if (shenandoah_heap_stable) {
1147     if (!ShenandoahWriteBarrierNode::is_heap_stable_test(dom)) {
1148       return false;
1149     }
1150   } else
1151 #endif
1152   {
1153     if (dom->in(1) != n->in(1)) {
1154       return false;
1155     }
1156   }
1157   IfNode* dom_if = dom->as_If();
1158   Node* proj_true = dom_if->proj_out(1);
1159   Node* proj_false = dom_if->proj_out(0);
1160 
1161   for (uint i = 1; i < region->req(); i++) {
1162     if (is_dominator(proj_true, region->in(i))) {
1163       continue;
1164     }
1165     if (is_dominator(proj_false, region->in(i))) {
1166       continue;
1167     }
1168     return false;
1169   }
1170 
1171   return true;
1172 }
1173 
1174 bool PhaseIdealLoop::can_split_if(Node *n_ctrl) {
1175   assert(n_ctrl->is_Region(), "broken");
1176   if (C->live_nodes() > 35000) {
1177     return false; // Method too big
1178   }
1179 
1180   // Do not do 'split-if' if irreducible loops are present.
1181   if (_has_irreducible_loops) {
1182     return false;
1183   }
1184 
1185   if (merge_point_too_heavy(C, n_ctrl)) {
1186     return false;
1187   }
1188 
1189   // Do not do 'split-if' if some paths are dead.  First do dead code
1190   // elimination and then see if its still profitable.
1191   for (uint i = 1; i < n_ctrl->req(); i++) {
1192     if (n_ctrl->in(i) == C->top()) {
1193       return false;
1194     }
1195   }
1196 
1197   // If trying to do a 'Split-If' at the loop head, it is only
1198   // profitable if the cmp folds up on BOTH paths.  Otherwise we
1199   // risk peeling a loop forever.
1200 
1201   // CNC - Disabled for now.  Requires careful handling of loop
1202   // body selection for the cloned code.  Also, make sure we check
1203   // for any input path not being in the same loop as n_ctrl.  For
1204   // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
1205   // because the alternative loop entry points won't be converted
1206   // into LoopNodes.
1207   IdealLoopTree *n_loop = get_loop(n_ctrl);
1208   for (uint j = 1; j < n_ctrl->req(); j++) {
1209     if (get_loop(n_ctrl->in(j)) != n_loop) {
1210       return false;
1211     }
1212   }
1213 
1214   // Check for safety of the merge point.
1215   if (!merge_point_safe(n_ctrl)) {
1216     return false;
1217   }
1218 
1219   return true;
1220 }
1221 
1222 //------------------------------split_if_with_blocks_post----------------------
1223 // Do the real work in a non-recursive function.  CFG hackery wants to be
1224 // in the post-order, so it can dirty the I-DOM info and not use the dirtied
1225 // info.
1226 void PhaseIdealLoop::split_if_with_blocks_post(Node *n, bool last_round) {
1227 
1228   // Cloning Cmp through Phi's involves the split-if transform.
1229   // FastLock is not used by an If
1230   if (n->is_Cmp() && !n->is_FastLock() && !last_round) {
1231     Node *n_ctrl = get_ctrl(n);
1232     // Determine if the Node has inputs from some local Phi.
1233     // Returns the block to clone thru.
1234     Node *n_blk = has_local_phi_input(n);
1235     if (n_blk != n_ctrl) {
1236       return;
1237     }
1238 
1239     if (!can_split_if(n_ctrl)) {
1240       return;
1241     }
1242 
1243     if (n->outcnt() != 1) {
1244       return; // Multiple bool's from 1 compare?
1245     }
1246     Node *bol = n->unique_out();
1247     assert(bol->is_Bool(), "expect a bool here");
1248     if (bol->outcnt() != 1) {
1249       return;// Multiple branches from 1 compare?
1250     }
1251     Node *iff = bol->unique_out();
1252 
1253     // Check some safety conditions
1254     if (iff->is_If()) {        // Classic split-if?
1255       if (iff->in(0) != n_ctrl) {
1256         return; // Compare must be in same blk as if
1257       }
1258     } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
1259       // Can't split CMove with different control edge.
1260       if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) {
1261         return;
1262       }
1263       if (get_ctrl(iff->in(2)) == n_ctrl ||
1264           get_ctrl(iff->in(3)) == n_ctrl) {
1265         return;                 // Inputs not yet split-up
1266       }
1267       if (get_loop(n_ctrl) != get_loop(get_ctrl(iff))) {
1268         return;                 // Loop-invar test gates loop-varying CMOVE
1269       }
1270     } else {
1271       return;  // some other kind of node, such as an Allocate
1272     }
1273 
1274     // When is split-if profitable?  Every 'win' on means some control flow
1275     // goes dead, so it's almost always a win.
1276     int policy = 0;
1277     // Split compare 'n' through the merge point if it is profitable
1278     Node *phi = split_thru_phi( n, n_ctrl, policy);
1279     if (!phi) {
1280       return;
1281     }
1282 
1283     // Found a Phi to split thru!
1284     // Replace 'n' with the new phi
1285     _igvn.replace_node(n, phi);
1286 
1287     // Now split the bool up thru the phi
1288     Node *bolphi = split_thru_phi(bol, n_ctrl, -1);
1289     guarantee(bolphi != NULL, "null boolean phi node");
1290 
1291     _igvn.replace_node(bol, bolphi);
1292     assert(iff->in(1) == bolphi, "");
1293 
1294     if (bolphi->Value(&_igvn)->singleton()) {
1295       return;
1296     }
1297 
1298     // Conditional-move?  Must split up now
1299     if (!iff->is_If()) {
1300       Node *cmovphi = split_thru_phi(iff, n_ctrl, -1);
1301       _igvn.replace_node(iff, cmovphi);
1302       return;
1303     }
1304 
1305     // Now split the IF
1306     do_split_if(iff);
1307     return;
1308   }
1309 
1310   // Two identical ifs back to back can be merged
1311   if (identical_backtoback_ifs(n) && can_split_if(n->in(0))) {
1312     Node *n_ctrl = n->in(0);
1313     PhiNode* bolphi = PhiNode::make_blank(n_ctrl, n->in(1));
1314     IfNode* dom_if = idom(n_ctrl)->as_If();
1315     Node* proj_true = dom_if->proj_out(1);
1316     Node* proj_false = dom_if->proj_out(0);
1317     Node* con_true = _igvn.makecon(TypeInt::ONE);
1318     Node* con_false = _igvn.makecon(TypeInt::ZERO);
1319 
1320     for (uint i = 1; i < n_ctrl->req(); i++) {
1321       if (is_dominator(proj_true, n_ctrl->in(i))) {
1322         bolphi->init_req(i, con_true);
1323       } else {
1324         assert(is_dominator(proj_false, n_ctrl->in(i)), "bad if");
1325         bolphi->init_req(i, con_false);
1326       }
1327     }
1328     register_new_node(bolphi, n_ctrl);
1329     _igvn.replace_input_of(n, 1, bolphi);
1330 
1331     // Now split the IF
1332     do_split_if(n);
1333     return;
1334   }
1335 
1336   // Check for an IF ready to split; one that has its
1337   // condition codes input coming from a Phi at the block start.
1338   int n_op = n->Opcode();
1339 
1340   // Check for an IF being dominated by another IF same test
1341   if (n_op == Op_If ||
1342       n_op == Op_RangeCheck) {
1343     Node *bol = n->in(1);
1344     uint max = bol->outcnt();
1345     // Check for same test used more than once?
1346     if (max > 1 && bol->is_Bool()) {
1347       // Search up IDOMs to see if this IF is dominated.
1348       Node *cutoff = get_ctrl(bol);
1349 
1350       // Now search up IDOMs till cutoff, looking for a dominating test
1351       Node *prevdom = n;
1352       Node *dom = idom(prevdom);
1353       while (dom != cutoff) {
1354         if (dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom) {
1355           // Replace the dominated test with an obvious true or false.
1356           // Place it on the IGVN worklist for later cleanup.
1357           C->set_major_progress();
1358           dominated_by(prevdom, n, false, true);
1359 #ifndef PRODUCT
1360           if( VerifyLoopOptimizations ) verify();
1361 #endif
1362           return;
1363         }
1364         prevdom = dom;
1365         dom = idom(prevdom);
1366       }
1367     }
1368   }
1369 
1370   // See if a shared loop-varying computation has no loop-varying uses.
1371   // Happens if something is only used for JVM state in uncommon trap exits,
1372   // like various versions of induction variable+offset.  Clone the
1373   // computation per usage to allow it to sink out of the loop.
1374   if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
1375     Node *n_ctrl = get_ctrl(n);
1376     IdealLoopTree *n_loop = get_loop(n_ctrl);
1377     if( n_loop != _ltree_root ) {
1378       DUIterator_Fast imax, i = n->fast_outs(imax);
1379       for (; i < imax; i++) {
1380         Node* u = n->fast_out(i);
1381         if( !has_ctrl(u) )     break; // Found control user
1382         IdealLoopTree *u_loop = get_loop(get_ctrl(u));
1383         if( u_loop == n_loop ) break; // Found loop-varying use
1384         if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
1385         if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
1386       }
1387       bool did_break = (i < imax);  // Did we break out of the previous loop?
1388       if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
1389         Node *late_load_ctrl = NULL;
1390         if (n->is_Load()) {
1391           // If n is a load, get and save the result from get_late_ctrl(),
1392           // to be later used in calculating the control for n's clones.
1393           clear_dom_lca_tags();
1394           late_load_ctrl = get_late_ctrl(n, n_ctrl);
1395         }
1396         // If n is a load, and the late control is the same as the current
1397         // control, then the cloning of n is a pointless exercise, because
1398         // GVN will ensure that we end up where we started.
1399         if (!n->is_Load() || late_load_ctrl != n_ctrl) {
1400           for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
1401             Node *u = n->last_out(j); // Clone private computation per use
1402             _igvn.rehash_node_delayed(u);
1403             Node *x = n->clone(); // Clone computation
1404             Node *x_ctrl = NULL;
1405             if( u->is_Phi() ) {
1406               // Replace all uses of normal nodes.  Replace Phi uses
1407               // individually, so the separate Nodes can sink down
1408               // different paths.
1409               uint k = 1;
1410               while( u->in(k) != n ) k++;
1411               u->set_req( k, x );
1412               // x goes next to Phi input path
1413               x_ctrl = u->in(0)->in(k);
1414               --j;
1415             } else {              // Normal use
1416               // Replace all uses
1417               for( uint k = 0; k < u->req(); k++ ) {
1418                 if( u->in(k) == n ) {
1419                   u->set_req( k, x );
1420                   --j;
1421                 }
1422               }
1423               x_ctrl = get_ctrl(u);
1424             }
1425 
1426             // Find control for 'x' next to use but not inside inner loops.
1427             // For inner loop uses get the preheader area.
1428             x_ctrl = place_near_use(x_ctrl);
1429 
1430             if (n->is_Load() || n->Opcode() == Op_ShenandoahReadBarrier) {
1431               // For loads, add a control edge to a CFG node outside of the loop
1432               // to force them to not combine and return back inside the loop
1433               // during GVN optimization (4641526).
1434               //
1435               // Because we are setting the actual control input, factor in
1436               // the result from get_late_ctrl() so we respect any
1437               // anti-dependences. (6233005).
1438               if (n->is_Load()) {
1439                 x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
1440               }
1441 
1442               // Don't allow the control input to be a CFG splitting node.
1443               // Such nodes should only have ProjNodes as outs, e.g. IfNode
1444               // should only have IfTrueNode and IfFalseNode (4985384).
1445               x_ctrl = find_non_split_ctrl(x_ctrl);
1446               assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
1447 
1448               x->set_req(0, x_ctrl);
1449             }
1450             register_new_node(x, x_ctrl);
1451 
1452             // Some institutional knowledge is needed here: 'x' is
1453             // yanked because if the optimizer runs GVN on it all the
1454             // cloned x's will common up and undo this optimization and
1455             // be forced back in the loop.  This is annoying because it
1456             // makes +VerifyOpto report false-positives on progress.  I
1457             // tried setting control edges on the x's to force them to
1458             // not combine, but the matching gets worried when it tries
1459             // to fold a StoreP and an AddP together (as part of an
1460             // address expression) and the AddP and StoreP have
1461             // different controls.
1462             if (!x->is_Load() && !x->is_DecodeNarrowPtr() && !x->is_ShenandoahBarrier() && !x->is_MergeMem()) _igvn._worklist.yank(x);
1463           }
1464           _igvn.remove_dead_node(n);
1465         }
1466       }
1467     }
1468   }
1469 
1470   try_move_store_after_loop(n);
1471 
1472   // Check for Opaque2's who's loop has disappeared - who's input is in the
1473   // same loop nest as their output.  Remove 'em, they are no longer useful.
1474   if( n_op == Op_Opaque2 &&
1475       n->in(1) != NULL &&
1476       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1477     _igvn.replace_node( n, n->in(1) );
1478   }
1479 
1480 #if INCLUDE_ZGC
1481   if (UseZGC) {
1482     ZBarrierSetC2::loop_optimize_gc_barrier(this, n, last_round);
1483   }
1484 #endif
1485 }
1486 
1487 //------------------------------split_if_with_blocks---------------------------
1488 // Check for aggressive application of 'split-if' optimization,
1489 // using basic block level info.
1490 void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack, bool last_round) {
1491   Node *n = C->root();
1492   visited.set(n->_idx); // first, mark node as visited
1493   // Do pre-visit work for root
1494   n = split_if_with_blocks_pre( n );
1495   uint cnt = n->outcnt();
1496   uint i   = 0;
1497   while (true) {
1498     // Visit all children
1499     if (i < cnt) {
1500       Node* use = n->raw_out(i);
1501       ++i;
1502       if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1503         // Now do pre-visit work for this use
1504         use = split_if_with_blocks_pre( use );
1505         nstack.push(n, i); // Save parent and next use's index.
1506         n   = use;         // Process all children of current use.
1507         cnt = use->outcnt();
1508         i   = 0;
1509       }
1510     }
1511     else {
1512       // All of n's children have been processed, complete post-processing.
1513       if (cnt != 0 && !n->is_Con()) {
1514         assert(has_node(n), "no dead nodes");
1515         split_if_with_blocks_post( n, last_round );
1516       }
1517       if (nstack.is_empty()) {
1518         // Finished all nodes on stack.
1519         break;
1520       }
1521       // Get saved parent node and next use's index. Visit the rest of uses.
1522       n   = nstack.node();
1523       cnt = n->outcnt();
1524       i   = nstack.index();
1525       nstack.pop();
1526     }
1527   }
1528 }
1529 
1530 
1531 //=============================================================================
1532 //
1533 //                   C L O N E   A   L O O P   B O D Y
1534 //
1535 
1536 //------------------------------clone_iff--------------------------------------
1537 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1538 // "Nearly" because all Nodes have been cloned from the original in the loop,
1539 // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1540 // through the Phi recursively, and return a Bool.
1541 Node* PhaseIdealLoop::clone_iff(PhiNode *phi, IdealLoopTree *loop) {
1542 
1543   // Convert this Phi into a Phi merging Bools
1544   uint i;
1545   for (i = 1; i < phi->req(); i++) {
1546     Node *b = phi->in(i);
1547     if (b->is_Phi()) {
1548       _igvn.replace_input_of(phi, i, clone_iff(b->as_Phi(), loop));
1549     } else {
1550       assert(b->is_Bool() || b->Opcode() == Op_Opaque4, "");
1551     }
1552   }
1553 
1554   Node* n = phi->in(1);
1555   Node* sample_opaque = NULL;
1556   Node *sample_bool = NULL;
1557   if (n->Opcode() == Op_Opaque4) {
1558     sample_opaque = n;
1559     sample_bool = n->in(1);
1560     assert(sample_bool->is_Bool(), "wrong type");
1561   } else {
1562     sample_bool = n;
1563   }
1564   Node *sample_cmp = sample_bool->in(1);
1565 
1566   // Make Phis to merge the Cmp's inputs.
1567   PhiNode *phi1 = new PhiNode(phi->in(0), Type::TOP);
1568   PhiNode *phi2 = new PhiNode(phi->in(0), Type::TOP);
1569   for (i = 1; i < phi->req(); i++) {
1570     Node *n1 = sample_opaque == NULL ? phi->in(i)->in(1)->in(1) : phi->in(i)->in(1)->in(1)->in(1);
1571     Node *n2 = sample_opaque == NULL ? phi->in(i)->in(1)->in(2) : phi->in(i)->in(1)->in(1)->in(2);
1572     phi1->set_req(i, n1);
1573     phi2->set_req(i, n2);
1574     phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1575     phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1576   }
1577   // See if these Phis have been made before.
1578   // Register with optimizer
1579   Node *hit1 = _igvn.hash_find_insert(phi1);
1580   if (hit1) {                   // Hit, toss just made Phi
1581     _igvn.remove_dead_node(phi1); // Remove new phi
1582     assert(hit1->is_Phi(), "" );
1583     phi1 = (PhiNode*)hit1;      // Use existing phi
1584   } else {                      // Miss
1585     _igvn.register_new_node_with_optimizer(phi1);
1586   }
1587   Node *hit2 = _igvn.hash_find_insert(phi2);
1588   if (hit2) {                   // Hit, toss just made Phi
1589     _igvn.remove_dead_node(phi2); // Remove new phi
1590     assert(hit2->is_Phi(), "" );
1591     phi2 = (PhiNode*)hit2;      // Use existing phi
1592   } else {                      // Miss
1593     _igvn.register_new_node_with_optimizer(phi2);
1594   }
1595   // Register Phis with loop/block info
1596   set_ctrl(phi1, phi->in(0));
1597   set_ctrl(phi2, phi->in(0));
1598   // Make a new Cmp
1599   Node *cmp = sample_cmp->clone();
1600   cmp->set_req(1, phi1);
1601   cmp->set_req(2, phi2);
1602   _igvn.register_new_node_with_optimizer(cmp);
1603   set_ctrl(cmp, phi->in(0));
1604 
1605   // Make a new Bool
1606   Node *b = sample_bool->clone();
1607   b->set_req(1,cmp);
1608   _igvn.register_new_node_with_optimizer(b);
1609   set_ctrl(b, phi->in(0));
1610 
1611   if (sample_opaque != NULL) {
1612     Node* opaque = sample_opaque->clone();
1613     opaque->set_req(1, b);
1614     _igvn.register_new_node_with_optimizer(opaque);
1615     set_ctrl(opaque, phi->in(0));
1616     return opaque;
1617   }
1618 
1619   assert(b->is_Bool(), "");
1620   return b;
1621 }
1622 
1623 //------------------------------clone_bool-------------------------------------
1624 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1625 // "Nearly" because all Nodes have been cloned from the original in the loop,
1626 // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1627 // through the Phi recursively, and return a Bool.
1628 CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1629   uint i;
1630   // Convert this Phi into a Phi merging Bools
1631   for( i = 1; i < phi->req(); i++ ) {
1632     Node *b = phi->in(i);
1633     if( b->is_Phi() ) {
1634       _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1635     } else {
1636       assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1637     }
1638   }
1639 
1640   Node *sample_cmp = phi->in(1);
1641 
1642   // Make Phis to merge the Cmp's inputs.
1643   PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1644   PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1645   for( uint j = 1; j < phi->req(); j++ ) {
1646     Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1647     Node *n1, *n2;
1648     if( cmp_top->is_Cmp() ) {
1649       n1 = cmp_top->in(1);
1650       n2 = cmp_top->in(2);
1651     } else {
1652       n1 = n2 = cmp_top;
1653     }
1654     phi1->set_req( j, n1 );
1655     phi2->set_req( j, n2 );
1656     phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1657     phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1658   }
1659 
1660   // See if these Phis have been made before.
1661   // Register with optimizer
1662   Node *hit1 = _igvn.hash_find_insert(phi1);
1663   if( hit1 ) {                  // Hit, toss just made Phi
1664     _igvn.remove_dead_node(phi1); // Remove new phi
1665     assert( hit1->is_Phi(), "" );
1666     phi1 = (PhiNode*)hit1;      // Use existing phi
1667   } else {                      // Miss
1668     _igvn.register_new_node_with_optimizer(phi1);
1669   }
1670   Node *hit2 = _igvn.hash_find_insert(phi2);
1671   if( hit2 ) {                  // Hit, toss just made Phi
1672     _igvn.remove_dead_node(phi2); // Remove new phi
1673     assert( hit2->is_Phi(), "" );
1674     phi2 = (PhiNode*)hit2;      // Use existing phi
1675   } else {                      // Miss
1676     _igvn.register_new_node_with_optimizer(phi2);
1677   }
1678   // Register Phis with loop/block info
1679   set_ctrl(phi1, phi->in(0));
1680   set_ctrl(phi2, phi->in(0));
1681   // Make a new Cmp
1682   Node *cmp = sample_cmp->clone();
1683   cmp->set_req( 1, phi1 );
1684   cmp->set_req( 2, phi2 );
1685   _igvn.register_new_node_with_optimizer(cmp);
1686   set_ctrl(cmp, phi->in(0));
1687 
1688   assert( cmp->is_Cmp(), "" );
1689   return (CmpNode*)cmp;
1690 }
1691 
1692 //------------------------------sink_use---------------------------------------
1693 // If 'use' was in the loop-exit block, it now needs to be sunk
1694 // below the post-loop merge point.
1695 void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1696   if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1697     set_ctrl(use, post_loop);
1698     for (DUIterator j = use->outs(); use->has_out(j); j++)
1699       sink_use(use->out(j), post_loop);
1700   }
1701 }
1702 
1703 void PhaseIdealLoop::clone_loop_handle_data_uses(Node* old, Node_List &old_new,
1704                                                  IdealLoopTree* loop, IdealLoopTree* outer_loop,
1705                                                  Node_List*& split_if_set, Node_List*& split_bool_set,
1706                                                  Node_List*& split_cex_set, Node_List& worklist,
1707                                                  uint new_counter, CloneLoopMode mode) {
1708   Node* nnn = old_new[old->_idx];
1709   // Copy uses to a worklist, so I can munge the def-use info
1710   // with impunity.
1711   for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1712     worklist.push(old->fast_out(j));
1713 
1714   while( worklist.size() ) {
1715     Node *use = worklist.pop();
1716     if (!has_node(use))  continue; // Ignore dead nodes
1717     if (use->in(0) == C->top())  continue;
1718     IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1719     // Check for data-use outside of loop - at least one of OLD or USE
1720     // must not be a CFG node.
1721 #ifdef ASSERT
1722     if (loop->_head->as_Loop()->is_strip_mined() && outer_loop->is_member(use_loop) && !loop->is_member(use_loop) && old_new[use->_idx] == NULL) {
1723       Node* sfpt = loop->_head->as_CountedLoop()->outer_safepoint();
1724       assert(mode == ControlAroundStripMined && use == sfpt, "missed a node");
1725     }
1726 #endif
1727     if (!loop->is_member(use_loop) && !outer_loop->is_member(use_loop) && (!old->is_CFG() || !use->is_CFG())) {
1728 
1729       // If the Data use is an IF, that means we have an IF outside of the
1730       // loop that is switching on a condition that is set inside of the
1731       // loop.  Happens if people set a loop-exit flag; then test the flag
1732       // in the loop to break the loop, then test is again outside of the
1733       // loop to determine which way the loop exited.
1734       // Loop predicate If node connects to Bool node through Opaque1 node.
1735       if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use) || use->Opcode() == Op_Opaque4) {
1736         // Since this code is highly unlikely, we lazily build the worklist
1737         // of such Nodes to go split.
1738         if (!split_if_set) {
1739           ResourceArea *area = Thread::current()->resource_area();
1740           split_if_set = new Node_List(area);
1741         }
1742         split_if_set->push(use);
1743       }
1744       if (use->is_Bool()) {
1745         if (!split_bool_set) {
1746           ResourceArea *area = Thread::current()->resource_area();
1747           split_bool_set = new Node_List(area);
1748         }
1749         split_bool_set->push(use);
1750       }
1751       if (use->Opcode() == Op_CreateEx) {
1752         if (!split_cex_set) {
1753           ResourceArea *area = Thread::current()->resource_area();
1754           split_cex_set = new Node_List(area);
1755         }
1756         split_cex_set->push(use);
1757       }
1758 
1759 
1760       // Get "block" use is in
1761       uint idx = 0;
1762       while( use->in(idx) != old ) idx++;
1763       Node *prev = use->is_CFG() ? use : get_ctrl(use);
1764       assert(!loop->is_member(get_loop(prev)) && !outer_loop->is_member(get_loop(prev)), "" );
1765       Node *cfg = prev->_idx >= new_counter
1766         ? prev->in(2)
1767         : idom(prev);
1768       if( use->is_Phi() )     // Phi use is in prior block
1769         cfg = prev->in(idx);  // NOT in block of Phi itself
1770       if (cfg->is_top()) {    // Use is dead?
1771         _igvn.replace_input_of(use, idx, C->top());
1772         continue;
1773       }
1774 
1775       // If use is referenced through control edge... (idx == 0)
1776       if (mode == IgnoreStripMined && idx == 0) {
1777         LoopNode *head = loop->_head->as_Loop();
1778         if (head->is_strip_mined() && is_dominator(head->outer_loop_exit(), prev)) {
1779           // That node is outside the inner loop, leave it outside the
1780           // outer loop as well to not confuse verification code.
1781           assert(!loop->_parent->is_member(use_loop), "should be out of the outer loop");
1782           _igvn.replace_input_of(use, 0, head->outer_loop_exit());
1783           continue;
1784         }
1785       }
1786 
1787       while(!outer_loop->is_member(get_loop(cfg))) {
1788         prev = cfg;
1789         cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1790       }
1791       // If the use occurs after merging several exits from the loop, then
1792       // old value must have dominated all those exits.  Since the same old
1793       // value was used on all those exits we did not need a Phi at this
1794       // merge point.  NOW we do need a Phi here.  Each loop exit value
1795       // is now merged with the peeled body exit; each exit gets its own
1796       // private Phi and those Phis need to be merged here.
1797       Node *phi;
1798       if( prev->is_Region() ) {
1799         if (idx == 0 && use->Opcode() != Op_ShenandoahWBMemProj) {      // Updating control edge?
1800           phi = prev;         // Just use existing control
1801         } else {              // Else need a new Phi
1802           phi = PhiNode::make( prev, old );
1803           // Now recursively fix up the new uses of old!
1804           uint first = use->Opcode() != Op_ShenandoahWBMemProj ? 1 : 0;
1805           for (uint i = first; i < prev->req(); i++) {
1806             worklist.push(phi); // Onto worklist once for each 'old' input
1807           }
1808         }
1809       } else {
1810         // Get new RegionNode merging old and new loop exits
1811         prev = old_new[prev->_idx];
1812         assert( prev, "just made this in step 7" );
1813         if( idx == 0) {      // Updating control edge?
1814           phi = prev;         // Just use existing control
1815         } else {              // Else need a new Phi
1816           // Make a new Phi merging data values properly
1817           phi = PhiNode::make( prev, old );
1818           phi->set_req( 1, nnn );
1819         }
1820       }
1821       // If inserting a new Phi, check for prior hits
1822       if (idx != 0 && use->Opcode() != Op_ShenandoahWBMemProj) {
1823         Node *hit = _igvn.hash_find_insert(phi);
1824         if( hit == NULL ) {
1825           _igvn.register_new_node_with_optimizer(phi); // Register new phi
1826         } else {                                      // or
1827           // Remove the new phi from the graph and use the hit
1828           _igvn.remove_dead_node(phi);
1829           phi = hit;                                  // Use existing phi
1830         }
1831         set_ctrl(phi, prev);
1832       }
1833       // Make 'use' use the Phi instead of the old loop body exit value
1834       _igvn.replace_input_of(use, idx, phi);
1835       if( use->_idx >= new_counter ) { // If updating new phis
1836         // Not needed for correctness, but prevents a weak assert
1837         // in AddPNode from tripping (when we end up with different
1838         // base & derived Phis that will become the same after
1839         // IGVN does CSE).
1840         Node *hit = _igvn.hash_find_insert(use);
1841         if( hit )             // Go ahead and re-hash for hits.
1842           _igvn.replace_node( use, hit );
1843       }
1844 
1845       // If 'use' was in the loop-exit block, it now needs to be sunk
1846       // below the post-loop merge point.
1847       sink_use( use, prev );
1848     }
1849   }
1850 }
1851 
1852 static void clone_outer_loop_helper(Node* n, const IdealLoopTree *loop, const IdealLoopTree* outer_loop,
1853                                     const Node_List &old_new, Unique_Node_List& wq, PhaseIdealLoop* phase,
1854                                     bool check_old_new) {
1855   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1856     Node* u = n->fast_out(j);
1857     assert(check_old_new || old_new[u->_idx] == NULL, "shouldn't have been cloned");
1858     if (!u->is_CFG() && (!check_old_new || old_new[u->_idx] == NULL)) {
1859       Node* c = phase->get_ctrl(u);
1860       IdealLoopTree* u_loop = phase->get_loop(c);
1861       assert(!loop->is_member(u_loop), "can be in outer loop or out of both loops only");
1862       if (outer_loop->is_member(u_loop)) {
1863         wq.push(u);
1864       }
1865     }
1866   }
1867 }
1868 
1869 void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
1870                                       IdealLoopTree* outer_loop, int dd, Node_List &old_new,
1871                                       Node_List& extra_data_nodes) {
1872   if (head->is_strip_mined() && mode != IgnoreStripMined) {
1873     CountedLoopNode* cl = head->as_CountedLoop();
1874     Node* l = cl->outer_loop();
1875     Node* tail = cl->outer_loop_tail();
1876     IfNode* le = cl->outer_loop_end();
1877     Node* sfpt = cl->outer_safepoint();
1878     CountedLoopEndNode* cle = cl->loopexit();
1879     CountedLoopNode* new_cl = old_new[cl->_idx]->as_CountedLoop();
1880     CountedLoopEndNode* new_cle = new_cl->as_CountedLoop()->loopexit_or_null();
1881     Node* cle_out = cle->proj_out(false);
1882 
1883     Node* new_sfpt = NULL;
1884     Node* new_cle_out = cle_out->clone();
1885     old_new.map(cle_out->_idx, new_cle_out);
1886     if (mode == CloneIncludesStripMined) {
1887       // clone outer loop body
1888       Node* new_l = l->clone();
1889       Node* new_tail = tail->clone();
1890       IfNode* new_le = le->clone()->as_If();
1891       new_sfpt = sfpt->clone();
1892 
1893       set_loop(new_l, outer_loop->_parent);
1894       set_idom(new_l, new_l->in(LoopNode::EntryControl), dd);
1895       set_loop(new_cle_out, outer_loop->_parent);
1896       set_idom(new_cle_out, new_cle, dd);
1897       set_loop(new_sfpt, outer_loop->_parent);
1898       set_idom(new_sfpt, new_cle_out, dd);
1899       set_loop(new_le, outer_loop->_parent);
1900       set_idom(new_le, new_sfpt, dd);
1901       set_loop(new_tail, outer_loop->_parent);
1902       set_idom(new_tail, new_le, dd);
1903       set_idom(new_cl, new_l, dd);
1904 
1905       old_new.map(l->_idx, new_l);
1906       old_new.map(tail->_idx, new_tail);
1907       old_new.map(le->_idx, new_le);
1908       old_new.map(sfpt->_idx, new_sfpt);
1909 
1910       new_l->set_req(LoopNode::LoopBackControl, new_tail);
1911       new_l->set_req(0, new_l);
1912       new_tail->set_req(0, new_le);
1913       new_le->set_req(0, new_sfpt);
1914       new_sfpt->set_req(0, new_cle_out);
1915       new_cle_out->set_req(0, new_cle);
1916       new_cl->set_req(LoopNode::EntryControl, new_l);
1917 
1918       _igvn.register_new_node_with_optimizer(new_l);
1919       _igvn.register_new_node_with_optimizer(new_tail);
1920       _igvn.register_new_node_with_optimizer(new_le);
1921     } else {
1922       Node *newhead = old_new[loop->_head->_idx];
1923       newhead->as_Loop()->clear_strip_mined();
1924       _igvn.replace_input_of(newhead, LoopNode::EntryControl, newhead->in(LoopNode::EntryControl)->in(LoopNode::EntryControl));
1925       set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1926     }
1927     // Look at data node that were assigned a control in the outer
1928     // loop: they are kept in the outer loop by the safepoint so start
1929     // from the safepoint node's inputs.
1930     IdealLoopTree* outer_loop = get_loop(l);
1931     Node_Stack stack(2);
1932     stack.push(sfpt, 1);
1933     uint new_counter = C->unique();
1934     while (stack.size() > 0) {
1935       Node* n = stack.node();
1936       uint i = stack.index();
1937       while (i < n->req() &&
1938              (n->in(i) == NULL ||
1939               !has_ctrl(n->in(i)) ||
1940               get_loop(get_ctrl(n->in(i))) != outer_loop ||
1941               (old_new[n->in(i)->_idx] != NULL && old_new[n->in(i)->_idx]->_idx >= new_counter))) {
1942         i++;
1943       }
1944       if (i < n->req()) {
1945         stack.set_index(i+1);
1946         stack.push(n->in(i), 0);
1947       } else {
1948         assert(old_new[n->_idx] == NULL || n == sfpt || old_new[n->_idx]->_idx < new_counter, "no clone yet");
1949         Node* m = n == sfpt ? new_sfpt : n->clone();
1950         if (m != NULL) {
1951           for (uint i = 0; i < n->req(); i++) {
1952             if (m->in(i) != NULL && old_new[m->in(i)->_idx] != NULL) {
1953               m->set_req(i, old_new[m->in(i)->_idx]);
1954             }
1955           }
1956         } else {
1957           assert(n == sfpt && mode != CloneIncludesStripMined, "where's the safepoint clone?");
1958         }
1959         if (n != sfpt) {
1960           extra_data_nodes.push(n);
1961           _igvn.register_new_node_with_optimizer(m);
1962           assert(get_ctrl(n) == cle_out, "what other control?");
1963           set_ctrl(m, new_cle_out);
1964           old_new.map(n->_idx, m);
1965         }
1966         stack.pop();
1967       }
1968     }
1969     if (mode == CloneIncludesStripMined) {
1970       _igvn.register_new_node_with_optimizer(new_sfpt);
1971       _igvn.register_new_node_with_optimizer(new_cle_out);
1972     }
1973     // Some other transformation may have pessimistically assign some
1974     // data nodes to the outer loop. Set their control so they are out
1975     // of the outer loop.
1976     ResourceMark rm;
1977     Unique_Node_List wq;
1978     for (uint i = 0; i < extra_data_nodes.size(); i++) {
1979       Node* old = extra_data_nodes.at(i);
1980       clone_outer_loop_helper(old, loop, outer_loop, old_new, wq, this, true);
1981     }
1982     Node* new_ctrl = cl->outer_loop_exit();
1983     assert(get_loop(new_ctrl) != outer_loop, "must be out of the loop nest");
1984     for (uint i = 0; i < wq.size(); i++) {
1985       Node* n = wq.at(i);
1986       set_ctrl(n, new_ctrl);
1987       clone_outer_loop_helper(n, loop, outer_loop, old_new, wq, this, false);
1988     }
1989   } else {
1990     Node *newhead = old_new[loop->_head->_idx];
1991     set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1992   }
1993 }
1994 
1995 //------------------------------clone_loop-------------------------------------
1996 //
1997 //                   C L O N E   A   L O O P   B O D Y
1998 //
1999 // This is the basic building block of the loop optimizations.  It clones an
2000 // entire loop body.  It makes an old_new loop body mapping; with this mapping
2001 // you can find the new-loop equivalent to an old-loop node.  All new-loop
2002 // nodes are exactly equal to their old-loop counterparts, all edges are the
2003 // same.  All exits from the old-loop now have a RegionNode that merges the
2004 // equivalent new-loop path.  This is true even for the normal "loop-exit"
2005 // condition.  All uses of loop-invariant old-loop values now come from (one
2006 // or more) Phis that merge their new-loop equivalents.
2007 //
2008 // This operation leaves the graph in an illegal state: there are two valid
2009 // control edges coming from the loop pre-header to both loop bodies.  I'll
2010 // definitely have to hack the graph after running this transform.
2011 //
2012 // From this building block I will further edit edges to perform loop peeling
2013 // or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
2014 //
2015 // Parameter side_by_size_idom:
2016 //   When side_by_size_idom is NULL, the dominator tree is constructed for
2017 //      the clone loop to dominate the original.  Used in construction of
2018 //      pre-main-post loop sequence.
2019 //   When nonnull, the clone and original are side-by-side, both are
2020 //      dominated by the side_by_side_idom node.  Used in construction of
2021 //      unswitched loops.
2022 void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
2023                                 CloneLoopMode mode, Node* side_by_side_idom) {
2024 
2025   LoopNode* head = loop->_head->as_Loop();
2026   head->verify_strip_mined(1);
2027 
2028   if (C->do_vector_loop() && PrintOpto) {
2029     const char* mname = C->method()->name()->as_quoted_ascii();
2030     if (mname != NULL) {
2031       tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
2032     }
2033   }
2034 
2035   CloneMap& cm = C->clone_map();
2036   Dict* dict = cm.dict();
2037   if (C->do_vector_loop()) {
2038     cm.set_clone_idx(cm.max_gen()+1);
2039 #ifndef PRODUCT
2040     if (PrintOpto) {
2041       tty->print_cr("PhaseIdealLoop::clone_loop: _clone_idx %d", cm.clone_idx());
2042       loop->dump_head();
2043     }
2044 #endif
2045   }
2046 
2047   // Step 1: Clone the loop body.  Make the old->new mapping.
2048   uint i;
2049   for( i = 0; i < loop->_body.size(); i++ ) {
2050     Node *old = loop->_body.at(i);
2051     Node *nnn = old->clone();
2052     old_new.map( old->_idx, nnn );
2053     if (C->do_vector_loop()) {
2054       cm.verify_insert_and_clone(old, nnn, cm.clone_idx());
2055     }
2056     _igvn.register_new_node_with_optimizer(nnn);
2057   }
2058 
2059   IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop;
2060 
2061   // Step 2: Fix the edges in the new body.  If the old input is outside the
2062   // loop use it.  If the old input is INside the loop, use the corresponding
2063   // new node instead.
2064   for( i = 0; i < loop->_body.size(); i++ ) {
2065     Node *old = loop->_body.at(i);
2066     Node *nnn = old_new[old->_idx];
2067     // Fix CFG/Loop controlling the new node
2068     if (has_ctrl(old)) {
2069       set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
2070     } else {
2071       set_loop(nnn, outer_loop->_parent);
2072       if (old->outcnt() > 0) {
2073         set_idom( nnn, old_new[idom(old)->_idx], dd );
2074       }
2075     }
2076     // Correct edges to the new node
2077     for( uint j = 0; j < nnn->req(); j++ ) {
2078         Node *n = nnn->in(j);
2079         if( n ) {
2080           IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
2081           if( loop->is_member( old_in_loop ) )
2082             nnn->set_req(j, old_new[n->_idx]);
2083         }
2084     }
2085     _igvn.hash_find_insert(nnn);
2086   }
2087 
2088   ResourceArea *area = Thread::current()->resource_area();
2089   Node_List extra_data_nodes(area); // data nodes in the outer strip mined loop
2090   clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes);
2091 
2092   // Step 3: Now fix control uses.  Loop varying control uses have already
2093   // been fixed up (as part of all input edges in Step 2).  Loop invariant
2094   // control uses must be either an IfFalse or an IfTrue.  Make a merge
2095   // point to merge the old and new IfFalse/IfTrue nodes; make the use
2096   // refer to this.
2097   Node_List worklist(area);
2098   uint new_counter = C->unique();
2099   for( i = 0; i < loop->_body.size(); i++ ) {
2100     Node* old = loop->_body.at(i);
2101     if( !old->is_CFG() ) continue;
2102 
2103     // Copy uses to a worklist, so I can munge the def-use info
2104     // with impunity.
2105     for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
2106       worklist.push(old->fast_out(j));
2107 
2108     while( worklist.size() ) {  // Visit all uses
2109       Node *use = worklist.pop();
2110       if (!has_node(use))  continue; // Ignore dead nodes
2111       IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
2112       if( !loop->is_member( use_loop ) && use->is_CFG() ) {
2113         // Both OLD and USE are CFG nodes here.
2114         assert( use->is_Proj(), "" );
2115         Node* nnn = old_new[old->_idx];
2116 
2117         Node* newuse = NULL;
2118         if (head->is_strip_mined() && mode != IgnoreStripMined) {
2119           CountedLoopNode* cl = head->as_CountedLoop();
2120           CountedLoopEndNode* cle = cl->loopexit();
2121           Node* cle_out = cle->proj_out_or_null(false);
2122           if (use == cle_out) {
2123             IfNode* le = cl->outer_loop_end();
2124             use = le->proj_out(false);
2125             use_loop = get_loop(use);
2126             if (mode == CloneIncludesStripMined) {
2127               nnn = old_new[le->_idx];
2128             } else {
2129               newuse = old_new[cle_out->_idx];
2130             }
2131           }
2132         }
2133         if (newuse == NULL) {
2134           newuse = use->clone();
2135         }
2136 
2137         // Clone the loop exit control projection
2138         if (C->do_vector_loop()) {
2139           cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
2140         }
2141         newuse->set_req(0,nnn);
2142         _igvn.register_new_node_with_optimizer(newuse);
2143         set_loop(newuse, use_loop);
2144         set_idom(newuse, nnn, dom_depth(nnn) + 1 );
2145 
2146         // We need a Region to merge the exit from the peeled body and the
2147         // exit from the old loop body.
2148         RegionNode *r = new RegionNode(3);
2149         // Map the old use to the new merge point
2150         old_new.map( use->_idx, r );
2151         uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
2152         assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
2153 
2154         // The original user of 'use' uses 'r' instead.
2155         for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
2156           Node* useuse = use->last_out(l);
2157           _igvn.rehash_node_delayed(useuse);
2158           uint uses_found = 0;
2159           if( useuse->in(0) == use ) {
2160             useuse->set_req(0, r);
2161             uses_found++;
2162             if( useuse->is_CFG() ) {
2163               assert( dom_depth(useuse) > dd_r, "" );
2164               set_idom(useuse, r, dom_depth(useuse));
2165             }
2166           }
2167           for( uint k = 1; k < useuse->req(); k++ ) {
2168             if( useuse->in(k) == use ) {
2169               useuse->set_req(k, r);
2170               uses_found++;
2171               if (useuse->is_Loop() && k == LoopNode::EntryControl) {
2172                 assert(dom_depth(useuse) > dd_r , "");
2173                 set_idom(useuse, r, dom_depth(useuse));
2174               }
2175             }
2176           }
2177           l -= uses_found;    // we deleted 1 or more copies of this edge
2178         }
2179 
2180         // Now finish up 'r'
2181         r->set_req( 1, newuse );
2182         r->set_req( 2,    use );
2183         _igvn.register_new_node_with_optimizer(r);
2184         set_loop(r, use_loop);
2185         set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
2186       } // End of if a loop-exit test
2187     }
2188   }
2189 
2190   // Step 4: If loop-invariant use is not control, it must be dominated by a
2191   // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
2192   // there if needed.  Make a Phi there merging old and new used values.
2193   Node_List *split_if_set = NULL;
2194   Node_List *split_bool_set = NULL;
2195   Node_List *split_cex_set = NULL;
2196   for( i = 0; i < loop->_body.size(); i++ ) {
2197     Node* old = loop->_body.at(i);
2198     clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
2199                                 split_bool_set, split_cex_set, worklist, new_counter,
2200                                 mode);
2201   }
2202 
2203   for (i = 0; i < extra_data_nodes.size(); i++) {
2204     Node* old = extra_data_nodes.at(i);
2205     clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
2206                                 split_bool_set, split_cex_set, worklist, new_counter,
2207                                 mode);
2208   }
2209 
2210   // Check for IFs that need splitting/cloning.  Happens if an IF outside of
2211   // the loop uses a condition set in the loop.  The original IF probably
2212   // takes control from one or more OLD Regions (which in turn get from NEW
2213   // Regions).  In any case, there will be a set of Phis for each merge point
2214   // from the IF up to where the original BOOL def exists the loop.
2215   if (split_if_set) {
2216     while (split_if_set->size()) {
2217       Node *iff = split_if_set->pop();
2218       if (iff->in(1)->is_Phi()) {
2219         Node *b = clone_iff(iff->in(1)->as_Phi(), loop);
2220         _igvn.replace_input_of(iff, 1, b);
2221       }
2222     }
2223   }
2224   if (split_bool_set) {
2225     while (split_bool_set->size()) {
2226       Node *b = split_bool_set->pop();
2227       Node *phi = b->in(1);
2228       assert(phi->is_Phi(), "");
2229       CmpNode *cmp = clone_bool((PhiNode*)phi, loop);
2230       _igvn.replace_input_of(b, 1, cmp);
2231     }
2232   }
2233   if (split_cex_set) {
2234     while (split_cex_set->size()) {
2235       Node *b = split_cex_set->pop();
2236       assert(b->in(0)->is_Region(), "");
2237       assert(b->in(1)->is_Phi(), "");
2238       assert(b->in(0)->in(0) == b->in(1)->in(0), "");
2239       split_up(b, b->in(0), NULL);
2240     }
2241   }
2242 
2243 }
2244 
2245 
2246 //---------------------- stride_of_possible_iv -------------------------------------
2247 // Looks for an iff/bool/comp with one operand of the compare
2248 // being a cycle involving an add and a phi,
2249 // with an optional truncation (left-shift followed by a right-shift)
2250 // of the add. Returns zero if not an iv.
2251 int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
2252   Node* trunc1 = NULL;
2253   Node* trunc2 = NULL;
2254   const TypeInt* ttype = NULL;
2255   if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
2256     return 0;
2257   }
2258   BoolNode* bl = iff->in(1)->as_Bool();
2259   Node* cmp = bl->in(1);
2260   if (!cmp || (cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU)) {
2261     return 0;
2262   }
2263   // Must have an invariant operand
2264   if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
2265     return 0;
2266   }
2267   Node* add2 = NULL;
2268   Node* cmp1 = cmp->in(1);
2269   if (cmp1->is_Phi()) {
2270     // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
2271     Node* phi = cmp1;
2272     for (uint i = 1; i < phi->req(); i++) {
2273       Node* in = phi->in(i);
2274       Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
2275                                 &trunc1, &trunc2, &ttype);
2276       if (add && add->in(1) == phi) {
2277         add2 = add->in(2);
2278         break;
2279       }
2280     }
2281   } else {
2282     // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
2283     Node* addtrunc = cmp1;
2284     Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
2285                                 &trunc1, &trunc2, &ttype);
2286     if (add && add->in(1)->is_Phi()) {
2287       Node* phi = add->in(1);
2288       for (uint i = 1; i < phi->req(); i++) {
2289         if (phi->in(i) == addtrunc) {
2290           add2 = add->in(2);
2291           break;
2292         }
2293       }
2294     }
2295   }
2296   if (add2 != NULL) {
2297     const TypeInt* add2t = _igvn.type(add2)->is_int();
2298     if (add2t->is_con()) {
2299       return add2t->get_con();
2300     }
2301   }
2302   return 0;
2303 }
2304 
2305 
2306 //---------------------- stay_in_loop -------------------------------------
2307 // Return the (unique) control output node that's in the loop (if it exists.)
2308 Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
2309   Node* unique = NULL;
2310   if (!n) return NULL;
2311   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2312     Node* use = n->fast_out(i);
2313     if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
2314       if (unique != NULL) {
2315         return NULL;
2316       }
2317       unique = use;
2318     }
2319   }
2320   return unique;
2321 }
2322 
2323 //------------------------------ register_node -------------------------------------
2324 // Utility to register node "n" with PhaseIdealLoop
2325 void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
2326   _igvn.register_new_node_with_optimizer(n);
2327   loop->_body.push(n);
2328   if (n->is_CFG()) {
2329     set_loop(n, loop);
2330     set_idom(n, pred, ddepth);
2331   } else {
2332     set_ctrl(n, pred);
2333   }
2334 }
2335 
2336 //------------------------------ proj_clone -------------------------------------
2337 // Utility to create an if-projection
2338 ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
2339   ProjNode* c = p->clone()->as_Proj();
2340   c->set_req(0, iff);
2341   return c;
2342 }
2343 
2344 //------------------------------ short_circuit_if -------------------------------------
2345 // Force the iff control output to be the live_proj
2346 Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
2347   guarantee(live_proj != NULL, "null projection");
2348   int proj_con = live_proj->_con;
2349   assert(proj_con == 0 || proj_con == 1, "false or true projection");
2350   Node *con = _igvn.intcon(proj_con);
2351   set_ctrl(con, C->root());
2352   if (iff) {
2353     iff->set_req(1, con);
2354   }
2355   return con;
2356 }
2357 
2358 //------------------------------ insert_if_before_proj -------------------------------------
2359 // Insert a new if before an if projection (* - new node)
2360 //
2361 // before
2362 //           if(test)
2363 //           /     \
2364 //          v       v
2365 //    other-proj   proj (arg)
2366 //
2367 // after
2368 //           if(test)
2369 //           /     \
2370 //          /       v
2371 //         |      * proj-clone
2372 //         v          |
2373 //    other-proj      v
2374 //                * new_if(relop(cmp[IU](left,right)))
2375 //                  /  \
2376 //                 v    v
2377 //         * new-proj  proj
2378 //         (returned)
2379 //
2380 ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
2381   IfNode* iff = proj->in(0)->as_If();
2382   IdealLoopTree *loop = get_loop(proj);
2383   ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2384   int ddepth = dom_depth(proj);
2385 
2386   _igvn.rehash_node_delayed(iff);
2387   _igvn.rehash_node_delayed(proj);
2388 
2389   proj->set_req(0, NULL);  // temporary disconnect
2390   ProjNode* proj2 = proj_clone(proj, iff);
2391   register_node(proj2, loop, iff, ddepth);
2392 
2393   Node* cmp = Signed ? (Node*) new CmpINode(left, right) : (Node*) new CmpUNode(left, right);
2394   register_node(cmp, loop, proj2, ddepth);
2395 
2396   BoolNode* bol = new BoolNode(cmp, relop);
2397   register_node(bol, loop, proj2, ddepth);
2398 
2399   int opcode = iff->Opcode();
2400   assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode");
2401   IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt):
2402     new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt);
2403   register_node(new_if, loop, proj2, ddepth);
2404 
2405   proj->set_req(0, new_if); // reattach
2406   set_idom(proj, new_if, ddepth);
2407 
2408   ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
2409   guarantee(new_exit != NULL, "null exit node");
2410   register_node(new_exit, get_loop(other_proj), new_if, ddepth);
2411 
2412   return new_exit;
2413 }
2414 
2415 //------------------------------ insert_region_before_proj -------------------------------------
2416 // Insert a region before an if projection (* - new node)
2417 //
2418 // before
2419 //           if(test)
2420 //          /      |
2421 //         v       |
2422 //       proj      v
2423 //               other-proj
2424 //
2425 // after
2426 //           if(test)
2427 //          /      |
2428 //         v       |
2429 // * proj-clone    v
2430 //         |     other-proj
2431 //         v
2432 // * new-region
2433 //         |
2434 //         v
2435 // *      dum_if
2436 //       /     \
2437 //      v       \
2438 // * dum-proj    v
2439 //              proj
2440 //
2441 RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
2442   IfNode* iff = proj->in(0)->as_If();
2443   IdealLoopTree *loop = get_loop(proj);
2444   ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2445   int ddepth = dom_depth(proj);
2446 
2447   _igvn.rehash_node_delayed(iff);
2448   _igvn.rehash_node_delayed(proj);
2449 
2450   proj->set_req(0, NULL);  // temporary disconnect
2451   ProjNode* proj2 = proj_clone(proj, iff);
2452   register_node(proj2, loop, iff, ddepth);
2453 
2454   RegionNode* reg = new RegionNode(2);
2455   reg->set_req(1, proj2);
2456   register_node(reg, loop, iff, ddepth);
2457 
2458   IfNode* dum_if = new IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
2459   register_node(dum_if, loop, reg, ddepth);
2460 
2461   proj->set_req(0, dum_if); // reattach
2462   set_idom(proj, dum_if, ddepth);
2463 
2464   ProjNode* dum_proj = proj_clone(other_proj, dum_if);
2465   register_node(dum_proj, loop, dum_if, ddepth);
2466 
2467   return reg;
2468 }
2469 
2470 //------------------------------ insert_cmpi_loop_exit -------------------------------------
2471 // Clone a signed compare loop exit from an unsigned compare and
2472 // insert it before the unsigned cmp on the stay-in-loop path.
2473 // All new nodes inserted in the dominator tree between the original
2474 // if and it's projections.  The original if test is replaced with
2475 // a constant to force the stay-in-loop path.
2476 //
2477 // This is done to make sure that the original if and it's projections
2478 // still dominate the same set of control nodes, that the ctrl() relation
2479 // from data nodes to them is preserved, and that their loop nesting is
2480 // preserved.
2481 //
2482 // before
2483 //          if(i <u limit)    unsigned compare loop exit
2484 //         /       |
2485 //        v        v
2486 //   exit-proj   stay-in-loop-proj
2487 //
2488 // after
2489 //          if(stay-in-loop-const)  original if
2490 //         /       |
2491 //        /        v
2492 //       /  if(i <  limit)    new signed test
2493 //      /  /       |
2494 //     /  /        v
2495 //    /  /  if(i <u limit)    new cloned unsigned test
2496 //   /  /   /      |
2497 //   v  v  v       |
2498 //    region       |
2499 //        |        |
2500 //      dum-if     |
2501 //     /  |        |
2502 // ether  |        |
2503 //        v        v
2504 //   exit-proj   stay-in-loop-proj
2505 //
2506 IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
2507   const bool Signed   = true;
2508   const bool Unsigned = false;
2509 
2510   BoolNode* bol = if_cmpu->in(1)->as_Bool();
2511   if (bol->_test._test != BoolTest::lt) return NULL;
2512   CmpNode* cmpu = bol->in(1)->as_Cmp();
2513   if (cmpu->Opcode() != Op_CmpU) return NULL;
2514   int stride = stride_of_possible_iv(if_cmpu);
2515   if (stride == 0) return NULL;
2516 
2517   Node* lp_proj = stay_in_loop(if_cmpu, loop);
2518   guarantee(lp_proj != NULL, "null loop node");
2519 
2520   ProjNode* lp_continue = lp_proj->as_Proj();
2521   ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
2522 
2523   Node* limit = NULL;
2524   if (stride > 0) {
2525     limit = cmpu->in(2);
2526   } else {
2527     limit = _igvn.makecon(TypeInt::ZERO);
2528     set_ctrl(limit, C->root());
2529   }
2530   // Create a new region on the exit path
2531   RegionNode* reg = insert_region_before_proj(lp_exit);
2532   guarantee(reg != NULL, "null region node");
2533 
2534   // Clone the if-cmpu-true-false using a signed compare
2535   BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
2536   ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
2537   reg->add_req(cmpi_exit);
2538 
2539   // Clone the if-cmpu-true-false
2540   BoolTest::mask rel_u = bol->_test._test;
2541   ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
2542   reg->add_req(cmpu_exit);
2543 
2544   // Force original if to stay in loop.
2545   short_circuit_if(if_cmpu, lp_continue);
2546 
2547   return cmpi_exit->in(0)->as_If();
2548 }
2549 
2550 //------------------------------ remove_cmpi_loop_exit -------------------------------------
2551 // Remove a previously inserted signed compare loop exit.
2552 void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
2553   Node* lp_proj = stay_in_loop(if_cmp, loop);
2554   assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
2555          stay_in_loop(lp_proj, loop)->is_If() &&
2556          stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
2557   Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
2558   set_ctrl(con, C->root());
2559   if_cmp->set_req(1, con);
2560 }
2561 
2562 //------------------------------ scheduled_nodelist -------------------------------------
2563 // Create a post order schedule of nodes that are in the
2564 // "member" set.  The list is returned in "sched".
2565 // The first node in "sched" is the loop head, followed by
2566 // nodes which have no inputs in the "member" set, and then
2567 // followed by the nodes that have an immediate input dependence
2568 // on a node in "sched".
2569 void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
2570 
2571   assert(member.test(loop->_head->_idx), "loop head must be in member set");
2572   Arena *a = Thread::current()->resource_area();
2573   VectorSet visited(a);
2574   Node_Stack nstack(a, loop->_body.size());
2575 
2576   Node* n  = loop->_head;  // top of stack is cached in "n"
2577   uint idx = 0;
2578   visited.set(n->_idx);
2579 
2580   // Initially push all with no inputs from within member set
2581   for(uint i = 0; i < loop->_body.size(); i++ ) {
2582     Node *elt = loop->_body.at(i);
2583     if (member.test(elt->_idx)) {
2584       bool found = false;
2585       for (uint j = 0; j < elt->req(); j++) {
2586         Node* def = elt->in(j);
2587         if (def && member.test(def->_idx) && def != elt) {
2588           found = true;
2589           break;
2590         }
2591       }
2592       if (!found && elt != loop->_head) {
2593         nstack.push(n, idx);
2594         n = elt;
2595         assert(!visited.test(n->_idx), "not seen yet");
2596         visited.set(n->_idx);
2597       }
2598     }
2599   }
2600 
2601   // traverse out's that are in the member set
2602   while (true) {
2603     if (idx < n->outcnt()) {
2604       Node* use = n->raw_out(idx);
2605       idx++;
2606       if (!visited.test_set(use->_idx)) {
2607         if (member.test(use->_idx)) {
2608           nstack.push(n, idx);
2609           n = use;
2610           idx = 0;
2611         }
2612       }
2613     } else {
2614       // All outputs processed
2615       sched.push(n);
2616       if (nstack.is_empty()) break;
2617       n   = nstack.node();
2618       idx = nstack.index();
2619       nstack.pop();
2620     }
2621   }
2622 }
2623 
2624 
2625 //------------------------------ has_use_in_set -------------------------------------
2626 // Has a use in the vector set
2627 bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
2628   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2629     Node* use = n->fast_out(j);
2630     if (vset.test(use->_idx)) {
2631       return true;
2632     }
2633   }
2634   return false;
2635 }
2636 
2637 
2638 //------------------------------ has_use_internal_to_set -------------------------------------
2639 // Has use internal to the vector set (ie. not in a phi at the loop head)
2640 bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
2641   Node* head  = loop->_head;
2642   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2643     Node* use = n->fast_out(j);
2644     if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
2645       return true;
2646     }
2647   }
2648   return false;
2649 }
2650 
2651 
2652 //------------------------------ clone_for_use_outside_loop -------------------------------------
2653 // clone "n" for uses that are outside of loop
2654 int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
2655   int cloned = 0;
2656   assert(worklist.size() == 0, "should be empty");
2657   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2658     Node* use = n->fast_out(j);
2659     if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
2660       worklist.push(use);
2661     }
2662   }
2663   while( worklist.size() ) {
2664     Node *use = worklist.pop();
2665     if (!has_node(use) || use->in(0) == C->top()) continue;
2666     uint j;
2667     for (j = 0; j < use->req(); j++) {
2668       if (use->in(j) == n) break;
2669     }
2670     assert(j < use->req(), "must be there");
2671 
2672     // clone "n" and insert it between the inputs of "n" and the use outside the loop
2673     Node* n_clone = n->clone();
2674     _igvn.replace_input_of(use, j, n_clone);
2675     cloned++;
2676     Node* use_c;
2677     if (!use->is_Phi()) {
2678       use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
2679     } else {
2680       // Use in a phi is considered a use in the associated predecessor block
2681       use_c = use->in(0)->in(j);
2682     }
2683     set_ctrl(n_clone, use_c);
2684     assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
2685     get_loop(use_c)->_body.push(n_clone);
2686     _igvn.register_new_node_with_optimizer(n_clone);
2687 #if !defined(PRODUCT)
2688     if (TracePartialPeeling) {
2689       tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
2690     }
2691 #endif
2692   }
2693   return cloned;
2694 }
2695 
2696 
2697 //------------------------------ clone_for_special_use_inside_loop -------------------------------------
2698 // clone "n" for special uses that are in the not_peeled region.
2699 // If these def-uses occur in separate blocks, the code generator
2700 // marks the method as not compilable.  For example, if a "BoolNode"
2701 // is in a different basic block than the "IfNode" that uses it, then
2702 // the compilation is aborted in the code generator.
2703 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
2704                                                         VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
2705   if (n->is_Phi() || n->is_Load()) {
2706     return;
2707   }
2708   assert(worklist.size() == 0, "should be empty");
2709   for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2710     Node* use = n->fast_out(j);
2711     if ( not_peel.test(use->_idx) &&
2712          (use->is_If() || use->is_CMove() || use->is_Bool()) &&
2713          use->in(1) == n)  {
2714       worklist.push(use);
2715     }
2716   }
2717   if (worklist.size() > 0) {
2718     // clone "n" and insert it between inputs of "n" and the use
2719     Node* n_clone = n->clone();
2720     loop->_body.push(n_clone);
2721     _igvn.register_new_node_with_optimizer(n_clone);
2722     set_ctrl(n_clone, get_ctrl(n));
2723     sink_list.push(n_clone);
2724     not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
2725 #if !defined(PRODUCT)
2726     if (TracePartialPeeling) {
2727       tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2728     }
2729 #endif
2730     while( worklist.size() ) {
2731       Node *use = worklist.pop();
2732       _igvn.rehash_node_delayed(use);
2733       for (uint j = 1; j < use->req(); j++) {
2734         if (use->in(j) == n) {
2735           use->set_req(j, n_clone);
2736         }
2737       }
2738     }
2739   }
2740 }
2741 
2742 
2743 //------------------------------ insert_phi_for_loop -------------------------------------
2744 // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
2745 void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2746   Node *phi = PhiNode::make(lp, back_edge_val);
2747   phi->set_req(LoopNode::EntryControl, lp_entry_val);
2748   // Use existing phi if it already exists
2749   Node *hit = _igvn.hash_find_insert(phi);
2750   if( hit == NULL ) {
2751     _igvn.register_new_node_with_optimizer(phi);
2752     set_ctrl(phi, lp);
2753   } else {
2754     // Remove the new phi from the graph and use the hit
2755     _igvn.remove_dead_node(phi);
2756     phi = hit;
2757   }
2758   _igvn.replace_input_of(use, idx, phi);
2759 }
2760 
2761 #ifdef ASSERT
2762 //------------------------------ is_valid_loop_partition -------------------------------------
2763 // Validate the loop partition sets: peel and not_peel
2764 bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2765                                               VectorSet& not_peel ) {
2766   uint i;
2767   // Check that peel_list entries are in the peel set
2768   for (i = 0; i < peel_list.size(); i++) {
2769     if (!peel.test(peel_list.at(i)->_idx)) {
2770       return false;
2771     }
2772   }
2773   // Check at loop members are in one of peel set or not_peel set
2774   for (i = 0; i < loop->_body.size(); i++ ) {
2775     Node *def  = loop->_body.at(i);
2776     uint di = def->_idx;
2777     // Check that peel set elements are in peel_list
2778     if (peel.test(di)) {
2779       if (not_peel.test(di)) {
2780         return false;
2781       }
2782       // Must be in peel_list also
2783       bool found = false;
2784       for (uint j = 0; j < peel_list.size(); j++) {
2785         if (peel_list.at(j)->_idx == di) {
2786           found = true;
2787           break;
2788         }
2789       }
2790       if (!found) {
2791         return false;
2792       }
2793     } else if (not_peel.test(di)) {
2794       if (peel.test(di)) {
2795         return false;
2796       }
2797     } else {
2798       return false;
2799     }
2800   }
2801   return true;
2802 }
2803 
2804 //------------------------------ is_valid_clone_loop_exit_use -------------------------------------
2805 // Ensure a use outside of loop is of the right form
2806 bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
2807   Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2808   return (use->is_Phi() &&
2809           use_c->is_Region() && use_c->req() == 3 &&
2810           (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
2811            use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
2812            use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
2813           loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
2814 }
2815 
2816 //------------------------------ is_valid_clone_loop_form -------------------------------------
2817 // Ensure that all uses outside of loop are of the right form
2818 bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
2819                                                uint orig_exit_idx, uint clone_exit_idx) {
2820   uint len = peel_list.size();
2821   for (uint i = 0; i < len; i++) {
2822     Node *def = peel_list.at(i);
2823 
2824     for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2825       Node *use = def->fast_out(j);
2826       Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2827       if (!loop->is_member(get_loop(use_c))) {
2828         // use is not in the loop, check for correct structure
2829         if (use->in(0) == def) {
2830           // Okay
2831         } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
2832           return false;
2833         }
2834       }
2835     }
2836   }
2837   return true;
2838 }
2839 #endif
2840 
2841 //------------------------------ partial_peel -------------------------------------
2842 // Partially peel (aka loop rotation) the top portion of a loop (called
2843 // the peel section below) by cloning it and placing one copy just before
2844 // the new loop head and the other copy at the bottom of the new loop.
2845 //
2846 //    before                       after                where it came from
2847 //
2848 //    stmt1                        stmt1
2849 //  loop:                          stmt2                     clone
2850 //    stmt2                        if condA goto exitA       clone
2851 //    if condA goto exitA        new_loop:                   new
2852 //    stmt3                        stmt3                     clone
2853 //    if !condB goto loop          if condB goto exitB       clone
2854 //  exitB:                         stmt2                     orig
2855 //    stmt4                        if !condA goto new_loop   orig
2856 //  exitA:                         goto exitA
2857 //                               exitB:
2858 //                                 stmt4
2859 //                               exitA:
2860 //
2861 // Step 1: find the cut point: an exit test on probable
2862 //         induction variable.
2863 // Step 2: schedule (with cloning) operations in the peel
2864 //         section that can be executed after the cut into
2865 //         the section that is not peeled.  This may need
2866 //         to clone operations into exit blocks.  For
2867 //         instance, a reference to A[i] in the not-peel
2868 //         section and a reference to B[i] in an exit block
2869 //         may cause a left-shift of i by 2 to be placed
2870 //         in the peel block.  This step will clone the left
2871 //         shift into the exit block and sink the left shift
2872 //         from the peel to the not-peel section.
2873 // Step 3: clone the loop, retarget the control, and insert
2874 //         phis for values that are live across the new loop
2875 //         head.  This is very dependent on the graph structure
2876 //         from clone_loop.  It creates region nodes for
2877 //         exit control and associated phi nodes for values
2878 //         flow out of the loop through that exit.  The region
2879 //         node is dominated by the clone's control projection.
2880 //         So the clone's peel section is placed before the
2881 //         new loop head, and the clone's not-peel section is
2882 //         forms the top part of the new loop.  The original
2883 //         peel section forms the tail of the new loop.
2884 // Step 4: update the dominator tree and recompute the
2885 //         dominator depth.
2886 //
2887 //                   orig
2888 //
2889 //                   stmt1
2890 //                     |
2891 //                     v
2892 //               loop predicate
2893 //                     |
2894 //                     v
2895 //                   loop<----+
2896 //                     |      |
2897 //                   stmt2    |
2898 //                     |      |
2899 //                     v      |
2900 //                    ifA     |
2901 //                   / |      |
2902 //                  v  v      |
2903 //               false true   ^  <-- last_peel
2904 //               /     |      |
2905 //              /   ===|==cut |
2906 //             /     stmt3    |  <-- first_not_peel
2907 //            /        |      |
2908 //            |        v      |
2909 //            v       ifB     |
2910 //          exitA:   / \      |
2911 //                  /   \     |
2912 //                 v     v    |
2913 //               false true   |
2914 //               /       \    |
2915 //              /         ----+
2916 //             |
2917 //             v
2918 //           exitB:
2919 //           stmt4
2920 //
2921 //
2922 //            after clone loop
2923 //
2924 //                   stmt1
2925 //                     |
2926 //                     v
2927 //               loop predicate
2928 //                 /       \
2929 //        clone   /         \   orig
2930 //               /           \
2931 //              /             \
2932 //             v               v
2933 //   +---->loop                loop<----+
2934 //   |      |                    |      |
2935 //   |    stmt2                stmt2    |
2936 //   |      |                    |      |
2937 //   |      v                    v      |
2938 //   |      ifA                 ifA     |
2939 //   |      | \                / |      |
2940 //   |      v  v              v  v      |
2941 //   ^    true  false      false true   ^  <-- last_peel
2942 //   |      |   ^   \       /    |      |
2943 //   | cut==|==  \   \     /  ===|==cut |
2944 //   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
2945 //   |      |    dom   | |       |      |
2946 //   |      v      \  1v v2      v      |
2947 //   |      ifB     regionA     ifB     |
2948 //   |      / \        |       / \      |
2949 //   |     /   \       v      /   \     |
2950 //   |    v     v    exitA:  v     v    |
2951 //   |    true  false      false true   |
2952 //   |    /     ^   \      /       \    |
2953 //   +----       \   \    /         ----+
2954 //               dom  \  /
2955 //                 \  1v v2
2956 //                  regionB
2957 //                     |
2958 //                     v
2959 //                   exitB:
2960 //                   stmt4
2961 //
2962 //
2963 //           after partial peel
2964 //
2965 //                  stmt1
2966 //                     |
2967 //                     v
2968 //               loop predicate
2969 //                 /
2970 //        clone   /             orig
2971 //               /          TOP
2972 //              /             \
2973 //             v               v
2974 //    TOP->loop                loop----+
2975 //          |                    |      |
2976 //        stmt2                stmt2    |
2977 //          |                    |      |
2978 //          v                    v      |
2979 //          ifA                 ifA     |
2980 //          | \                / |      |
2981 //          v  v              v  v      |
2982 //        true  false      false true   |     <-- last_peel
2983 //          |   ^   \       /    +------|---+
2984 //  +->newloop   \   \     /  === ==cut |   |
2985 //  |     stmt3   \   \   /     TOP     |   |
2986 //  |       |    dom   | |      stmt3   |   | <-- first_not_peel
2987 //  |       v      \  1v v2      v      |   |
2988 //  |       ifB     regionA     ifB     ^   v
2989 //  |       / \        |       / \      |   |
2990 //  |      /   \       v      /   \     |   |
2991 //  |     v     v    exitA:  v     v    |   |
2992 //  |     true  false      false true   |   |
2993 //  |     /     ^   \      /       \    |   |
2994 //  |    |       \   \    /         v   |   |
2995 //  |    |       dom  \  /         TOP  |   |
2996 //  |    |         \  1v v2             |   |
2997 //  ^    v          regionB             |   |
2998 //  |    |             |                |   |
2999 //  |    |             v                ^   v
3000 //  |    |           exitB:             |   |
3001 //  |    |           stmt4              |   |
3002 //  |    +------------>-----------------+   |
3003 //  |                                       |
3004 //  +-----------------<---------------------+
3005 //
3006 //
3007 //              final graph
3008 //
3009 //                  stmt1
3010 //                    |
3011 //                    v
3012 //               loop predicate
3013 //                    |
3014 //                    v
3015 //                  stmt2 clone
3016 //                    |
3017 //                    v
3018 //         ........> ifA clone
3019 //         :        / |
3020 //        dom      /  |
3021 //         :      v   v
3022 //         :  false   true
3023 //         :  |       |
3024 //         :  |       v
3025 //         :  |    newloop<-----+
3026 //         :  |        |        |
3027 //         :  |     stmt3 clone |
3028 //         :  |        |        |
3029 //         :  |        v        |
3030 //         :  |       ifB       |
3031 //         :  |      / \        |
3032 //         :  |     v   v       |
3033 //         :  |  false true     |
3034 //         :  |   |     |       |
3035 //         :  |   v    stmt2    |
3036 //         :  | exitB:  |       |
3037 //         :  | stmt4   v       |
3038 //         :  |       ifA orig  |
3039 //         :  |      /  \       |
3040 //         :  |     /    \      |
3041 //         :  |    v     v      |
3042 //         :  |  false  true    |
3043 //         :  |  /        \     |
3044 //         :  v  v         -----+
3045 //          RegionA
3046 //             |
3047 //             v
3048 //           exitA
3049 //
3050 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
3051 
3052   assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
3053   if (!loop->_head->is_Loop()) {
3054     return false;  }
3055 
3056   LoopNode *head  = loop->_head->as_Loop();
3057 
3058   if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
3059     return false;
3060   }
3061 
3062   // Check for complex exit control
3063   for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
3064     Node *n = loop->_body.at(ii);
3065     int opc = n->Opcode();
3066     if (n->is_Call()        ||
3067         opc == Op_Catch     ||
3068         opc == Op_CatchProj ||
3069         opc == Op_Jump      ||
3070         opc == Op_JumpProj) {
3071 #if !defined(PRODUCT)
3072       if (TracePartialPeeling) {
3073         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
3074       }
3075 #endif
3076       return false;
3077     }
3078   }
3079 
3080   int dd = dom_depth(head);
3081 
3082   // Step 1: find cut point
3083 
3084   // Walk up dominators to loop head looking for first loop exit
3085   // which is executed on every path thru loop.
3086   IfNode *peel_if = NULL;
3087   IfNode *peel_if_cmpu = NULL;
3088 
3089   Node *iff = loop->tail();
3090   while( iff != head ) {
3091     if( iff->is_If() ) {
3092       Node *ctrl = get_ctrl(iff->in(1));
3093       if (ctrl->is_top()) return false; // Dead test on live IF.
3094       // If loop-varying exit-test, check for induction variable
3095       if( loop->is_member(get_loop(ctrl)) &&
3096           loop->is_loop_exit(iff) &&
3097           is_possible_iv_test(iff)) {
3098         Node* cmp = iff->in(1)->in(1);
3099         if (cmp->Opcode() == Op_CmpI) {
3100           peel_if = iff->as_If();
3101         } else {
3102           assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
3103           peel_if_cmpu = iff->as_If();
3104         }
3105       }
3106     }
3107     iff = idom(iff);
3108   }
3109   // Prefer signed compare over unsigned compare.
3110   IfNode* new_peel_if = NULL;
3111   if (peel_if == NULL) {
3112     if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
3113       return false;   // No peel point found
3114     }
3115     new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
3116     if (new_peel_if == NULL) {
3117       return false;   // No peel point found
3118     }
3119     peel_if = new_peel_if;
3120   }
3121   Node* last_peel        = stay_in_loop(peel_if, loop);
3122   Node* first_not_peeled = stay_in_loop(last_peel, loop);
3123   if (first_not_peeled == NULL || first_not_peeled == head) {
3124     return false;
3125   }
3126 
3127 #if !defined(PRODUCT)
3128   if (TraceLoopOpts) {
3129     tty->print("PartialPeel  ");
3130     loop->dump_head();
3131   }
3132 
3133   if (TracePartialPeeling) {
3134     tty->print_cr("before partial peel one iteration");
3135     Node_List wl;
3136     Node* t = head->in(2);
3137     while (true) {
3138       wl.push(t);
3139       if (t == head) break;
3140       t = idom(t);
3141     }
3142     while (wl.size() > 0) {
3143       Node* tt = wl.pop();
3144       tt->dump();
3145       if (tt == last_peel) tty->print_cr("-- cut --");
3146     }
3147   }
3148 #endif
3149   ResourceArea *area = Thread::current()->resource_area();
3150   VectorSet peel(area);
3151   VectorSet not_peel(area);
3152   Node_List peel_list(area);
3153   Node_List worklist(area);
3154   Node_List sink_list(area);
3155 
3156   // Set of cfg nodes to peel are those that are executable from
3157   // the head through last_peel.
3158   assert(worklist.size() == 0, "should be empty");
3159   worklist.push(head);
3160   peel.set(head->_idx);
3161   while (worklist.size() > 0) {
3162     Node *n = worklist.pop();
3163     if (n != last_peel) {
3164       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3165         Node* use = n->fast_out(j);
3166         if (use->is_CFG() &&
3167             loop->is_member(get_loop(use)) &&
3168             !peel.test_set(use->_idx)) {
3169           worklist.push(use);
3170         }
3171       }
3172     }
3173   }
3174 
3175   // Set of non-cfg nodes to peel are those that are control
3176   // dependent on the cfg nodes.
3177   uint i;
3178   for(i = 0; i < loop->_body.size(); i++ ) {
3179     Node *n = loop->_body.at(i);
3180     Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
3181     if (peel.test(n_c->_idx)) {
3182       peel.set(n->_idx);
3183     } else {
3184       not_peel.set(n->_idx);
3185     }
3186   }
3187 
3188   // Step 2: move operations from the peeled section down into the
3189   //         not-peeled section
3190 
3191   // Get a post order schedule of nodes in the peel region
3192   // Result in right-most operand.
3193   scheduled_nodelist(loop, peel, peel_list );
3194 
3195   assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
3196 
3197   // For future check for too many new phis
3198   uint old_phi_cnt = 0;
3199   for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
3200     Node* use = head->fast_out(j);
3201     if (use->is_Phi()) old_phi_cnt++;
3202   }
3203 
3204 #if !defined(PRODUCT)
3205   if (TracePartialPeeling) {
3206     tty->print_cr("\npeeled list");
3207   }
3208 #endif
3209 
3210   // Evacuate nodes in peel region into the not_peeled region if possible
3211   uint new_phi_cnt = 0;
3212   uint cloned_for_outside_use = 0;
3213   for (i = 0; i < peel_list.size();) {
3214     Node* n = peel_list.at(i);
3215 #if !defined(PRODUCT)
3216   if (TracePartialPeeling) n->dump();
3217 #endif
3218     bool incr = true;
3219     if ( !n->is_CFG() ) {
3220 
3221       if ( has_use_in_set(n, not_peel) ) {
3222 
3223         // If not used internal to the peeled region,
3224         // move "n" from peeled to not_peeled region.
3225 
3226         if ( !has_use_internal_to_set(n, peel, loop) ) {
3227 
3228           // if not pinned and not a load (which maybe anti-dependent on a store)
3229           // and not a CMove (Matcher expects only bool->cmove).
3230           if (n->in(0) == NULL && !n->is_Load() && !n->is_CMove() && n->Opcode() != Op_ShenandoahWBMemProj) {
3231             cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
3232             sink_list.push(n);
3233             peel     >>= n->_idx; // delete n from peel set.
3234             not_peel <<= n->_idx; // add n to not_peel set.
3235             peel_list.remove(i);
3236             incr = false;
3237 #if !defined(PRODUCT)
3238             if (TracePartialPeeling) {
3239               tty->print_cr("sink to not_peeled region: %d newbb: %d",
3240                             n->_idx, get_ctrl(n)->_idx);
3241             }
3242 #endif
3243           }
3244         } else {
3245           // Otherwise check for special def-use cases that span
3246           // the peel/not_peel boundary such as bool->if
3247           clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
3248           new_phi_cnt++;
3249         }
3250       }
3251     }
3252     if (incr) i++;
3253   }
3254 
3255   if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
3256 #if !defined(PRODUCT)
3257     if (TracePartialPeeling) {
3258       tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
3259                     new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
3260     }
3261 #endif
3262     if (new_peel_if != NULL) {
3263       remove_cmpi_loop_exit(new_peel_if, loop);
3264     }
3265     // Inhibit more partial peeling on this loop
3266     assert(!head->is_partial_peel_loop(), "not partial peeled");
3267     head->mark_partial_peel_failed();
3268     if (cloned_for_outside_use > 0) {
3269       // Terminate this round of loop opts because
3270       // the graph outside this loop was changed.
3271       C->set_major_progress();
3272       return true;
3273     }
3274     return false;
3275   }
3276 
3277   // Step 3: clone loop, retarget control, and insert new phis
3278 
3279   // Create new loop head for new phis and to hang
3280   // the nodes being moved (sinked) from the peel region.
3281   LoopNode* new_head = new LoopNode(last_peel, last_peel);
3282   new_head->set_unswitch_count(head->unswitch_count()); // Preserve
3283   _igvn.register_new_node_with_optimizer(new_head);
3284   assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
3285   _igvn.replace_input_of(first_not_peeled, 0, new_head);
3286   set_loop(new_head, loop);
3287   loop->_body.push(new_head);
3288   not_peel.set(new_head->_idx);
3289   set_idom(new_head, last_peel, dom_depth(first_not_peeled));
3290   set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
3291 
3292   while (sink_list.size() > 0) {
3293     Node* n = sink_list.pop();
3294     set_ctrl(n, new_head);
3295   }
3296 
3297   assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
3298 
3299   clone_loop(loop, old_new, dd, IgnoreStripMined);
3300 
3301   const uint clone_exit_idx = 1;
3302   const uint orig_exit_idx  = 2;
3303   assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
3304 
3305   Node* head_clone             = old_new[head->_idx];
3306   LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
3307   Node* orig_tail_clone        = head_clone->in(2);
3308 
3309   // Add phi if "def" node is in peel set and "use" is not
3310 
3311   for(i = 0; i < peel_list.size(); i++ ) {
3312     Node *def  = peel_list.at(i);
3313     if (!def->is_CFG()) {
3314       for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
3315         Node *use = def->fast_out(j);
3316         if (has_node(use) && use->in(0) != C->top() &&
3317             (!peel.test(use->_idx) ||
3318              (use->is_Phi() && use->in(0) == head)) ) {
3319           worklist.push(use);
3320         }
3321       }
3322       while( worklist.size() ) {
3323         Node *use = worklist.pop();
3324         for (uint j = 1; j < use->req(); j++) {
3325           Node* n = use->in(j);
3326           if (n == def) {
3327 
3328             // "def" is in peel set, "use" is not in peel set
3329             // or "use" is in the entry boundary (a phi) of the peel set
3330 
3331             Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
3332 
3333             if ( loop->is_member(get_loop( use_c )) ) {
3334               // use is in loop
3335               if (old_new[use->_idx] != NULL) { // null for dead code
3336                 Node* use_clone = old_new[use->_idx];
3337                 _igvn.replace_input_of(use, j, C->top());
3338                 insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
3339               }
3340             } else {
3341               assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
3342               // use is not in the loop, check if the live range includes the cut
3343               Node* lp_if = use_c->in(orig_exit_idx)->in(0);
3344               if (not_peel.test(lp_if->_idx)) {
3345                 assert(j == orig_exit_idx, "use from original loop");
3346                 insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
3347               }
3348             }
3349           }
3350         }
3351       }
3352     }
3353   }
3354 
3355   // Step 3b: retarget control
3356 
3357   // Redirect control to the new loop head if a cloned node in
3358   // the not_peeled region has control that points into the peeled region.
3359   // This necessary because the cloned peeled region will be outside
3360   // the loop.
3361   //                            from    to
3362   //          cloned-peeled    <---+
3363   //    new_head_clone:            |    <--+
3364   //          cloned-not_peeled  in(0)    in(0)
3365   //          orig-peeled
3366 
3367   for(i = 0; i < loop->_body.size(); i++ ) {
3368     Node *n = loop->_body.at(i);
3369     if (!n->is_CFG()           && n->in(0) != NULL        &&
3370         not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
3371       Node* n_clone = old_new[n->_idx];
3372       _igvn.replace_input_of(n_clone, 0, new_head_clone);
3373     }
3374   }
3375 
3376   // Backedge of the surviving new_head (the clone) is original last_peel
3377   _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
3378 
3379   // Cut first node in original not_peel set
3380   _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
3381   new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
3382   new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
3383 
3384   // Copy head_clone back-branch info to original head
3385   // and remove original head's loop entry and
3386   // clone head's back-branch
3387   _igvn.rehash_node_delayed(head); // Multiple edge updates
3388   head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
3389   head->set_req(LoopNode::LoopBackControl, C->top());
3390   _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
3391 
3392   // Similarly modify the phis
3393   for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
3394     Node* use = head->fast_out(k);
3395     if (use->is_Phi() && use->outcnt() > 0) {
3396       Node* use_clone = old_new[use->_idx];
3397       _igvn.rehash_node_delayed(use); // Multiple edge updates
3398       use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
3399       use->set_req(LoopNode::LoopBackControl, C->top());
3400       _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
3401     }
3402   }
3403 
3404   // Step 4: update dominator tree and dominator depth
3405 
3406   set_idom(head, orig_tail_clone, dd);
3407   recompute_dom_depth();
3408 
3409   // Inhibit more partial peeling on this loop
3410   new_head_clone->set_partial_peel_loop();
3411   C->set_major_progress();
3412   loop->record_for_igvn();
3413 
3414 #if !defined(PRODUCT)
3415   if (TracePartialPeeling) {
3416     tty->print_cr("\nafter partial peel one iteration");
3417     Node_List wl(area);
3418     Node* t = last_peel;
3419     while (true) {
3420       wl.push(t);
3421       if (t == head_clone) break;
3422       t = idom(t);
3423     }
3424     while (wl.size() > 0) {
3425       Node* tt = wl.pop();
3426       if (tt == head) tty->print_cr("orig head");
3427       else if (tt == new_head_clone) tty->print_cr("new head");
3428       else if (tt == head_clone) tty->print_cr("clone head");
3429       tt->dump();
3430     }
3431   }
3432 #endif
3433   return true;
3434 }
3435 
3436 //------------------------------reorg_offsets----------------------------------
3437 // Reorganize offset computations to lower register pressure.  Mostly
3438 // prevent loop-fallout uses of the pre-incremented trip counter (which are
3439 // then alive with the post-incremented trip counter forcing an extra
3440 // register move)
3441 void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
3442   // Perform it only for canonical counted loops.
3443   // Loop's shape could be messed up by iteration_split_impl.
3444   if (!loop->_head->is_CountedLoop())
3445     return;
3446   if (!loop->_head->as_Loop()->is_valid_counted_loop())
3447     return;
3448 
3449   CountedLoopNode *cl = loop->_head->as_CountedLoop();
3450   CountedLoopEndNode *cle = cl->loopexit();
3451   Node *exit = cle->proj_out(false);
3452   Node *phi = cl->phi();
3453 
3454   // Check for the special case of folks using the pre-incremented
3455   // trip-counter on the fall-out path (forces the pre-incremented
3456   // and post-incremented trip counter to be live at the same time).
3457   // Fix this by adjusting to use the post-increment trip counter.
3458 
3459   bool progress = true;
3460   while (progress) {
3461     progress = false;
3462     for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
3463       Node* use = phi->fast_out(i);   // User of trip-counter
3464       if (!has_ctrl(use))  continue;
3465       Node *u_ctrl = get_ctrl(use);
3466       if (use->is_Phi()) {
3467         u_ctrl = NULL;
3468         for (uint j = 1; j < use->req(); j++)
3469           if (use->in(j) == phi)
3470             u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
3471       }
3472       IdealLoopTree *u_loop = get_loop(u_ctrl);
3473       // Look for loop-invariant use
3474       if (u_loop == loop) continue;
3475       if (loop->is_member(u_loop)) continue;
3476       // Check that use is live out the bottom.  Assuming the trip-counter
3477       // update is right at the bottom, uses of of the loop middle are ok.
3478       if (dom_lca(exit, u_ctrl) != exit) continue;
3479       // Hit!  Refactor use to use the post-incremented tripcounter.
3480       // Compute a post-increment tripcounter.
3481       Node *opaq = new Opaque2Node( C, cle->incr() );
3482       register_new_node(opaq, exit);
3483       Node *neg_stride = _igvn.intcon(-cle->stride_con());
3484       set_ctrl(neg_stride, C->root());
3485       Node *post = new AddINode( opaq, neg_stride);
3486       register_new_node(post, exit);
3487       _igvn.rehash_node_delayed(use);
3488       for (uint j = 1; j < use->req(); j++) {
3489         if (use->in(j) == phi)
3490           use->set_req(j, post);
3491       }
3492       // Since DU info changed, rerun loop
3493       progress = true;
3494       break;
3495     }
3496   }
3497 
3498 }