1 /* 2 * Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "gc/shared/barrierSet.hpp" 27 #include "gc/shared/c2/barrierSetC2.hpp" 28 #include "memory/allocation.inline.hpp" 29 #include "memory/resourceArea.hpp" 30 #include "opto/addnode.hpp" 31 #include "opto/callnode.hpp" 32 #include "opto/castnode.hpp" 33 #include "opto/connode.hpp" 34 #include "opto/castnode.hpp" 35 #include "opto/divnode.hpp" 36 #include "opto/inlinetypenode.hpp" 37 #include "opto/loopnode.hpp" 38 #include "opto/matcher.hpp" 39 #include "opto/mulnode.hpp" 40 #include "opto/movenode.hpp" 41 #include "opto/opaquenode.hpp" 42 #include "opto/rootnode.hpp" 43 #include "opto/subnode.hpp" 44 #include "opto/subtypenode.hpp" 45 #include "utilities/macros.hpp" 46 47 //============================================================================= 48 //------------------------------split_thru_phi--------------------------------- 49 // Split Node 'n' through merge point if there is enough win. 50 Node* PhaseIdealLoop::split_thru_phi(Node* n, Node* region, int policy) { 51 if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) { 52 // ConvI2L may have type information on it which is unsafe to push up 53 // so disable this for now 54 return NULL; 55 } 56 57 // Splitting range check CastIIs through a loop induction Phi can 58 // cause new Phis to be created that are left unrelated to the loop 59 // induction Phi and prevent optimizations (vectorization) 60 if (n->Opcode() == Op_CastII && region->is_CountedLoop() && 61 n->in(1) == region->as_CountedLoop()->phi()) { 62 return NULL; 63 } 64 65 // Inline types should not be split through Phis because they cannot be merged 66 // through Phi nodes but each value input needs to be merged individually. 67 if (n->is_InlineType()) { 68 return NULL; 69 } 70 71 if (cannot_split_division(n, region)) { 72 return NULL; 73 } 74 75 int wins = 0; 76 assert(!n->is_CFG(), ""); 77 assert(region->is_Region(), ""); 78 79 const Type* type = n->bottom_type(); 80 const TypeOopPtr* t_oop = _igvn.type(n)->isa_oopptr(); 81 Node* phi; 82 if (t_oop != NULL && t_oop->is_known_instance_field()) { 83 int iid = t_oop->instance_id(); 84 int index = C->get_alias_index(t_oop); 85 int offset = t_oop->offset(); 86 phi = new PhiNode(region, type, NULL, iid, index, offset); 87 } else { 88 phi = PhiNode::make_blank(region, n); 89 } 90 uint old_unique = C->unique(); 91 for (uint i = 1; i < region->req(); i++) { 92 Node* x; 93 Node* the_clone = NULL; 94 if (region->in(i) == C->top()) { 95 x = C->top(); // Dead path? Use a dead data op 96 } else { 97 x = n->clone(); // Else clone up the data op 98 the_clone = x; // Remember for possible deletion. 99 // Alter data node to use pre-phi inputs 100 if (n->in(0) == region) 101 x->set_req( 0, region->in(i) ); 102 for (uint j = 1; j < n->req(); j++) { 103 Node* in = n->in(j); 104 if (in->is_Phi() && in->in(0) == region) 105 x->set_req(j, in->in(i)); // Use pre-Phi input for the clone 106 } 107 } 108 // Check for a 'win' on some paths 109 const Type* t = x->Value(&_igvn); 110 111 bool singleton = t->singleton(); 112 113 // A TOP singleton indicates that there are no possible values incoming 114 // along a particular edge. In most cases, this is OK, and the Phi will 115 // be eliminated later in an Ideal call. However, we can't allow this to 116 // happen if the singleton occurs on loop entry, as the elimination of 117 // the PhiNode may cause the resulting node to migrate back to a previous 118 // loop iteration. 119 if (singleton && t == Type::TOP) { 120 // Is_Loop() == false does not confirm the absence of a loop (e.g., an 121 // irreducible loop may not be indicated by an affirmative is_Loop()); 122 // therefore, the only top we can split thru a phi is on a backedge of 123 // a loop. 124 singleton &= region->is_Loop() && (i != LoopNode::EntryControl); 125 } 126 127 if (singleton) { 128 wins++; 129 x = ((PhaseGVN&)_igvn).makecon(t); 130 } else { 131 // We now call Identity to try to simplify the cloned node. 132 // Note that some Identity methods call phase->type(this). 133 // Make sure that the type array is big enough for 134 // our new node, even though we may throw the node away. 135 // (Note: This tweaking with igvn only works because x is a new node.) 136 _igvn.set_type(x, t); 137 // If x is a TypeNode, capture any more-precise type permanently into Node 138 // otherwise it will be not updated during igvn->transform since 139 // igvn->type(x) is set to x->Value() already. 140 x->raise_bottom_type(t); 141 Node* y = x->Identity(&_igvn); 142 if (y != x) { 143 wins++; 144 x = y; 145 } else { 146 y = _igvn.hash_find(x); 147 if (y) { 148 wins++; 149 x = y; 150 } else { 151 // Else x is a new node we are keeping 152 // We do not need register_new_node_with_optimizer 153 // because set_type has already been called. 154 _igvn._worklist.push(x); 155 } 156 } 157 } 158 if (x != the_clone && the_clone != NULL) 159 _igvn.remove_dead_node(the_clone); 160 phi->set_req( i, x ); 161 } 162 // Too few wins? 163 if (wins <= policy) { 164 _igvn.remove_dead_node(phi); 165 return NULL; 166 } 167 168 // Record Phi 169 register_new_node( phi, region ); 170 171 for (uint i2 = 1; i2 < phi->req(); i2++) { 172 Node *x = phi->in(i2); 173 // If we commoned up the cloned 'x' with another existing Node, 174 // the existing Node picks up a new use. We need to make the 175 // existing Node occur higher up so it dominates its uses. 176 Node *old_ctrl; 177 IdealLoopTree *old_loop; 178 179 if (x->is_Con()) { 180 // Constant's control is always root. 181 set_ctrl(x, C->root()); 182 continue; 183 } 184 // The occasional new node 185 if (x->_idx >= old_unique) { // Found a new, unplaced node? 186 old_ctrl = NULL; 187 old_loop = NULL; // Not in any prior loop 188 } else { 189 old_ctrl = get_ctrl(x); 190 old_loop = get_loop(old_ctrl); // Get prior loop 191 } 192 // New late point must dominate new use 193 Node *new_ctrl = dom_lca(old_ctrl, region->in(i2)); 194 if (new_ctrl == old_ctrl) // Nothing is changed 195 continue; 196 197 IdealLoopTree *new_loop = get_loop(new_ctrl); 198 199 // Don't move x into a loop if its uses are 200 // outside of loop. Otherwise x will be cloned 201 // for each use outside of this loop. 202 IdealLoopTree *use_loop = get_loop(region); 203 if (!new_loop->is_member(use_loop) && 204 (old_loop == NULL || !new_loop->is_member(old_loop))) { 205 // Take early control, later control will be recalculated 206 // during next iteration of loop optimizations. 207 new_ctrl = get_early_ctrl(x); 208 new_loop = get_loop(new_ctrl); 209 } 210 // Set new location 211 set_ctrl(x, new_ctrl); 212 // If changing loop bodies, see if we need to collect into new body 213 if (old_loop != new_loop) { 214 if (old_loop && !old_loop->_child) 215 old_loop->_body.yank(x); 216 if (!new_loop->_child) 217 new_loop->_body.push(x); // Collect body info 218 } 219 } 220 221 return phi; 222 } 223 224 // Return true if 'n' is a Div or Mod node (without zero check If node which was removed earlier) with a loop phi divisor 225 // of a trip-counted (integer or long) loop with a backedge input that could be zero (include zero in its type range). In 226 // this case, we cannot split the division to the backedge as it could freely float above the loop exit check resulting in 227 // a division by zero. This situation is possible because the type of an increment node of an iv phi (trip-counter) could 228 // include zero while the iv phi does not (see PhiNode::Value() for trip-counted loops where we improve types of iv phis). 229 // We also need to check other loop phis as they could have been created in the same split-if pass when applying 230 // PhaseIdealLoop::split_thru_phi() to split nodes through an iv phi. 231 bool PhaseIdealLoop::cannot_split_division(const Node* n, const Node* region) const { 232 const Type* zero; 233 switch (n->Opcode()) { 234 case Op_DivI: 235 case Op_ModI: 236 zero = TypeInt::ZERO; 237 break; 238 case Op_DivL: 239 case Op_ModL: 240 zero = TypeLong::ZERO; 241 break; 242 default: 243 return false; 244 } 245 246 assert(n->in(0) == NULL, "divisions with zero check should already have bailed out earlier in split-if"); 247 Node* divisor = n->in(2); 248 return is_divisor_counted_loop_phi(divisor, region) && 249 loop_phi_backedge_type_contains_zero(divisor, zero); 250 } 251 252 bool PhaseIdealLoop::is_divisor_counted_loop_phi(const Node* divisor, const Node* loop) { 253 return loop->is_BaseCountedLoop() && divisor->is_Phi() && divisor->in(0) == loop; 254 } 255 256 bool PhaseIdealLoop::loop_phi_backedge_type_contains_zero(const Node* phi_divisor, const Type* zero) const { 257 return _igvn.type(phi_divisor->in(LoopNode::LoopBackControl))->filter_speculative(zero) != Type::TOP; 258 } 259 260 //------------------------------dominated_by------------------------------------ 261 // Replace the dominated test with an obvious true or false. Place it on the 262 // IGVN worklist for later cleanup. Move control-dependent data Nodes on the 263 // live path up to the dominating control. 264 void PhaseIdealLoop::dominated_by(IfProjNode* prevdom, IfNode* iff, bool flip, bool exclude_loop_predicate) { 265 if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); } 266 267 // prevdom is the dominating projection of the dominating test. 268 assert(iff->Opcode() == Op_If || 269 iff->Opcode() == Op_CountedLoopEnd || 270 iff->Opcode() == Op_LongCountedLoopEnd || 271 iff->Opcode() == Op_RangeCheck, 272 "Check this code when new subtype is added"); 273 274 int pop = prevdom->Opcode(); 275 assert( pop == Op_IfFalse || pop == Op_IfTrue, "" ); 276 if (flip) { 277 if (pop == Op_IfTrue) 278 pop = Op_IfFalse; 279 else 280 pop = Op_IfTrue; 281 } 282 // 'con' is set to true or false to kill the dominated test. 283 Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO); 284 set_ctrl(con, C->root()); // Constant gets a new use 285 // Hack the dominated test 286 _igvn.replace_input_of(iff, 1, con); 287 288 // If I dont have a reachable TRUE and FALSE path following the IfNode then 289 // I can assume this path reaches an infinite loop. In this case it's not 290 // important to optimize the data Nodes - either the whole compilation will 291 // be tossed or this path (and all data Nodes) will go dead. 292 if (iff->outcnt() != 2) return; 293 294 // Make control-dependent data Nodes on the live path (path that will remain 295 // once the dominated IF is removed) become control-dependent on the 296 // dominating projection. 297 Node* dp = iff->proj_out_or_null(pop == Op_IfTrue); 298 299 // Loop predicates may have depending checks which should not 300 // be skipped. For example, range check predicate has two checks 301 // for lower and upper bounds. 302 if (dp == NULL) 303 return; 304 305 ProjNode* dp_proj = dp->as_Proj(); 306 ProjNode* unc_proj = iff->proj_out(1 - dp_proj->_con)->as_Proj(); 307 if (exclude_loop_predicate && 308 (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL || 309 unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_profile_predicate) != NULL || 310 unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check) != NULL)) { 311 // If this is a range check (IfNode::is_range_check), do not 312 // reorder because Compile::allow_range_check_smearing might have 313 // changed the check. 314 return; // Let IGVN transformation change control dependence. 315 } 316 317 IdealLoopTree* old_loop = get_loop(dp); 318 319 for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { 320 Node* cd = dp->fast_out(i); // Control-dependent node 321 // Do not rewire Div and Mod nodes which could have a zero divisor to avoid skipping their zero check. 322 if (cd->depends_only_on_test() && _igvn.no_dependent_zero_check(cd)) { 323 assert(cd->in(0) == dp, ""); 324 _igvn.replace_input_of(cd, 0, prevdom); 325 set_early_ctrl(cd, false); 326 IdealLoopTree* new_loop = get_loop(get_ctrl(cd)); 327 if (old_loop != new_loop) { 328 if (!old_loop->_child) { 329 old_loop->_body.yank(cd); 330 } 331 if (!new_loop->_child) { 332 new_loop->_body.push(cd); 333 } 334 } 335 --i; 336 --imax; 337 } 338 } 339 } 340 341 //------------------------------has_local_phi_input---------------------------- 342 // Return TRUE if 'n' has Phi inputs from its local block and no other 343 // block-local inputs (all non-local-phi inputs come from earlier blocks) 344 Node *PhaseIdealLoop::has_local_phi_input( Node *n ) { 345 Node *n_ctrl = get_ctrl(n); 346 // See if some inputs come from a Phi in this block, or from before 347 // this block. 348 uint i; 349 for( i = 1; i < n->req(); i++ ) { 350 Node *phi = n->in(i); 351 if( phi->is_Phi() && phi->in(0) == n_ctrl ) 352 break; 353 } 354 if( i >= n->req() ) 355 return NULL; // No Phi inputs; nowhere to clone thru 356 357 // Check for inputs created between 'n' and the Phi input. These 358 // must split as well; they have already been given the chance 359 // (courtesy of a post-order visit) and since they did not we must 360 // recover the 'cost' of splitting them by being very profitable 361 // when splitting 'n'. Since this is unlikely we simply give up. 362 for( i = 1; i < n->req(); i++ ) { 363 Node *m = n->in(i); 364 if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) { 365 // We allow the special case of AddP's with no local inputs. 366 // This allows us to split-up address expressions. 367 if (m->is_AddP() && 368 get_ctrl(m->in(AddPNode::Base)) != n_ctrl && 369 get_ctrl(m->in(AddPNode::Address)) != n_ctrl && 370 get_ctrl(m->in(AddPNode::Offset)) != n_ctrl) { 371 // Move the AddP up to the dominating point. That's fine because control of m's inputs 372 // must dominate get_ctrl(m) == n_ctrl and we just checked that the input controls are != n_ctrl. 373 Node* c = find_non_split_ctrl(idom(n_ctrl)); 374 if (c->is_OuterStripMinedLoop()) { 375 c->as_Loop()->verify_strip_mined(1); 376 c = c->in(LoopNode::EntryControl); 377 } 378 set_ctrl_and_loop(m, c); 379 continue; 380 } 381 return NULL; 382 } 383 assert(n->is_Phi() || m->is_Phi() || is_dominator(get_ctrl(m), n_ctrl), "m has strange control"); 384 } 385 386 return n_ctrl; 387 } 388 389 // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2). 390 Node* PhaseIdealLoop::remix_address_expressions_add_left_shift(Node* n, IdealLoopTree* n_loop, Node* n_ctrl, BasicType bt) { 391 assert(bt == T_INT || bt == T_LONG, "only for integers"); 392 int n_op = n->Opcode(); 393 394 if (n_op == Op_LShift(bt)) { 395 // Scale is loop invariant 396 Node* scale = n->in(2); 397 Node* scale_ctrl = get_ctrl(scale); 398 IdealLoopTree* scale_loop = get_loop(scale_ctrl); 399 if (n_loop == scale_loop || !scale_loop->is_member(n_loop)) { 400 return NULL; 401 } 402 const TypeInt* scale_t = scale->bottom_type()->isa_int(); 403 if (scale_t != NULL && scale_t->is_con() && scale_t->get_con() >= 16) { 404 return NULL; // Dont bother with byte/short masking 405 } 406 // Add must vary with loop (else shift would be loop-invariant) 407 Node* add = n->in(1); 408 Node* add_ctrl = get_ctrl(add); 409 IdealLoopTree* add_loop = get_loop(add_ctrl); 410 if (n_loop != add_loop) { 411 return NULL; // happens w/ evil ZKM loops 412 } 413 414 // Convert I-V into I+ (0-V); same for V-I 415 if (add->Opcode() == Op_Sub(bt) && 416 _igvn.type(add->in(1)) != TypeInteger::zero(bt)) { 417 assert(add->Opcode() == Op_SubI || add->Opcode() == Op_SubL, ""); 418 Node* zero = _igvn.integercon(0, bt); 419 set_ctrl(zero, C->root()); 420 Node* neg = SubNode::make(zero, add->in(2), bt); 421 register_new_node(neg, get_ctrl(add->in(2))); 422 add = AddNode::make(add->in(1), neg, bt); 423 register_new_node(add, add_ctrl); 424 } 425 if (add->Opcode() != Op_Add(bt)) return NULL; 426 assert(add->Opcode() == Op_AddI || add->Opcode() == Op_AddL, ""); 427 // See if one add input is loop invariant 428 Node* add_var = add->in(1); 429 Node* add_var_ctrl = get_ctrl(add_var); 430 IdealLoopTree* add_var_loop = get_loop(add_var_ctrl); 431 Node* add_invar = add->in(2); 432 Node* add_invar_ctrl = get_ctrl(add_invar); 433 IdealLoopTree* add_invar_loop = get_loop(add_invar_ctrl); 434 if (add_invar_loop == n_loop) { 435 // Swap to find the invariant part 436 add_invar = add_var; 437 add_invar_ctrl = add_var_ctrl; 438 add_invar_loop = add_var_loop; 439 add_var = add->in(2); 440 } else if (add_var_loop != n_loop) { // Else neither input is loop invariant 441 return NULL; 442 } 443 if (n_loop == add_invar_loop || !add_invar_loop->is_member(n_loop)) { 444 return NULL; // No invariant part of the add? 445 } 446 447 // Yes! Reshape address expression! 448 Node* inv_scale = LShiftNode::make(add_invar, scale, bt); 449 Node* inv_scale_ctrl = 450 dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ? 451 add_invar_ctrl : scale_ctrl; 452 register_new_node(inv_scale, inv_scale_ctrl); 453 Node* var_scale = LShiftNode::make(add_var, scale, bt); 454 register_new_node(var_scale, n_ctrl); 455 Node* var_add = AddNode::make(var_scale, inv_scale, bt); 456 register_new_node(var_add, n_ctrl); 457 _igvn.replace_node(n, var_add); 458 return var_add; 459 } 460 return NULL; 461 } 462 463 //------------------------------remix_address_expressions---------------------- 464 // Rework addressing expressions to get the most loop-invariant stuff 465 // moved out. We'd like to do all associative operators, but it's especially 466 // important (common) to do address expressions. 467 Node* PhaseIdealLoop::remix_address_expressions(Node* n) { 468 if (!has_ctrl(n)) return NULL; 469 Node* n_ctrl = get_ctrl(n); 470 IdealLoopTree* n_loop = get_loop(n_ctrl); 471 472 // See if 'n' mixes loop-varying and loop-invariant inputs and 473 // itself is loop-varying. 474 475 // Only interested in binary ops (and AddP) 476 if (n->req() < 3 || n->req() > 4) return NULL; 477 478 Node* n1_ctrl = get_ctrl(n->in( 1)); 479 Node* n2_ctrl = get_ctrl(n->in( 2)); 480 Node* n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3)); 481 IdealLoopTree* n1_loop = get_loop(n1_ctrl); 482 IdealLoopTree* n2_loop = get_loop(n2_ctrl); 483 IdealLoopTree* n3_loop = get_loop(n3_ctrl); 484 485 // Does one of my inputs spin in a tighter loop than self? 486 if ((n_loop->is_member(n1_loop) && n_loop != n1_loop) || 487 (n_loop->is_member(n2_loop) && n_loop != n2_loop) || 488 (n_loop->is_member(n3_loop) && n_loop != n3_loop)) { 489 return NULL; // Leave well enough alone 490 } 491 492 // Is at least one of my inputs loop-invariant? 493 if (n1_loop == n_loop && 494 n2_loop == n_loop && 495 n3_loop == n_loop) { 496 return NULL; // No loop-invariant inputs 497 } 498 499 Node* res = remix_address_expressions_add_left_shift(n, n_loop, n_ctrl, T_INT); 500 if (res != NULL) { 501 return res; 502 } 503 res = remix_address_expressions_add_left_shift(n, n_loop, n_ctrl, T_LONG); 504 if (res != NULL) { 505 return res; 506 } 507 508 int n_op = n->Opcode(); 509 // Replace (I+V) with (V+I) 510 if (n_op == Op_AddI || 511 n_op == Op_AddL || 512 n_op == Op_AddF || 513 n_op == Op_AddD || 514 n_op == Op_MulI || 515 n_op == Op_MulL || 516 n_op == Op_MulF || 517 n_op == Op_MulD) { 518 if (n2_loop == n_loop) { 519 assert(n1_loop != n_loop, ""); 520 n->swap_edges(1, 2); 521 } 522 } 523 524 // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V), 525 // but not if I2 is a constant. 526 if (n_op == Op_AddP) { 527 if (n2_loop == n_loop && n3_loop != n_loop) { 528 if (n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con()) { 529 Node* n22_ctrl = get_ctrl(n->in(2)->in(2)); 530 Node* n23_ctrl = get_ctrl(n->in(2)->in(3)); 531 IdealLoopTree* n22loop = get_loop(n22_ctrl); 532 IdealLoopTree* n23_loop = get_loop(n23_ctrl); 533 if (n22loop != n_loop && n22loop->is_member(n_loop) && 534 n23_loop == n_loop) { 535 Node* add1 = new AddPNode(n->in(1), n->in(2)->in(2), n->in(3)); 536 // Stuff new AddP in the loop preheader 537 register_new_node(add1, n_loop->_head->in(LoopNode::EntryControl)); 538 Node* add2 = new AddPNode(n->in(1), add1, n->in(2)->in(3)); 539 register_new_node(add2, n_ctrl); 540 _igvn.replace_node(n, add2); 541 return add2; 542 } 543 } 544 } 545 546 // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V) 547 if (n2_loop != n_loop && n3_loop == n_loop) { 548 if (n->in(3)->Opcode() == Op_AddX) { 549 Node* V = n->in(3)->in(1); 550 Node* I = n->in(3)->in(2); 551 if (is_member(n_loop,get_ctrl(V))) { 552 } else { 553 Node *tmp = V; V = I; I = tmp; 554 } 555 if (!is_member(n_loop,get_ctrl(I))) { 556 Node* add1 = new AddPNode(n->in(1), n->in(2), I); 557 // Stuff new AddP in the loop preheader 558 register_new_node(add1, n_loop->_head->in(LoopNode::EntryControl)); 559 Node* add2 = new AddPNode(n->in(1), add1, V); 560 register_new_node(add2, n_ctrl); 561 _igvn.replace_node(n, add2); 562 return add2; 563 } 564 } 565 } 566 } 567 568 return NULL; 569 } 570 571 // Optimize ((in1[2*i] * in2[2*i]) + (in1[2*i+1] * in2[2*i+1])) 572 Node *PhaseIdealLoop::convert_add_to_muladd(Node* n) { 573 assert(n->Opcode() == Op_AddI, "sanity"); 574 Node * nn = NULL; 575 Node * in1 = n->in(1); 576 Node * in2 = n->in(2); 577 if (in1->Opcode() == Op_MulI && in2->Opcode() == Op_MulI) { 578 IdealLoopTree* loop_n = get_loop(get_ctrl(n)); 579 if (loop_n->is_counted() && 580 loop_n->_head->as_Loop()->is_valid_counted_loop(T_INT) && 581 Matcher::match_rule_supported(Op_MulAddVS2VI) && 582 Matcher::match_rule_supported(Op_MulAddS2I)) { 583 Node* mul_in1 = in1->in(1); 584 Node* mul_in2 = in1->in(2); 585 Node* mul_in3 = in2->in(1); 586 Node* mul_in4 = in2->in(2); 587 if (mul_in1->Opcode() == Op_LoadS && 588 mul_in2->Opcode() == Op_LoadS && 589 mul_in3->Opcode() == Op_LoadS && 590 mul_in4->Opcode() == Op_LoadS) { 591 IdealLoopTree* loop1 = get_loop(get_ctrl(mul_in1)); 592 IdealLoopTree* loop2 = get_loop(get_ctrl(mul_in2)); 593 IdealLoopTree* loop3 = get_loop(get_ctrl(mul_in3)); 594 IdealLoopTree* loop4 = get_loop(get_ctrl(mul_in4)); 595 IdealLoopTree* loop5 = get_loop(get_ctrl(in1)); 596 IdealLoopTree* loop6 = get_loop(get_ctrl(in2)); 597 // All nodes should be in the same counted loop. 598 if (loop_n == loop1 && loop_n == loop2 && loop_n == loop3 && 599 loop_n == loop4 && loop_n == loop5 && loop_n == loop6) { 600 Node* adr1 = mul_in1->in(MemNode::Address); 601 Node* adr2 = mul_in2->in(MemNode::Address); 602 Node* adr3 = mul_in3->in(MemNode::Address); 603 Node* adr4 = mul_in4->in(MemNode::Address); 604 if (adr1->is_AddP() && adr2->is_AddP() && adr3->is_AddP() && adr4->is_AddP()) { 605 if ((adr1->in(AddPNode::Base) == adr3->in(AddPNode::Base)) && 606 (adr2->in(AddPNode::Base) == adr4->in(AddPNode::Base))) { 607 nn = new MulAddS2INode(mul_in1, mul_in2, mul_in3, mul_in4); 608 register_new_node(nn, get_ctrl(n)); 609 _igvn.replace_node(n, nn); 610 return nn; 611 } else if ((adr1->in(AddPNode::Base) == adr4->in(AddPNode::Base)) && 612 (adr2->in(AddPNode::Base) == adr3->in(AddPNode::Base))) { 613 nn = new MulAddS2INode(mul_in1, mul_in2, mul_in4, mul_in3); 614 register_new_node(nn, get_ctrl(n)); 615 _igvn.replace_node(n, nn); 616 return nn; 617 } 618 } 619 } 620 } 621 } 622 } 623 return nn; 624 } 625 626 //------------------------------conditional_move------------------------------- 627 // Attempt to replace a Phi with a conditional move. We have some pretty 628 // strict profitability requirements. All Phis at the merge point must 629 // be converted, so we can remove the control flow. We need to limit the 630 // number of c-moves to a small handful. All code that was in the side-arms 631 // of the CFG diamond is now speculatively executed. This code has to be 632 // "cheap enough". We are pretty much limited to CFG diamonds that merge 633 // 1 or 2 items with a total of 1 or 2 ops executed speculatively. 634 Node *PhaseIdealLoop::conditional_move( Node *region ) { 635 636 assert(region->is_Region(), "sanity check"); 637 if (region->req() != 3) return NULL; 638 639 // Check for CFG diamond 640 Node *lp = region->in(1); 641 Node *rp = region->in(2); 642 if (!lp || !rp) return NULL; 643 Node *lp_c = lp->in(0); 644 if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL; 645 IfNode *iff = lp_c->as_If(); 646 647 // Check for ops pinned in an arm of the diamond. 648 // Can't remove the control flow in this case 649 if (lp->outcnt() > 1) return NULL; 650 if (rp->outcnt() > 1) return NULL; 651 652 IdealLoopTree* r_loop = get_loop(region); 653 assert(r_loop == get_loop(iff), "sanity"); 654 // Always convert to CMOVE if all results are used only outside this loop. 655 bool used_inside_loop = (r_loop == _ltree_root); 656 657 // Check profitability 658 int cost = 0; 659 int phis = 0; 660 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 661 Node *out = region->fast_out(i); 662 if (!out->is_Phi()) continue; // Ignore other control edges, etc 663 phis++; 664 PhiNode* phi = out->as_Phi(); 665 BasicType bt = phi->type()->basic_type(); 666 switch (bt) { 667 case T_DOUBLE: 668 case T_FLOAT: 669 if (C->use_cmove()) { 670 continue; //TODO: maybe we want to add some cost 671 } 672 cost += Matcher::float_cmove_cost(); // Could be very expensive 673 break; 674 case T_LONG: { 675 cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's 676 } 677 case T_INT: // These all CMOV fine 678 case T_ADDRESS: { // (RawPtr) 679 cost++; 680 break; 681 } 682 case T_NARROWOOP: // Fall through 683 case T_OBJECT: { // Base oops are OK, but not derived oops 684 const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); 685 // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a 686 // CMOVE'd derived pointer? It's a CMOVE'd derived base. Thus 687 // CMOVE'ing a derived pointer requires we also CMOVE the base. If we 688 // have a Phi for the base here that we convert to a CMOVE all is well 689 // and good. But if the base is dead, we'll not make a CMOVE. Later 690 // the allocator will have to produce a base by creating a CMOVE of the 691 // relevant bases. This puts the allocator in the business of 692 // manufacturing expensive instructions, generally a bad plan. 693 // Just Say No to Conditionally-Moved Derived Pointers. 694 if (tp && tp->offset() != 0) 695 return NULL; 696 cost++; 697 break; 698 } 699 default: 700 return NULL; // In particular, can't do memory or I/O 701 } 702 // Add in cost any speculative ops 703 for (uint j = 1; j < region->req(); j++) { 704 Node *proj = region->in(j); 705 Node *inp = phi->in(j); 706 if (get_ctrl(inp) == proj) { // Found local op 707 cost++; 708 // Check for a chain of dependent ops; these will all become 709 // speculative in a CMOV. 710 for (uint k = 1; k < inp->req(); k++) 711 if (get_ctrl(inp->in(k)) == proj) 712 cost += ConditionalMoveLimit; // Too much speculative goo 713 } 714 } 715 // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. 716 // This will likely Split-If, a higher-payoff operation. 717 for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { 718 Node* use = phi->fast_out(k); 719 if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr()) 720 cost += ConditionalMoveLimit; 721 // Is there a use inside the loop? 722 // Note: check only basic types since CMoveP is pinned. 723 if (!used_inside_loop && is_java_primitive(bt)) { 724 IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use); 725 if (r_loop == u_loop || r_loop->is_member(u_loop)) { 726 used_inside_loop = true; 727 } 728 } 729 } 730 }//for 731 Node* bol = iff->in(1); 732 if (bol->Opcode() == Op_Opaque4) { 733 return NULL; // Ignore loop predicate checks (the Opaque4 ensures they will go away) 734 } 735 assert(bol->Opcode() == Op_Bool, "Unexpected node"); 736 int cmp_op = bol->in(1)->Opcode(); 737 if (cmp_op == Op_SubTypeCheck) { // SubTypeCheck expansion expects an IfNode 738 return NULL; 739 } 740 // It is expensive to generate flags from a float compare. 741 // Avoid duplicated float compare. 742 if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; 743 744 float infrequent_prob = PROB_UNLIKELY_MAG(3); 745 // Ignore cost and blocks frequency if CMOVE can be moved outside the loop. 746 if (used_inside_loop) { 747 if (cost >= ConditionalMoveLimit) return NULL; // Too much goo 748 749 // BlockLayoutByFrequency optimization moves infrequent branch 750 // from hot path. No point in CMOV'ing in such case (110 is used 751 // instead of 100 to take into account not exactness of float value). 752 if (BlockLayoutByFrequency) { 753 infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f); 754 } 755 } 756 // Check for highly predictable branch. No point in CMOV'ing if 757 // we are going to predict accurately all the time. 758 if (C->use_cmove() && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) { 759 //keep going 760 } else if (iff->_prob < infrequent_prob || 761 iff->_prob > (1.0f - infrequent_prob)) 762 return NULL; 763 764 // -------------- 765 // Now replace all Phis with CMOV's 766 Node *cmov_ctrl = iff->in(0); 767 uint flip = (lp->Opcode() == Op_IfTrue); 768 Node_List wq; 769 while (1) { 770 PhiNode* phi = NULL; 771 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 772 Node *out = region->fast_out(i); 773 if (out->is_Phi()) { 774 phi = out->as_Phi(); 775 break; 776 } 777 } 778 if (phi == NULL || _igvn.type(phi) == Type::TOP) { 779 break; 780 } 781 if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); } 782 // Move speculative ops 783 wq.push(phi); 784 while (wq.size() > 0) { 785 Node *n = wq.pop(); 786 for (uint j = 1; j < n->req(); j++) { 787 Node* m = n->in(j); 788 if (m != NULL && !is_dominator(get_ctrl(m), cmov_ctrl)) { 789 #ifndef PRODUCT 790 if (PrintOpto && VerifyLoopOptimizations) { 791 tty->print(" speculate: "); 792 m->dump(); 793 } 794 #endif 795 set_ctrl(m, cmov_ctrl); 796 wq.push(m); 797 } 798 } 799 } 800 Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi)); 801 register_new_node( cmov, cmov_ctrl ); 802 _igvn.replace_node( phi, cmov ); 803 #ifndef PRODUCT 804 if (TraceLoopOpts) { 805 tty->print("CMOV "); 806 r_loop->dump_head(); 807 if (Verbose) { 808 bol->in(1)->dump(1); 809 cmov->dump(1); 810 } 811 } 812 if (VerifyLoopOptimizations) verify(); 813 #endif 814 } 815 816 // The useless CFG diamond will fold up later; see the optimization in 817 // RegionNode::Ideal. 818 _igvn._worklist.push(region); 819 820 return iff->in(1); 821 } 822 823 static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) { 824 for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) { 825 Node* u = m->fast_out(i); 826 if (u->is_CFG()) { 827 if (u->is_NeverBranch()) { 828 u = u->as_NeverBranch()->proj_out(0); 829 enqueue_cfg_uses(u, wq); 830 } else { 831 wq.push(u); 832 } 833 } 834 } 835 } 836 837 // Try moving a store out of a loop, right before the loop 838 Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) { 839 // Store has to be first in the loop body 840 IdealLoopTree *n_loop = get_loop(n_ctrl); 841 if (n->is_Store() && n_loop != _ltree_root && 842 n_loop->is_loop() && n_loop->_head->is_Loop() && 843 n->in(0) != NULL) { 844 Node* address = n->in(MemNode::Address); 845 Node* value = n->in(MemNode::ValueIn); 846 Node* mem = n->in(MemNode::Memory); 847 IdealLoopTree* address_loop = get_loop(get_ctrl(address)); 848 IdealLoopTree* value_loop = get_loop(get_ctrl(value)); 849 850 // - address and value must be loop invariant 851 // - memory must be a memory Phi for the loop 852 // - Store must be the only store on this memory slice in the 853 // loop: if there's another store following this one then value 854 // written at iteration i by the second store could be overwritten 855 // at iteration i+n by the first store: it's not safe to move the 856 // first store out of the loop 857 // - nothing must observe the memory Phi: it guarantees no read 858 // before the store, we are also guaranteed the store post 859 // dominates the loop head (ignoring a possible early 860 // exit). Otherwise there would be extra Phi involved between the 861 // loop's Phi and the store. 862 // - there must be no early exit from the loop before the Store 863 // (such an exit most of the time would be an extra use of the 864 // memory Phi but sometimes is a bottom memory Phi that takes the 865 // store as input). 866 867 if (!n_loop->is_member(address_loop) && 868 !n_loop->is_member(value_loop) && 869 mem->is_Phi() && mem->in(0) == n_loop->_head && 870 mem->outcnt() == 1 && 871 mem->in(LoopNode::LoopBackControl) == n) { 872 873 assert(n_loop->_tail != NULL, "need a tail"); 874 assert(is_dominator(n_ctrl, n_loop->_tail), "store control must not be in a branch in the loop"); 875 876 // Verify that there's no early exit of the loop before the store. 877 bool ctrl_ok = false; 878 { 879 // Follow control from loop head until n, we exit the loop or 880 // we reach the tail 881 ResourceMark rm; 882 Unique_Node_List wq; 883 wq.push(n_loop->_head); 884 885 for (uint next = 0; next < wq.size(); ++next) { 886 Node *m = wq.at(next); 887 if (m == n->in(0)) { 888 ctrl_ok = true; 889 continue; 890 } 891 assert(!has_ctrl(m), "should be CFG"); 892 if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) { 893 ctrl_ok = false; 894 break; 895 } 896 enqueue_cfg_uses(m, wq); 897 if (wq.size() > 10) { 898 ctrl_ok = false; 899 break; 900 } 901 } 902 } 903 if (ctrl_ok) { 904 // move the Store 905 _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem); 906 _igvn.replace_input_of(n, 0, n_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)); 907 _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl)); 908 // Disconnect the phi now. An empty phi can confuse other 909 // optimizations in this pass of loop opts. 910 _igvn.replace_node(mem, mem->in(LoopNode::EntryControl)); 911 n_loop->_body.yank(mem); 912 913 set_ctrl_and_loop(n, n->in(0)); 914 915 return n; 916 } 917 } 918 } 919 return NULL; 920 } 921 922 // Try moving a store out of a loop, right after the loop 923 void PhaseIdealLoop::try_move_store_after_loop(Node* n) { 924 if (n->is_Store() && n->in(0) != NULL) { 925 Node *n_ctrl = get_ctrl(n); 926 IdealLoopTree *n_loop = get_loop(n_ctrl); 927 // Store must be in a loop 928 if (n_loop != _ltree_root && !n_loop->_irreducible) { 929 Node* address = n->in(MemNode::Address); 930 Node* value = n->in(MemNode::ValueIn); 931 IdealLoopTree* address_loop = get_loop(get_ctrl(address)); 932 // address must be loop invariant 933 if (!n_loop->is_member(address_loop)) { 934 // Store must be last on this memory slice in the loop and 935 // nothing in the loop must observe it 936 Node* phi = NULL; 937 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { 938 Node* u = n->fast_out(i); 939 if (has_ctrl(u)) { // control use? 940 IdealLoopTree *u_loop = get_loop(get_ctrl(u)); 941 if (!n_loop->is_member(u_loop)) { 942 continue; 943 } 944 if (u->is_Phi() && u->in(0) == n_loop->_head) { 945 assert(_igvn.type(u) == Type::MEMORY, "bad phi"); 946 // multiple phis on the same slice are possible 947 if (phi != NULL) { 948 return; 949 } 950 phi = u; 951 continue; 952 } 953 } 954 return; 955 } 956 if (phi != NULL) { 957 // Nothing in the loop before the store (next iteration) 958 // must observe the stored value 959 bool mem_ok = true; 960 { 961 ResourceMark rm; 962 Unique_Node_List wq; 963 wq.push(phi); 964 for (uint next = 0; next < wq.size() && mem_ok; ++next) { 965 Node *m = wq.at(next); 966 for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) { 967 Node* u = m->fast_out(i); 968 if (u->is_Store() || u->is_Phi()) { 969 if (u != n) { 970 wq.push(u); 971 mem_ok = (wq.size() <= 10); 972 } 973 } else { 974 mem_ok = false; 975 break; 976 } 977 } 978 } 979 } 980 if (mem_ok) { 981 // Move the store out of the loop if the LCA of all 982 // users (except for the phi) is outside the loop. 983 Node* hook = new Node(1); 984 hook->init_req(0, n_ctrl); // Add an input to prevent hook from being dead 985 _igvn.rehash_node_delayed(phi); 986 int count = phi->replace_edge(n, hook, &_igvn); 987 assert(count > 0, "inconsistent phi"); 988 989 // Compute latest point this store can go 990 Node* lca = get_late_ctrl(n, get_ctrl(n)); 991 if (lca->is_OuterStripMinedLoop()) { 992 lca = lca->in(LoopNode::EntryControl); 993 } 994 if (n_loop->is_member(get_loop(lca))) { 995 // LCA is in the loop - bail out 996 _igvn.replace_node(hook, n); 997 return; 998 } 999 #ifdef ASSERT 1000 if (n_loop->_head->is_Loop() && n_loop->_head->as_Loop()->is_strip_mined()) { 1001 assert(n_loop->_head->Opcode() == Op_CountedLoop, "outer loop is a strip mined"); 1002 n_loop->_head->as_Loop()->verify_strip_mined(1); 1003 Node* outer = n_loop->_head->as_CountedLoop()->outer_loop(); 1004 IdealLoopTree* outer_loop = get_loop(outer); 1005 assert(n_loop->_parent == outer_loop, "broken loop tree"); 1006 assert(get_loop(lca) == outer_loop, "safepoint in outer loop consume all memory state"); 1007 } 1008 #endif 1009 lca = place_outside_loop(lca, n_loop); 1010 assert(!n_loop->is_member(get_loop(lca)), "control must not be back in the loop"); 1011 assert(get_loop(lca)->_nest < n_loop->_nest || lca->in(0)->is_NeverBranch(), "must not be moved into inner loop"); 1012 1013 // Move store out of the loop 1014 _igvn.replace_node(hook, n->in(MemNode::Memory)); 1015 _igvn.replace_input_of(n, 0, lca); 1016 set_ctrl_and_loop(n, lca); 1017 1018 // Disconnect the phi now. An empty phi can confuse other 1019 // optimizations in this pass of loop opts.. 1020 if (phi->in(LoopNode::LoopBackControl) == phi) { 1021 _igvn.replace_node(phi, phi->in(LoopNode::EntryControl)); 1022 n_loop->_body.yank(phi); 1023 } 1024 } 1025 } 1026 } 1027 } 1028 } 1029 } 1030 1031 // If UseArrayMarkWordCheck is enabled, we can't use immutable memory for the flat array check 1032 // because we are loading the mark word which is mutable. Although the bits we are interested in 1033 // are immutable (we check for markWord::unlocked_value), we need to use raw memory to not break 1034 // anti dependency analysis. Below code will attempt to still move flat array checks out of loops, 1035 // mainly to enable loop unswitching. 1036 void PhaseIdealLoop::move_flat_array_check_out_of_loop(Node* n) { 1037 // Skip checks for more than one array 1038 if (n->req() > 3) { 1039 return; 1040 } 1041 Node* mem = n->in(FlatArrayCheckNode::Memory); 1042 Node* array = n->in(FlatArrayCheckNode::ArrayOrKlass)->uncast(); 1043 IdealLoopTree* check_loop = get_loop(get_ctrl(n)); 1044 IdealLoopTree* ary_loop = get_loop(get_ctrl(array)); 1045 1046 // Check if array is loop invariant 1047 if (!check_loop->is_member(ary_loop)) { 1048 // Walk up memory graph from the check until we leave the loop 1049 VectorSet wq; 1050 wq.set(mem->_idx); 1051 while (check_loop->is_member(get_loop(ctrl_or_self(mem)))) { 1052 if (mem->is_Phi()) { 1053 mem = mem->in(1); 1054 } else if (mem->is_MergeMem()) { 1055 mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); 1056 } else if (mem->is_Proj()) { 1057 mem = mem->in(0); 1058 } else if (mem->is_MemBar() || mem->is_SafePoint()) { 1059 mem = mem->in(TypeFunc::Memory); 1060 } else if (mem->is_Store() || mem->is_LoadStore() || mem->is_ClearArray()) { 1061 mem = mem->in(MemNode::Memory); 1062 } else { 1063 #ifdef ASSERT 1064 mem->dump(); 1065 #endif 1066 ShouldNotReachHere(); 1067 } 1068 if (wq.test_set(mem->_idx)) { 1069 return; 1070 } 1071 } 1072 // Replace memory input and re-compute ctrl to move the check out of the loop 1073 _igvn.replace_input_of(n, 1, mem); 1074 set_ctrl_and_loop(n, get_early_ctrl(n)); 1075 Node* bol = n->unique_out(); 1076 set_ctrl_and_loop(bol, get_early_ctrl(bol)); 1077 } 1078 } 1079 1080 //------------------------------split_if_with_blocks_pre----------------------- 1081 // Do the real work in a non-recursive function. Data nodes want to be 1082 // cloned in the pre-order so they can feed each other nicely. 1083 Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) { 1084 // Cloning these guys is unlikely to win 1085 int n_op = n->Opcode(); 1086 if (n_op == Op_MergeMem) { 1087 return n; 1088 } 1089 if (n->is_Proj()) { 1090 return n; 1091 } 1092 1093 if (UseArrayMarkWordCheck && n->isa_FlatArrayCheck()) { 1094 move_flat_array_check_out_of_loop(n); 1095 return n; 1096 } 1097 1098 // Do not clone-up CmpFXXX variations, as these are always 1099 // followed by a CmpI 1100 if (n->is_Cmp()) { 1101 return n; 1102 } 1103 // Attempt to use a conditional move instead of a phi/branch 1104 if (ConditionalMoveLimit > 0 && n_op == Op_Region) { 1105 Node *cmov = conditional_move( n ); 1106 if (cmov) { 1107 return cmov; 1108 } 1109 } 1110 if (n->is_CFG() || n->is_LoadStore()) { 1111 return n; 1112 } 1113 if (n->is_Opaque1()) { // Opaque nodes cannot be mod'd 1114 if (!C->major_progress()) { // If chance of no more loop opts... 1115 _igvn._worklist.push(n); // maybe we'll remove them 1116 } 1117 return n; 1118 } 1119 1120 if (n->is_Con()) { 1121 return n; // No cloning for Con nodes 1122 } 1123 1124 Node *n_ctrl = get_ctrl(n); 1125 if (!n_ctrl) { 1126 return n; // Dead node 1127 } 1128 1129 Node* res = try_move_store_before_loop(n, n_ctrl); 1130 if (res != NULL) { 1131 return n; 1132 } 1133 1134 // Attempt to remix address expressions for loop invariants 1135 Node *m = remix_address_expressions( n ); 1136 if( m ) return m; 1137 1138 if (n_op == Op_AddI) { 1139 Node *nn = convert_add_to_muladd( n ); 1140 if ( nn ) return nn; 1141 } 1142 1143 if (n->is_ConstraintCast()) { 1144 Node* dom_cast = n->as_ConstraintCast()->dominating_cast(&_igvn, this); 1145 // ConstraintCastNode::dominating_cast() uses node control input to determine domination. 1146 // Node control inputs don't necessarily agree with loop control info (due to 1147 // transformations happened in between), thus additional dominance check is needed 1148 // to keep loop info valid. 1149 if (dom_cast != NULL && is_dominator(get_ctrl(dom_cast), get_ctrl(n))) { 1150 _igvn.replace_node(n, dom_cast); 1151 return dom_cast; 1152 } 1153 } 1154 1155 // Determine if the Node has inputs from some local Phi. 1156 // Returns the block to clone thru. 1157 Node *n_blk = has_local_phi_input( n ); 1158 if( !n_blk ) return n; 1159 1160 // Do not clone the trip counter through on a CountedLoop 1161 // (messes up the canonical shape). 1162 if (((n_blk->is_CountedLoop() || (n_blk->is_Loop() && n_blk->as_Loop()->is_loop_nest_inner_loop())) && n->Opcode() == Op_AddI) || 1163 (n_blk->is_LongCountedLoop() && n->Opcode() == Op_AddL)) { 1164 return n; 1165 } 1166 // Pushing a shift through the iv Phi can get in the way of addressing optimizations or range check elimination 1167 if (n_blk->is_BaseCountedLoop() && n->Opcode() == Op_LShift(n_blk->as_BaseCountedLoop()->bt()) && 1168 n->in(1) == n_blk->as_BaseCountedLoop()->phi()) { 1169 return n; 1170 } 1171 1172 // Check for having no control input; not pinned. Allow 1173 // dominating control. 1174 if (n->in(0)) { 1175 Node *dom = idom(n_blk); 1176 if (dom_lca(n->in(0), dom) != n->in(0)) { 1177 return n; 1178 } 1179 } 1180 // Policy: when is it profitable. You must get more wins than 1181 // policy before it is considered profitable. Policy is usually 0, 1182 // so 1 win is considered profitable. Big merges will require big 1183 // cloning, so get a larger policy. 1184 int policy = n_blk->req() >> 2; 1185 1186 // If the loop is a candidate for range check elimination, 1187 // delay splitting through it's phi until a later loop optimization 1188 if (n_blk->is_BaseCountedLoop()) { 1189 IdealLoopTree *lp = get_loop(n_blk); 1190 if (lp && lp->_rce_candidate) { 1191 return n; 1192 } 1193 } 1194 1195 if (must_throttle_split_if()) return n; 1196 1197 // Split 'n' through the merge point if it is profitable 1198 Node *phi = split_thru_phi( n, n_blk, policy ); 1199 if (!phi) return n; 1200 1201 // Found a Phi to split thru! 1202 // Replace 'n' with the new phi 1203 _igvn.replace_node( n, phi ); 1204 // Moved a load around the loop, 'en-registering' something. 1205 if (n_blk->is_Loop() && n->is_Load() && 1206 !phi->in(LoopNode::LoopBackControl)->is_Load()) 1207 C->set_major_progress(); 1208 1209 return phi; 1210 } 1211 1212 static bool merge_point_too_heavy(Compile* C, Node* region) { 1213 // Bail out if the region and its phis have too many users. 1214 int weight = 0; 1215 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1216 weight += region->fast_out(i)->outcnt(); 1217 } 1218 int nodes_left = C->max_node_limit() - C->live_nodes(); 1219 if (weight * 8 > nodes_left) { 1220 if (PrintOpto) { 1221 tty->print_cr("*** Split-if bails out: %d nodes, region weight %d", C->unique(), weight); 1222 } 1223 return true; 1224 } else { 1225 return false; 1226 } 1227 } 1228 1229 static bool merge_point_safe(Node* region) { 1230 // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode 1231 // having a PhiNode input. This sidesteps the dangerous case where the split 1232 // ConvI2LNode may become TOP if the input Value() does not 1233 // overlap the ConvI2L range, leaving a node which may not dominate its 1234 // uses. 1235 // A better fix for this problem can be found in the BugTraq entry, but 1236 // expediency for Mantis demands this hack. 1237 #ifdef _LP64 1238 for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1239 Node* n = region->fast_out(i); 1240 if (n->is_Phi()) { 1241 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 1242 Node* m = n->fast_out(j); 1243 if (m->Opcode() == Op_ConvI2L) 1244 return false; 1245 if (m->is_CastII()) { 1246 return false; 1247 } 1248 } 1249 } 1250 } 1251 #endif 1252 return true; 1253 } 1254 1255 1256 //------------------------------place_outside_loop--------------------------------- 1257 // Place some computation outside of this loop on the path to the use passed as argument 1258 Node* PhaseIdealLoop::place_outside_loop(Node* useblock, IdealLoopTree* loop) const { 1259 Node* head = loop->_head; 1260 assert(!loop->is_member(get_loop(useblock)), "must be outside loop"); 1261 if (head->is_Loop() && head->as_Loop()->is_strip_mined()) { 1262 loop = loop->_parent; 1263 assert(loop->_head->is_OuterStripMinedLoop(), "malformed strip mined loop"); 1264 } 1265 1266 // Pick control right outside the loop 1267 for (;;) { 1268 Node* dom = idom(useblock); 1269 if (loop->is_member(get_loop(dom)) || 1270 // NeverBranch nodes are not assigned to the loop when constructed 1271 (dom->is_NeverBranch() && loop->is_member(get_loop(dom->in(0))))) { 1272 break; 1273 } 1274 useblock = dom; 1275 } 1276 assert(find_non_split_ctrl(useblock) == useblock, "should be non split control"); 1277 return useblock; 1278 } 1279 1280 1281 bool PhaseIdealLoop::identical_backtoback_ifs(Node *n) { 1282 if (!n->is_If() || n->is_BaseCountedLoopEnd()) { 1283 return false; 1284 } 1285 if (!n->in(0)->is_Region()) { 1286 return false; 1287 } 1288 1289 Node* region = n->in(0); 1290 Node* dom = idom(region); 1291 if (!dom->is_If() || dom->in(1) != n->in(1)) { 1292 return false; 1293 } 1294 IfNode* dom_if = dom->as_If(); 1295 Node* proj_true = dom_if->proj_out(1); 1296 Node* proj_false = dom_if->proj_out(0); 1297 1298 for (uint i = 1; i < region->req(); i++) { 1299 if (is_dominator(proj_true, region->in(i))) { 1300 continue; 1301 } 1302 if (is_dominator(proj_false, region->in(i))) { 1303 continue; 1304 } 1305 return false; 1306 } 1307 1308 return true; 1309 } 1310 1311 1312 bool PhaseIdealLoop::can_split_if(Node* n_ctrl) { 1313 if (must_throttle_split_if()) { 1314 return false; 1315 } 1316 1317 // Do not do 'split-if' if irreducible loops are present. 1318 if (_has_irreducible_loops) { 1319 return false; 1320 } 1321 1322 if (merge_point_too_heavy(C, n_ctrl)) { 1323 return false; 1324 } 1325 1326 // Do not do 'split-if' if some paths are dead. First do dead code 1327 // elimination and then see if its still profitable. 1328 for (uint i = 1; i < n_ctrl->req(); i++) { 1329 if (n_ctrl->in(i) == C->top()) { 1330 return false; 1331 } 1332 } 1333 1334 // If trying to do a 'Split-If' at the loop head, it is only 1335 // profitable if the cmp folds up on BOTH paths. Otherwise we 1336 // risk peeling a loop forever. 1337 1338 // CNC - Disabled for now. Requires careful handling of loop 1339 // body selection for the cloned code. Also, make sure we check 1340 // for any input path not being in the same loop as n_ctrl. For 1341 // irreducible loops we cannot check for 'n_ctrl->is_Loop()' 1342 // because the alternative loop entry points won't be converted 1343 // into LoopNodes. 1344 IdealLoopTree *n_loop = get_loop(n_ctrl); 1345 for (uint j = 1; j < n_ctrl->req(); j++) { 1346 if (get_loop(n_ctrl->in(j)) != n_loop) { 1347 return false; 1348 } 1349 } 1350 1351 // Check for safety of the merge point. 1352 if (!merge_point_safe(n_ctrl)) { 1353 return false; 1354 } 1355 1356 return true; 1357 } 1358 1359 // Detect if the node is the inner strip-mined loop 1360 // Return: NULL if it's not the case, or the exit of outer strip-mined loop 1361 static Node* is_inner_of_stripmined_loop(const Node* out) { 1362 Node* out_le = NULL; 1363 1364 if (out->is_CountedLoopEnd()) { 1365 const CountedLoopNode* loop = out->as_CountedLoopEnd()->loopnode(); 1366 1367 if (loop != NULL && loop->is_strip_mined()) { 1368 out_le = loop->in(LoopNode::EntryControl)->as_OuterStripMinedLoop()->outer_loop_exit(); 1369 } 1370 } 1371 1372 return out_le; 1373 } 1374 1375 bool PhaseIdealLoop::flatten_array_element_type_check(Node *n) { 1376 // If the CmpP is a subtype check for a value that has just been 1377 // loaded from an array, the subtype check guarantees the value 1378 // can't be stored in a flattened array and the load of the value 1379 // happens with a flattened array check then: push the type check 1380 // through the phi of the flattened array check. This needs special 1381 // logic because the subtype check's input is not a phi but a 1382 // LoadKlass that must first be cloned through the phi. 1383 if (n->Opcode() != Op_CmpP) { 1384 return false; 1385 } 1386 1387 Node* klassptr = n->in(1); 1388 Node* klasscon = n->in(2); 1389 1390 if (klassptr->is_DecodeNarrowPtr()) { 1391 klassptr = klassptr->in(1); 1392 } 1393 1394 if (klassptr->Opcode() != Op_LoadKlass && klassptr->Opcode() != Op_LoadNKlass) { 1395 return false; 1396 } 1397 1398 if (!klasscon->is_Con()) { 1399 return false; 1400 } 1401 1402 Node* addr = klassptr->in(MemNode::Address); 1403 1404 if (!addr->is_AddP()) { 1405 return false; 1406 } 1407 1408 intptr_t offset; 1409 Node* obj = AddPNode::Ideal_base_and_offset(addr, &_igvn, offset); 1410 1411 if (obj == NULL) { 1412 return false; 1413 } 1414 1415 assert(obj != NULL && addr->in(AddPNode::Base) == addr->in(AddPNode::Address), "malformed AddP?"); 1416 if (obj->Opcode() == Op_CastPP) { 1417 obj = obj->in(1); 1418 } 1419 1420 if (!obj->is_Phi()) { 1421 return false; 1422 } 1423 1424 Node* region = obj->in(0); 1425 1426 Node* phi = PhiNode::make_blank(region, n->in(1)); 1427 for (uint i = 1; i < region->req(); i++) { 1428 Node* in = obj->in(i); 1429 Node* ctrl = region->in(i); 1430 if (addr->in(AddPNode::Base) != obj) { 1431 Node* cast = addr->in(AddPNode::Base); 1432 assert(cast->Opcode() == Op_CastPP && cast->in(0) != NULL, "inconsistent subgraph"); 1433 Node* cast_clone = cast->clone(); 1434 cast_clone->set_req(0, ctrl); 1435 cast_clone->set_req(1, in); 1436 register_new_node(cast_clone, ctrl); 1437 const Type* tcast = cast_clone->Value(&_igvn); 1438 _igvn.set_type(cast_clone, tcast); 1439 cast_clone->as_Type()->set_type(tcast); 1440 in = cast_clone; 1441 } 1442 Node* addr_clone = addr->clone(); 1443 addr_clone->set_req(AddPNode::Base, in); 1444 addr_clone->set_req(AddPNode::Address, in); 1445 register_new_node(addr_clone, ctrl); 1446 _igvn.set_type(addr_clone, addr_clone->Value(&_igvn)); 1447 Node* klassptr_clone = klassptr->clone(); 1448 klassptr_clone->set_req(2, addr_clone); 1449 register_new_node(klassptr_clone, ctrl); 1450 _igvn.set_type(klassptr_clone, klassptr_clone->Value(&_igvn)); 1451 if (klassptr != n->in(1)) { 1452 Node* decode = n->in(1); 1453 assert(decode->is_DecodeNarrowPtr(), "inconsistent subgraph"); 1454 Node* decode_clone = decode->clone(); 1455 decode_clone->set_req(1, klassptr_clone); 1456 register_new_node(decode_clone, ctrl); 1457 _igvn.set_type(decode_clone, decode_clone->Value(&_igvn)); 1458 klassptr_clone = decode_clone; 1459 } 1460 phi->set_req(i, klassptr_clone); 1461 } 1462 register_new_node(phi, region); 1463 Node* orig = n->in(1); 1464 _igvn.replace_input_of(n, 1, phi); 1465 split_if_with_blocks_post(n); 1466 if (n->outcnt() != 0) { 1467 _igvn.replace_input_of(n, 1, orig); 1468 _igvn.remove_dead_node(phi); 1469 } 1470 return true; 1471 } 1472 1473 //------------------------------split_if_with_blocks_post---------------------- 1474 // Do the real work in a non-recursive function. CFG hackery wants to be 1475 // in the post-order, so it can dirty the I-DOM info and not use the dirtied 1476 // info. 1477 void PhaseIdealLoop::split_if_with_blocks_post(Node *n) { 1478 1479 if (flatten_array_element_type_check(n)) { 1480 return; 1481 } 1482 1483 // Cloning Cmp through Phi's involves the split-if transform. 1484 // FastLock is not used by an If 1485 if (n->is_Cmp() && !n->is_FastLock()) { 1486 Node *n_ctrl = get_ctrl(n); 1487 // Determine if the Node has inputs from some local Phi. 1488 // Returns the block to clone thru. 1489 Node *n_blk = has_local_phi_input(n); 1490 if (n_blk != n_ctrl) { 1491 return; 1492 } 1493 1494 if (!can_split_if(n_ctrl)) { 1495 return; 1496 } 1497 1498 if (n->outcnt() != 1) { 1499 return; // Multiple bool's from 1 compare? 1500 } 1501 Node *bol = n->unique_out(); 1502 assert(bol->is_Bool(), "expect a bool here"); 1503 if (bol->outcnt() != 1) { 1504 return;// Multiple branches from 1 compare? 1505 } 1506 Node *iff = bol->unique_out(); 1507 1508 // Check some safety conditions 1509 if (iff->is_If()) { // Classic split-if? 1510 if (iff->in(0) != n_ctrl) { 1511 return; // Compare must be in same blk as if 1512 } 1513 } else if (iff->is_CMove()) { // Trying to split-up a CMOVE 1514 // Can't split CMove with different control edge. 1515 if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) { 1516 return; 1517 } 1518 if (get_ctrl(iff->in(2)) == n_ctrl || 1519 get_ctrl(iff->in(3)) == n_ctrl) { 1520 return; // Inputs not yet split-up 1521 } 1522 if (get_loop(n_ctrl) != get_loop(get_ctrl(iff))) { 1523 return; // Loop-invar test gates loop-varying CMOVE 1524 } 1525 } else { 1526 return; // some other kind of node, such as an Allocate 1527 } 1528 1529 // When is split-if profitable? Every 'win' on means some control flow 1530 // goes dead, so it's almost always a win. 1531 int policy = 0; 1532 // Split compare 'n' through the merge point if it is profitable 1533 Node *phi = split_thru_phi( n, n_ctrl, policy); 1534 if (!phi) { 1535 return; 1536 } 1537 1538 // Found a Phi to split thru! 1539 // Replace 'n' with the new phi 1540 _igvn.replace_node(n, phi); 1541 1542 // Now split the bool up thru the phi 1543 Node *bolphi = split_thru_phi(bol, n_ctrl, -1); 1544 guarantee(bolphi != NULL, "null boolean phi node"); 1545 1546 _igvn.replace_node(bol, bolphi); 1547 assert(iff->in(1) == bolphi, ""); 1548 1549 if (bolphi->Value(&_igvn)->singleton()) { 1550 return; 1551 } 1552 1553 // Conditional-move? Must split up now 1554 if (!iff->is_If()) { 1555 Node *cmovphi = split_thru_phi(iff, n_ctrl, -1); 1556 _igvn.replace_node(iff, cmovphi); 1557 return; 1558 } 1559 1560 // Now split the IF 1561 do_split_if(iff); 1562 return; 1563 } 1564 1565 // Two identical ifs back to back can be merged 1566 if (try_merge_identical_ifs(n)) { 1567 return; 1568 } 1569 1570 // Check for an IF ready to split; one that has its 1571 // condition codes input coming from a Phi at the block start. 1572 int n_op = n->Opcode(); 1573 1574 // Check for an IF being dominated by another IF same test 1575 if (n_op == Op_If || 1576 n_op == Op_RangeCheck) { 1577 Node *bol = n->in(1); 1578 uint max = bol->outcnt(); 1579 // Check for same test used more than once? 1580 if (max > 1 && bol->is_Bool()) { 1581 // Search up IDOMs to see if this IF is dominated. 1582 Node *cutoff = get_ctrl(bol); 1583 1584 // Now search up IDOMs till cutoff, looking for a dominating test 1585 Node *prevdom = n; 1586 Node *dom = idom(prevdom); 1587 while (dom != cutoff) { 1588 if (dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom && 1589 safe_for_if_replacement(dom)) { 1590 // It's invalid to move control dependent data nodes in the inner 1591 // strip-mined loop, because: 1592 // 1) break validation of LoopNode::verify_strip_mined() 1593 // 2) move code with side-effect in strip-mined loop 1594 // Move to the exit of outer strip-mined loop in that case. 1595 Node* out_le = is_inner_of_stripmined_loop(dom); 1596 if (out_le != NULL) { 1597 prevdom = out_le; 1598 } 1599 // Replace the dominated test with an obvious true or false. 1600 // Place it on the IGVN worklist for later cleanup. 1601 C->set_major_progress(); 1602 dominated_by(prevdom->as_IfProj(), n->as_If(), false, true); 1603 #ifndef PRODUCT 1604 if( VerifyLoopOptimizations ) verify(); 1605 #endif 1606 return; 1607 } 1608 prevdom = dom; 1609 dom = idom(prevdom); 1610 } 1611 } 1612 } 1613 1614 try_sink_out_of_loop(n); 1615 1616 try_move_store_after_loop(n); 1617 1618 // Remove multiple allocations of the same inline type 1619 if (n->is_InlineType()) { 1620 n->as_InlineType()->remove_redundant_allocations(this); 1621 } 1622 } 1623 1624 // Transform: 1625 // 1626 // if (some_condition) { 1627 // // body 1 1628 // } else { 1629 // // body 2 1630 // } 1631 // if (some_condition) { 1632 // // body 3 1633 // } else { 1634 // // body 4 1635 // } 1636 // 1637 // into: 1638 // 1639 // 1640 // if (some_condition) { 1641 // // body 1 1642 // // body 3 1643 // } else { 1644 // // body 2 1645 // // body 4 1646 // } 1647 bool PhaseIdealLoop::try_merge_identical_ifs(Node* n) { 1648 if (identical_backtoback_ifs(n) && can_split_if(n->in(0))) { 1649 Node *n_ctrl = n->in(0); 1650 IfNode* dom_if = idom(n_ctrl)->as_If(); 1651 ProjNode* dom_proj_true = dom_if->proj_out(1); 1652 ProjNode* dom_proj_false = dom_if->proj_out(0); 1653 1654 // Now split the IF 1655 RegionNode* new_false_region; 1656 RegionNode* new_true_region; 1657 do_split_if(n, &new_false_region, &new_true_region); 1658 assert(new_false_region->req() == new_true_region->req(), ""); 1659 #ifdef ASSERT 1660 for (uint i = 1; i < new_false_region->req(); ++i) { 1661 assert(new_false_region->in(i)->in(0) == new_true_region->in(i)->in(0), "unexpected shape following split if"); 1662 assert(i == new_false_region->req() - 1 || new_false_region->in(i)->in(0)->in(1) == new_false_region->in(i + 1)->in(0)->in(1), "unexpected shape following split if"); 1663 } 1664 #endif 1665 assert(new_false_region->in(1)->in(0)->in(1) == dom_if->in(1), "dominating if and dominated if after split must share test"); 1666 1667 // We now have: 1668 // if (some_condition) { 1669 // // body 1 1670 // if (some_condition) { 1671 // body3: // new_true_region 1672 // // body3 1673 // } else { 1674 // goto body4; 1675 // } 1676 // } else { 1677 // // body 2 1678 // if (some_condition) { 1679 // goto body3; 1680 // } else { 1681 // body4: // new_false_region 1682 // // body4; 1683 // } 1684 // } 1685 // 1686 1687 // clone pinned nodes thru the resulting regions 1688 push_pinned_nodes_thru_region(dom_if, new_true_region); 1689 push_pinned_nodes_thru_region(dom_if, new_false_region); 1690 1691 // Optimize out the cloned ifs. Because pinned nodes were cloned, this also allows a CastPP that would be dependent 1692 // on a projection of n to have the dom_if as a control dependency. We don't want the CastPP to end up with an 1693 // unrelated control dependency. 1694 for (uint i = 1; i < new_false_region->req(); i++) { 1695 if (is_dominator(dom_proj_true, new_false_region->in(i))) { 1696 dominated_by(dom_proj_true->as_IfProj(), new_false_region->in(i)->in(0)->as_If(), false, false); 1697 } else { 1698 assert(is_dominator(dom_proj_false, new_false_region->in(i)), "bad if"); 1699 dominated_by(dom_proj_false->as_IfProj(), new_false_region->in(i)->in(0)->as_If(), false, false); 1700 } 1701 } 1702 return true; 1703 } 1704 return false; 1705 } 1706 1707 void PhaseIdealLoop::push_pinned_nodes_thru_region(IfNode* dom_if, Node* region) { 1708 for (DUIterator i = region->outs(); region->has_out(i); i++) { 1709 Node* u = region->out(i); 1710 if (!has_ctrl(u) || u->is_Phi() || !u->depends_only_on_test() || !_igvn.no_dependent_zero_check(u)) { 1711 continue; 1712 } 1713 assert(u->in(0) == region, "not a control dependent node?"); 1714 uint j = 1; 1715 for (; j < u->req(); ++j) { 1716 Node* in = u->in(j); 1717 if (!is_dominator(ctrl_or_self(in), dom_if)) { 1718 break; 1719 } 1720 } 1721 if (j == u->req()) { 1722 Node *phi = PhiNode::make_blank(region, u); 1723 for (uint k = 1; k < region->req(); ++k) { 1724 Node* clone = u->clone(); 1725 clone->set_req(0, region->in(k)); 1726 register_new_node(clone, region->in(k)); 1727 phi->init_req(k, clone); 1728 } 1729 register_new_node(phi, region); 1730 _igvn.replace_node(u, phi); 1731 --i; 1732 } 1733 } 1734 } 1735 1736 bool PhaseIdealLoop::safe_for_if_replacement(const Node* dom) const { 1737 if (!dom->is_CountedLoopEnd()) { 1738 return true; 1739 } 1740 CountedLoopEndNode* le = dom->as_CountedLoopEnd(); 1741 CountedLoopNode* cl = le->loopnode(); 1742 if (cl == NULL) { 1743 return true; 1744 } 1745 if (!cl->is_main_loop()) { 1746 return true; 1747 } 1748 if (cl->is_canonical_loop_entry() == NULL) { 1749 return true; 1750 } 1751 // Further unrolling is possible so loop exit condition might change 1752 return false; 1753 } 1754 1755 // See if a shared loop-varying computation has no loop-varying uses. 1756 // Happens if something is only used for JVM state in uncommon trap exits, 1757 // like various versions of induction variable+offset. Clone the 1758 // computation per usage to allow it to sink out of the loop. 1759 void PhaseIdealLoop::try_sink_out_of_loop(Node* n) { 1760 if (has_ctrl(n) && 1761 !n->is_Phi() && 1762 !n->is_Bool() && 1763 !n->is_Proj() && 1764 !n->is_MergeMem() && 1765 !n->is_CMove() && 1766 n->Opcode() != Op_Opaque4 && 1767 !n->is_Type()) { 1768 Node *n_ctrl = get_ctrl(n); 1769 IdealLoopTree *n_loop = get_loop(n_ctrl); 1770 1771 if (n->in(0) != NULL) { 1772 IdealLoopTree* loop_ctrl = get_loop(n->in(0)); 1773 if (n_loop != loop_ctrl && n_loop->is_member(loop_ctrl)) { 1774 // n has a control input inside a loop but get_ctrl() is member of an outer loop. This could happen, for example, 1775 // for Div nodes inside a loop (control input inside loop) without a use except for an UCT (outside the loop). 1776 // Rewire control of n to right outside of the loop, regardless if its input(s) are later sunk or not. 1777 _igvn.replace_input_of(n, 0, place_outside_loop(n_ctrl, loop_ctrl)); 1778 } 1779 } 1780 if (n_loop != _ltree_root && n->outcnt() > 1) { 1781 // Compute early control: needed for anti-dependence analysis. It's also possible that as a result of 1782 // previous transformations in this loop opts round, the node can be hoisted now: early control will tell us. 1783 Node* early_ctrl = compute_early_ctrl(n, n_ctrl); 1784 if (n_loop->is_member(get_loop(early_ctrl)) && // check that this one can't be hoisted now 1785 ctrl_of_all_uses_out_of_loop(n, early_ctrl, n_loop)) { // All uses in outer loops! 1786 assert(!n->is_Store() && !n->is_LoadStore(), "no node with a side effect"); 1787 Node* outer_loop_clone = NULL; 1788 for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin;) { 1789 Node* u = n->last_out(j); // Clone private computation per use 1790 _igvn.rehash_node_delayed(u); 1791 Node* x = n->clone(); // Clone computation 1792 Node* x_ctrl = NULL; 1793 if (u->is_Phi()) { 1794 // Replace all uses of normal nodes. Replace Phi uses 1795 // individually, so the separate Nodes can sink down 1796 // different paths. 1797 uint k = 1; 1798 while (u->in(k) != n) k++; 1799 u->set_req(k, x); 1800 // x goes next to Phi input path 1801 x_ctrl = u->in(0)->in(k); 1802 // Find control for 'x' next to use but not inside inner loops. 1803 x_ctrl = place_outside_loop(x_ctrl, n_loop); 1804 --j; 1805 } else { // Normal use 1806 if (has_ctrl(u)) { 1807 x_ctrl = get_ctrl(u); 1808 } else { 1809 x_ctrl = u->in(0); 1810 } 1811 // Find control for 'x' next to use but not inside inner loops. 1812 x_ctrl = place_outside_loop(x_ctrl, n_loop); 1813 // Replace all uses 1814 if (u->is_ConstraintCast() && u->bottom_type()->higher_equal(_igvn.type(n)) && u->in(0) == x_ctrl) { 1815 // If we're sinking a chain of data nodes, we might have inserted a cast to pin the use which is not necessary 1816 // anymore now that we're going to pin n as well 1817 _igvn.replace_node(u, x); 1818 --j; 1819 } else { 1820 int nb = u->replace_edge(n, x, &_igvn); 1821 j -= nb; 1822 } 1823 } 1824 1825 if (n->is_Load()) { 1826 // For loads, add a control edge to a CFG node outside of the loop 1827 // to force them to not combine and return back inside the loop 1828 // during GVN optimization (4641526). 1829 assert(x_ctrl == get_late_ctrl_with_anti_dep(x->as_Load(), early_ctrl, x_ctrl), "anti-dependences were already checked"); 1830 1831 IdealLoopTree* x_loop = get_loop(x_ctrl); 1832 Node* x_head = x_loop->_head; 1833 if (x_head->is_Loop() && x_head->is_OuterStripMinedLoop()) { 1834 // Do not add duplicate LoadNodes to the outer strip mined loop 1835 if (outer_loop_clone != NULL) { 1836 _igvn.replace_node(x, outer_loop_clone); 1837 continue; 1838 } 1839 outer_loop_clone = x; 1840 } 1841 x->set_req(0, x_ctrl); 1842 } else if (n->in(0) != NULL){ 1843 x->set_req(0, x_ctrl); 1844 } 1845 assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone"); 1846 assert(!n_loop->is_member(get_loop(x_ctrl)), "should have moved out of loop"); 1847 register_new_node(x, x_ctrl); 1848 1849 // Chain of AddP: (AddP base (AddP base )) must keep the same base after sinking so: 1850 // 1- We don't add a CastPP here when the first one is sunk so if the second one is not, their bases remain 1851 // the same. 1852 // (see 2- below) 1853 assert(!x->is_AddP() || !x->in(AddPNode::Address)->is_AddP() || 1854 x->in(AddPNode::Address)->in(AddPNode::Base) == x->in(AddPNode::Base) || 1855 !x->in(AddPNode::Address)->in(AddPNode::Base)->eqv_uncast(x->in(AddPNode::Base)), "unexpected AddP shape"); 1856 if (x->in(0) == NULL && !x->is_DecodeNarrowPtr() && 1857 !(x->is_AddP() && x->in(AddPNode::Address)->is_AddP() && x->in(AddPNode::Address)->in(AddPNode::Base) == x->in(AddPNode::Base))) { 1858 assert(!x->is_Load(), "load should be pinned"); 1859 // Use a cast node to pin clone out of loop 1860 Node* cast = NULL; 1861 for (uint k = 0; k < x->req(); k++) { 1862 Node* in = x->in(k); 1863 if (in != NULL && n_loop->is_member(get_loop(get_ctrl(in)))) { 1864 const Type* in_t = _igvn.type(in); 1865 cast = ConstraintCastNode::make_cast_for_type(x_ctrl, in, in_t, ConstraintCastNode::UnconditionalDependency); 1866 } 1867 if (cast != NULL) { 1868 register_new_node(cast, x_ctrl); 1869 x->replace_edge(in, cast); 1870 // Chain of AddP: 1871 // 2- A CastPP of the base is only added now that both AddP nodes are sunk 1872 if (x->is_AddP() && k == AddPNode::Base) { 1873 for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) { 1874 Node* u = x->fast_out(i); 1875 if (u->is_AddP() && u->in(AddPNode::Base) == n->in(AddPNode::Base)) { 1876 _igvn.replace_input_of(u, AddPNode::Base, cast); 1877 assert(u->find_out_with(Op_AddP) == NULL, "more than 2 chained AddP nodes?"); 1878 } 1879 } 1880 } 1881 break; 1882 } 1883 } 1884 assert(cast != NULL, "must have added a cast to pin the node"); 1885 } 1886 } 1887 _igvn.remove_dead_node(n); 1888 } 1889 _dom_lca_tags_round = 0; 1890 } 1891 } 1892 } 1893 1894 // Compute the early control of a node by following its inputs until we reach 1895 // nodes that are pinned. Then compute the LCA of the control of all pinned nodes. 1896 Node* PhaseIdealLoop::compute_early_ctrl(Node* n, Node* n_ctrl) { 1897 Node* early_ctrl = NULL; 1898 ResourceMark rm; 1899 Unique_Node_List wq; 1900 wq.push(n); 1901 for (uint i = 0; i < wq.size(); i++) { 1902 Node* m = wq.at(i); 1903 Node* c = NULL; 1904 if (m->is_CFG()) { 1905 c = m; 1906 } else if (m->pinned()) { 1907 c = m->in(0); 1908 } else { 1909 for (uint j = 0; j < m->req(); j++) { 1910 Node* in = m->in(j); 1911 if (in != NULL) { 1912 wq.push(in); 1913 } 1914 } 1915 } 1916 if (c != NULL) { 1917 assert(is_dominator(c, n_ctrl), "control input must dominate current control"); 1918 if (early_ctrl == NULL || is_dominator(early_ctrl, c)) { 1919 early_ctrl = c; 1920 } 1921 } 1922 } 1923 assert(is_dominator(early_ctrl, n_ctrl), "early control must dominate current control"); 1924 return early_ctrl; 1925 } 1926 1927 bool PhaseIdealLoop::ctrl_of_all_uses_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop) { 1928 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { 1929 Node* u = n->fast_out(i); 1930 if (u->is_Opaque1()) { 1931 return false; // Found loop limit, bugfix for 4677003 1932 } 1933 // We can't reuse tags in PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal() so make sure calls to 1934 // get_late_ctrl_with_anti_dep() use their own tag 1935 _dom_lca_tags_round++; 1936 assert(_dom_lca_tags_round != 0, "shouldn't wrap around"); 1937 1938 if (u->is_Phi()) { 1939 for (uint j = 1; j < u->req(); ++j) { 1940 if (u->in(j) == n && !ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, u->in(0)->in(j))) { 1941 return false; 1942 } 1943 } 1944 } else { 1945 Node* ctrl = has_ctrl(u) ? get_ctrl(u) : u->in(0); 1946 if (!ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, ctrl)) { 1947 return false; 1948 } 1949 } 1950 } 1951 return true; 1952 } 1953 1954 bool PhaseIdealLoop::ctrl_of_use_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop, Node* ctrl) { 1955 if (n->is_Load()) { 1956 ctrl = get_late_ctrl_with_anti_dep(n->as_Load(), n_ctrl, ctrl); 1957 } 1958 IdealLoopTree *u_loop = get_loop(ctrl); 1959 if (u_loop == n_loop) { 1960 return false; // Found loop-varying use 1961 } 1962 if (n_loop->is_member(u_loop)) { 1963 return false; // Found use in inner loop 1964 } 1965 return true; 1966 } 1967 1968 //------------------------------split_if_with_blocks--------------------------- 1969 // Check for aggressive application of 'split-if' optimization, 1970 // using basic block level info. 1971 void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack) { 1972 Node* root = C->root(); 1973 visited.set(root->_idx); // first, mark root as visited 1974 // Do pre-visit work for root 1975 Node* n = split_if_with_blocks_pre(root); 1976 uint cnt = n->outcnt(); 1977 uint i = 0; 1978 1979 while (true) { 1980 // Visit all children 1981 if (i < cnt) { 1982 Node* use = n->raw_out(i); 1983 ++i; 1984 if (use->outcnt() != 0 && !visited.test_set(use->_idx)) { 1985 // Now do pre-visit work for this use 1986 use = split_if_with_blocks_pre(use); 1987 nstack.push(n, i); // Save parent and next use's index. 1988 n = use; // Process all children of current use. 1989 cnt = use->outcnt(); 1990 i = 0; 1991 } 1992 } 1993 else { 1994 // All of n's children have been processed, complete post-processing. 1995 if (cnt != 0 && !n->is_Con()) { 1996 assert(has_node(n), "no dead nodes"); 1997 split_if_with_blocks_post(n); 1998 } 1999 if (must_throttle_split_if()) { 2000 nstack.clear(); 2001 } 2002 if (nstack.is_empty()) { 2003 // Finished all nodes on stack. 2004 break; 2005 } 2006 // Get saved parent node and next use's index. Visit the rest of uses. 2007 n = nstack.node(); 2008 cnt = n->outcnt(); 2009 i = nstack.index(); 2010 nstack.pop(); 2011 } 2012 } 2013 } 2014 2015 2016 //============================================================================= 2017 // 2018 // C L O N E A L O O P B O D Y 2019 // 2020 2021 //------------------------------clone_iff-------------------------------------- 2022 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. 2023 // "Nearly" because all Nodes have been cloned from the original in the loop, 2024 // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs 2025 // through the Phi recursively, and return a Bool. 2026 Node* PhaseIdealLoop::clone_iff(PhiNode* phi) { 2027 2028 // Convert this Phi into a Phi merging Bools 2029 uint i; 2030 for (i = 1; i < phi->req(); i++) { 2031 Node *b = phi->in(i); 2032 if (b->is_Phi()) { 2033 _igvn.replace_input_of(phi, i, clone_iff(b->as_Phi())); 2034 } else { 2035 assert(b->is_Bool() || b->Opcode() == Op_Opaque4, ""); 2036 } 2037 } 2038 2039 Node* n = phi->in(1); 2040 Node* sample_opaque = NULL; 2041 Node *sample_bool = NULL; 2042 if (n->Opcode() == Op_Opaque4) { 2043 sample_opaque = n; 2044 sample_bool = n->in(1); 2045 assert(sample_bool->is_Bool(), "wrong type"); 2046 } else { 2047 sample_bool = n; 2048 } 2049 Node* sample_cmp = sample_bool->in(1); 2050 const Type* t = Type::TOP; 2051 const TypePtr* at = NULL; 2052 if (sample_cmp->is_FlatArrayCheck()) { 2053 // Left input of a FlatArrayCheckNode is memory, set the (adr) type of the phi accordingly 2054 assert(sample_cmp->in(1)->bottom_type() == Type::MEMORY, "unexpected input type"); 2055 t = Type::MEMORY; 2056 at = TypeRawPtr::BOTTOM; 2057 } 2058 2059 // Make Phis to merge the Cmp's inputs. 2060 PhiNode *phi1 = new PhiNode(phi->in(0), t, at); 2061 PhiNode *phi2 = new PhiNode(phi->in(0), Type::TOP); 2062 for (i = 1; i < phi->req(); i++) { 2063 Node *n1 = sample_opaque == NULL ? phi->in(i)->in(1)->in(1) : phi->in(i)->in(1)->in(1)->in(1); 2064 Node *n2 = sample_opaque == NULL ? phi->in(i)->in(1)->in(2) : phi->in(i)->in(1)->in(1)->in(2); 2065 phi1->set_req(i, n1); 2066 phi2->set_req(i, n2); 2067 phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type())); 2068 phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type())); 2069 } 2070 // See if these Phis have been made before. 2071 // Register with optimizer 2072 Node *hit1 = _igvn.hash_find_insert(phi1); 2073 if (hit1) { // Hit, toss just made Phi 2074 _igvn.remove_dead_node(phi1); // Remove new phi 2075 assert(hit1->is_Phi(), "" ); 2076 phi1 = (PhiNode*)hit1; // Use existing phi 2077 } else { // Miss 2078 _igvn.register_new_node_with_optimizer(phi1); 2079 } 2080 Node *hit2 = _igvn.hash_find_insert(phi2); 2081 if (hit2) { // Hit, toss just made Phi 2082 _igvn.remove_dead_node(phi2); // Remove new phi 2083 assert(hit2->is_Phi(), "" ); 2084 phi2 = (PhiNode*)hit2; // Use existing phi 2085 } else { // Miss 2086 _igvn.register_new_node_with_optimizer(phi2); 2087 } 2088 // Register Phis with loop/block info 2089 set_ctrl(phi1, phi->in(0)); 2090 set_ctrl(phi2, phi->in(0)); 2091 // Make a new Cmp 2092 Node *cmp = sample_cmp->clone(); 2093 cmp->set_req(1, phi1); 2094 cmp->set_req(2, phi2); 2095 _igvn.register_new_node_with_optimizer(cmp); 2096 set_ctrl(cmp, phi->in(0)); 2097 2098 // Make a new Bool 2099 Node *b = sample_bool->clone(); 2100 b->set_req(1,cmp); 2101 _igvn.register_new_node_with_optimizer(b); 2102 set_ctrl(b, phi->in(0)); 2103 2104 if (sample_opaque != NULL) { 2105 Node* opaque = sample_opaque->clone(); 2106 opaque->set_req(1, b); 2107 _igvn.register_new_node_with_optimizer(opaque); 2108 set_ctrl(opaque, phi->in(0)); 2109 return opaque; 2110 } 2111 2112 assert(b->is_Bool(), ""); 2113 return b; 2114 } 2115 2116 //------------------------------clone_bool------------------------------------- 2117 // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps. 2118 // "Nearly" because all Nodes have been cloned from the original in the loop, 2119 // but the fall-in edges to the Cmp are different. Clone bool/Cmp pairs 2120 // through the Phi recursively, and return a Bool. 2121 CmpNode*PhaseIdealLoop::clone_bool(PhiNode* phi) { 2122 uint i; 2123 // Convert this Phi into a Phi merging Bools 2124 for( i = 1; i < phi->req(); i++ ) { 2125 Node *b = phi->in(i); 2126 if( b->is_Phi() ) { 2127 _igvn.replace_input_of(phi, i, clone_bool(b->as_Phi())); 2128 } else { 2129 assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" ); 2130 } 2131 } 2132 2133 Node *sample_cmp = phi->in(1); 2134 2135 // Make Phis to merge the Cmp's inputs. 2136 PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP ); 2137 PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP ); 2138 for( uint j = 1; j < phi->req(); j++ ) { 2139 Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP 2140 Node *n1, *n2; 2141 if( cmp_top->is_Cmp() ) { 2142 n1 = cmp_top->in(1); 2143 n2 = cmp_top->in(2); 2144 } else { 2145 n1 = n2 = cmp_top; 2146 } 2147 phi1->set_req( j, n1 ); 2148 phi2->set_req( j, n2 ); 2149 phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type())); 2150 phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type())); 2151 } 2152 2153 // See if these Phis have been made before. 2154 // Register with optimizer 2155 Node *hit1 = _igvn.hash_find_insert(phi1); 2156 if( hit1 ) { // Hit, toss just made Phi 2157 _igvn.remove_dead_node(phi1); // Remove new phi 2158 assert( hit1->is_Phi(), "" ); 2159 phi1 = (PhiNode*)hit1; // Use existing phi 2160 } else { // Miss 2161 _igvn.register_new_node_with_optimizer(phi1); 2162 } 2163 Node *hit2 = _igvn.hash_find_insert(phi2); 2164 if( hit2 ) { // Hit, toss just made Phi 2165 _igvn.remove_dead_node(phi2); // Remove new phi 2166 assert( hit2->is_Phi(), "" ); 2167 phi2 = (PhiNode*)hit2; // Use existing phi 2168 } else { // Miss 2169 _igvn.register_new_node_with_optimizer(phi2); 2170 } 2171 // Register Phis with loop/block info 2172 set_ctrl(phi1, phi->in(0)); 2173 set_ctrl(phi2, phi->in(0)); 2174 // Make a new Cmp 2175 Node *cmp = sample_cmp->clone(); 2176 cmp->set_req( 1, phi1 ); 2177 cmp->set_req( 2, phi2 ); 2178 _igvn.register_new_node_with_optimizer(cmp); 2179 set_ctrl(cmp, phi->in(0)); 2180 2181 assert( cmp->is_Cmp(), "" ); 2182 return (CmpNode*)cmp; 2183 } 2184 2185 //------------------------------sink_use--------------------------------------- 2186 // If 'use' was in the loop-exit block, it now needs to be sunk 2187 // below the post-loop merge point. 2188 void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) { 2189 if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) { 2190 set_ctrl(use, post_loop); 2191 for (DUIterator j = use->outs(); use->has_out(j); j++) 2192 sink_use(use->out(j), post_loop); 2193 } 2194 } 2195 2196 void PhaseIdealLoop::clone_loop_handle_data_uses(Node* old, Node_List &old_new, 2197 IdealLoopTree* loop, IdealLoopTree* outer_loop, 2198 Node_List*& split_if_set, Node_List*& split_bool_set, 2199 Node_List*& split_cex_set, Node_List& worklist, 2200 uint new_counter, CloneLoopMode mode) { 2201 Node* nnn = old_new[old->_idx]; 2202 // Copy uses to a worklist, so I can munge the def-use info 2203 // with impunity. 2204 for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) 2205 worklist.push(old->fast_out(j)); 2206 2207 while( worklist.size() ) { 2208 Node *use = worklist.pop(); 2209 if (!has_node(use)) continue; // Ignore dead nodes 2210 if (use->in(0) == C->top()) continue; 2211 IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use ); 2212 // Check for data-use outside of loop - at least one of OLD or USE 2213 // must not be a CFG node. 2214 #ifdef ASSERT 2215 if (loop->_head->as_Loop()->is_strip_mined() && outer_loop->is_member(use_loop) && !loop->is_member(use_loop) && old_new[use->_idx] == NULL) { 2216 Node* sfpt = loop->_head->as_CountedLoop()->outer_safepoint(); 2217 assert(mode != IgnoreStripMined, "incorrect cloning mode"); 2218 assert((mode == ControlAroundStripMined && use == sfpt) || !use->is_reachable_from_root(), "missed a node"); 2219 } 2220 #endif 2221 if (!loop->is_member(use_loop) && !outer_loop->is_member(use_loop) && (!old->is_CFG() || !use->is_CFG())) { 2222 2223 // If the Data use is an IF, that means we have an IF outside of the 2224 // loop that is switching on a condition that is set inside of the 2225 // loop. Happens if people set a loop-exit flag; then test the flag 2226 // in the loop to break the loop, then test is again outside of the 2227 // loop to determine which way the loop exited. 2228 // Loop predicate If node connects to Bool node through Opaque1 node. 2229 // 2230 // If the use is an AllocateArray through its ValidLengthTest input, 2231 // make sure the Bool/Cmp input is cloned down to avoid a Phi between 2232 // the AllocateArray node and its ValidLengthTest input that could cause 2233 // split if to break. 2234 if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use) || use->Opcode() == Op_Opaque4 || 2235 (use->Opcode() == Op_AllocateArray && use->in(AllocateNode::ValidLengthTest) == old)) { 2236 // Since this code is highly unlikely, we lazily build the worklist 2237 // of such Nodes to go split. 2238 if (!split_if_set) { 2239 split_if_set = new Node_List(); 2240 } 2241 split_if_set->push(use); 2242 } 2243 if (use->is_Bool()) { 2244 if (!split_bool_set) { 2245 split_bool_set = new Node_List(); 2246 } 2247 split_bool_set->push(use); 2248 } 2249 if (use->Opcode() == Op_CreateEx) { 2250 if (!split_cex_set) { 2251 split_cex_set = new Node_List(); 2252 } 2253 split_cex_set->push(use); 2254 } 2255 2256 2257 // Get "block" use is in 2258 uint idx = 0; 2259 while( use->in(idx) != old ) idx++; 2260 Node *prev = use->is_CFG() ? use : get_ctrl(use); 2261 assert(!loop->is_member(get_loop(prev)) && !outer_loop->is_member(get_loop(prev)), "" ); 2262 Node *cfg = prev->_idx >= new_counter 2263 ? prev->in(2) 2264 : idom(prev); 2265 if( use->is_Phi() ) // Phi use is in prior block 2266 cfg = prev->in(idx); // NOT in block of Phi itself 2267 if (cfg->is_top()) { // Use is dead? 2268 _igvn.replace_input_of(use, idx, C->top()); 2269 continue; 2270 } 2271 2272 // If use is referenced through control edge... (idx == 0) 2273 if (mode == IgnoreStripMined && idx == 0) { 2274 LoopNode *head = loop->_head->as_Loop(); 2275 if (head->is_strip_mined() && is_dominator(head->outer_loop_exit(), prev)) { 2276 // That node is outside the inner loop, leave it outside the 2277 // outer loop as well to not confuse verification code. 2278 assert(!loop->_parent->is_member(use_loop), "should be out of the outer loop"); 2279 _igvn.replace_input_of(use, 0, head->outer_loop_exit()); 2280 continue; 2281 } 2282 } 2283 2284 while(!outer_loop->is_member(get_loop(cfg))) { 2285 prev = cfg; 2286 cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg); 2287 } 2288 // If the use occurs after merging several exits from the loop, then 2289 // old value must have dominated all those exits. Since the same old 2290 // value was used on all those exits we did not need a Phi at this 2291 // merge point. NOW we do need a Phi here. Each loop exit value 2292 // is now merged with the peeled body exit; each exit gets its own 2293 // private Phi and those Phis need to be merged here. 2294 Node *phi; 2295 if( prev->is_Region() ) { 2296 if( idx == 0 ) { // Updating control edge? 2297 phi = prev; // Just use existing control 2298 } else { // Else need a new Phi 2299 phi = PhiNode::make( prev, old ); 2300 // Now recursively fix up the new uses of old! 2301 for( uint i = 1; i < prev->req(); i++ ) { 2302 worklist.push(phi); // Onto worklist once for each 'old' input 2303 } 2304 } 2305 } else { 2306 // Get new RegionNode merging old and new loop exits 2307 prev = old_new[prev->_idx]; 2308 assert( prev, "just made this in step 7" ); 2309 if( idx == 0) { // Updating control edge? 2310 phi = prev; // Just use existing control 2311 } else { // Else need a new Phi 2312 // Make a new Phi merging data values properly 2313 phi = PhiNode::make( prev, old ); 2314 phi->set_req( 1, nnn ); 2315 } 2316 } 2317 // If inserting a new Phi, check for prior hits 2318 if( idx != 0 ) { 2319 Node *hit = _igvn.hash_find_insert(phi); 2320 if( hit == NULL ) { 2321 _igvn.register_new_node_with_optimizer(phi); // Register new phi 2322 } else { // or 2323 // Remove the new phi from the graph and use the hit 2324 _igvn.remove_dead_node(phi); 2325 phi = hit; // Use existing phi 2326 } 2327 set_ctrl(phi, prev); 2328 } 2329 // Make 'use' use the Phi instead of the old loop body exit value 2330 _igvn.replace_input_of(use, idx, phi); 2331 if( use->_idx >= new_counter ) { // If updating new phis 2332 // Not needed for correctness, but prevents a weak assert 2333 // in AddPNode from tripping (when we end up with different 2334 // base & derived Phis that will become the same after 2335 // IGVN does CSE). 2336 Node *hit = _igvn.hash_find_insert(use); 2337 if( hit ) // Go ahead and re-hash for hits. 2338 _igvn.replace_node( use, hit ); 2339 } 2340 2341 // If 'use' was in the loop-exit block, it now needs to be sunk 2342 // below the post-loop merge point. 2343 sink_use( use, prev ); 2344 } 2345 } 2346 } 2347 2348 static void collect_nodes_in_outer_loop_not_reachable_from_sfpt(Node* n, const IdealLoopTree *loop, const IdealLoopTree* outer_loop, 2349 const Node_List &old_new, Unique_Node_List& wq, PhaseIdealLoop* phase, 2350 bool check_old_new) { 2351 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 2352 Node* u = n->fast_out(j); 2353 assert(check_old_new || old_new[u->_idx] == NULL, "shouldn't have been cloned"); 2354 if (!u->is_CFG() && (!check_old_new || old_new[u->_idx] == NULL)) { 2355 Node* c = phase->get_ctrl(u); 2356 IdealLoopTree* u_loop = phase->get_loop(c); 2357 assert(!loop->is_member(u_loop) || !loop->_body.contains(u), "can be in outer loop or out of both loops only"); 2358 if (!loop->is_member(u_loop)) { 2359 if (outer_loop->is_member(u_loop)) { 2360 wq.push(u); 2361 } else { 2362 // nodes pinned with control in the outer loop but not referenced from the safepoint must be moved out of 2363 // the outer loop too 2364 Node* u_c = u->in(0); 2365 if (u_c != NULL) { 2366 IdealLoopTree* u_c_loop = phase->get_loop(u_c); 2367 if (outer_loop->is_member(u_c_loop) && !loop->is_member(u_c_loop)) { 2368 wq.push(u); 2369 } 2370 } 2371 } 2372 } 2373 } 2374 } 2375 } 2376 2377 void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop, 2378 IdealLoopTree* outer_loop, int dd, Node_List &old_new, 2379 Node_List& extra_data_nodes) { 2380 if (head->is_strip_mined() && mode != IgnoreStripMined) { 2381 CountedLoopNode* cl = head->as_CountedLoop(); 2382 Node* l = cl->outer_loop(); 2383 Node* tail = cl->outer_loop_tail(); 2384 IfNode* le = cl->outer_loop_end(); 2385 Node* sfpt = cl->outer_safepoint(); 2386 CountedLoopEndNode* cle = cl->loopexit(); 2387 CountedLoopNode* new_cl = old_new[cl->_idx]->as_CountedLoop(); 2388 CountedLoopEndNode* new_cle = new_cl->as_CountedLoop()->loopexit_or_null(); 2389 Node* cle_out = cle->proj_out(false); 2390 2391 Node* new_sfpt = NULL; 2392 Node* new_cle_out = cle_out->clone(); 2393 old_new.map(cle_out->_idx, new_cle_out); 2394 if (mode == CloneIncludesStripMined) { 2395 // clone outer loop body 2396 Node* new_l = l->clone(); 2397 Node* new_tail = tail->clone(); 2398 IfNode* new_le = le->clone()->as_If(); 2399 new_sfpt = sfpt->clone(); 2400 2401 set_loop(new_l, outer_loop->_parent); 2402 set_idom(new_l, new_l->in(LoopNode::EntryControl), dd); 2403 set_loop(new_cle_out, outer_loop->_parent); 2404 set_idom(new_cle_out, new_cle, dd); 2405 set_loop(new_sfpt, outer_loop->_parent); 2406 set_idom(new_sfpt, new_cle_out, dd); 2407 set_loop(new_le, outer_loop->_parent); 2408 set_idom(new_le, new_sfpt, dd); 2409 set_loop(new_tail, outer_loop->_parent); 2410 set_idom(new_tail, new_le, dd); 2411 set_idom(new_cl, new_l, dd); 2412 2413 old_new.map(l->_idx, new_l); 2414 old_new.map(tail->_idx, new_tail); 2415 old_new.map(le->_idx, new_le); 2416 old_new.map(sfpt->_idx, new_sfpt); 2417 2418 new_l->set_req(LoopNode::LoopBackControl, new_tail); 2419 new_l->set_req(0, new_l); 2420 new_tail->set_req(0, new_le); 2421 new_le->set_req(0, new_sfpt); 2422 new_sfpt->set_req(0, new_cle_out); 2423 new_cle_out->set_req(0, new_cle); 2424 new_cl->set_req(LoopNode::EntryControl, new_l); 2425 2426 _igvn.register_new_node_with_optimizer(new_l); 2427 _igvn.register_new_node_with_optimizer(new_tail); 2428 _igvn.register_new_node_with_optimizer(new_le); 2429 } else { 2430 Node *newhead = old_new[loop->_head->_idx]; 2431 newhead->as_Loop()->clear_strip_mined(); 2432 _igvn.replace_input_of(newhead, LoopNode::EntryControl, newhead->in(LoopNode::EntryControl)->in(LoopNode::EntryControl)); 2433 set_idom(newhead, newhead->in(LoopNode::EntryControl), dd); 2434 } 2435 // Look at data node that were assigned a control in the outer 2436 // loop: they are kept in the outer loop by the safepoint so start 2437 // from the safepoint node's inputs. 2438 IdealLoopTree* outer_loop = get_loop(l); 2439 Node_Stack stack(2); 2440 stack.push(sfpt, 1); 2441 uint new_counter = C->unique(); 2442 while (stack.size() > 0) { 2443 Node* n = stack.node(); 2444 uint i = stack.index(); 2445 while (i < n->req() && 2446 (n->in(i) == NULL || 2447 !has_ctrl(n->in(i)) || 2448 get_loop(get_ctrl(n->in(i))) != outer_loop || 2449 (old_new[n->in(i)->_idx] != NULL && old_new[n->in(i)->_idx]->_idx >= new_counter))) { 2450 i++; 2451 } 2452 if (i < n->req()) { 2453 stack.set_index(i+1); 2454 stack.push(n->in(i), 0); 2455 } else { 2456 assert(old_new[n->_idx] == NULL || n == sfpt || old_new[n->_idx]->_idx < new_counter, "no clone yet"); 2457 Node* m = n == sfpt ? new_sfpt : n->clone(); 2458 if (m != NULL) { 2459 for (uint i = 0; i < n->req(); i++) { 2460 if (m->in(i) != NULL && old_new[m->in(i)->_idx] != NULL) { 2461 m->set_req(i, old_new[m->in(i)->_idx]); 2462 } 2463 } 2464 } else { 2465 assert(n == sfpt && mode != CloneIncludesStripMined, "where's the safepoint clone?"); 2466 } 2467 if (n != sfpt) { 2468 extra_data_nodes.push(n); 2469 _igvn.register_new_node_with_optimizer(m); 2470 assert(get_ctrl(n) == cle_out, "what other control?"); 2471 set_ctrl(m, new_cle_out); 2472 old_new.map(n->_idx, m); 2473 } 2474 stack.pop(); 2475 } 2476 } 2477 if (mode == CloneIncludesStripMined) { 2478 _igvn.register_new_node_with_optimizer(new_sfpt); 2479 _igvn.register_new_node_with_optimizer(new_cle_out); 2480 } 2481 // Some other transformation may have pessimistically assigned some 2482 // data nodes to the outer loop. Set their control so they are out 2483 // of the outer loop. 2484 ResourceMark rm; 2485 Unique_Node_List wq; 2486 for (uint i = 0; i < extra_data_nodes.size(); i++) { 2487 Node* old = extra_data_nodes.at(i); 2488 collect_nodes_in_outer_loop_not_reachable_from_sfpt(old, loop, outer_loop, old_new, wq, this, true); 2489 } 2490 2491 for (uint i = 0; i < loop->_body.size(); i++) { 2492 Node* old = loop->_body.at(i); 2493 collect_nodes_in_outer_loop_not_reachable_from_sfpt(old, loop, outer_loop, old_new, wq, this, true); 2494 } 2495 2496 Node* inner_out = sfpt->in(0); 2497 if (inner_out->outcnt() > 1) { 2498 collect_nodes_in_outer_loop_not_reachable_from_sfpt(inner_out, loop, outer_loop, old_new, wq, this, true); 2499 } 2500 2501 Node* new_ctrl = cl->outer_loop_exit(); 2502 assert(get_loop(new_ctrl) != outer_loop, "must be out of the loop nest"); 2503 for (uint i = 0; i < wq.size(); i++) { 2504 Node* n = wq.at(i); 2505 set_ctrl(n, new_ctrl); 2506 if (n->in(0) != NULL) { 2507 _igvn.replace_input_of(n, 0, new_ctrl); 2508 } 2509 collect_nodes_in_outer_loop_not_reachable_from_sfpt(n, loop, outer_loop, old_new, wq, this, false); 2510 } 2511 } else { 2512 Node *newhead = old_new[loop->_head->_idx]; 2513 set_idom(newhead, newhead->in(LoopNode::EntryControl), dd); 2514 } 2515 } 2516 2517 //------------------------------clone_loop------------------------------------- 2518 // 2519 // C L O N E A L O O P B O D Y 2520 // 2521 // This is the basic building block of the loop optimizations. It clones an 2522 // entire loop body. It makes an old_new loop body mapping; with this mapping 2523 // you can find the new-loop equivalent to an old-loop node. All new-loop 2524 // nodes are exactly equal to their old-loop counterparts, all edges are the 2525 // same. All exits from the old-loop now have a RegionNode that merges the 2526 // equivalent new-loop path. This is true even for the normal "loop-exit" 2527 // condition. All uses of loop-invariant old-loop values now come from (one 2528 // or more) Phis that merge their new-loop equivalents. 2529 // 2530 // This operation leaves the graph in an illegal state: there are two valid 2531 // control edges coming from the loop pre-header to both loop bodies. I'll 2532 // definitely have to hack the graph after running this transform. 2533 // 2534 // From this building block I will further edit edges to perform loop peeling 2535 // or loop unrolling or iteration splitting (Range-Check-Elimination), etc. 2536 // 2537 // Parameter side_by_size_idom: 2538 // When side_by_size_idom is NULL, the dominator tree is constructed for 2539 // the clone loop to dominate the original. Used in construction of 2540 // pre-main-post loop sequence. 2541 // When nonnull, the clone and original are side-by-side, both are 2542 // dominated by the side_by_side_idom node. Used in construction of 2543 // unswitched loops. 2544 void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd, 2545 CloneLoopMode mode, Node* side_by_side_idom) { 2546 2547 LoopNode* head = loop->_head->as_Loop(); 2548 head->verify_strip_mined(1); 2549 2550 if (C->do_vector_loop() && PrintOpto) { 2551 const char* mname = C->method()->name()->as_quoted_ascii(); 2552 if (mname != NULL) { 2553 tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname); 2554 } 2555 } 2556 2557 CloneMap& cm = C->clone_map(); 2558 if (C->do_vector_loop()) { 2559 cm.set_clone_idx(cm.max_gen()+1); 2560 #ifndef PRODUCT 2561 if (PrintOpto) { 2562 tty->print_cr("PhaseIdealLoop::clone_loop: _clone_idx %d", cm.clone_idx()); 2563 loop->dump_head(); 2564 } 2565 #endif 2566 } 2567 2568 // Step 1: Clone the loop body. Make the old->new mapping. 2569 clone_loop_body(loop->_body, old_new, &cm); 2570 2571 IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop; 2572 2573 // Step 2: Fix the edges in the new body. If the old input is outside the 2574 // loop use it. If the old input is INside the loop, use the corresponding 2575 // new node instead. 2576 fix_body_edges(loop->_body, loop, old_new, dd, outer_loop->_parent, false); 2577 2578 Node_List extra_data_nodes; // data nodes in the outer strip mined loop 2579 clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes); 2580 2581 // Step 3: Now fix control uses. Loop varying control uses have already 2582 // been fixed up (as part of all input edges in Step 2). Loop invariant 2583 // control uses must be either an IfFalse or an IfTrue. Make a merge 2584 // point to merge the old and new IfFalse/IfTrue nodes; make the use 2585 // refer to this. 2586 Node_List worklist; 2587 uint new_counter = C->unique(); 2588 fix_ctrl_uses(loop->_body, loop, old_new, mode, side_by_side_idom, &cm, worklist); 2589 2590 // Step 4: If loop-invariant use is not control, it must be dominated by a 2591 // loop exit IfFalse/IfTrue. Find "proper" loop exit. Make a Region 2592 // there if needed. Make a Phi there merging old and new used values. 2593 Node_List *split_if_set = NULL; 2594 Node_List *split_bool_set = NULL; 2595 Node_List *split_cex_set = NULL; 2596 fix_data_uses(loop->_body, loop, mode, outer_loop, new_counter, old_new, worklist, split_if_set, split_bool_set, split_cex_set); 2597 2598 for (uint i = 0; i < extra_data_nodes.size(); i++) { 2599 Node* old = extra_data_nodes.at(i); 2600 clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set, 2601 split_bool_set, split_cex_set, worklist, new_counter, 2602 mode); 2603 } 2604 2605 // Check for IFs that need splitting/cloning. Happens if an IF outside of 2606 // the loop uses a condition set in the loop. The original IF probably 2607 // takes control from one or more OLD Regions (which in turn get from NEW 2608 // Regions). In any case, there will be a set of Phis for each merge point 2609 // from the IF up to where the original BOOL def exists the loop. 2610 finish_clone_loop(split_if_set, split_bool_set, split_cex_set); 2611 2612 } 2613 2614 void PhaseIdealLoop::finish_clone_loop(Node_List* split_if_set, Node_List* split_bool_set, Node_List* split_cex_set) { 2615 if (split_if_set) { 2616 while (split_if_set->size()) { 2617 Node *iff = split_if_set->pop(); 2618 uint input = iff->Opcode() == Op_AllocateArray ? AllocateNode::ValidLengthTest : 1; 2619 if (iff->in(input)->is_Phi()) { 2620 Node *b = clone_iff(iff->in(input)->as_Phi()); 2621 _igvn.replace_input_of(iff, input, b); 2622 } 2623 } 2624 } 2625 if (split_bool_set) { 2626 while (split_bool_set->size()) { 2627 Node *b = split_bool_set->pop(); 2628 Node *phi = b->in(1); 2629 assert(phi->is_Phi(), ""); 2630 CmpNode *cmp = clone_bool((PhiNode*) phi); 2631 _igvn.replace_input_of(b, 1, cmp); 2632 } 2633 } 2634 if (split_cex_set) { 2635 while (split_cex_set->size()) { 2636 Node *b = split_cex_set->pop(); 2637 assert(b->in(0)->is_Region(), ""); 2638 assert(b->in(1)->is_Phi(), ""); 2639 assert(b->in(0)->in(0) == b->in(1)->in(0), ""); 2640 split_up(b, b->in(0), NULL); 2641 } 2642 } 2643 } 2644 2645 void PhaseIdealLoop::fix_data_uses(Node_List& body, IdealLoopTree* loop, CloneLoopMode mode, IdealLoopTree* outer_loop, 2646 uint new_counter, Node_List &old_new, Node_List &worklist, Node_List*& split_if_set, 2647 Node_List*& split_bool_set, Node_List*& split_cex_set) { 2648 for(uint i = 0; i < body.size(); i++ ) { 2649 Node* old = body.at(i); 2650 clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set, 2651 split_bool_set, split_cex_set, worklist, new_counter, 2652 mode); 2653 } 2654 } 2655 2656 void PhaseIdealLoop::fix_ctrl_uses(const Node_List& body, const IdealLoopTree* loop, Node_List &old_new, CloneLoopMode mode, 2657 Node* side_by_side_idom, CloneMap* cm, Node_List &worklist) { 2658 LoopNode* head = loop->_head->as_Loop(); 2659 for(uint i = 0; i < body.size(); i++ ) { 2660 Node* old = body.at(i); 2661 if( !old->is_CFG() ) continue; 2662 2663 // Copy uses to a worklist, so I can munge the def-use info 2664 // with impunity. 2665 for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++) { 2666 worklist.push(old->fast_out(j)); 2667 } 2668 2669 while (worklist.size()) { // Visit all uses 2670 Node *use = worklist.pop(); 2671 if (!has_node(use)) continue; // Ignore dead nodes 2672 IdealLoopTree *use_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use ); 2673 if (!loop->is_member(use_loop) && use->is_CFG()) { 2674 // Both OLD and USE are CFG nodes here. 2675 assert(use->is_Proj(), "" ); 2676 Node* nnn = old_new[old->_idx]; 2677 2678 Node* newuse = NULL; 2679 if (head->is_strip_mined() && mode != IgnoreStripMined) { 2680 CountedLoopNode* cl = head->as_CountedLoop(); 2681 CountedLoopEndNode* cle = cl->loopexit(); 2682 Node* cle_out = cle->proj_out_or_null(false); 2683 if (use == cle_out) { 2684 IfNode* le = cl->outer_loop_end(); 2685 use = le->proj_out(false); 2686 use_loop = get_loop(use); 2687 if (mode == CloneIncludesStripMined) { 2688 nnn = old_new[le->_idx]; 2689 } else { 2690 newuse = old_new[cle_out->_idx]; 2691 } 2692 } 2693 } 2694 if (newuse == NULL) { 2695 newuse = use->clone(); 2696 } 2697 2698 // Clone the loop exit control projection 2699 if (C->do_vector_loop() && cm != NULL) { 2700 cm->verify_insert_and_clone(use, newuse, cm->clone_idx()); 2701 } 2702 newuse->set_req(0,nnn); 2703 _igvn.register_new_node_with_optimizer(newuse); 2704 set_loop(newuse, use_loop); 2705 set_idom(newuse, nnn, dom_depth(nnn) + 1 ); 2706 2707 // We need a Region to merge the exit from the peeled body and the 2708 // exit from the old loop body. 2709 RegionNode *r = new RegionNode(3); 2710 // Map the old use to the new merge point 2711 old_new.map( use->_idx, r ); 2712 uint dd_r = MIN2(dom_depth(newuse), dom_depth(use)); 2713 assert(dd_r >= dom_depth(dom_lca(newuse, use)), "" ); 2714 2715 // The original user of 'use' uses 'r' instead. 2716 for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) { 2717 Node* useuse = use->last_out(l); 2718 _igvn.rehash_node_delayed(useuse); 2719 uint uses_found = 0; 2720 if (useuse->in(0) == use) { 2721 useuse->set_req(0, r); 2722 uses_found++; 2723 if (useuse->is_CFG()) { 2724 // This is not a dom_depth > dd_r because when new 2725 // control flow is constructed by a loop opt, a node and 2726 // its dominator can end up at the same dom_depth 2727 assert(dom_depth(useuse) >= dd_r, ""); 2728 set_idom(useuse, r, dom_depth(useuse)); 2729 } 2730 } 2731 for (uint k = 1; k < useuse->req(); k++) { 2732 if( useuse->in(k) == use ) { 2733 useuse->set_req(k, r); 2734 uses_found++; 2735 if (useuse->is_Loop() && k == LoopNode::EntryControl) { 2736 // This is not a dom_depth > dd_r because when new 2737 // control flow is constructed by a loop opt, a node 2738 // and its dominator can end up at the same dom_depth 2739 assert(dom_depth(useuse) >= dd_r , ""); 2740 set_idom(useuse, r, dom_depth(useuse)); 2741 } 2742 } 2743 } 2744 l -= uses_found; // we deleted 1 or more copies of this edge 2745 } 2746 2747 // Now finish up 'r' 2748 r->set_req(1, newuse); 2749 r->set_req(2, use); 2750 _igvn.register_new_node_with_optimizer(r); 2751 set_loop(r, use_loop); 2752 set_idom(r, (side_by_side_idom == NULL) ? newuse->in(0) : side_by_side_idom, dd_r); 2753 } // End of if a loop-exit test 2754 } 2755 } 2756 } 2757 2758 void PhaseIdealLoop::fix_body_edges(const Node_List &body, IdealLoopTree* loop, const Node_List &old_new, int dd, 2759 IdealLoopTree* parent, bool partial) { 2760 for(uint i = 0; i < body.size(); i++ ) { 2761 Node *old = body.at(i); 2762 Node *nnn = old_new[old->_idx]; 2763 // Fix CFG/Loop controlling the new node 2764 if (has_ctrl(old)) { 2765 set_ctrl(nnn, old_new[get_ctrl(old)->_idx]); 2766 } else { 2767 set_loop(nnn, parent); 2768 if (old->outcnt() > 0) { 2769 Node* dom = idom(old); 2770 if (old_new[dom->_idx] != NULL) { 2771 dom = old_new[dom->_idx]; 2772 set_idom(nnn, dom, dd ); 2773 } 2774 } 2775 } 2776 // Correct edges to the new node 2777 for (uint j = 0; j < nnn->req(); j++) { 2778 Node *n = nnn->in(j); 2779 if (n != NULL) { 2780 IdealLoopTree *old_in_loop = get_loop(has_ctrl(n) ? get_ctrl(n) : n); 2781 if (loop->is_member(old_in_loop)) { 2782 if (old_new[n->_idx] != NULL) { 2783 nnn->set_req(j, old_new[n->_idx]); 2784 } else { 2785 assert(!body.contains(n), ""); 2786 assert(partial, "node not cloned"); 2787 } 2788 } 2789 } 2790 } 2791 _igvn.hash_find_insert(nnn); 2792 } 2793 } 2794 2795 void PhaseIdealLoop::clone_loop_body(const Node_List& body, Node_List &old_new, CloneMap* cm) { 2796 for (uint i = 0; i < body.size(); i++) { 2797 Node* old = body.at(i); 2798 Node* nnn = old->clone(); 2799 old_new.map(old->_idx, nnn); 2800 if (old->is_reduction()) { 2801 // Reduction flag is not copied by default. Copy it here when cloning the entire loop body. 2802 nnn->add_flag(Node::Flag_is_reduction); 2803 } 2804 if (C->do_vector_loop() && cm != NULL) { 2805 cm->verify_insert_and_clone(old, nnn, cm->clone_idx()); 2806 } 2807 _igvn.register_new_node_with_optimizer(nnn); 2808 } 2809 } 2810 2811 2812 //---------------------- stride_of_possible_iv ------------------------------------- 2813 // Looks for an iff/bool/comp with one operand of the compare 2814 // being a cycle involving an add and a phi, 2815 // with an optional truncation (left-shift followed by a right-shift) 2816 // of the add. Returns zero if not an iv. 2817 int PhaseIdealLoop::stride_of_possible_iv(Node* iff) { 2818 Node* trunc1 = NULL; 2819 Node* trunc2 = NULL; 2820 const TypeInteger* ttype = NULL; 2821 if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) { 2822 return 0; 2823 } 2824 BoolNode* bl = iff->in(1)->as_Bool(); 2825 Node* cmp = bl->in(1); 2826 if (!cmp || (cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU)) { 2827 return 0; 2828 } 2829 // Must have an invariant operand 2830 if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) { 2831 return 0; 2832 } 2833 Node* add2 = NULL; 2834 Node* cmp1 = cmp->in(1); 2835 if (cmp1->is_Phi()) { 2836 // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) ))) 2837 Node* phi = cmp1; 2838 for (uint i = 1; i < phi->req(); i++) { 2839 Node* in = phi->in(i); 2840 Node* add = CountedLoopNode::match_incr_with_optional_truncation(in, 2841 &trunc1, &trunc2, &ttype, T_INT); 2842 if (add && add->in(1) == phi) { 2843 add2 = add->in(2); 2844 break; 2845 } 2846 } 2847 } else { 2848 // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) ))) 2849 Node* addtrunc = cmp1; 2850 Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc, 2851 &trunc1, &trunc2, &ttype, T_INT); 2852 if (add && add->in(1)->is_Phi()) { 2853 Node* phi = add->in(1); 2854 for (uint i = 1; i < phi->req(); i++) { 2855 if (phi->in(i) == addtrunc) { 2856 add2 = add->in(2); 2857 break; 2858 } 2859 } 2860 } 2861 } 2862 if (add2 != NULL) { 2863 const TypeInt* add2t = _igvn.type(add2)->is_int(); 2864 if (add2t->is_con()) { 2865 return add2t->get_con(); 2866 } 2867 } 2868 return 0; 2869 } 2870 2871 2872 //---------------------- stay_in_loop ------------------------------------- 2873 // Return the (unique) control output node that's in the loop (if it exists.) 2874 Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) { 2875 Node* unique = NULL; 2876 if (!n) return NULL; 2877 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { 2878 Node* use = n->fast_out(i); 2879 if (!has_ctrl(use) && loop->is_member(get_loop(use))) { 2880 if (unique != NULL) { 2881 return NULL; 2882 } 2883 unique = use; 2884 } 2885 } 2886 return unique; 2887 } 2888 2889 //------------------------------ register_node ------------------------------------- 2890 // Utility to register node "n" with PhaseIdealLoop 2891 void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) { 2892 _igvn.register_new_node_with_optimizer(n); 2893 loop->_body.push(n); 2894 if (n->is_CFG()) { 2895 set_loop(n, loop); 2896 set_idom(n, pred, ddepth); 2897 } else { 2898 set_ctrl(n, pred); 2899 } 2900 } 2901 2902 //------------------------------ proj_clone ------------------------------------- 2903 // Utility to create an if-projection 2904 ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) { 2905 ProjNode* c = p->clone()->as_Proj(); 2906 c->set_req(0, iff); 2907 return c; 2908 } 2909 2910 //------------------------------ short_circuit_if ------------------------------------- 2911 // Force the iff control output to be the live_proj 2912 Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) { 2913 guarantee(live_proj != NULL, "null projection"); 2914 int proj_con = live_proj->_con; 2915 assert(proj_con == 0 || proj_con == 1, "false or true projection"); 2916 Node *con = _igvn.intcon(proj_con); 2917 set_ctrl(con, C->root()); 2918 if (iff) { 2919 iff->set_req(1, con); 2920 } 2921 return con; 2922 } 2923 2924 //------------------------------ insert_if_before_proj ------------------------------------- 2925 // Insert a new if before an if projection (* - new node) 2926 // 2927 // before 2928 // if(test) 2929 // / \ 2930 // v v 2931 // other-proj proj (arg) 2932 // 2933 // after 2934 // if(test) 2935 // / \ 2936 // / v 2937 // | * proj-clone 2938 // v | 2939 // other-proj v 2940 // * new_if(relop(cmp[IU](left,right))) 2941 // / \ 2942 // v v 2943 // * new-proj proj 2944 // (returned) 2945 // 2946 ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) { 2947 IfNode* iff = proj->in(0)->as_If(); 2948 IdealLoopTree *loop = get_loop(proj); 2949 ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj(); 2950 int ddepth = dom_depth(proj); 2951 2952 _igvn.rehash_node_delayed(iff); 2953 _igvn.rehash_node_delayed(proj); 2954 2955 proj->set_req(0, NULL); // temporary disconnect 2956 ProjNode* proj2 = proj_clone(proj, iff); 2957 register_node(proj2, loop, iff, ddepth); 2958 2959 Node* cmp = Signed ? (Node*) new CmpINode(left, right) : (Node*) new CmpUNode(left, right); 2960 register_node(cmp, loop, proj2, ddepth); 2961 2962 BoolNode* bol = new BoolNode(cmp, relop); 2963 register_node(bol, loop, proj2, ddepth); 2964 2965 int opcode = iff->Opcode(); 2966 assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode"); 2967 IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt): 2968 new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt); 2969 register_node(new_if, loop, proj2, ddepth); 2970 2971 proj->set_req(0, new_if); // reattach 2972 set_idom(proj, new_if, ddepth); 2973 2974 ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj(); 2975 guarantee(new_exit != NULL, "null exit node"); 2976 register_node(new_exit, get_loop(other_proj), new_if, ddepth); 2977 2978 return new_exit; 2979 } 2980 2981 //------------------------------ insert_region_before_proj ------------------------------------- 2982 // Insert a region before an if projection (* - new node) 2983 // 2984 // before 2985 // if(test) 2986 // / | 2987 // v | 2988 // proj v 2989 // other-proj 2990 // 2991 // after 2992 // if(test) 2993 // / | 2994 // v | 2995 // * proj-clone v 2996 // | other-proj 2997 // v 2998 // * new-region 2999 // | 3000 // v 3001 // * dum_if 3002 // / \ 3003 // v \ 3004 // * dum-proj v 3005 // proj 3006 // 3007 RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) { 3008 IfNode* iff = proj->in(0)->as_If(); 3009 IdealLoopTree *loop = get_loop(proj); 3010 ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj(); 3011 int ddepth = dom_depth(proj); 3012 3013 _igvn.rehash_node_delayed(iff); 3014 _igvn.rehash_node_delayed(proj); 3015 3016 proj->set_req(0, NULL); // temporary disconnect 3017 ProjNode* proj2 = proj_clone(proj, iff); 3018 register_node(proj2, loop, iff, ddepth); 3019 3020 RegionNode* reg = new RegionNode(2); 3021 reg->set_req(1, proj2); 3022 register_node(reg, loop, iff, ddepth); 3023 3024 IfNode* dum_if = new IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt); 3025 register_node(dum_if, loop, reg, ddepth); 3026 3027 proj->set_req(0, dum_if); // reattach 3028 set_idom(proj, dum_if, ddepth); 3029 3030 ProjNode* dum_proj = proj_clone(other_proj, dum_if); 3031 register_node(dum_proj, loop, dum_if, ddepth); 3032 3033 return reg; 3034 } 3035 3036 //------------------------------ insert_cmpi_loop_exit ------------------------------------- 3037 // Clone a signed compare loop exit from an unsigned compare and 3038 // insert it before the unsigned cmp on the stay-in-loop path. 3039 // All new nodes inserted in the dominator tree between the original 3040 // if and it's projections. The original if test is replaced with 3041 // a constant to force the stay-in-loop path. 3042 // 3043 // This is done to make sure that the original if and it's projections 3044 // still dominate the same set of control nodes, that the ctrl() relation 3045 // from data nodes to them is preserved, and that their loop nesting is 3046 // preserved. 3047 // 3048 // before 3049 // if(i <u limit) unsigned compare loop exit 3050 // / | 3051 // v v 3052 // exit-proj stay-in-loop-proj 3053 // 3054 // after 3055 // if(stay-in-loop-const) original if 3056 // / | 3057 // / v 3058 // / if(i < limit) new signed test 3059 // / / | 3060 // / / v 3061 // / / if(i <u limit) new cloned unsigned test 3062 // / / / | 3063 // v v v | 3064 // region | 3065 // | | 3066 // dum-if | 3067 // / | | 3068 // ether | | 3069 // v v 3070 // exit-proj stay-in-loop-proj 3071 // 3072 IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) { 3073 const bool Signed = true; 3074 const bool Unsigned = false; 3075 3076 BoolNode* bol = if_cmpu->in(1)->as_Bool(); 3077 if (bol->_test._test != BoolTest::lt) return NULL; 3078 CmpNode* cmpu = bol->in(1)->as_Cmp(); 3079 if (cmpu->Opcode() != Op_CmpU) return NULL; 3080 int stride = stride_of_possible_iv(if_cmpu); 3081 if (stride == 0) return NULL; 3082 3083 Node* lp_proj = stay_in_loop(if_cmpu, loop); 3084 guarantee(lp_proj != NULL, "null loop node"); 3085 3086 ProjNode* lp_continue = lp_proj->as_Proj(); 3087 ProjNode* lp_exit = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj(); 3088 if (!lp_exit->is_IfFalse()) { 3089 // The loop exit condition is (i <u limit) ==> (i >= 0 && i < limit). 3090 // We therefore can't add a single exit condition. 3091 return NULL; 3092 } 3093 // The loop exit condition is !(i <u limit) ==> (i < 0 || i >= limit). 3094 // Split out the exit condition (i < 0) for stride < 0 or (i >= limit) for stride > 0. 3095 Node* limit = NULL; 3096 if (stride > 0) { 3097 limit = cmpu->in(2); 3098 } else { 3099 limit = _igvn.makecon(TypeInt::ZERO); 3100 set_ctrl(limit, C->root()); 3101 } 3102 // Create a new region on the exit path 3103 RegionNode* reg = insert_region_before_proj(lp_exit); 3104 guarantee(reg != NULL, "null region node"); 3105 3106 // Clone the if-cmpu-true-false using a signed compare 3107 BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge; 3108 ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue); 3109 reg->add_req(cmpi_exit); 3110 3111 // Clone the if-cmpu-true-false 3112 BoolTest::mask rel_u = bol->_test._test; 3113 ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue); 3114 reg->add_req(cmpu_exit); 3115 3116 // Force original if to stay in loop. 3117 short_circuit_if(if_cmpu, lp_continue); 3118 3119 return cmpi_exit->in(0)->as_If(); 3120 } 3121 3122 //------------------------------ remove_cmpi_loop_exit ------------------------------------- 3123 // Remove a previously inserted signed compare loop exit. 3124 void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) { 3125 Node* lp_proj = stay_in_loop(if_cmp, loop); 3126 assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI && 3127 stay_in_loop(lp_proj, loop)->is_If() && 3128 stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu"); 3129 Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO); 3130 set_ctrl(con, C->root()); 3131 if_cmp->set_req(1, con); 3132 } 3133 3134 //------------------------------ scheduled_nodelist ------------------------------------- 3135 // Create a post order schedule of nodes that are in the 3136 // "member" set. The list is returned in "sched". 3137 // The first node in "sched" is the loop head, followed by 3138 // nodes which have no inputs in the "member" set, and then 3139 // followed by the nodes that have an immediate input dependence 3140 // on a node in "sched". 3141 void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) { 3142 3143 assert(member.test(loop->_head->_idx), "loop head must be in member set"); 3144 VectorSet visited; 3145 Node_Stack nstack(loop->_body.size()); 3146 3147 Node* n = loop->_head; // top of stack is cached in "n" 3148 uint idx = 0; 3149 visited.set(n->_idx); 3150 3151 // Initially push all with no inputs from within member set 3152 for(uint i = 0; i < loop->_body.size(); i++ ) { 3153 Node *elt = loop->_body.at(i); 3154 if (member.test(elt->_idx)) { 3155 bool found = false; 3156 for (uint j = 0; j < elt->req(); j++) { 3157 Node* def = elt->in(j); 3158 if (def && member.test(def->_idx) && def != elt) { 3159 found = true; 3160 break; 3161 } 3162 } 3163 if (!found && elt != loop->_head) { 3164 nstack.push(n, idx); 3165 n = elt; 3166 assert(!visited.test(n->_idx), "not seen yet"); 3167 visited.set(n->_idx); 3168 } 3169 } 3170 } 3171 3172 // traverse out's that are in the member set 3173 while (true) { 3174 if (idx < n->outcnt()) { 3175 Node* use = n->raw_out(idx); 3176 idx++; 3177 if (!visited.test_set(use->_idx)) { 3178 if (member.test(use->_idx)) { 3179 nstack.push(n, idx); 3180 n = use; 3181 idx = 0; 3182 } 3183 } 3184 } else { 3185 // All outputs processed 3186 sched.push(n); 3187 if (nstack.is_empty()) break; 3188 n = nstack.node(); 3189 idx = nstack.index(); 3190 nstack.pop(); 3191 } 3192 } 3193 } 3194 3195 3196 //------------------------------ has_use_in_set ------------------------------------- 3197 // Has a use in the vector set 3198 bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) { 3199 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 3200 Node* use = n->fast_out(j); 3201 if (vset.test(use->_idx)) { 3202 return true; 3203 } 3204 } 3205 return false; 3206 } 3207 3208 3209 //------------------------------ has_use_internal_to_set ------------------------------------- 3210 // Has use internal to the vector set (ie. not in a phi at the loop head) 3211 bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) { 3212 Node* head = loop->_head; 3213 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 3214 Node* use = n->fast_out(j); 3215 if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) { 3216 return true; 3217 } 3218 } 3219 return false; 3220 } 3221 3222 3223 //------------------------------ clone_for_use_outside_loop ------------------------------------- 3224 // clone "n" for uses that are outside of loop 3225 int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) { 3226 int cloned = 0; 3227 assert(worklist.size() == 0, "should be empty"); 3228 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 3229 Node* use = n->fast_out(j); 3230 if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) { 3231 worklist.push(use); 3232 } 3233 } 3234 3235 if (C->check_node_count(worklist.size() + NodeLimitFudgeFactor, 3236 "Too many clones required in clone_for_use_outside_loop in partial peeling")) { 3237 return -1; 3238 } 3239 3240 while( worklist.size() ) { 3241 Node *use = worklist.pop(); 3242 if (!has_node(use) || use->in(0) == C->top()) continue; 3243 uint j; 3244 for (j = 0; j < use->req(); j++) { 3245 if (use->in(j) == n) break; 3246 } 3247 assert(j < use->req(), "must be there"); 3248 3249 // clone "n" and insert it between the inputs of "n" and the use outside the loop 3250 Node* n_clone = n->clone(); 3251 _igvn.replace_input_of(use, j, n_clone); 3252 cloned++; 3253 Node* use_c; 3254 if (!use->is_Phi()) { 3255 use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0); 3256 } else { 3257 // Use in a phi is considered a use in the associated predecessor block 3258 use_c = use->in(0)->in(j); 3259 } 3260 set_ctrl(n_clone, use_c); 3261 assert(!loop->is_member(get_loop(use_c)), "should be outside loop"); 3262 get_loop(use_c)->_body.push(n_clone); 3263 _igvn.register_new_node_with_optimizer(n_clone); 3264 #ifndef PRODUCT 3265 if (TracePartialPeeling) { 3266 tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx); 3267 } 3268 #endif 3269 } 3270 return cloned; 3271 } 3272 3273 3274 //------------------------------ clone_for_special_use_inside_loop ------------------------------------- 3275 // clone "n" for special uses that are in the not_peeled region. 3276 // If these def-uses occur in separate blocks, the code generator 3277 // marks the method as not compilable. For example, if a "BoolNode" 3278 // is in a different basic block than the "IfNode" that uses it, then 3279 // the compilation is aborted in the code generator. 3280 void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n, 3281 VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) { 3282 if (n->is_Phi() || n->is_Load()) { 3283 return; 3284 } 3285 assert(worklist.size() == 0, "should be empty"); 3286 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 3287 Node* use = n->fast_out(j); 3288 if ( not_peel.test(use->_idx) && 3289 (use->is_If() || use->is_CMove() || use->is_Bool()) && 3290 use->in(1) == n) { 3291 worklist.push(use); 3292 } 3293 } 3294 if (worklist.size() > 0) { 3295 // clone "n" and insert it between inputs of "n" and the use 3296 Node* n_clone = n->clone(); 3297 loop->_body.push(n_clone); 3298 _igvn.register_new_node_with_optimizer(n_clone); 3299 set_ctrl(n_clone, get_ctrl(n)); 3300 sink_list.push(n_clone); 3301 not_peel.set(n_clone->_idx); 3302 #ifndef PRODUCT 3303 if (TracePartialPeeling) { 3304 tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx); 3305 } 3306 #endif 3307 while( worklist.size() ) { 3308 Node *use = worklist.pop(); 3309 _igvn.rehash_node_delayed(use); 3310 for (uint j = 1; j < use->req(); j++) { 3311 if (use->in(j) == n) { 3312 use->set_req(j, n_clone); 3313 } 3314 } 3315 } 3316 } 3317 } 3318 3319 3320 //------------------------------ insert_phi_for_loop ------------------------------------- 3321 // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist 3322 void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) { 3323 Node *phi = PhiNode::make(lp, back_edge_val); 3324 phi->set_req(LoopNode::EntryControl, lp_entry_val); 3325 // Use existing phi if it already exists 3326 Node *hit = _igvn.hash_find_insert(phi); 3327 if( hit == NULL ) { 3328 _igvn.register_new_node_with_optimizer(phi); 3329 set_ctrl(phi, lp); 3330 } else { 3331 // Remove the new phi from the graph and use the hit 3332 _igvn.remove_dead_node(phi); 3333 phi = hit; 3334 } 3335 _igvn.replace_input_of(use, idx, phi); 3336 } 3337 3338 #ifdef ASSERT 3339 //------------------------------ is_valid_loop_partition ------------------------------------- 3340 // Validate the loop partition sets: peel and not_peel 3341 bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list, 3342 VectorSet& not_peel ) { 3343 uint i; 3344 // Check that peel_list entries are in the peel set 3345 for (i = 0; i < peel_list.size(); i++) { 3346 if (!peel.test(peel_list.at(i)->_idx)) { 3347 return false; 3348 } 3349 } 3350 // Check at loop members are in one of peel set or not_peel set 3351 for (i = 0; i < loop->_body.size(); i++ ) { 3352 Node *def = loop->_body.at(i); 3353 uint di = def->_idx; 3354 // Check that peel set elements are in peel_list 3355 if (peel.test(di)) { 3356 if (not_peel.test(di)) { 3357 return false; 3358 } 3359 // Must be in peel_list also 3360 bool found = false; 3361 for (uint j = 0; j < peel_list.size(); j++) { 3362 if (peel_list.at(j)->_idx == di) { 3363 found = true; 3364 break; 3365 } 3366 } 3367 if (!found) { 3368 return false; 3369 } 3370 } else if (not_peel.test(di)) { 3371 if (peel.test(di)) { 3372 return false; 3373 } 3374 } else { 3375 return false; 3376 } 3377 } 3378 return true; 3379 } 3380 3381 //------------------------------ is_valid_clone_loop_exit_use ------------------------------------- 3382 // Ensure a use outside of loop is of the right form 3383 bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) { 3384 Node *use_c = has_ctrl(use) ? get_ctrl(use) : use; 3385 return (use->is_Phi() && 3386 use_c->is_Region() && use_c->req() == 3 && 3387 (use_c->in(exit_idx)->Opcode() == Op_IfTrue || 3388 use_c->in(exit_idx)->Opcode() == Op_IfFalse || 3389 use_c->in(exit_idx)->Opcode() == Op_JumpProj) && 3390 loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) ); 3391 } 3392 3393 //------------------------------ is_valid_clone_loop_form ------------------------------------- 3394 // Ensure that all uses outside of loop are of the right form 3395 bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list, 3396 uint orig_exit_idx, uint clone_exit_idx) { 3397 uint len = peel_list.size(); 3398 for (uint i = 0; i < len; i++) { 3399 Node *def = peel_list.at(i); 3400 3401 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 3402 Node *use = def->fast_out(j); 3403 Node *use_c = has_ctrl(use) ? get_ctrl(use) : use; 3404 if (!loop->is_member(get_loop(use_c))) { 3405 // use is not in the loop, check for correct structure 3406 if (use->in(0) == def) { 3407 // Okay 3408 } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) { 3409 return false; 3410 } 3411 } 3412 } 3413 } 3414 return true; 3415 } 3416 #endif 3417 3418 //------------------------------ partial_peel ------------------------------------- 3419 // Partially peel (aka loop rotation) the top portion of a loop (called 3420 // the peel section below) by cloning it and placing one copy just before 3421 // the new loop head and the other copy at the bottom of the new loop. 3422 // 3423 // before after where it came from 3424 // 3425 // stmt1 stmt1 3426 // loop: stmt2 clone 3427 // stmt2 if condA goto exitA clone 3428 // if condA goto exitA new_loop: new 3429 // stmt3 stmt3 clone 3430 // if !condB goto loop if condB goto exitB clone 3431 // exitB: stmt2 orig 3432 // stmt4 if !condA goto new_loop orig 3433 // exitA: goto exitA 3434 // exitB: 3435 // stmt4 3436 // exitA: 3437 // 3438 // Step 1: find the cut point: an exit test on probable 3439 // induction variable. 3440 // Step 2: schedule (with cloning) operations in the peel 3441 // section that can be executed after the cut into 3442 // the section that is not peeled. This may need 3443 // to clone operations into exit blocks. For 3444 // instance, a reference to A[i] in the not-peel 3445 // section and a reference to B[i] in an exit block 3446 // may cause a left-shift of i by 2 to be placed 3447 // in the peel block. This step will clone the left 3448 // shift into the exit block and sink the left shift 3449 // from the peel to the not-peel section. 3450 // Step 3: clone the loop, retarget the control, and insert 3451 // phis for values that are live across the new loop 3452 // head. This is very dependent on the graph structure 3453 // from clone_loop. It creates region nodes for 3454 // exit control and associated phi nodes for values 3455 // flow out of the loop through that exit. The region 3456 // node is dominated by the clone's control projection. 3457 // So the clone's peel section is placed before the 3458 // new loop head, and the clone's not-peel section is 3459 // forms the top part of the new loop. The original 3460 // peel section forms the tail of the new loop. 3461 // Step 4: update the dominator tree and recompute the 3462 // dominator depth. 3463 // 3464 // orig 3465 // 3466 // stmt1 3467 // | 3468 // v 3469 // loop predicate 3470 // | 3471 // v 3472 // loop<----+ 3473 // | | 3474 // stmt2 | 3475 // | | 3476 // v | 3477 // ifA | 3478 // / | | 3479 // v v | 3480 // false true ^ <-- last_peel 3481 // / | | 3482 // / ===|==cut | 3483 // / stmt3 | <-- first_not_peel 3484 // / | | 3485 // | v | 3486 // v ifB | 3487 // exitA: / \ | 3488 // / \ | 3489 // v v | 3490 // false true | 3491 // / \ | 3492 // / ----+ 3493 // | 3494 // v 3495 // exitB: 3496 // stmt4 3497 // 3498 // 3499 // after clone loop 3500 // 3501 // stmt1 3502 // | 3503 // v 3504 // loop predicate 3505 // / \ 3506 // clone / \ orig 3507 // / \ 3508 // / \ 3509 // v v 3510 // +---->loop loop<----+ 3511 // | | | | 3512 // | stmt2 stmt2 | 3513 // | | | | 3514 // | v v | 3515 // | ifA ifA | 3516 // | | \ / | | 3517 // | v v v v | 3518 // ^ true false false true ^ <-- last_peel 3519 // | | ^ \ / | | 3520 // | cut==|== \ \ / ===|==cut | 3521 // | stmt3 \ \ / stmt3 | <-- first_not_peel 3522 // | | dom | | | | 3523 // | v \ 1v v2 v | 3524 // | ifB regionA ifB | 3525 // | / \ | / \ | 3526 // | / \ v / \ | 3527 // | v v exitA: v v | 3528 // | true false false true | 3529 // | / ^ \ / \ | 3530 // +---- \ \ / ----+ 3531 // dom \ / 3532 // \ 1v v2 3533 // regionB 3534 // | 3535 // v 3536 // exitB: 3537 // stmt4 3538 // 3539 // 3540 // after partial peel 3541 // 3542 // stmt1 3543 // | 3544 // v 3545 // loop predicate 3546 // / 3547 // clone / orig 3548 // / TOP 3549 // / \ 3550 // v v 3551 // TOP->loop loop----+ 3552 // | | | 3553 // stmt2 stmt2 | 3554 // | | | 3555 // v v | 3556 // ifA ifA | 3557 // | \ / | | 3558 // v v v v | 3559 // true false false true | <-- last_peel 3560 // | ^ \ / +------|---+ 3561 // +->newloop \ \ / === ==cut | | 3562 // | stmt3 \ \ / TOP | | 3563 // | | dom | | stmt3 | | <-- first_not_peel 3564 // | v \ 1v v2 v | | 3565 // | ifB regionA ifB ^ v 3566 // | / \ | / \ | | 3567 // | / \ v / \ | | 3568 // | v v exitA: v v | | 3569 // | true false false true | | 3570 // | / ^ \ / \ | | 3571 // | | \ \ / v | | 3572 // | | dom \ / TOP | | 3573 // | | \ 1v v2 | | 3574 // ^ v regionB | | 3575 // | | | | | 3576 // | | v ^ v 3577 // | | exitB: | | 3578 // | | stmt4 | | 3579 // | +------------>-----------------+ | 3580 // | | 3581 // +-----------------<---------------------+ 3582 // 3583 // 3584 // final graph 3585 // 3586 // stmt1 3587 // | 3588 // v 3589 // loop predicate 3590 // | 3591 // v 3592 // stmt2 clone 3593 // | 3594 // v 3595 // ........> ifA clone 3596 // : / | 3597 // dom / | 3598 // : v v 3599 // : false true 3600 // : | | 3601 // : | v 3602 // : | newloop<-----+ 3603 // : | | | 3604 // : | stmt3 clone | 3605 // : | | | 3606 // : | v | 3607 // : | ifB | 3608 // : | / \ | 3609 // : | v v | 3610 // : | false true | 3611 // : | | | | 3612 // : | v stmt2 | 3613 // : | exitB: | | 3614 // : | stmt4 v | 3615 // : | ifA orig | 3616 // : | / \ | 3617 // : | / \ | 3618 // : | v v | 3619 // : | false true | 3620 // : | / \ | 3621 // : v v -----+ 3622 // RegionA 3623 // | 3624 // v 3625 // exitA 3626 // 3627 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) { 3628 3629 assert(!loop->_head->is_CountedLoop(), "Non-counted loop only"); 3630 if (!loop->_head->is_Loop()) { 3631 return false; 3632 } 3633 LoopNode *head = loop->_head->as_Loop(); 3634 3635 if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) { 3636 return false; 3637 } 3638 3639 // Check for complex exit control 3640 for (uint ii = 0; ii < loop->_body.size(); ii++) { 3641 Node *n = loop->_body.at(ii); 3642 int opc = n->Opcode(); 3643 if (n->is_Call() || 3644 opc == Op_Catch || 3645 opc == Op_CatchProj || 3646 opc == Op_Jump || 3647 opc == Op_JumpProj) { 3648 #ifndef PRODUCT 3649 if (TracePartialPeeling) { 3650 tty->print_cr("\nExit control too complex: lp: %d", head->_idx); 3651 } 3652 #endif 3653 return false; 3654 } 3655 } 3656 3657 int dd = dom_depth(head); 3658 3659 // Step 1: find cut point 3660 3661 // Walk up dominators to loop head looking for first loop exit 3662 // which is executed on every path thru loop. 3663 IfNode *peel_if = NULL; 3664 IfNode *peel_if_cmpu = NULL; 3665 3666 Node *iff = loop->tail(); 3667 while (iff != head) { 3668 if (iff->is_If()) { 3669 Node *ctrl = get_ctrl(iff->in(1)); 3670 if (ctrl->is_top()) return false; // Dead test on live IF. 3671 // If loop-varying exit-test, check for induction variable 3672 if (loop->is_member(get_loop(ctrl)) && 3673 loop->is_loop_exit(iff) && 3674 is_possible_iv_test(iff)) { 3675 Node* cmp = iff->in(1)->in(1); 3676 if (cmp->Opcode() == Op_CmpI) { 3677 peel_if = iff->as_If(); 3678 } else { 3679 assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU"); 3680 peel_if_cmpu = iff->as_If(); 3681 } 3682 } 3683 } 3684 iff = idom(iff); 3685 } 3686 3687 // Prefer signed compare over unsigned compare. 3688 IfNode* new_peel_if = NULL; 3689 if (peel_if == NULL) { 3690 if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) { 3691 return false; // No peel point found 3692 } 3693 new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop); 3694 if (new_peel_if == NULL) { 3695 return false; // No peel point found 3696 } 3697 peel_if = new_peel_if; 3698 } 3699 Node* last_peel = stay_in_loop(peel_if, loop); 3700 Node* first_not_peeled = stay_in_loop(last_peel, loop); 3701 if (first_not_peeled == NULL || first_not_peeled == head) { 3702 return false; 3703 } 3704 3705 #ifndef PRODUCT 3706 if (TraceLoopOpts) { 3707 tty->print("PartialPeel "); 3708 loop->dump_head(); 3709 } 3710 3711 if (TracePartialPeeling) { 3712 tty->print_cr("before partial peel one iteration"); 3713 Node_List wl; 3714 Node* t = head->in(2); 3715 while (true) { 3716 wl.push(t); 3717 if (t == head) break; 3718 t = idom(t); 3719 } 3720 while (wl.size() > 0) { 3721 Node* tt = wl.pop(); 3722 tt->dump(); 3723 if (tt == last_peel) tty->print_cr("-- cut --"); 3724 } 3725 } 3726 #endif 3727 VectorSet peel; 3728 VectorSet not_peel; 3729 Node_List peel_list; 3730 Node_List worklist; 3731 Node_List sink_list; 3732 3733 uint estimate = loop->est_loop_clone_sz(1); 3734 if (exceeding_node_budget(estimate)) { 3735 return false; 3736 } 3737 3738 // Set of cfg nodes to peel are those that are executable from 3739 // the head through last_peel. 3740 assert(worklist.size() == 0, "should be empty"); 3741 worklist.push(head); 3742 peel.set(head->_idx); 3743 while (worklist.size() > 0) { 3744 Node *n = worklist.pop(); 3745 if (n != last_peel) { 3746 for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { 3747 Node* use = n->fast_out(j); 3748 if (use->is_CFG() && 3749 loop->is_member(get_loop(use)) && 3750 !peel.test_set(use->_idx)) { 3751 worklist.push(use); 3752 } 3753 } 3754 } 3755 } 3756 3757 // Set of non-cfg nodes to peel are those that are control 3758 // dependent on the cfg nodes. 3759 for (uint i = 0; i < loop->_body.size(); i++) { 3760 Node *n = loop->_body.at(i); 3761 Node *n_c = has_ctrl(n) ? get_ctrl(n) : n; 3762 if (peel.test(n_c->_idx)) { 3763 peel.set(n->_idx); 3764 } else { 3765 not_peel.set(n->_idx); 3766 } 3767 } 3768 3769 // Step 2: move operations from the peeled section down into the 3770 // not-peeled section 3771 3772 // Get a post order schedule of nodes in the peel region 3773 // Result in right-most operand. 3774 scheduled_nodelist(loop, peel, peel_list); 3775 3776 assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); 3777 3778 // For future check for too many new phis 3779 uint old_phi_cnt = 0; 3780 for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) { 3781 Node* use = head->fast_out(j); 3782 if (use->is_Phi()) old_phi_cnt++; 3783 } 3784 3785 #ifndef PRODUCT 3786 if (TracePartialPeeling) { 3787 tty->print_cr("\npeeled list"); 3788 } 3789 #endif 3790 3791 // Evacuate nodes in peel region into the not_peeled region if possible 3792 bool too_many_clones = false; 3793 uint new_phi_cnt = 0; 3794 uint cloned_for_outside_use = 0; 3795 for (uint i = 0; i < peel_list.size();) { 3796 Node* n = peel_list.at(i); 3797 #ifndef PRODUCT 3798 if (TracePartialPeeling) n->dump(); 3799 #endif 3800 bool incr = true; 3801 if (!n->is_CFG()) { 3802 if (has_use_in_set(n, not_peel)) { 3803 // If not used internal to the peeled region, 3804 // move "n" from peeled to not_peeled region. 3805 if (!has_use_internal_to_set(n, peel, loop)) { 3806 // if not pinned and not a load (which maybe anti-dependent on a store) 3807 // and not a CMove (Matcher expects only bool->cmove). 3808 if (n->in(0) == NULL && !n->is_Load() && !n->is_CMove()) { 3809 int new_clones = clone_for_use_outside_loop(loop, n, worklist); 3810 if (new_clones == -1) { 3811 too_many_clones = true; 3812 break; 3813 } 3814 cloned_for_outside_use += new_clones; 3815 sink_list.push(n); 3816 peel.remove(n->_idx); 3817 not_peel.set(n->_idx); 3818 peel_list.remove(i); 3819 incr = false; 3820 #ifndef PRODUCT 3821 if (TracePartialPeeling) { 3822 tty->print_cr("sink to not_peeled region: %d newbb: %d", 3823 n->_idx, get_ctrl(n)->_idx); 3824 } 3825 #endif 3826 } 3827 } else { 3828 // Otherwise check for special def-use cases that span 3829 // the peel/not_peel boundary such as bool->if 3830 clone_for_special_use_inside_loop(loop, n, not_peel, sink_list, worklist); 3831 new_phi_cnt++; 3832 } 3833 } 3834 } 3835 if (incr) i++; 3836 } 3837 3838 estimate += cloned_for_outside_use + new_phi_cnt; 3839 bool exceed_node_budget = !may_require_nodes(estimate); 3840 bool exceed_phi_limit = new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta; 3841 3842 if (too_many_clones || exceed_node_budget || exceed_phi_limit) { 3843 #ifndef PRODUCT 3844 if (TracePartialPeeling && exceed_phi_limit) { 3845 tty->print_cr("\nToo many new phis: %d old %d new cmpi: %c", 3846 new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F'); 3847 } 3848 #endif 3849 if (new_peel_if != NULL) { 3850 remove_cmpi_loop_exit(new_peel_if, loop); 3851 } 3852 // Inhibit more partial peeling on this loop 3853 assert(!head->is_partial_peel_loop(), "not partial peeled"); 3854 head->mark_partial_peel_failed(); 3855 if (cloned_for_outside_use > 0) { 3856 // Terminate this round of loop opts because 3857 // the graph outside this loop was changed. 3858 C->set_major_progress(); 3859 return true; 3860 } 3861 return false; 3862 } 3863 3864 // Step 3: clone loop, retarget control, and insert new phis 3865 3866 // Create new loop head for new phis and to hang 3867 // the nodes being moved (sinked) from the peel region. 3868 LoopNode* new_head = new LoopNode(last_peel, last_peel); 3869 new_head->set_unswitch_count(head->unswitch_count()); // Preserve 3870 _igvn.register_new_node_with_optimizer(new_head); 3871 assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled"); 3872 _igvn.replace_input_of(first_not_peeled, 0, new_head); 3873 set_loop(new_head, loop); 3874 loop->_body.push(new_head); 3875 not_peel.set(new_head->_idx); 3876 set_idom(new_head, last_peel, dom_depth(first_not_peeled)); 3877 set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled)); 3878 3879 while (sink_list.size() > 0) { 3880 Node* n = sink_list.pop(); 3881 set_ctrl(n, new_head); 3882 } 3883 3884 assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition"); 3885 3886 clone_loop(loop, old_new, dd, IgnoreStripMined); 3887 3888 const uint clone_exit_idx = 1; 3889 const uint orig_exit_idx = 2; 3890 assert(is_valid_clone_loop_form(loop, peel_list, orig_exit_idx, clone_exit_idx), "bad clone loop"); 3891 3892 Node* head_clone = old_new[head->_idx]; 3893 LoopNode* new_head_clone = old_new[new_head->_idx]->as_Loop(); 3894 Node* orig_tail_clone = head_clone->in(2); 3895 3896 // Add phi if "def" node is in peel set and "use" is not 3897 3898 for (uint i = 0; i < peel_list.size(); i++) { 3899 Node *def = peel_list.at(i); 3900 if (!def->is_CFG()) { 3901 for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { 3902 Node *use = def->fast_out(j); 3903 if (has_node(use) && use->in(0) != C->top() && 3904 (!peel.test(use->_idx) || 3905 (use->is_Phi() && use->in(0) == head)) ) { 3906 worklist.push(use); 3907 } 3908 } 3909 while( worklist.size() ) { 3910 Node *use = worklist.pop(); 3911 for (uint j = 1; j < use->req(); j++) { 3912 Node* n = use->in(j); 3913 if (n == def) { 3914 3915 // "def" is in peel set, "use" is not in peel set 3916 // or "use" is in the entry boundary (a phi) of the peel set 3917 3918 Node* use_c = has_ctrl(use) ? get_ctrl(use) : use; 3919 3920 if ( loop->is_member(get_loop( use_c )) ) { 3921 // use is in loop 3922 if (old_new[use->_idx] != NULL) { // null for dead code 3923 Node* use_clone = old_new[use->_idx]; 3924 _igvn.replace_input_of(use, j, C->top()); 3925 insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone ); 3926 } 3927 } else { 3928 assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format"); 3929 // use is not in the loop, check if the live range includes the cut 3930 Node* lp_if = use_c->in(orig_exit_idx)->in(0); 3931 if (not_peel.test(lp_if->_idx)) { 3932 assert(j == orig_exit_idx, "use from original loop"); 3933 insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone ); 3934 } 3935 } 3936 } 3937 } 3938 } 3939 } 3940 } 3941 3942 // Step 3b: retarget control 3943 3944 // Redirect control to the new loop head if a cloned node in 3945 // the not_peeled region has control that points into the peeled region. 3946 // This necessary because the cloned peeled region will be outside 3947 // the loop. 3948 // from to 3949 // cloned-peeled <---+ 3950 // new_head_clone: | <--+ 3951 // cloned-not_peeled in(0) in(0) 3952 // orig-peeled 3953 3954 for (uint i = 0; i < loop->_body.size(); i++) { 3955 Node *n = loop->_body.at(i); 3956 if (!n->is_CFG() && n->in(0) != NULL && 3957 not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) { 3958 Node* n_clone = old_new[n->_idx]; 3959 _igvn.replace_input_of(n_clone, 0, new_head_clone); 3960 } 3961 } 3962 3963 // Backedge of the surviving new_head (the clone) is original last_peel 3964 _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel); 3965 3966 // Cut first node in original not_peel set 3967 _igvn.rehash_node_delayed(new_head); // Multiple edge updates: 3968 new_head->set_req(LoopNode::EntryControl, C->top()); // use rehash_node_delayed / set_req instead of 3969 new_head->set_req(LoopNode::LoopBackControl, C->top()); // multiple replace_input_of calls 3970 3971 // Copy head_clone back-branch info to original head 3972 // and remove original head's loop entry and 3973 // clone head's back-branch 3974 _igvn.rehash_node_delayed(head); // Multiple edge updates 3975 head->set_req(LoopNode::EntryControl, head_clone->in(LoopNode::LoopBackControl)); 3976 head->set_req(LoopNode::LoopBackControl, C->top()); 3977 _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top()); 3978 3979 // Similarly modify the phis 3980 for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) { 3981 Node* use = head->fast_out(k); 3982 if (use->is_Phi() && use->outcnt() > 0) { 3983 Node* use_clone = old_new[use->_idx]; 3984 _igvn.rehash_node_delayed(use); // Multiple edge updates 3985 use->set_req(LoopNode::EntryControl, use_clone->in(LoopNode::LoopBackControl)); 3986 use->set_req(LoopNode::LoopBackControl, C->top()); 3987 _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top()); 3988 } 3989 } 3990 3991 // Step 4: update dominator tree and dominator depth 3992 3993 set_idom(head, orig_tail_clone, dd); 3994 recompute_dom_depth(); 3995 3996 // Inhibit more partial peeling on this loop 3997 new_head_clone->set_partial_peel_loop(); 3998 C->set_major_progress(); 3999 loop->record_for_igvn(); 4000 4001 #ifndef PRODUCT 4002 if (TracePartialPeeling) { 4003 tty->print_cr("\nafter partial peel one iteration"); 4004 Node_List wl; 4005 Node* t = last_peel; 4006 while (true) { 4007 wl.push(t); 4008 if (t == head_clone) break; 4009 t = idom(t); 4010 } 4011 while (wl.size() > 0) { 4012 Node* tt = wl.pop(); 4013 if (tt == head) tty->print_cr("orig head"); 4014 else if (tt == new_head_clone) tty->print_cr("new head"); 4015 else if (tt == head_clone) tty->print_cr("clone head"); 4016 tt->dump(); 4017 } 4018 } 4019 #endif 4020 return true; 4021 } 4022 4023 // Transform: 4024 // 4025 // loop<-----------------+ 4026 // | | 4027 // stmt1 stmt2 .. stmtn | 4028 // | | | | 4029 // \ | / | 4030 // v v v | 4031 // region | 4032 // | | 4033 // shared_stmt | 4034 // | | 4035 // v | 4036 // if | 4037 // / \ | 4038 // | -----------+ 4039 // v 4040 // 4041 // into: 4042 // 4043 // loop<-------------------+ 4044 // | | 4045 // v | 4046 // +->loop | 4047 // | | | 4048 // | stmt1 stmt2 .. stmtn | 4049 // | | | | | 4050 // | | \ / | 4051 // | | v v | 4052 // | | region1 | 4053 // | | | | 4054 // | shared_stmt shared_stmt | 4055 // | | | | 4056 // | v v | 4057 // | if if | 4058 // | /\ / \ | 4059 // +-- | | -------+ 4060 // \ / 4061 // v v 4062 // region2 4063 // 4064 // (region2 is shown to merge mirrored projections of the loop exit 4065 // ifs to make the diagram clearer but they really merge the same 4066 // projection) 4067 // 4068 // Conditions for this transformation to trigger: 4069 // - the path through stmt1 is frequent enough 4070 // - the inner loop will be turned into a counted loop after transformation 4071 bool PhaseIdealLoop::duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new) { 4072 if (!DuplicateBackedge) { 4073 return false; 4074 } 4075 assert(!loop->_head->is_CountedLoop() || StressDuplicateBackedge, "Non-counted loop only"); 4076 if (!loop->_head->is_Loop()) { 4077 return false; 4078 } 4079 4080 uint estimate = loop->est_loop_clone_sz(1); 4081 if (exceeding_node_budget(estimate)) { 4082 return false; 4083 } 4084 4085 LoopNode *head = loop->_head->as_Loop(); 4086 4087 Node* region = NULL; 4088 IfNode* exit_test = NULL; 4089 uint inner; 4090 float f; 4091 if (StressDuplicateBackedge) { 4092 if (head->is_strip_mined()) { 4093 return false; 4094 } 4095 Node* c = head->in(LoopNode::LoopBackControl); 4096 4097 while (c != head) { 4098 if (c->is_Region()) { 4099 region = c; 4100 } 4101 c = idom(c); 4102 } 4103 4104 if (region == NULL) { 4105 return false; 4106 } 4107 4108 inner = 1; 4109 } else { 4110 // Is the shape of the loop that of a counted loop... 4111 Node* back_control = loop_exit_control(head, loop); 4112 if (back_control == NULL) { 4113 return false; 4114 } 4115 4116 BoolTest::mask bt = BoolTest::illegal; 4117 float cl_prob = 0; 4118 Node* incr = NULL; 4119 Node* limit = NULL; 4120 Node* cmp = loop_exit_test(back_control, loop, incr, limit, bt, cl_prob); 4121 if (cmp == NULL || cmp->Opcode() != Op_CmpI) { 4122 return false; 4123 } 4124 4125 // With an extra phi for the candidate iv? 4126 // Or the region node is the loop head 4127 if (!incr->is_Phi() || incr->in(0) == head) { 4128 return false; 4129 } 4130 4131 PathFrequency pf(head, this); 4132 region = incr->in(0); 4133 4134 // Go over all paths for the extra phi's region and see if that 4135 // path is frequent enough and would match the expected iv shape 4136 // if the extra phi is removed 4137 inner = 0; 4138 for (uint i = 1; i < incr->req(); ++i) { 4139 Node* in = incr->in(i); 4140 Node* trunc1 = NULL; 4141 Node* trunc2 = NULL; 4142 const TypeInteger* iv_trunc_t = NULL; 4143 Node* orig_in = in; 4144 if (!(in = CountedLoopNode::match_incr_with_optional_truncation(in, &trunc1, &trunc2, &iv_trunc_t, T_INT))) { 4145 continue; 4146 } 4147 assert(in->Opcode() == Op_AddI, "wrong increment code"); 4148 Node* xphi = NULL; 4149 Node* stride = loop_iv_stride(in, loop, xphi); 4150 4151 if (stride == NULL) { 4152 continue; 4153 } 4154 4155 PhiNode* phi = loop_iv_phi(xphi, NULL, head, loop); 4156 if (phi == NULL || 4157 (trunc1 == NULL && phi->in(LoopNode::LoopBackControl) != incr) || 4158 (trunc1 != NULL && phi->in(LoopNode::LoopBackControl) != trunc1)) { 4159 return false; 4160 } 4161 4162 f = pf.to(region->in(i)); 4163 if (f > 0.5) { 4164 inner = i; 4165 break; 4166 } 4167 } 4168 4169 if (inner == 0) { 4170 return false; 4171 } 4172 4173 exit_test = back_control->in(0)->as_If(); 4174 } 4175 4176 if (idom(region)->is_Catch()) { 4177 return false; 4178 } 4179 4180 // Collect all control nodes that need to be cloned (shared_stmt in the diagram) 4181 Unique_Node_List wq; 4182 wq.push(head->in(LoopNode::LoopBackControl)); 4183 for (uint i = 0; i < wq.size(); i++) { 4184 Node* c = wq.at(i); 4185 assert(get_loop(c) == loop, "not in the right loop?"); 4186 if (c->is_Region()) { 4187 if (c != region) { 4188 for (uint j = 1; j < c->req(); ++j) { 4189 wq.push(c->in(j)); 4190 } 4191 } 4192 } else { 4193 wq.push(c->in(0)); 4194 } 4195 assert(!is_dominator(c, region) || c == region, "shouldn't go above region"); 4196 } 4197 4198 Node* region_dom = idom(region); 4199 4200 // Can't do the transformation if this would cause a membar pair to 4201 // be split 4202 for (uint i = 0; i < wq.size(); i++) { 4203 Node* c = wq.at(i); 4204 if (c->is_MemBar() && (c->as_MemBar()->trailing_store() || c->as_MemBar()->trailing_load_store())) { 4205 assert(c->as_MemBar()->leading_membar()->trailing_membar() == c, "bad membar pair"); 4206 if (!wq.member(c->as_MemBar()->leading_membar())) { 4207 return false; 4208 } 4209 } 4210 } 4211 4212 // Collect data nodes that need to be clones as well 4213 int dd = dom_depth(head); 4214 4215 for (uint i = 0; i < loop->_body.size(); ++i) { 4216 Node* n = loop->_body.at(i); 4217 if (has_ctrl(n)) { 4218 Node* c = get_ctrl(n); 4219 if (wq.member(c)) { 4220 wq.push(n); 4221 } 4222 } else { 4223 set_idom(n, idom(n), dd); 4224 } 4225 } 4226 4227 // clone shared_stmt 4228 clone_loop_body(wq, old_new, NULL); 4229 4230 Node* region_clone = old_new[region->_idx]; 4231 region_clone->set_req(inner, C->top()); 4232 set_idom(region, region->in(inner), dd); 4233 4234 // Prepare the outer loop 4235 Node* outer_head = new LoopNode(head->in(LoopNode::EntryControl), old_new[head->in(LoopNode::LoopBackControl)->_idx]); 4236 register_control(outer_head, loop->_parent, outer_head->in(LoopNode::EntryControl)); 4237 _igvn.replace_input_of(head, LoopNode::EntryControl, outer_head); 4238 set_idom(head, outer_head, dd); 4239 4240 fix_body_edges(wq, loop, old_new, dd, loop->_parent, true); 4241 4242 // Make one of the shared_stmt copies only reachable from stmt1, the 4243 // other only from stmt2..stmtn. 4244 Node* dom = NULL; 4245 for (uint i = 1; i < region->req(); ++i) { 4246 if (i != inner) { 4247 _igvn.replace_input_of(region, i, C->top()); 4248 } 4249 Node* in = region_clone->in(i); 4250 if (in->is_top()) { 4251 continue; 4252 } 4253 if (dom == NULL) { 4254 dom = in; 4255 } else { 4256 dom = dom_lca(dom, in); 4257 } 4258 } 4259 4260 set_idom(region_clone, dom, dd); 4261 4262 // Set up the outer loop 4263 for (uint i = 0; i < head->outcnt(); i++) { 4264 Node* u = head->raw_out(i); 4265 if (u->is_Phi()) { 4266 Node* outer_phi = u->clone(); 4267 outer_phi->set_req(0, outer_head); 4268 Node* backedge = old_new[u->in(LoopNode::LoopBackControl)->_idx]; 4269 if (backedge == NULL) { 4270 backedge = u->in(LoopNode::LoopBackControl); 4271 } 4272 outer_phi->set_req(LoopNode::LoopBackControl, backedge); 4273 register_new_node(outer_phi, outer_head); 4274 _igvn.replace_input_of(u, LoopNode::EntryControl, outer_phi); 4275 } 4276 } 4277 4278 // create control and data nodes for out of loop uses (including region2) 4279 Node_List worklist; 4280 uint new_counter = C->unique(); 4281 fix_ctrl_uses(wq, loop, old_new, ControlAroundStripMined, outer_head, NULL, worklist); 4282 4283 Node_List *split_if_set = NULL; 4284 Node_List *split_bool_set = NULL; 4285 Node_List *split_cex_set = NULL; 4286 fix_data_uses(wq, loop, ControlAroundStripMined, head->is_strip_mined() ? loop->_parent : loop, new_counter, old_new, worklist, split_if_set, split_bool_set, split_cex_set); 4287 4288 finish_clone_loop(split_if_set, split_bool_set, split_cex_set); 4289 4290 if (exit_test != NULL) { 4291 float cnt = exit_test->_fcnt; 4292 if (cnt != COUNT_UNKNOWN) { 4293 exit_test->_fcnt = cnt * f; 4294 old_new[exit_test->_idx]->as_If()->_fcnt = cnt * (1 - f); 4295 } 4296 } 4297 4298 C->set_major_progress(); 4299 4300 return true; 4301 }