1 /* 2 * Copyright (c) 2006, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "memory/allocation.inline.hpp" 26 #include "opto/castnode.hpp" 27 #include "opto/cfgnode.hpp" 28 #include "opto/connode.hpp" 29 #include "opto/convertnode.hpp" 30 #include "opto/loopnode.hpp" 31 #include "opto/opaquenode.hpp" 32 #include "opto/predicates.hpp" 33 #include "opto/rootnode.hpp" 34 35 // Multiversioning: 36 // A loop is cloned, and a selector If decides which loop is taken at run-time: the true-path-loop (original) or the 37 // false-path-loop (cloned). 38 // 39 // Use-cases: 40 // - Speculative compilation: 41 // The selector If checks some assumptions which allow stronger optimization in the true-path-loop. If the assumptions 42 // do not hold, we can still execute in the false-path-loop, although with fewer optimizations. 43 // See: PhaseIdealLoop::maybe_multiversion_for_auto_vectorization_runtime_checks 44 // PhaseIdealLoop::create_new_if_for_multiversion 45 // 46 // - Unswitching: 47 // The selector If has the same (loop invariant) condition as some unswitching candidate If inside the loop. This 48 // allows us to constant-fold the unswitching candidate If to true in the true-path-loop and to false in the 49 // false-path-loop, thus eliminating the unswitching candidate If from the loop. 50 // 51 // 52 // Loop Unswitching is a loop optimization to move an invariant, non-loop-exiting test in the loop body before the loop. 53 // Such a test is either always true or always false in all loop iterations and could therefore only be executed once. 54 // To achieve that, we duplicate the loop and change the original and cloned loop as follows: 55 // - Original loop -> true-path-loop: 56 // The true-path of the invariant, non-loop-exiting test in the original loop 57 // is kept while the false-path is killed. We call this unswitched loop version 58 // the true-path-loop. 59 // - Cloned loop -> false-path-loop: 60 // The false-path of the invariant, non-loop-exiting test in the cloned loop 61 // is kept while the true-path is killed. We call this unswitched loop version 62 // the false-path loop. 63 // 64 // The invariant, non-loop-exiting test can now be moved before both loops (to only execute it once) and turned into a 65 // loop selector If node to select at runtime which unswitched loop version should be executed. 66 // - Loop selector true? Execute the true-path-loop. 67 // - Loop selector false? Execute the false-path-loop. 68 // 69 // Note that even though an invariant test that exits the loop could also be optimized with Loop Unswitching, it is more 70 // efficient to simply peel the loop which achieves the same result in a simpler manner (also see policy_peeling()). 71 // 72 // The following graphs summarizes the Loop Unswitching optimization. 73 // We start with the original loop: 74 // 75 // [Predicates] 76 // | 77 // Original Loop 78 // stmt1 79 // if (invariant-test) 80 // if-path 81 // else 82 // else-path 83 // stmt2 84 // Endloop 85 // 86 // 87 // which is unswitched into a true-path-loop and a false-path-loop together with a loop selector: 88 // 89 // 90 // [Initialized Assertion Predicates] 91 // | 92 // loop selector If (invariant-test) 93 // / \ 94 // true? false? 95 // / \ 96 // [Cloned Parse Predicates] [Cloned Parse Predicates] 97 // [Cloned Template [Cloned Template 98 // Assertion Predicates] Assertion Predicates] 99 // | | 100 // True-Path-Loop False-Path-Loop 101 // cloned stmt1 cloned stmt1 102 // cloned if-path cloned else-path 103 // cloned stmt2 cloned stmt2 104 // Endloop Endloop 105 106 107 // Return true if the loop should be unswitched or false otherwise. 108 bool IdealLoopTree::policy_unswitching(PhaseIdealLoop* phase) const { 109 if (!LoopUnswitching) { 110 return false; 111 } 112 if (!_head->is_Loop()) { 113 return false; 114 } 115 116 // If nodes are depleted, some transform has miscalculated its needs. 117 assert(!phase->exceeding_node_budget(), "sanity"); 118 119 // check for vectorized loops, any unswitching was already applied 120 if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) { 121 return false; 122 } 123 124 LoopNode* head = _head->as_Loop(); 125 if (head->unswitch_count() + 1 > head->unswitch_max()) { 126 return false; 127 } 128 129 if (head->is_flat_arrays()) { 130 return false; 131 } 132 133 if (no_unswitch_candidate()) { 134 return false; 135 } 136 137 // Too speculative if running low on nodes. 138 return phase->may_require_nodes(est_loop_clone_sz(2)); 139 } 140 141 // Check the absence of any If node that can be used for Loop Unswitching. In that case, no Loop Unswitching can be done. 142 bool IdealLoopTree::no_unswitch_candidate() const { 143 ResourceMark rm; 144 Node_List dont_care; 145 return _phase->find_unswitch_candidates(this, dont_care) == nullptr; 146 } 147 148 // Find an invariant test in the loop body that does not exit the loop. If multiple tests are found, we pick the first 149 // one in the loop body as "unswitch candidate" to apply Loop Unswitching on. 150 // Depending on whether we find such a candidate and if we do, whether it's a flat array check, we do the following: 151 // (1) Candidate is not a flat array check: 152 // Return the unique unswitch candidate. 153 // (2) Candidate is a flat array check: 154 // Collect all remaining non-loop-exiting flat array checks in the loop body in the provided 'flat_array_checks' 155 // list in order to create an unswitched loop version without any flat array checks and a version with checks 156 // (i.e. same as original loop). Return the initially found candidate which could be unique if no further flat array 157 // checks are found. 158 // (3) No candidate is initially found: 159 // As in (2), we collect all non-loop-exiting flat array checks in the loop body in the provided 'flat_array_checks' 160 // list. Pick the first collected flat array check as unswitch candidate, which could be unique, and return it (a). 161 // If there are no flat array checks, we cannot apply Loop Unswitching (b). 162 // 163 // Note that for both (2) and (3a), if there are multiple flat array checks, then the candidate's FlatArrayCheckNode is 164 // later updated in Loop Unswitching to perform a flat array check on all collected flat array checks. 165 IfNode* PhaseIdealLoop::find_unswitch_candidates(const IdealLoopTree* loop, Node_List& flat_array_checks) const { 166 IfNode* unswitch_candidate = find_unswitch_candidate_from_idoms(loop); 167 if (unswitch_candidate != nullptr && !unswitch_candidate->is_flat_array_check(&_igvn)) { 168 // Case (1) 169 return unswitch_candidate; 170 } 171 172 collect_flat_array_checks(loop, flat_array_checks); 173 if (unswitch_candidate != nullptr) { 174 // Case (2) 175 assert(unswitch_candidate->is_flat_array_check(&_igvn), "is a flat array check"); 176 return unswitch_candidate; 177 } else if (flat_array_checks.size() > 0) { 178 // Case (3a): Pick first one found as candidate (there could be multiple). 179 return flat_array_checks[0]->as_If(); 180 } 181 182 // Case (3b): No suitable unswitch candidate found. 183 return nullptr; 184 } 185 186 // Find an unswitch candidate by following the idom chain from the loop back edge. 187 IfNode* PhaseIdealLoop::find_unswitch_candidate_from_idoms(const IdealLoopTree* loop) const { 188 LoopNode* head = loop->_head->as_Loop(); 189 IfNode* unswitch_candidate = nullptr; 190 Node* n = head->in(LoopNode::LoopBackControl); 191 while (n != head) { 192 Node* n_dom = idom(n); 193 if (n->is_Region()) { 194 if (n_dom->is_If()) { 195 IfNode* iff = n_dom->as_If(); 196 if (iff->in(1)->is_Bool()) { 197 BoolNode* bol = iff->in(1)->as_Bool(); 198 if (bol->in(1)->is_Cmp()) { 199 // If condition is invariant and not a loop exit, 200 // then found reason to unswitch. 201 if (loop->is_invariant(bol) && !loop->is_loop_exit(iff)) { 202 assert(iff->Opcode() == Op_If || iff->is_RangeCheck() || iff->is_BaseCountedLoopEnd(), "valid ifs"); 203 unswitch_candidate = iff; 204 } 205 } 206 } 207 } 208 } 209 n = n_dom; 210 } 211 return unswitch_candidate; 212 } 213 214 // Collect all flat array checks in the provided 'flat_array_checks' list. 215 void PhaseIdealLoop::collect_flat_array_checks(const IdealLoopTree* loop, Node_List& flat_array_checks) const { 216 assert(flat_array_checks.size() == 0, "should be empty initially"); 217 for (uint i = 0; i < loop->_body.size(); i++) { 218 Node* next = loop->_body.at(i); 219 if (next->is_If() && next->as_If()->is_flat_array_check(&_igvn) && loop->is_invariant(next->in(1)) && 220 !loop->is_loop_exit(next)) { 221 flat_array_checks.push(next); 222 } 223 } 224 } 225 226 // This class represents an "unswitch candidate" which is an If that can be used to perform Loop Unswitching on. If the 227 // candidate is a flat array check candidate, then we also collect all remaining non-loop-exiting flat array checks. 228 // These are candidates as well. We want to get rid of all these flat array checks in the true-path-loop for the 229 // following reason: 230 // 231 // FlatArrayCheckNodes are used with array accesses to switch between a flat and a non-flat array access. We want 232 // the performance impact on non-flat array accesses to be as small as possible. We therefore create the following 233 // loops in Loop Unswitching: 234 // - True-path-loop: We remove all non-loop-exiting flat array checks to get a loop with only non-flat array accesses 235 // (i.e. a fast path loop). 236 // - False-path-loop: We keep all flat array checks in this loop (i.e. a slow path loop). 237 class UnswitchCandidate : public StackObj { 238 PhaseIdealLoop* const _phase; 239 const Node_List& _old_new; 240 Node* const _original_loop_entry; 241 // If _candidate is a flat array check, this list contains all non-loop-exiting flat array checks in the loop body. 242 Node_List _flat_array_check_candidates; 243 IfNode* const _candidate; 244 245 public: 246 UnswitchCandidate(IdealLoopTree* loop, const Node_List& old_new) 247 : _phase(loop->_phase), 248 _old_new(old_new), 249 _original_loop_entry(loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)), 250 _flat_array_check_candidates(), 251 _candidate(find_unswitch_candidate(loop)) {} 252 NONCOPYABLE(UnswitchCandidate); 253 254 IfNode* find_unswitch_candidate(IdealLoopTree* loop) { 255 IfNode* unswitch_candidate = _phase->find_unswitch_candidates(loop, _flat_array_check_candidates); 256 assert(unswitch_candidate != nullptr, "guaranteed to exist by policy_unswitching"); 257 assert(_phase->is_member(loop, unswitch_candidate), "must be inside original loop"); 258 return unswitch_candidate; 259 } 260 261 IfNode* candidate() const { 262 return _candidate; 263 } 264 265 // Is the candidate a flat array check and are there other flat array checks as well? 266 bool has_multiple_flat_array_check_candidates() const { 267 return _flat_array_check_candidates.size() > 1; 268 } 269 270 // Remove all candidates from the true-path-loop which are now dominated by the loop selector 271 // (i.e. 'true_path_loop_proj'). The removed candidates are folded in the next IGVN round. 272 void update_in_true_path_loop(IfTrueNode* true_path_loop_proj) const { 273 remove_from_loop(true_path_loop_proj, _candidate); 274 if (has_multiple_flat_array_check_candidates()) { 275 remove_flat_array_checks(true_path_loop_proj); 276 } 277 } 278 279 // Remove a unique candidate from the false-path-loop which is now dominated by the loop selector 280 // (i.e. 'false_path_loop_proj'). The removed candidate is folded in the next IGVN round. If there are multiple 281 // candidates (i.e. flat array checks), then we leave them in the false-path-loop and only mark the loop such that it 282 // is not unswitched anymore in later loop opts rounds. 283 void update_in_false_path_loop(IfFalseNode* false_path_loop_proj, LoopNode* false_path_loop) const { 284 if (has_multiple_flat_array_check_candidates()) { 285 // Leave the flat array checks in the false-path-loop and prevent it from being unswitched again based on these 286 // checks. 287 false_path_loop->mark_flat_arrays(); 288 } else { 289 remove_from_loop(false_path_loop_proj, _old_new[_candidate->_idx]->as_If()); 290 } 291 } 292 293 private: 294 void remove_from_loop(IfProjNode* dominating_proj, IfNode* candidate) const { 295 _phase->igvn().rehash_node_delayed(candidate); 296 _phase->dominated_by(dominating_proj, candidate); 297 } 298 299 void remove_flat_array_checks(IfProjNode* dominating_proj) const { 300 for (uint i = 0; i < _flat_array_check_candidates.size(); i++) { 301 IfNode* flat_array_check = _flat_array_check_candidates.at(i)->as_If(); 302 _phase->igvn().rehash_node_delayed(flat_array_check); 303 _phase->dominated_by(dominating_proj, flat_array_check); 304 } 305 } 306 307 public: 308 // Merge all flat array checks into a single new BoolNode and return it. 309 BoolNode* merge_flat_array_checks() const { 310 assert(has_multiple_flat_array_check_candidates(), "must have multiple flat array checks to merge"); 311 assert(_candidate->in(1)->as_Bool()->_test._test == BoolTest::ne, "IfTrue proj must point to flat array"); 312 BoolNode* merged_flat_array_check_bool = create_bool_node(); 313 create_flat_array_check_node(merged_flat_array_check_bool); 314 return merged_flat_array_check_bool; 315 } 316 317 private: 318 BoolNode* create_bool_node() const { 319 BoolNode* merged_flat_array_check_bool = _candidate->in(1)->clone()->as_Bool(); 320 _phase->register_new_node(merged_flat_array_check_bool, _original_loop_entry); 321 return merged_flat_array_check_bool; 322 } 323 324 void create_flat_array_check_node(BoolNode* merged_flat_array_check_bool) const { 325 FlatArrayCheckNode* cloned_flat_array_check = merged_flat_array_check_bool->in(1)->clone()->as_FlatArrayCheck(); 326 _phase->register_new_node(cloned_flat_array_check, _original_loop_entry); 327 merged_flat_array_check_bool->set_req(1, cloned_flat_array_check); 328 set_flat_array_check_inputs(cloned_flat_array_check); 329 } 330 331 // Combine all checks into a single one that fails if one array is flat. 332 void set_flat_array_check_inputs(FlatArrayCheckNode* cloned_flat_array_check) const { 333 assert(cloned_flat_array_check->req() == 3, "unexpected number of inputs for FlatArrayCheck"); 334 cloned_flat_array_check->add_req_batch(_phase->C->top(), _flat_array_check_candidates.size() - 1); 335 for (uint i = 0; i < _flat_array_check_candidates.size(); i++) { 336 Node* array = _flat_array_check_candidates.at(i)->in(1)->in(1)->in(FlatArrayCheckNode::ArrayOrKlass); 337 cloned_flat_array_check->set_req(FlatArrayCheckNode::ArrayOrKlass + i, array); 338 } 339 } 340 341 public: 342 #ifndef PRODUCT 343 void trace_flat_array_checks() const { 344 if (has_multiple_flat_array_check_candidates()) { 345 tty->print_cr("- Unswitched and Merged Flat Array Checks:"); 346 for (uint i = 0; i < _flat_array_check_candidates.size(); i++) { 347 Node* unswitch_iff = _flat_array_check_candidates.at(i); 348 Node* cloned_unswitch_iff = _old_new[unswitch_iff->_idx]; 349 assert(cloned_unswitch_iff != nullptr, "must exist"); 350 tty->print_cr(" - %d %s -> %d %s", unswitch_iff->_idx, unswitch_iff->Name(), 351 cloned_unswitch_iff->_idx, cloned_unswitch_iff->Name()); 352 } 353 } 354 } 355 #endif // NOT PRODUCT 356 }; 357 358 // LoopSelector is used for loop multiversioning and unswitching. This class creates an If node (i.e. loop selector) 359 // that selects if the true-path-loop or the false-path-loop should be executed at runtime. 360 class LoopSelector : public StackObj { 361 // Cached fields for construction. 362 PhaseIdealLoop* const _phase; 363 IdealLoopTree* const _outer_loop; 364 Node* const _original_loop_entry; 365 const uint _dom_depth; // of original_loop_entry 366 367 // Constructed selector if with its projections. 368 IfNode* const _selector; 369 IfTrueNode* const _true_path_loop_proj; 370 IfFalseNode* const _false_path_loop_proj; 371 372 enum PathToLoop { 373 TRUE_PATH, FALSE_PATH 374 }; 375 376 public: 377 // For multiversioning: create a new selector (multiversion_if) from a bol condition. 378 LoopSelector(IdealLoopTree* loop, Node* bol, float prob, float fcnt) 379 : _phase(loop->_phase), 380 _outer_loop(loop->skip_strip_mined()->_parent), 381 _original_loop_entry(loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)), 382 _dom_depth(_phase->dom_depth(_original_loop_entry)), 383 _selector(create_multiversioning_if(bol, prob, fcnt)), // multiversioning 384 _true_path_loop_proj(create_proj_to_loop(TRUE_PATH)->as_IfTrue()), 385 _false_path_loop_proj(create_proj_to_loop(FALSE_PATH)->as_IfFalse()) { 386 } 387 388 // For unswitching: create an unswitching if before the loop, from a pre-existing 389 // unswitching_candidate inside the loop. 390 LoopSelector(IdealLoopTree* loop, const UnswitchCandidate& unswitch_candidate) 391 : _phase(loop->_phase), 392 _outer_loop(loop->skip_strip_mined()->_parent), 393 _original_loop_entry(loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)), 394 _dom_depth(_phase->dom_depth(_original_loop_entry)), 395 _selector(create_unswitching_if(unswitch_candidate)), // unswitching 396 _true_path_loop_proj(create_proj_to_loop(TRUE_PATH)->as_IfTrue()), 397 _false_path_loop_proj(create_proj_to_loop(FALSE_PATH)->as_IfFalse()) { 398 } 399 NONCOPYABLE(LoopSelector); 400 401 private: 402 IfNode* create_multiversioning_if(Node* bol, float prob, float fcnt) { 403 _phase->igvn().rehash_node_delayed(_original_loop_entry); 404 IfNode* selector_if = new IfNode(_original_loop_entry, bol, prob, fcnt); 405 _phase->register_node(selector_if, _outer_loop, _original_loop_entry, _dom_depth); 406 return selector_if; 407 } 408 409 IfNode* create_unswitching_if(const UnswitchCandidate& unswitch_candidate) { 410 const uint dom_depth = _phase->dom_depth(_original_loop_entry); 411 _phase->igvn().rehash_node_delayed(_original_loop_entry); 412 IfNode* unswitch_candidate_if = unswitch_candidate.candidate(); 413 BoolNode* selector_bool; 414 if (unswitch_candidate.has_multiple_flat_array_check_candidates()) { 415 selector_bool = unswitch_candidate.merge_flat_array_checks(); 416 } else { 417 selector_bool = unswitch_candidate_if->in(1)->as_Bool(); 418 } 419 IfNode* selector_if = IfNode::make_with_same_profile(unswitch_candidate_if, _original_loop_entry, selector_bool); 420 _phase->register_node(selector_if, _outer_loop, _original_loop_entry, dom_depth); 421 return selector_if; 422 } 423 424 IfProjNode* create_proj_to_loop(const PathToLoop path_to_loop) { 425 IfProjNode* proj_to_loop; 426 if (path_to_loop == TRUE_PATH) { 427 proj_to_loop = new IfTrueNode(_selector); 428 } else { 429 proj_to_loop = new IfFalseNode(_selector); 430 } 431 _phase->register_node(proj_to_loop, _outer_loop, _selector, _dom_depth); 432 return proj_to_loop; 433 } 434 435 public: 436 IfNode* selector() const { 437 return _selector; 438 } 439 440 IfTrueNode* true_path_loop_proj() const { 441 return _true_path_loop_proj; 442 } 443 444 IfFalseNode* false_path_loop_proj() const { 445 return _false_path_loop_proj; 446 } 447 }; 448 449 // This class creates an If node (i.e. loop selector) that selects if the true-path-loop or the false-path-loop should be 450 // executed at runtime. This is done by finding an invariant and non-loop-exiting unswitch candidate If node (guaranteed 451 // to exist at this point) to perform Loop Unswitching on. 452 class UnswitchedLoopSelector : public StackObj { 453 const UnswitchCandidate& _unswitch_candidate; 454 const LoopSelector _loop_selector; 455 456 public: 457 UnswitchedLoopSelector(IdealLoopTree* loop, const UnswitchCandidate& unswitch_candidate) 458 : _unswitch_candidate(unswitch_candidate), 459 _loop_selector(loop, _unswitch_candidate) {} 460 NONCOPYABLE(UnswitchedLoopSelector); 461 462 IfNode* selector_if() const { 463 return _loop_selector.selector(); 464 } 465 466 const LoopSelector& loop_selector() const { 467 return _loop_selector; 468 } 469 }; 470 471 // Class to unswitch the original loop and create Predicates at the new unswitched loop versions. The newly cloned loop 472 // becomes the false-path-loop while original loop becomes the true-path-loop. 473 class OriginalLoop : public StackObj { 474 LoopNode* const _loop_head; 475 LoopNode* const _outer_loop_head; // OuterStripMinedLoopNode if loop strip mined, else just the loop head. 476 IdealLoopTree* const _loop; 477 Node_List& _old_new; 478 PhaseIdealLoop* const _phase; 479 480 public: 481 OriginalLoop(IdealLoopTree* loop, Node_List& old_new) 482 : _loop_head(loop->_head->as_Loop()), 483 _outer_loop_head(loop->_head->as_Loop()->skip_strip_mined()), 484 _loop(loop), 485 _old_new(old_new), 486 _phase(loop->_phase) {} 487 NONCOPYABLE(OriginalLoop); 488 489 // Unswitch the original loop on the invariant loop selector by creating a true-path-loop and a false-path-loop. 490 // Remove the unswitch candidate If from both unswitched loop versions which are now covered by the loop selector If. 491 void unswitch(const UnswitchedLoopSelector& unswitched_loop_selector) { 492 multiversion(unswitched_loop_selector.loop_selector()); 493 } 494 495 // Multiversion the original loop. The loop selector if selects between the original loop (true-path-loop), and 496 // a copy of it (false-path-loop). 497 void multiversion(const LoopSelector& loop_selector) { 498 const uint first_false_path_loop_node_index = _phase->C->unique(); 499 clone_loop(loop_selector); 500 501 move_parse_and_template_assertion_predicates_to_unswitched_loops(loop_selector, 502 first_false_path_loop_node_index); 503 DEBUG_ONLY(verify_loop_versions(_loop->_head->as_Loop(), loop_selector);) 504 505 _phase->recompute_dom_depth(); 506 } 507 508 private: 509 void clone_loop(const LoopSelector& loop_selector) { 510 _phase->clone_loop(_loop, _old_new, _phase->dom_depth(_outer_loop_head), 511 PhaseIdealLoop::CloneIncludesStripMined, loop_selector.selector()); 512 fix_loop_entries(loop_selector); 513 } 514 515 void fix_loop_entries(const LoopSelector& loop_selector) const { 516 _phase->replace_loop_entry(_outer_loop_head, loop_selector.true_path_loop_proj()); 517 LoopNode* false_path_loop_strip_mined_head = old_to_new(_outer_loop_head)->as_Loop(); 518 _phase->replace_loop_entry(false_path_loop_strip_mined_head, 519 loop_selector.false_path_loop_proj()); 520 } 521 522 // Moves the Parse And Template Assertion Predicates to the true and false path loop. They are inserted between the 523 // loop heads and the loop selector If projections. The old Parse and Template Assertion Predicates before 524 // the unswitched loop selector are killed. 525 void move_parse_and_template_assertion_predicates_to_unswitched_loops( 526 const LoopSelector& loop_selector, const uint first_false_path_loop_node_index) const { 527 const NodeInOriginalLoopBody node_in_true_path_loop_body(first_false_path_loop_node_index, _old_new); 528 const NodeInClonedLoopBody node_in_false_path_loop_body(first_false_path_loop_node_index); 529 CloneUnswitchedLoopPredicatesVisitor 530 clone_unswitched_loop_predicates_visitor(_loop_head, old_to_new(_loop_head)->as_Loop(), node_in_true_path_loop_body, 531 node_in_false_path_loop_body, _phase); 532 Node* source_loop_entry = loop_selector.selector()->in(0); 533 PredicateIterator predicate_iterator(source_loop_entry); 534 predicate_iterator.for_each(clone_unswitched_loop_predicates_visitor); 535 } 536 537 #ifdef ASSERT 538 void verify_loop_versions(LoopNode* true_path_loop_head, 539 const LoopSelector& loop_selector) const { 540 verify_loop_version(true_path_loop_head, 541 loop_selector.true_path_loop_proj()); 542 verify_loop_version(old_to_new(true_path_loop_head)->as_Loop(), 543 loop_selector.false_path_loop_proj()); 544 } 545 546 static void verify_loop_version(LoopNode* loop_head, IfProjNode* loop_selector_if_proj) { 547 Node* entry = loop_head->skip_strip_mined()->in(LoopNode::EntryControl); 548 const Predicates predicates(entry); 549 // When skipping all predicates, we should end up at 'loop_selector_if_proj'. 550 assert(loop_selector_if_proj == predicates.entry(), "should end up at loop selector If"); 551 } 552 #endif // ASSERT 553 554 Node* old_to_new(const Node* old) const { 555 return _old_new[old->_idx]; 556 } 557 }; 558 559 // See comments below file header for more information about Loop Unswitching. 560 void PhaseIdealLoop::do_unswitching(IdealLoopTree* loop, Node_List& old_new) { 561 assert(LoopUnswitching, "LoopUnswitching must be enabled"); 562 563 LoopNode* original_head = loop->_head->as_Loop(); 564 if (has_control_dependencies_from_predicates(original_head)) { 565 NOT_PRODUCT(trace_loop_unswitching_impossible(original_head);) 566 return; 567 } 568 569 NOT_PRODUCT(trace_loop_unswitching_count(loop, original_head);) 570 C->print_method(PHASE_BEFORE_LOOP_UNSWITCHING, 4, original_head); 571 572 revert_to_normal_loop(original_head); 573 574 const UnswitchCandidate unswitch_candidate(loop, old_new); 575 const UnswitchedLoopSelector unswitched_loop_selector(loop, unswitch_candidate); 576 OriginalLoop original_loop(loop, old_new); 577 original_loop.unswitch(unswitched_loop_selector); 578 579 unswitch_candidate.update_in_true_path_loop(unswitched_loop_selector.loop_selector().true_path_loop_proj()); 580 unswitch_candidate.update_in_false_path_loop(unswitched_loop_selector.loop_selector().false_path_loop_proj(), 581 old_new[original_head->_idx]->as_Loop()); 582 hoist_invariant_check_casts(loop, old_new, unswitch_candidate, unswitched_loop_selector.selector_if()); 583 add_unswitched_loop_version_bodies_to_igvn(loop, old_new); 584 585 LoopNode* new_head = old_new[original_head->_idx]->as_Loop(); 586 increment_unswitch_counts(original_head, new_head); 587 588 NOT_PRODUCT(trace_loop_unswitching_result(unswitched_loop_selector, unswitch_candidate, original_head, new_head);) 589 C->print_method(PHASE_AFTER_LOOP_UNSWITCHING, 4, new_head); 590 C->set_major_progress(); 591 } 592 593 void PhaseIdealLoop::do_multiversioning(IdealLoopTree* lpt, Node_List& old_new) { 594 #ifndef PRODUCT 595 if (TraceLoopOpts || TraceLoopMultiversioning) { 596 tty->print("Multiversion "); 597 lpt->dump_head(); 598 } 599 #endif 600 assert(LoopMultiversioning, "LoopMultiversioning must be enabled"); 601 602 CountedLoopNode* original_head = lpt->_head->as_CountedLoop(); 603 C->print_method(PHASE_BEFORE_LOOP_MULTIVERSIONING, 4, original_head); 604 605 Node* one = _igvn.intcon(1); 606 set_ctrl(one, C->root()); 607 Node* opaque = new OpaqueMultiversioningNode(C, one); 608 set_ctrl(opaque, C->root()); 609 _igvn.register_new_node_with_optimizer(opaque); 610 _igvn.set_type(opaque, TypeInt::BOOL); 611 612 const LoopSelector loop_selector(lpt, opaque, PROB_LIKELY_MAG(3), COUNT_UNKNOWN); 613 OriginalLoop original_loop(lpt, old_new); 614 original_loop.multiversion(loop_selector); 615 616 add_unswitched_loop_version_bodies_to_igvn(lpt, old_new); 617 618 CountedLoopNode* new_head = old_new[original_head->_idx]->as_CountedLoop(); 619 original_head->set_multiversion_fast_loop(); 620 new_head->set_multiversion_delayed_slow_loop(); 621 622 NOT_PRODUCT(trace_loop_multiversioning_result(loop_selector, original_head, new_head);) 623 C->print_method(PHASE_AFTER_LOOP_MULTIVERSIONING, 4, new_head); 624 C->set_major_progress(); 625 } 626 627 // Create a new if in the multiversioning pattern, adding an additional condition for the 628 // multiversioning fast-loop. 629 // 630 // Before: 631 // entry opaque 632 // | | 633 // multiversion_if 634 // | | 635 // +----------------+ +---------------+ 636 // | | 637 // multiversion_fast_proj multiversion_slow_proj 638 // | 639 // +--------+ 640 // | 641 // slow_path 642 // 643 // 644 // After: 645 // entry opaque <-- to be replaced by caller 646 // | | 647 // new_if 648 // | | 649 // | +-----------------------------+ 650 // | | 651 // new_if_true opaque new_if_false 652 // | | | 653 // multiversion_if | 654 // | | | 655 // +----------------+ +---------------+ | 656 // | | | 657 // multiversion_fast_proj new_multiversion_slow_proj | 658 // | | 659 // +------+ | 660 // | | 661 // region 662 // | 663 // slow_path 664 // 665 IfTrueNode* PhaseIdealLoop::create_new_if_for_multiversion(IfTrueNode* multiversioning_fast_proj) { 666 // Give all nodes in the old sub-graph a name. 667 IfNode* multiversion_if = multiversioning_fast_proj->in(0)->as_If(); 668 Node* entry = multiversion_if->in(0); 669 OpaqueMultiversioningNode* opaque = multiversion_if->in(1)->as_OpaqueMultiversioning(); 670 IfFalseNode* multiversion_slow_proj = multiversion_if->proj_out(0)->as_IfFalse(); 671 Node* slow_path = multiversion_slow_proj->unique_ctrl_out(); 672 673 // The slow_loop may still be delayed, and waiting for runtime-checks to be added to the 674 // multiversion_if. Now that we have at least one condition for the multiversioning, 675 // we should resume optimizations for the slow loop. 676 opaque->notify_slow_loop_that_it_can_resume_optimizations(); 677 678 // Create new_if with its projections. 679 IfNode* new_if = IfNode::make_with_same_profile(multiversion_if, entry, opaque); 680 IdealLoopTree* lp = get_loop(entry); 681 register_control(new_if, lp, entry); 682 683 IfTrueNode* new_if_true = new IfTrueNode(new_if); 684 IfFalseNode* new_if_false = new IfFalseNode(new_if); 685 register_control(new_if_true, lp, new_if); 686 register_control(new_if_false, lp, new_if); 687 688 // Hook new_if_true into multiversion_if. 689 _igvn.replace_input_of(multiversion_if, 0, new_if_true); 690 691 // Clone multiversion_slow_path - this allows us to easily carry the dependencies to 692 // the new region below. 693 IfFalseNode* new_multiversion_slow_proj = multiversion_slow_proj->clone()->as_IfFalse(); 694 register_control(new_multiversion_slow_proj, lp, multiversion_if); 695 696 // Create new Region. 697 RegionNode* region = new RegionNode(1); 698 region->add_req(new_multiversion_slow_proj); 699 region->add_req(new_if_false); 700 register_control(region, lp, new_multiversion_slow_proj); 701 702 // Hook region into slow_path, in stead of the multiversion_slow_proj. 703 // This also moves all other dependencies of the multiversion_slow_proj to the region. 704 _igvn.replace_node(multiversion_slow_proj, region); 705 706 return new_if_true; 707 } 708 709 OpaqueMultiversioningNode* find_multiversion_opaque_from_multiversion_if_false(Node* maybe_multiversion_if_false) { 710 IfFalseNode* multiversion_if_false = maybe_multiversion_if_false->isa_IfFalse(); 711 if (multiversion_if_false == nullptr) { return nullptr; } 712 IfNode* multiversion_if = multiversion_if_false->in(0)->isa_If(); 713 if (multiversion_if == nullptr) { return nullptr; } 714 return multiversion_if->in(1)->isa_OpaqueMultiversioning(); 715 } 716 717 bool PhaseIdealLoop::try_resume_optimizations_for_delayed_slow_loop(IdealLoopTree* lpt) { 718 CountedLoopNode* cl = lpt->_head->as_CountedLoop(); 719 assert(cl->is_multiversion_delayed_slow_loop(), "must currently be delayed"); 720 721 // Find multiversion_if. 722 Node* entry = cl->skip_strip_mined()->in(LoopNode::EntryControl); 723 const Predicates predicates(entry); 724 725 Node* slow_path = predicates.entry(); 726 727 // Find opaque. 728 OpaqueMultiversioningNode* opaque = nullptr; 729 if (slow_path->is_Region()) { 730 for (uint i = 1; i < slow_path->req(); i++) { 731 Node* n = slow_path->in(i); 732 opaque = find_multiversion_opaque_from_multiversion_if_false(n); 733 if (opaque != nullptr) { break; } 734 } 735 } else { 736 opaque = find_multiversion_opaque_from_multiversion_if_false(slow_path); 737 } 738 assert(opaque != nullptr, "must have found multiversion opaque node"); 739 if (opaque == nullptr) { return false; } 740 741 // We may still be delayed, if there were not yet any runtime-checks added 742 // for the multiversioning. We may never add any, and then this loop would 743 // fold away. So we wait until some runtime-checks are added, then we know 744 // that this loop will be reachable and it is worth optimizing further. 745 if (opaque->is_delayed_slow_loop()) { return false; } 746 747 // Clear away the "delayed" status, i.e. resume optimizations. 748 cl->set_no_multiversion(); 749 cl->set_multiversion_slow_loop(); 750 #ifndef PRODUCT 751 if (TraceLoopOpts) { 752 tty->print("Resume Optimizations "); 753 lpt->dump_head(); 754 } 755 #endif 756 return true; 757 } 758 759 bool PhaseIdealLoop::has_control_dependencies_from_predicates(LoopNode* head) { 760 Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl); 761 const Predicates predicates(entry); 762 if (predicates.has_any()) { 763 assert(entry->is_IfProj(), "sanity - must be ifProj since there is at least one predicate"); 764 if (entry->outcnt() > 1) { 765 // Bailout if there are predicates from which there are additional control dependencies (i.e. from loop 766 // entry 'entry') to previously partially peeled statements since this case is not handled and can lead 767 // to a wrong execution. Remove this bailout, once this is fixed. 768 return true; 769 } 770 } 771 return false; 772 } 773 774 #ifndef PRODUCT 775 void PhaseIdealLoop::trace_loop_unswitching_impossible(const LoopNode* original_head) { 776 if (TraceLoopUnswitching) { 777 tty->print_cr("Loop Unswitching \"%d %s\" not possible due to control dependencies", 778 original_head->_idx, original_head->Name()); 779 } 780 } 781 782 void PhaseIdealLoop::trace_loop_unswitching_count(IdealLoopTree* loop, LoopNode* original_head) { 783 if (TraceLoopOpts) { 784 tty->print("Unswitch %d ", original_head->unswitch_count() + 1); 785 loop->dump_head(); 786 } 787 } 788 789 void PhaseIdealLoop::trace_loop_unswitching_result(const UnswitchedLoopSelector& unswitched_loop_selector, 790 const UnswitchCandidate& unswitch_candidate, 791 const LoopNode* original_head, const LoopNode* new_head) { 792 if (TraceLoopUnswitching) { 793 IfNode* unswitch_candidate_if = unswitch_candidate.candidate(); 794 IfNode* loop_selector = unswitched_loop_selector.selector_if(); 795 tty->print_cr("Loop Unswitching:"); 796 tty->print_cr("- Unswitch-Candidate-If: %d %s", unswitch_candidate_if->_idx, unswitch_candidate_if->Name()); 797 tty->print_cr("- Loop-Selector-If: %d %s", loop_selector->_idx, loop_selector->Name()); 798 tty->print_cr("- True-Path-Loop (=Orig): %d %s", original_head->_idx, original_head->Name()); 799 tty->print_cr("- False-Path-Loop (=Clone): %d %s", new_head->_idx, new_head->Name()); 800 unswitch_candidate.trace_flat_array_checks(); 801 } 802 } 803 804 void PhaseIdealLoop::trace_loop_multiversioning_result(const LoopSelector& loop_selector, 805 const LoopNode* original_head, const LoopNode* new_head) { 806 if (TraceLoopMultiversioning) { 807 IfNode* selector_if = loop_selector.selector(); 808 tty->print_cr("Loop Multiversioning:"); 809 tty->print_cr("- Loop-Selector-If: %d %s", selector_if->_idx, selector_if->Name()); 810 tty->print_cr("- True-Path-Loop (=Orig / Fast): %d %s", original_head->_idx, original_head->Name()); 811 tty->print_cr("- False-Path-Loop (=Clone / Slow): %d %s", new_head->_idx, new_head->Name()); 812 } 813 } 814 #endif 815 816 // When unswitching a counted loop, we need to convert it back to a normal loop since it's not a proper pre, main or, 817 // post loop anymore after loop unswitching. We also lose the multiversion structure, with access to the multiversion_if. 818 void PhaseIdealLoop::revert_to_normal_loop(const LoopNode* loop_head) { 819 CountedLoopNode* cl = loop_head->isa_CountedLoop(); 820 if (cl == nullptr) { return; } 821 if (!cl->is_normal_loop()) { cl->set_normal_loop(); } 822 if (cl->is_multiversion()) { cl->set_no_multiversion(); } 823 } 824 825 // Hoist invariant CheckCastPPNodes out of each unswitched loop version to the appropriate loop selector If projection. 826 void PhaseIdealLoop::hoist_invariant_check_casts(const IdealLoopTree* loop, const Node_List& old_new, 827 const UnswitchCandidate& unswitch_candidate, 828 const IfNode* loop_selector) { 829 ResourceMark rm; 830 GrowableArray<CheckCastPPNode*> loop_invariant_check_casts; 831 const IfNode* unswitch_candidate_if = unswitch_candidate.candidate(); 832 for (DUIterator_Fast imax, i = unswitch_candidate_if->fast_outs(imax); i < imax; i++) { 833 IfProjNode* proj = unswitch_candidate_if->fast_out(i)->as_IfProj(); 834 // Copy to a worklist for easier manipulation 835 for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) { 836 CheckCastPPNode* check_cast = proj->fast_out(j)->isa_CheckCastPP(); 837 if (check_cast != nullptr && loop->is_invariant(check_cast->in(1))) { 838 loop_invariant_check_casts.push(check_cast); 839 } 840 } 841 IfProjNode* loop_selector_if_proj = loop_selector->proj_out(proj->_con)->as_IfProj(); 842 while (loop_invariant_check_casts.length() > 0) { 843 CheckCastPPNode* cast = loop_invariant_check_casts.pop(); 844 Node* cast_clone = cast->clone(); 845 cast_clone->set_req(0, loop_selector_if_proj); 846 _igvn.replace_input_of(cast, 1, cast_clone); 847 register_new_node(cast_clone, loop_selector_if_proj); 848 // Same for the false-path-loop if there are not multiple flat array checks (in that case we leave the 849 // false-path-loop unchanged). 850 if (!unswitch_candidate.has_multiple_flat_array_check_candidates()) { 851 Node* use_clone = old_new[cast->_idx]; 852 _igvn.replace_input_of(use_clone, 1, cast_clone); 853 } 854 } 855 } 856 } 857 858 // Enable more optimizations possibilities in the next IGVN round. 859 void PhaseIdealLoop::add_unswitched_loop_version_bodies_to_igvn(IdealLoopTree* loop, const Node_List& old_new) { 860 loop->record_for_igvn(); 861 for (int i = loop->_body.size() - 1; i >= 0; i--) { 862 Node* n = loop->_body[i]; 863 Node* n_clone = old_new[n->_idx]; 864 _igvn._worklist.push(n_clone); 865 } 866 } 867 868 void PhaseIdealLoop::increment_unswitch_counts(LoopNode* original_head, LoopNode* new_head) { 869 const int unswitch_count = original_head->unswitch_count() + 1; 870 original_head->set_unswitch_count(unswitch_count); 871 new_head->set_unswitch_count(unswitch_count); 872 }