1 /*
2 * Copyright (c) 2006, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "memory/allocation.inline.hpp"
26 #include "opto/castnode.hpp"
27 #include "opto/cfgnode.hpp"
28 #include "opto/connode.hpp"
29 #include "opto/convertnode.hpp"
30 #include "opto/loopnode.hpp"
31 #include "opto/opaquenode.hpp"
32 #include "opto/predicates.hpp"
33 #include "opto/rootnode.hpp"
34
35 // Multiversioning:
36 // A loop is cloned, and a selector If decides which loop is taken at run-time: the true-path-loop (original) or the
37 // false-path-loop (cloned).
38 //
39 // Use-cases:
40 // - Speculative compilation:
41 // The selector If checks some assumptions which allow stronger optimization in the true-path-loop. If the assumptions
42 // do not hold, we can still execute in the false-path-loop, although with fewer optimizations.
43 // See: PhaseIdealLoop::maybe_multiversion_for_auto_vectorization_runtime_checks
44 // PhaseIdealLoop::create_new_if_for_multiversion
45 //
46 // - Unswitching:
47 // The selector If has the same (loop invariant) condition as some unswitching candidate If inside the loop. This
48 // allows us to constant-fold the unswitching candidate If to true in the true-path-loop and to false in the
49 // false-path-loop, thus eliminating the unswitching candidate If from the loop.
50 //
51 //
52 // Loop Unswitching is a loop optimization to move an invariant, non-loop-exiting test in the loop body before the loop.
53 // Such a test is either always true or always false in all loop iterations and could therefore only be executed once.
54 // To achieve that, we duplicate the loop and change the original and cloned loop as follows:
55 // - Original loop -> true-path-loop:
56 // The true-path of the invariant, non-loop-exiting test in the original loop
57 // is kept while the false-path is killed. We call this unswitched loop version
58 // the true-path-loop.
59 // - Cloned loop -> false-path-loop:
60 // The false-path of the invariant, non-loop-exiting test in the cloned loop
61 // is kept while the true-path is killed. We call this unswitched loop version
62 // the false-path loop.
63 //
64 // The invariant, non-loop-exiting test can now be moved before both loops (to only execute it once) and turned into a
65 // loop selector If node to select at runtime which unswitched loop version should be executed.
66 // - Loop selector true? Execute the true-path-loop.
67 // - Loop selector false? Execute the false-path-loop.
68 //
69 // Note that even though an invariant test that exits the loop could also be optimized with Loop Unswitching, it is more
70 // efficient to simply peel the loop which achieves the same result in a simpler manner (also see policy_peeling()).
71 //
72 // The following graphs summarizes the Loop Unswitching optimization.
73 // We start with the original loop:
74 //
75 // [Predicates]
76 // |
77 // Original Loop
78 // stmt1
79 // if (invariant-test)
80 // if-path
81 // else
82 // else-path
83 // stmt2
84 // Endloop
85 //
86 //
87 // which is unswitched into a true-path-loop and a false-path-loop together with a loop selector:
88 //
89 //
90 // [Initialized Assertion Predicates]
91 // |
92 // loop selector If (invariant-test)
93 // / \
94 // true? false?
95 // / \
96 // [Cloned Parse Predicates] [Cloned Parse Predicates]
97 // [Cloned Template [Cloned Template
98 // Assertion Predicates] Assertion Predicates]
99 // | |
100 // True-Path-Loop False-Path-Loop
101 // cloned stmt1 cloned stmt1
102 // cloned if-path cloned else-path
103 // cloned stmt2 cloned stmt2
104 // Endloop Endloop
105
106
107 // Return true if the loop should be unswitched or false otherwise.
108 bool IdealLoopTree::policy_unswitching(PhaseIdealLoop* phase) const {
109 if (!LoopUnswitching) {
110 return false;
111 }
112 if (!_head->is_Loop()) {
113 return false;
114 }
115
116 // If nodes are depleted, some transform has miscalculated its needs.
117 assert(!phase->exceeding_node_budget(), "sanity");
118
119 // check for vectorized loops, any unswitching was already applied
120 if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
121 return false;
122 }
123
124 LoopNode* head = _head->as_Loop();
125 if (head->unswitch_count() + 1 > head->unswitch_max()) {
126 return false;
127 }
128
129 if (head->is_flat_arrays()) {
130 return false;
131 }
132
133 if (no_unswitch_candidate()) {
134 return false;
135 }
136
137 // Too speculative if running low on nodes.
138 return phase->may_require_nodes(est_loop_clone_sz(2));
139 }
140
141 // Check the absence of any If node that can be used for Loop Unswitching. In that case, no Loop Unswitching can be done.
142 bool IdealLoopTree::no_unswitch_candidate() const {
143 ResourceMark rm;
144 Node_List dont_care;
145 return _phase->find_unswitch_candidates(this, dont_care) == nullptr;
146 }
147
148 // Find an invariant test in the loop body that does not exit the loop. If multiple tests are found, we pick the first
149 // one in the loop body as "unswitch candidate" to apply Loop Unswitching on.
150 // Depending on whether we find such a candidate and if we do, whether it's a flat array check, we do the following:
151 // (1) Candidate is not a flat array check:
152 // Return the unique unswitch candidate.
153 // (2) Candidate is a flat array check:
154 // Collect all remaining non-loop-exiting flat array checks in the loop body in the provided 'flat_array_checks'
155 // list in order to create an unswitched loop version without any flat array checks and a version with checks
156 // (i.e. same as original loop). Return the initially found candidate which could be unique if no further flat array
157 // checks are found.
158 // (3) No candidate is initially found:
159 // As in (2), we collect all non-loop-exiting flat array checks in the loop body in the provided 'flat_array_checks'
160 // list. Pick the first collected flat array check as unswitch candidate, which could be unique, and return it (a).
161 // If there are no flat array checks, we cannot apply Loop Unswitching (b).
162 //
163 // Note that for both (2) and (3a), if there are multiple flat array checks, then the candidate's FlatArrayCheckNode is
164 // later updated in Loop Unswitching to perform a flat array check on all collected flat array checks.
165 IfNode* PhaseIdealLoop::find_unswitch_candidates(const IdealLoopTree* loop, Node_List& flat_array_checks) const {
166 IfNode* unswitch_candidate = find_unswitch_candidate_from_idoms(loop);
167 if (unswitch_candidate != nullptr && !unswitch_candidate->is_flat_array_check(&_igvn)) {
168 // Case (1)
169 return unswitch_candidate;
170 }
171
172 collect_flat_array_checks(loop, flat_array_checks);
173 if (unswitch_candidate != nullptr) {
174 // Case (2)
175 assert(unswitch_candidate->is_flat_array_check(&_igvn), "is a flat array check");
176 return unswitch_candidate;
177 } else if (flat_array_checks.size() > 0) {
178 // Case (3a): Pick first one found as candidate (there could be multiple).
179 return flat_array_checks[0]->as_If();
180 }
181
182 // Case (3b): No suitable unswitch candidate found.
183 return nullptr;
184 }
185
186 // Find an unswitch candidate by following the idom chain from the loop back edge.
187 IfNode* PhaseIdealLoop::find_unswitch_candidate_from_idoms(const IdealLoopTree* loop) const {
188 LoopNode* head = loop->_head->as_Loop();
189 IfNode* unswitch_candidate = nullptr;
190 Node* n = head->in(LoopNode::LoopBackControl);
191 while (n != head) {
192 Node* n_dom = idom(n);
193 if (n->is_Region()) {
194 if (n_dom->is_If()) {
195 IfNode* iff = n_dom->as_If();
196 if (iff->in(1)->is_Bool()) {
197 BoolNode* bol = iff->in(1)->as_Bool();
198 if (bol->in(1)->is_Cmp()) {
199 // If condition is invariant and not a loop exit,
200 // then found reason to unswitch.
201 if (loop->is_invariant(bol) && !loop->is_loop_exit(iff)) {
202 assert(iff->Opcode() == Op_If || iff->is_RangeCheck() || iff->is_BaseCountedLoopEnd(), "valid ifs");
203 unswitch_candidate = iff;
204 }
205 }
206 }
207 }
208 }
209 n = n_dom;
210 }
211 return unswitch_candidate;
212 }
213
214 // Collect all flat array checks in the provided 'flat_array_checks' list.
215 void PhaseIdealLoop::collect_flat_array_checks(const IdealLoopTree* loop, Node_List& flat_array_checks) const {
216 assert(flat_array_checks.size() == 0, "should be empty initially");
217 for (uint i = 0; i < loop->_body.size(); i++) {
218 Node* next = loop->_body.at(i);
219 if (next->is_If() && next->as_If()->is_flat_array_check(&_igvn) && loop->is_invariant(next->in(1)) &&
220 !loop->is_loop_exit(next)) {
221 flat_array_checks.push(next);
222 }
223 }
224 }
225
226 // This class represents an "unswitch candidate" which is an If that can be used to perform Loop Unswitching on. If the
227 // candidate is a flat array check candidate, then we also collect all remaining non-loop-exiting flat array checks.
228 // These are candidates as well. We want to get rid of all these flat array checks in the true-path-loop for the
229 // following reason:
230 //
231 // FlatArrayCheckNodes are used with array accesses to switch between a flat and a non-flat array access. We want
232 // the performance impact on non-flat array accesses to be as small as possible. We therefore create the following
233 // loops in Loop Unswitching:
234 // - True-path-loop: We remove all non-loop-exiting flat array checks to get a loop with only non-flat array accesses
235 // (i.e. a fast path loop).
236 // - False-path-loop: We keep all flat array checks in this loop (i.e. a slow path loop).
237 class UnswitchCandidate : public StackObj {
238 PhaseIdealLoop* const _phase;
239 const Node_List& _old_new;
240 Node* const _original_loop_entry;
241 // If _candidate is a flat array check, this list contains all non-loop-exiting flat array checks in the loop body.
242 Node_List _flat_array_check_candidates;
243 IfNode* const _candidate;
244
245 public:
246 UnswitchCandidate(IdealLoopTree* loop, const Node_List& old_new)
247 : _phase(loop->_phase),
248 _old_new(old_new),
249 _original_loop_entry(loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)),
250 _flat_array_check_candidates(),
251 _candidate(find_unswitch_candidate(loop)) {}
252 NONCOPYABLE(UnswitchCandidate);
253
254 IfNode* find_unswitch_candidate(IdealLoopTree* loop) {
255 IfNode* unswitch_candidate = _phase->find_unswitch_candidates(loop, _flat_array_check_candidates);
256 assert(unswitch_candidate != nullptr, "guaranteed to exist by policy_unswitching");
257 assert(_phase->is_member(loop, unswitch_candidate), "must be inside original loop");
258 return unswitch_candidate;
259 }
260
261 IfNode* candidate() const {
262 return _candidate;
263 }
264
265 // Is the candidate a flat array check and are there other flat array checks as well?
266 bool has_multiple_flat_array_check_candidates() const {
267 return _flat_array_check_candidates.size() > 1;
268 }
269
270 // Remove all candidates from the true-path-loop which are now dominated by the loop selector
271 // (i.e. 'true_path_loop_proj'). The removed candidates are folded in the next IGVN round.
272 void update_in_true_path_loop(IfTrueNode* true_path_loop_proj) const {
273 remove_from_loop(true_path_loop_proj, _candidate);
274 if (has_multiple_flat_array_check_candidates()) {
275 remove_flat_array_checks(true_path_loop_proj);
276 }
277 }
278
279 // Remove a unique candidate from the false-path-loop which is now dominated by the loop selector
280 // (i.e. 'false_path_loop_proj'). The removed candidate is folded in the next IGVN round. If there are multiple
281 // candidates (i.e. flat array checks), then we leave them in the false-path-loop and only mark the loop such that it
282 // is not unswitched anymore in later loop opts rounds.
283 void update_in_false_path_loop(IfFalseNode* false_path_loop_proj, LoopNode* false_path_loop) const {
284 if (has_multiple_flat_array_check_candidates()) {
285 // Leave the flat array checks in the false-path-loop and prevent it from being unswitched again based on these
286 // checks.
287 false_path_loop->mark_flat_arrays();
288 } else {
289 remove_from_loop(false_path_loop_proj, _old_new[_candidate->_idx]->as_If());
290 }
291 }
292
293 private:
294 void remove_from_loop(IfProjNode* dominating_proj, IfNode* candidate) const {
295 _phase->igvn().rehash_node_delayed(candidate);
296 _phase->dominated_by(dominating_proj, candidate);
297 }
298
299 void remove_flat_array_checks(IfProjNode* dominating_proj) const {
300 for (uint i = 0; i < _flat_array_check_candidates.size(); i++) {
301 IfNode* flat_array_check = _flat_array_check_candidates.at(i)->as_If();
302 _phase->igvn().rehash_node_delayed(flat_array_check);
303 _phase->dominated_by(dominating_proj, flat_array_check);
304 }
305 }
306
307 public:
308 // Merge all flat array checks into a single new BoolNode and return it.
309 BoolNode* merge_flat_array_checks() const {
310 assert(has_multiple_flat_array_check_candidates(), "must have multiple flat array checks to merge");
311 assert(_candidate->in(1)->as_Bool()->_test._test == BoolTest::ne, "IfTrue proj must point to flat array");
312 BoolNode* merged_flat_array_check_bool = create_bool_node();
313 create_flat_array_check_node(merged_flat_array_check_bool);
314 return merged_flat_array_check_bool;
315 }
316
317 private:
318 BoolNode* create_bool_node() const {
319 BoolNode* merged_flat_array_check_bool = _candidate->in(1)->clone()->as_Bool();
320 _phase->register_new_node(merged_flat_array_check_bool, _original_loop_entry);
321 return merged_flat_array_check_bool;
322 }
323
324 void create_flat_array_check_node(BoolNode* merged_flat_array_check_bool) const {
325 FlatArrayCheckNode* cloned_flat_array_check = merged_flat_array_check_bool->in(1)->clone()->as_FlatArrayCheck();
326 _phase->register_new_node(cloned_flat_array_check, _original_loop_entry);
327 merged_flat_array_check_bool->set_req(1, cloned_flat_array_check);
328 set_flat_array_check_inputs(cloned_flat_array_check);
329 }
330
331 // Combine all checks into a single one that fails if one array is flat.
332 void set_flat_array_check_inputs(FlatArrayCheckNode* cloned_flat_array_check) const {
333 uint total = FlatArrayCheckNode::ArrayOrKlass;
334 for (uint i = 0; i < _flat_array_check_candidates.size(); i++) {
335 Node* flat_array_check = _flat_array_check_candidates.at(i)->in(1)->in(1);
336 total += flat_array_check->req() - FlatArrayCheckNode::ArrayOrKlass;
337 }
338 cloned_flat_array_check->add_req_batch(_phase->C->top(), total - cloned_flat_array_check->req());
339 uint k = FlatArrayCheckNode::ArrayOrKlass;
340 for (uint i = 0; i < _flat_array_check_candidates.size(); i++) {
341 Node* flat_array_check = _flat_array_check_candidates.at(i)->in(1)->in(1);
342 for (uint j = FlatArrayCheckNode::ArrayOrKlass; j < flat_array_check->req(); j++) {
343 cloned_flat_array_check->set_req(k, flat_array_check->in(j));
344 k++;
345 }
346 }
347 assert(k == total, "missed some checks?");
348 }
349
350 public:
351 #ifndef PRODUCT
352 void trace_flat_array_checks() const {
353 if (has_multiple_flat_array_check_candidates()) {
354 tty->print_cr("- Unswitched and Merged Flat Array Checks:");
355 for (uint i = 0; i < _flat_array_check_candidates.size(); i++) {
356 Node* unswitch_iff = _flat_array_check_candidates.at(i);
357 Node* cloned_unswitch_iff = _old_new[unswitch_iff->_idx];
358 assert(cloned_unswitch_iff != nullptr, "must exist");
359 tty->print_cr(" - %d %s -> %d %s", unswitch_iff->_idx, unswitch_iff->Name(),
360 cloned_unswitch_iff->_idx, cloned_unswitch_iff->Name());
361 }
362 }
363 }
364 #endif // NOT PRODUCT
365 };
366
367 // LoopSelector is used for loop multiversioning and unswitching. This class creates an If node (i.e. loop selector)
368 // that selects if the true-path-loop or the false-path-loop should be executed at runtime.
369 class LoopSelector : public StackObj {
370 // Cached fields for construction.
371 PhaseIdealLoop* const _phase;
372 IdealLoopTree* const _outer_loop;
373 Node* const _original_loop_entry;
374 const uint _dom_depth; // of original_loop_entry
375
376 // Constructed selector if with its projections.
377 IfNode* const _selector;
378 IfTrueNode* const _true_path_loop_proj;
379 IfFalseNode* const _false_path_loop_proj;
380
381 enum PathToLoop {
382 TRUE_PATH, FALSE_PATH
383 };
384
385 public:
386 // For multiversioning: create a new selector (multiversion_if) from a bol condition.
387 LoopSelector(IdealLoopTree* loop, Node* bol, float prob, float fcnt)
388 : _phase(loop->_phase),
389 _outer_loop(loop->skip_strip_mined()->_parent),
390 _original_loop_entry(loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)),
391 _dom_depth(_phase->dom_depth(_original_loop_entry)),
392 _selector(create_multiversioning_if(bol, prob, fcnt)), // multiversioning
393 _true_path_loop_proj(create_proj_to_loop(TRUE_PATH)->as_IfTrue()),
394 _false_path_loop_proj(create_proj_to_loop(FALSE_PATH)->as_IfFalse()) {
395 }
396
397 // For unswitching: create an unswitching if before the loop, from a pre-existing
398 // unswitching_candidate inside the loop.
399 LoopSelector(IdealLoopTree* loop, const UnswitchCandidate& unswitch_candidate)
400 : _phase(loop->_phase),
401 _outer_loop(loop->skip_strip_mined()->_parent),
402 _original_loop_entry(loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)),
403 _dom_depth(_phase->dom_depth(_original_loop_entry)),
404 _selector(create_unswitching_if(unswitch_candidate)), // unswitching
405 _true_path_loop_proj(create_proj_to_loop(TRUE_PATH)->as_IfTrue()),
406 _false_path_loop_proj(create_proj_to_loop(FALSE_PATH)->as_IfFalse()) {
407 }
408 NONCOPYABLE(LoopSelector);
409
410 private:
411 IfNode* create_multiversioning_if(Node* bol, float prob, float fcnt) {
412 _phase->igvn().rehash_node_delayed(_original_loop_entry);
413 IfNode* selector_if = new IfNode(_original_loop_entry, bol, prob, fcnt);
414 _phase->register_node(selector_if, _outer_loop, _original_loop_entry, _dom_depth);
415 return selector_if;
416 }
417
418 IfNode* create_unswitching_if(const UnswitchCandidate& unswitch_candidate) {
419 const uint dom_depth = _phase->dom_depth(_original_loop_entry);
420 _phase->igvn().rehash_node_delayed(_original_loop_entry);
421 IfNode* unswitch_candidate_if = unswitch_candidate.candidate();
422 BoolNode* selector_bool;
423 if (unswitch_candidate.has_multiple_flat_array_check_candidates()) {
424 selector_bool = unswitch_candidate.merge_flat_array_checks();
425 } else {
426 selector_bool = unswitch_candidate_if->in(1)->as_Bool();
427 }
428 IfNode* selector_if = IfNode::make_with_same_profile(unswitch_candidate_if, _original_loop_entry, selector_bool);
429 _phase->register_node(selector_if, _outer_loop, _original_loop_entry, dom_depth);
430 return selector_if;
431 }
432
433 IfProjNode* create_proj_to_loop(const PathToLoop path_to_loop) {
434 IfProjNode* proj_to_loop;
435 if (path_to_loop == TRUE_PATH) {
436 proj_to_loop = new IfTrueNode(_selector);
437 } else {
438 proj_to_loop = new IfFalseNode(_selector);
439 }
440 _phase->register_node(proj_to_loop, _outer_loop, _selector, _dom_depth);
441 return proj_to_loop;
442 }
443
444 public:
445 IfNode* selector() const {
446 return _selector;
447 }
448
449 IfTrueNode* true_path_loop_proj() const {
450 return _true_path_loop_proj;
451 }
452
453 IfFalseNode* false_path_loop_proj() const {
454 return _false_path_loop_proj;
455 }
456 };
457
458 // This class creates an If node (i.e. loop selector) that selects if the true-path-loop or the false-path-loop should be
459 // executed at runtime. This is done by finding an invariant and non-loop-exiting unswitch candidate If node (guaranteed
460 // to exist at this point) to perform Loop Unswitching on.
461 class UnswitchedLoopSelector : public StackObj {
462 const UnswitchCandidate& _unswitch_candidate;
463 const LoopSelector _loop_selector;
464
465 public:
466 UnswitchedLoopSelector(IdealLoopTree* loop, const UnswitchCandidate& unswitch_candidate)
467 : _unswitch_candidate(unswitch_candidate),
468 _loop_selector(loop, _unswitch_candidate) {}
469 NONCOPYABLE(UnswitchedLoopSelector);
470
471 IfNode* selector_if() const {
472 return _loop_selector.selector();
473 }
474
475 const LoopSelector& loop_selector() const {
476 return _loop_selector;
477 }
478 };
479
480 // Class to unswitch the original loop and create Predicates at the new unswitched loop versions. The newly cloned loop
481 // becomes the false-path-loop while original loop becomes the true-path-loop.
482 class OriginalLoop : public StackObj {
483 LoopNode* const _loop_head;
484 LoopNode* const _outer_loop_head; // OuterStripMinedLoopNode if loop strip mined, else just the loop head.
485 IdealLoopTree* const _loop;
486 Node_List& _old_new;
487 PhaseIdealLoop* const _phase;
488
489 public:
490 OriginalLoop(IdealLoopTree* loop, Node_List& old_new)
491 : _loop_head(loop->_head->as_Loop()),
492 _outer_loop_head(loop->_head->as_Loop()->skip_strip_mined()),
493 _loop(loop),
494 _old_new(old_new),
495 _phase(loop->_phase) {}
496 NONCOPYABLE(OriginalLoop);
497
498 // Unswitch the original loop on the invariant loop selector by creating a true-path-loop and a false-path-loop.
499 // Remove the unswitch candidate If from both unswitched loop versions which are now covered by the loop selector If.
500 void unswitch(const UnswitchedLoopSelector& unswitched_loop_selector) {
501 multiversion(unswitched_loop_selector.loop_selector());
502 }
503
504 // Multiversion the original loop. The loop selector if selects between the original loop (true-path-loop), and
505 // a copy of it (false-path-loop).
506 void multiversion(const LoopSelector& loop_selector) {
507 const uint first_false_path_loop_node_index = _phase->C->unique();
508 clone_loop(loop_selector);
509
510 move_parse_and_template_assertion_predicates_to_unswitched_loops(loop_selector,
511 first_false_path_loop_node_index);
512 DEBUG_ONLY(verify_loop_versions(_loop->_head->as_Loop(), loop_selector);)
513
514 _phase->recompute_dom_depth();
515 }
516
517 private:
518 void clone_loop(const LoopSelector& loop_selector) {
519 _phase->clone_loop(_loop, _old_new, _phase->dom_depth(_outer_loop_head),
520 PhaseIdealLoop::CloneIncludesStripMined, loop_selector.selector());
521 fix_loop_entries(loop_selector);
522 }
523
524 void fix_loop_entries(const LoopSelector& loop_selector) const {
525 _phase->replace_loop_entry(_outer_loop_head, loop_selector.true_path_loop_proj());
526 LoopNode* false_path_loop_strip_mined_head = old_to_new(_outer_loop_head)->as_Loop();
527 _phase->replace_loop_entry(false_path_loop_strip_mined_head,
528 loop_selector.false_path_loop_proj());
529 }
530
531 // Moves the Parse And Template Assertion Predicates to the true and false path loop. They are inserted between the
532 // loop heads and the loop selector If projections. The old Parse and Template Assertion Predicates before
533 // the unswitched loop selector are killed.
534 void move_parse_and_template_assertion_predicates_to_unswitched_loops(
535 const LoopSelector& loop_selector, const uint first_false_path_loop_node_index) const {
536 const NodeInOriginalLoopBody node_in_true_path_loop_body(first_false_path_loop_node_index, _old_new);
537 const NodeInClonedLoopBody node_in_false_path_loop_body(first_false_path_loop_node_index);
538 CloneUnswitchedLoopPredicatesVisitor
539 clone_unswitched_loop_predicates_visitor(_loop_head, old_to_new(_loop_head)->as_Loop(), node_in_true_path_loop_body,
540 node_in_false_path_loop_body, _phase);
541 Node* source_loop_entry = loop_selector.selector()->in(0);
542 PredicateIterator predicate_iterator(source_loop_entry);
543 predicate_iterator.for_each(clone_unswitched_loop_predicates_visitor);
544 }
545
546 #ifdef ASSERT
547 void verify_loop_versions(LoopNode* true_path_loop_head,
548 const LoopSelector& loop_selector) const {
549 verify_loop_version(true_path_loop_head,
550 loop_selector.true_path_loop_proj());
551 verify_loop_version(old_to_new(true_path_loop_head)->as_Loop(),
552 loop_selector.false_path_loop_proj());
553 }
554
555 static void verify_loop_version(LoopNode* loop_head, IfProjNode* loop_selector_if_proj) {
556 Node* entry = loop_head->skip_strip_mined()->in(LoopNode::EntryControl);
557 const Predicates predicates(entry);
558 // When skipping all predicates, we should end up at 'loop_selector_if_proj'.
559 assert(loop_selector_if_proj == predicates.entry(), "should end up at loop selector If");
560 }
561 #endif // ASSERT
562
563 Node* old_to_new(const Node* old) const {
564 return _old_new[old->_idx];
565 }
566 };
567
568 // See comments below file header for more information about Loop Unswitching.
569 void PhaseIdealLoop::do_unswitching(IdealLoopTree* loop, Node_List& old_new) {
570 assert(LoopUnswitching, "LoopUnswitching must be enabled");
571
572 LoopNode* original_head = loop->_head->as_Loop();
573 if (has_control_dependencies_from_predicates(original_head)) {
574 NOT_PRODUCT(trace_loop_unswitching_impossible(original_head);)
575 return;
576 }
577
578 NOT_PRODUCT(trace_loop_unswitching_count(loop, original_head);)
579 C->print_method(PHASE_BEFORE_LOOP_UNSWITCHING, 4, original_head);
580
581 revert_to_normal_loop(original_head);
582
583 const UnswitchCandidate unswitch_candidate(loop, old_new);
584 const UnswitchedLoopSelector unswitched_loop_selector(loop, unswitch_candidate);
585 OriginalLoop original_loop(loop, old_new);
586 original_loop.unswitch(unswitched_loop_selector);
587
588 unswitch_candidate.update_in_true_path_loop(unswitched_loop_selector.loop_selector().true_path_loop_proj());
589 unswitch_candidate.update_in_false_path_loop(unswitched_loop_selector.loop_selector().false_path_loop_proj(),
590 old_new[original_head->_idx]->as_Loop());
591 hoist_invariant_check_casts(loop, old_new, unswitch_candidate, unswitched_loop_selector.selector_if());
592 add_unswitched_loop_version_bodies_to_igvn(loop, old_new);
593
594 LoopNode* new_head = old_new[original_head->_idx]->as_Loop();
595 increment_unswitch_counts(original_head, new_head);
596
597 NOT_PRODUCT(trace_loop_unswitching_result(unswitched_loop_selector, unswitch_candidate, original_head, new_head);)
598 C->print_method(PHASE_AFTER_LOOP_UNSWITCHING, 4, new_head);
599 C->set_major_progress();
600 }
601
602 void PhaseIdealLoop::do_multiversioning(IdealLoopTree* lpt, Node_List& old_new) {
603 #ifndef PRODUCT
604 if (TraceLoopOpts || TraceLoopMultiversioning) {
605 tty->print("Multiversion ");
606 lpt->dump_head();
607 }
608 #endif
609 assert(LoopMultiversioning, "LoopMultiversioning must be enabled");
610
611 CountedLoopNode* original_head = lpt->_head->as_CountedLoop();
612 C->print_method(PHASE_BEFORE_LOOP_MULTIVERSIONING, 4, original_head);
613
614 Node* one = _igvn.intcon(1);
615 set_ctrl(one, C->root());
616 Node* opaque = new OpaqueMultiversioningNode(C, one);
617 set_ctrl(opaque, C->root());
618 _igvn.register_new_node_with_optimizer(opaque);
619 _igvn.set_type(opaque, TypeInt::BOOL);
620
621 const LoopSelector loop_selector(lpt, opaque, PROB_LIKELY_MAG(3), COUNT_UNKNOWN);
622 OriginalLoop original_loop(lpt, old_new);
623 original_loop.multiversion(loop_selector);
624
625 add_unswitched_loop_version_bodies_to_igvn(lpt, old_new);
626
627 CountedLoopNode* new_head = old_new[original_head->_idx]->as_CountedLoop();
628 original_head->set_multiversion_fast_loop();
629 new_head->set_multiversion_delayed_slow_loop();
630
631 NOT_PRODUCT(trace_loop_multiversioning_result(loop_selector, original_head, new_head);)
632 C->print_method(PHASE_AFTER_LOOP_MULTIVERSIONING, 4, new_head);
633 C->set_major_progress();
634 }
635
636 // Create a new if in the multiversioning pattern, adding an additional condition for the
637 // multiversioning fast-loop.
638 //
639 // Before:
640 // entry opaque
641 // | |
642 // multiversion_if
643 // | |
644 // +----------------+ +---------------+
645 // | |
646 // multiversion_fast_proj multiversion_slow_proj
647 // |
648 // +--------+
649 // |
650 // slow_path
651 //
652 //
653 // After:
654 // entry opaque <-- to be replaced by caller
655 // | |
656 // new_if
657 // | |
658 // | +-----------------------------+
659 // | |
660 // new_if_true opaque new_if_false
661 // | | |
662 // multiversion_if |
663 // | | |
664 // +----------------+ +---------------+ |
665 // | | |
666 // multiversion_fast_proj new_multiversion_slow_proj |
667 // | |
668 // +------+ |
669 // | |
670 // region
671 // |
672 // slow_path
673 //
674 // For more descriptions on multiversioning:
675 // See: PhaseIdealLoop::maybe_multiversion_for_auto_vectorization_runtime_checks
676 IfTrueNode* PhaseIdealLoop::create_new_if_for_multiversion(IfTrueNode* multiversioning_fast_proj) {
677 // Give all nodes in the old sub-graph a name.
678 IfNode* multiversion_if = multiversioning_fast_proj->in(0)->as_If();
679 Node* entry = multiversion_if->in(0);
680 OpaqueMultiversioningNode* opaque = multiversion_if->in(1)->as_OpaqueMultiversioning();
681 IfFalseNode* multiversion_slow_proj = multiversion_if->false_proj();
682 Node* slow_path = multiversion_slow_proj->unique_ctrl_out();
683
684 // The slow_loop may still be delayed, and waiting for runtime-checks to be added to the
685 // multiversion_if. Now that we have at least one condition for the multiversioning,
686 // we should resume optimizations for the slow loop.
687 opaque->notify_slow_loop_that_it_can_resume_optimizations();
688
689 // Create new_if with its projections.
690 IfNode* new_if = IfNode::make_with_same_profile(multiversion_if, entry, opaque);
691 IdealLoopTree* lp = get_loop(entry);
692 register_control(new_if, lp, entry);
693
694 IfTrueNode* new_if_true = new IfTrueNode(new_if);
695 IfFalseNode* new_if_false = new IfFalseNode(new_if);
696 register_control(new_if_true, lp, new_if);
697 register_control(new_if_false, lp, new_if);
698
699 // Hook new_if_true into multiversion_if.
700 _igvn.replace_input_of(multiversion_if, 0, new_if_true);
701
702 // Clone multiversion_slow_path - this allows us to easily carry the dependencies to
703 // the new region below.
704 IfFalseNode* new_multiversion_slow_proj = multiversion_slow_proj->clone()->as_IfFalse();
705 register_control(new_multiversion_slow_proj, lp, multiversion_if);
706
707 // Create new Region.
708 RegionNode* region = new RegionNode(1);
709 region->add_req(new_multiversion_slow_proj);
710 region->add_req(new_if_false);
711 register_control(region, lp, new_multiversion_slow_proj);
712
713 // Hook region into slow_path, instead of the multiversion_slow_proj.
714 // This also moves all other dependencies of the multiversion_slow_proj to the region.
715 // The replace_node_and_forward_ctrl ensures that any get_ctrl that used to have
716 // multiversion_slow_proj as their control are forwarded to the new region node as
717 // their control.
718 replace_node_and_forward_ctrl(multiversion_slow_proj, region);
719
720 return new_if_true;
721 }
722
723 OpaqueMultiversioningNode* find_multiversion_opaque_from_multiversion_if_false(Node* maybe_multiversion_if_false) {
724 IfFalseNode* multiversion_if_false = maybe_multiversion_if_false->isa_IfFalse();
725 if (multiversion_if_false == nullptr) { return nullptr; }
726 IfNode* multiversion_if = multiversion_if_false->in(0)->isa_If();
727 if (multiversion_if == nullptr) { return nullptr; }
728 return multiversion_if->in(1)->isa_OpaqueMultiversioning();
729 }
730
731 bool PhaseIdealLoop::try_resume_optimizations_for_delayed_slow_loop(IdealLoopTree* lpt) {
732 CountedLoopNode* cl = lpt->_head->as_CountedLoop();
733 assert(cl->is_multiversion_delayed_slow_loop(), "must currently be delayed");
734
735 // Find multiversion_if.
736 Node* entry = cl->skip_strip_mined()->in(LoopNode::EntryControl);
737 const Predicates predicates(entry);
738
739 Node* slow_path = predicates.entry();
740
741 // Find opaque.
742 OpaqueMultiversioningNode* opaque = nullptr;
743 if (slow_path->is_Region()) {
744 for (uint i = 1; i < slow_path->req(); i++) {
745 Node* n = slow_path->in(i);
746 opaque = find_multiversion_opaque_from_multiversion_if_false(n);
747 if (opaque != nullptr) { break; }
748 }
749 } else {
750 opaque = find_multiversion_opaque_from_multiversion_if_false(slow_path);
751 }
752 assert(opaque != nullptr, "must have found multiversion opaque node");
753 if (opaque == nullptr) { return false; }
754
755 // We may still be delayed, if there were not yet any runtime-checks added
756 // for the multiversioning. We may never add any, and then this loop would
757 // fold away. So we wait until some runtime-checks are added, then we know
758 // that this loop will be reachable and it is worth optimizing further.
759 if (opaque->is_delayed_slow_loop()) { return false; }
760
761 // Clear away the "delayed" status, i.e. resume optimizations.
762 cl->set_no_multiversion();
763 cl->set_multiversion_slow_loop();
764 #ifndef PRODUCT
765 if (TraceLoopOpts) {
766 tty->print("Resume Optimizations ");
767 lpt->dump_head();
768 }
769 #endif
770 return true;
771 }
772
773 bool PhaseIdealLoop::has_control_dependencies_from_predicates(LoopNode* head) {
774 Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
775 const Predicates predicates(entry);
776 if (predicates.has_any()) {
777 assert(entry->is_IfProj(), "sanity - must be ifProj since there is at least one predicate");
778 if (entry->outcnt() > 1) {
779 // Bailout if there are predicates from which there are additional control dependencies (i.e. from loop
780 // entry 'entry') to previously partially peeled statements since this case is not handled and can lead
781 // to a wrong execution. Remove this bailout, once this is fixed.
782 return true;
783 }
784 }
785 return false;
786 }
787
788 #ifndef PRODUCT
789 void PhaseIdealLoop::trace_loop_unswitching_impossible(const LoopNode* original_head) {
790 if (TraceLoopUnswitching) {
791 tty->print_cr("Loop Unswitching \"%d %s\" not possible due to control dependencies",
792 original_head->_idx, original_head->Name());
793 }
794 }
795
796 void PhaseIdealLoop::trace_loop_unswitching_count(IdealLoopTree* loop, LoopNode* original_head) {
797 if (TraceLoopOpts) {
798 tty->print("Unswitch %d ", original_head->unswitch_count() + 1);
799 loop->dump_head();
800 }
801 }
802
803 void PhaseIdealLoop::trace_loop_unswitching_result(const UnswitchedLoopSelector& unswitched_loop_selector,
804 const UnswitchCandidate& unswitch_candidate,
805 const LoopNode* original_head, const LoopNode* new_head) {
806 if (TraceLoopUnswitching) {
807 IfNode* unswitch_candidate_if = unswitch_candidate.candidate();
808 IfNode* loop_selector = unswitched_loop_selector.selector_if();
809 tty->print_cr("Loop Unswitching:");
810 tty->print_cr("- Unswitch-Candidate-If: %d %s", unswitch_candidate_if->_idx, unswitch_candidate_if->Name());
811 tty->print_cr("- Loop-Selector-If: %d %s", loop_selector->_idx, loop_selector->Name());
812 tty->print_cr("- True-Path-Loop (=Orig): %d %s", original_head->_idx, original_head->Name());
813 tty->print_cr("- False-Path-Loop (=Clone): %d %s", new_head->_idx, new_head->Name());
814 unswitch_candidate.trace_flat_array_checks();
815 }
816 }
817
818 void PhaseIdealLoop::trace_loop_multiversioning_result(const LoopSelector& loop_selector,
819 const LoopNode* original_head, const LoopNode* new_head) {
820 if (TraceLoopMultiversioning) {
821 IfNode* selector_if = loop_selector.selector();
822 tty->print_cr("Loop Multiversioning:");
823 tty->print_cr("- Loop-Selector-If: %d %s", selector_if->_idx, selector_if->Name());
824 tty->print_cr("- True-Path-Loop (=Orig / Fast): %d %s", original_head->_idx, original_head->Name());
825 tty->print_cr("- False-Path-Loop (=Clone / Slow): %d %s", new_head->_idx, new_head->Name());
826 }
827 }
828 #endif
829
830 // When unswitching a counted loop, we need to convert it back to a normal loop since it's not a proper pre, main or,
831 // post loop anymore after loop unswitching. We also lose the multiversion structure, with access to the multiversion_if.
832 void PhaseIdealLoop::revert_to_normal_loop(const LoopNode* loop_head) {
833 CountedLoopNode* cl = loop_head->isa_CountedLoop();
834 if (cl == nullptr) { return; }
835 if (!cl->is_normal_loop()) { cl->set_normal_loop(); }
836 if (cl->is_multiversion()) { cl->set_no_multiversion(); }
837 }
838
839 // Hoist invariant CheckCastPPNodes out of each unswitched loop version to the appropriate loop selector If projection.
840 void PhaseIdealLoop::hoist_invariant_check_casts(const IdealLoopTree* loop, const Node_List& old_new,
841 const UnswitchCandidate& unswitch_candidate,
842 const IfNode* loop_selector) {
843 ResourceMark rm;
844 GrowableArray<CheckCastPPNode*> loop_invariant_check_casts;
845 const IfNode* unswitch_candidate_if = unswitch_candidate.candidate();
846 for (DUIterator_Fast imax, i = unswitch_candidate_if->fast_outs(imax); i < imax; i++) {
847 IfProjNode* proj = unswitch_candidate_if->fast_out(i)->as_IfProj();
848 // Copy to a worklist for easier manipulation
849 for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) {
850 CheckCastPPNode* check_cast = proj->fast_out(j)->isa_CheckCastPP();
851 if (check_cast != nullptr && loop->is_invariant(check_cast->in(1))) {
852 loop_invariant_check_casts.push(check_cast);
853 }
854 }
855 IfProjNode* loop_selector_if_proj = loop_selector->proj_out(proj->_con)->as_IfProj();
856 while (loop_invariant_check_casts.length() > 0) {
857 CheckCastPPNode* cast = loop_invariant_check_casts.pop();
858 Node* cast_clone = cast->clone();
859 cast_clone->set_req(0, loop_selector_if_proj);
860 _igvn.replace_input_of(cast, 1, cast_clone);
861 register_new_node(cast_clone, loop_selector_if_proj);
862 // Same for the false-path-loop if there are not multiple flat array checks (in that case we leave the
863 // false-path-loop unchanged).
864 if (!unswitch_candidate.has_multiple_flat_array_check_candidates()) {
865 Node* use_clone = old_new[cast->_idx];
866 _igvn.replace_input_of(use_clone, 1, cast_clone);
867 }
868 }
869 }
870 }
871
872 // Enable more optimizations possibilities in the next IGVN round.
873 void PhaseIdealLoop::add_unswitched_loop_version_bodies_to_igvn(IdealLoopTree* loop, const Node_List& old_new) {
874 loop->record_for_igvn();
875 for (int i = loop->_body.size() - 1; i >= 0; i--) {
876 Node* n = loop->_body[i];
877 Node* n_clone = old_new[n->_idx];
878 _igvn._worklist.push(n_clone);
879 }
880 }
881
882 void PhaseIdealLoop::increment_unswitch_counts(LoopNode* original_head, LoopNode* new_head) {
883 const int unswitch_count = original_head->unswitch_count() + 1;
884 original_head->set_unswitch_count(unswitch_count);
885 new_head->set_unswitch_count(unswitch_count);
886 }