1 /*
2 * Copyright (c) 2006, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "memory/allocation.inline.hpp"
26 #include "opto/castnode.hpp"
27 #include "opto/cfgnode.hpp"
28 #include "opto/connode.hpp"
29 #include "opto/convertnode.hpp"
30 #include "opto/loopnode.hpp"
31 #include "opto/opaquenode.hpp"
32 #include "opto/predicates.hpp"
33 #include "opto/rootnode.hpp"
34
35 // Multiversioning:
36 // A loop is cloned, and a selector If decides which loop is taken at run-time: the true-path-loop (original) or the
37 // false-path-loop (cloned).
38 //
39 // Use-cases:
40 // - Speculative compilation:
41 // The selector If checks some assumptions which allow stronger optimization in the true-path-loop. If the assumptions
42 // do not hold, we can still execute in the false-path-loop, although with fewer optimizations.
43 // See: PhaseIdealLoop::maybe_multiversion_for_auto_vectorization_runtime_checks
44 // PhaseIdealLoop::create_new_if_for_multiversion
45 //
46 // - Unswitching:
47 // The selector If has the same (loop invariant) condition as some unswitching candidate If inside the loop. This
48 // allows us to constant-fold the unswitching candidate If to true in the true-path-loop and to false in the
49 // false-path-loop, thus eliminating the unswitching candidate If from the loop.
50 //
51 //
52 // Loop Unswitching is a loop optimization to move an invariant, non-loop-exiting test in the loop body before the loop.
53 // Such a test is either always true or always false in all loop iterations and could therefore only be executed once.
54 // To achieve that, we duplicate the loop and change the original and cloned loop as follows:
55 // - Original loop -> true-path-loop:
56 // The true-path of the invariant, non-loop-exiting test in the original loop
57 // is kept while the false-path is killed. We call this unswitched loop version
58 // the true-path-loop.
59 // - Cloned loop -> false-path-loop:
60 // The false-path of the invariant, non-loop-exiting test in the cloned loop
61 // is kept while the true-path is killed. We call this unswitched loop version
62 // the false-path loop.
63 //
64 // The invariant, non-loop-exiting test can now be moved before both loops (to only execute it once) and turned into a
65 // loop selector If node to select at runtime which unswitched loop version should be executed.
66 // - Loop selector true? Execute the true-path-loop.
67 // - Loop selector false? Execute the false-path-loop.
68 //
69 // Note that even though an invariant test that exits the loop could also be optimized with Loop Unswitching, it is more
70 // efficient to simply peel the loop which achieves the same result in a simpler manner (also see policy_peeling()).
71 //
72 // The following graphs summarizes the Loop Unswitching optimization.
73 // We start with the original loop:
74 //
75 // [Predicates]
76 // |
77 // Original Loop
78 // stmt1
79 // if (invariant-test)
80 // if-path
81 // else
82 // else-path
83 // stmt2
84 // Endloop
85 //
86 //
87 // which is unswitched into a true-path-loop and a false-path-loop together with a loop selector:
88 //
89 //
90 // [Initialized Assertion Predicates]
91 // |
92 // loop selector If (invariant-test)
93 // / \
94 // true? false?
95 // / \
96 // [Cloned Parse Predicates] [Cloned Parse Predicates]
97 // [Cloned Template [Cloned Template
98 // Assertion Predicates] Assertion Predicates]
99 // | |
100 // True-Path-Loop False-Path-Loop
101 // cloned stmt1 cloned stmt1
102 // cloned if-path cloned else-path
103 // cloned stmt2 cloned stmt2
104 // Endloop Endloop
105
106
107 // Return true if the loop should be unswitched or false otherwise.
108 bool IdealLoopTree::policy_unswitching(PhaseIdealLoop* phase) const {
109 if (!LoopUnswitching) {
110 return false;
111 }
112 if (!_head->is_Loop()) {
113 return false;
114 }
115
116 // If nodes are depleted, some transform has miscalculated its needs.
117 assert(!phase->exceeding_node_budget(), "sanity");
118
119 // check for vectorized loops, any unswitching was already applied
120 if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
121 return false;
122 }
123
124 LoopNode* head = _head->as_Loop();
125 if (head->unswitch_count() + 1 > head->unswitch_max()) {
126 return false;
127 }
128
129 if (head->is_flat_arrays()) {
130 return false;
131 }
132
133 if (no_unswitch_candidate()) {
134 return false;
135 }
136
137 // Too speculative if running low on nodes.
138 return phase->may_require_nodes(est_loop_clone_sz(2));
139 }
140
141 // Check the absence of any If node that can be used for Loop Unswitching. In that case, no Loop Unswitching can be done.
142 bool IdealLoopTree::no_unswitch_candidate() const {
143 ResourceMark rm;
144 Node_List dont_care;
145 return _phase->find_unswitch_candidates(this, dont_care) == nullptr;
146 }
147
148 // Find an invariant test in the loop body that does not exit the loop. If multiple tests are found, we pick the first
149 // one in the loop body as "unswitch candidate" to apply Loop Unswitching on.
150 // Depending on whether we find such a candidate and if we do, whether it's a flat array check, we do the following:
151 // (1) Candidate is not a flat array check:
152 // Return the unique unswitch candidate.
153 // (2) Candidate is a flat array check:
154 // Collect all remaining non-loop-exiting flat array checks in the loop body in the provided 'flat_array_checks'
155 // list in order to create an unswitched loop version without any flat array checks and a version with checks
156 // (i.e. same as original loop). Return the initially found candidate which could be unique if no further flat array
157 // checks are found.
158 // (3) No candidate is initially found:
159 // As in (2), we collect all non-loop-exiting flat array checks in the loop body in the provided 'flat_array_checks'
160 // list. Pick the first collected flat array check as unswitch candidate, which could be unique, and return it (a).
161 // If there are no flat array checks, we cannot apply Loop Unswitching (b).
162 //
163 // Note that for both (2) and (3a), if there are multiple flat array checks, then the candidate's FlatArrayCheckNode is
164 // later updated in Loop Unswitching to perform a flat array check on all collected flat array checks.
165 IfNode* PhaseIdealLoop::find_unswitch_candidates(const IdealLoopTree* loop, Node_List& flat_array_checks) const {
166 IfNode* unswitch_candidate = find_unswitch_candidate_from_idoms(loop);
167 if (unswitch_candidate != nullptr && !unswitch_candidate->is_flat_array_check(&_igvn)) {
168 // Case (1)
169 return unswitch_candidate;
170 }
171
172 collect_flat_array_checks(loop, flat_array_checks);
173 if (unswitch_candidate != nullptr) {
174 // Case (2)
175 assert(unswitch_candidate->is_flat_array_check(&_igvn), "is a flat array check");
176 return unswitch_candidate;
177 } else if (flat_array_checks.size() > 0) {
178 // Case (3a): Pick first one found as candidate (there could be multiple).
179 return flat_array_checks[0]->as_If();
180 }
181
182 // Case (3b): No suitable unswitch candidate found.
183 return nullptr;
184 }
185
186 // Find an unswitch candidate by following the idom chain from the loop back edge.
187 IfNode* PhaseIdealLoop::find_unswitch_candidate_from_idoms(const IdealLoopTree* loop) const {
188 LoopNode* head = loop->_head->as_Loop();
189 IfNode* unswitch_candidate = nullptr;
190 Node* n = head->in(LoopNode::LoopBackControl);
191 while (n != head) {
192 Node* n_dom = idom(n);
193 if (n->is_Region()) {
194 if (n_dom->is_If()) {
195 IfNode* iff = n_dom->as_If();
196 if (iff->in(1)->is_Bool()) {
197 BoolNode* bol = iff->in(1)->as_Bool();
198 if (bol->in(1)->is_Cmp()) {
199 // If condition is invariant and not a loop exit,
200 // then found reason to unswitch.
201 if (loop->is_invariant(bol) && !loop->is_loop_exit(iff)) {
202 assert(iff->Opcode() == Op_If || iff->is_RangeCheck() || iff->is_BaseCountedLoopEnd(), "valid ifs");
203 unswitch_candidate = iff;
204 }
205 }
206 }
207 }
208 }
209 n = n_dom;
210 }
211 return unswitch_candidate;
212 }
213
214 // Collect all flat array checks in the provided 'flat_array_checks' list.
215 void PhaseIdealLoop::collect_flat_array_checks(const IdealLoopTree* loop, Node_List& flat_array_checks) const {
216 assert(flat_array_checks.size() == 0, "should be empty initially");
217 for (uint i = 0; i < loop->_body.size(); i++) {
218 Node* next = loop->_body.at(i);
219 if (next->is_If() && next->as_If()->is_flat_array_check(&_igvn) && loop->is_invariant(next->in(1)) &&
220 !loop->is_loop_exit(next)) {
221 flat_array_checks.push(next);
222 }
223 }
224 }
225
226 // This class represents an "unswitch candidate" which is an If that can be used to perform Loop Unswitching on. If the
227 // candidate is a flat array check candidate, then we also collect all remaining non-loop-exiting flat array checks.
228 // These are candidates as well. We want to get rid of all these flat array checks in the true-path-loop for the
229 // following reason:
230 //
231 // FlatArrayCheckNodes are used with array accesses to switch between a flat and a non-flat array access. We want
232 // the performance impact on non-flat array accesses to be as small as possible. We therefore create the following
233 // loops in Loop Unswitching:
234 // - True-path-loop: We remove all non-loop-exiting flat array checks to get a loop with only non-flat array accesses
235 // (i.e. a fast path loop).
236 // - False-path-loop: We keep all flat array checks in this loop (i.e. a slow path loop).
237 class UnswitchCandidate : public StackObj {
238 PhaseIdealLoop* const _phase;
239 const Node_List& _old_new;
240 Node* const _original_loop_entry;
241 // If _candidate is a flat array check, this list contains all non-loop-exiting flat array checks in the loop body.
242 Node_List _flat_array_check_candidates;
243 IfNode* const _candidate;
244
245 public:
246 UnswitchCandidate(IdealLoopTree* loop, const Node_List& old_new)
247 : _phase(loop->_phase),
248 _old_new(old_new),
249 _original_loop_entry(loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)),
250 _flat_array_check_candidates(),
251 _candidate(find_unswitch_candidate(loop)) {}
252 NONCOPYABLE(UnswitchCandidate);
253
254 IfNode* find_unswitch_candidate(IdealLoopTree* loop) {
255 IfNode* unswitch_candidate = _phase->find_unswitch_candidates(loop, _flat_array_check_candidates);
256 assert(unswitch_candidate != nullptr, "guaranteed to exist by policy_unswitching");
257 assert(_phase->is_member(loop, unswitch_candidate), "must be inside original loop");
258 return unswitch_candidate;
259 }
260
261 IfNode* candidate() const {
262 return _candidate;
263 }
264
265 // Is the candidate a flat array check and are there other flat array checks as well?
266 bool has_multiple_flat_array_check_candidates() const {
267 return _flat_array_check_candidates.size() > 1;
268 }
269
270 // Remove all candidates from the true-path-loop which are now dominated by the loop selector
271 // (i.e. 'true_path_loop_proj'). The removed candidates are folded in the next IGVN round.
272 void update_in_true_path_loop(IfTrueNode* true_path_loop_proj) const {
273 remove_from_loop(true_path_loop_proj, _candidate);
274 if (has_multiple_flat_array_check_candidates()) {
275 remove_flat_array_checks(true_path_loop_proj);
276 }
277 }
278
279 // Remove a unique candidate from the false-path-loop which is now dominated by the loop selector
280 // (i.e. 'false_path_loop_proj'). The removed candidate is folded in the next IGVN round. If there are multiple
281 // candidates (i.e. flat array checks), then we leave them in the false-path-loop and only mark the loop such that it
282 // is not unswitched anymore in later loop opts rounds.
283 void update_in_false_path_loop(IfFalseNode* false_path_loop_proj, LoopNode* false_path_loop) const {
284 if (has_multiple_flat_array_check_candidates()) {
285 // Leave the flat array checks in the false-path-loop and prevent it from being unswitched again based on these
286 // checks.
287 false_path_loop->mark_flat_arrays();
288 } else {
289 remove_from_loop(false_path_loop_proj, _old_new[_candidate->_idx]->as_If());
290 }
291 }
292
293 private:
294 void remove_from_loop(IfProjNode* dominating_proj, IfNode* candidate) const {
295 _phase->igvn().rehash_node_delayed(candidate);
296 _phase->dominated_by(dominating_proj, candidate);
297 }
298
299 void remove_flat_array_checks(IfProjNode* dominating_proj) const {
300 for (uint i = 0; i < _flat_array_check_candidates.size(); i++) {
301 IfNode* flat_array_check = _flat_array_check_candidates.at(i)->as_If();
302 _phase->igvn().rehash_node_delayed(flat_array_check);
303 _phase->dominated_by(dominating_proj, flat_array_check);
304 }
305 }
306
307 public:
308 // Merge all flat array checks into a single new BoolNode and return it.
309 BoolNode* merge_flat_array_checks() const {
310 assert(has_multiple_flat_array_check_candidates(), "must have multiple flat array checks to merge");
311 assert(_candidate->in(1)->as_Bool()->_test._test == BoolTest::ne, "IfTrue proj must point to flat array");
312 BoolNode* merged_flat_array_check_bool = create_bool_node();
313 create_flat_array_check_node(merged_flat_array_check_bool);
314 return merged_flat_array_check_bool;
315 }
316
317 private:
318 BoolNode* create_bool_node() const {
319 BoolNode* merged_flat_array_check_bool = _candidate->in(1)->clone()->as_Bool();
320 _phase->register_new_node(merged_flat_array_check_bool, _original_loop_entry);
321 return merged_flat_array_check_bool;
322 }
323
324 void create_flat_array_check_node(BoolNode* merged_flat_array_check_bool) const {
325 FlatArrayCheckNode* cloned_flat_array_check = merged_flat_array_check_bool->in(1)->clone()->as_FlatArrayCheck();
326 _phase->register_new_node(cloned_flat_array_check, _original_loop_entry);
327 merged_flat_array_check_bool->set_req(1, cloned_flat_array_check);
328 set_flat_array_check_inputs(cloned_flat_array_check);
329 }
330
331 // Combine all checks into a single one that fails if one array is flat.
332 void set_flat_array_check_inputs(FlatArrayCheckNode* cloned_flat_array_check) const {
333 assert(cloned_flat_array_check->req() == 3, "unexpected number of inputs for FlatArrayCheck");
334 cloned_flat_array_check->add_req_batch(_phase->C->top(), _flat_array_check_candidates.size() - 1);
335 for (uint i = 0; i < _flat_array_check_candidates.size(); i++) {
336 Node* array = _flat_array_check_candidates.at(i)->in(1)->in(1)->in(FlatArrayCheckNode::ArrayOrKlass);
337 cloned_flat_array_check->set_req(FlatArrayCheckNode::ArrayOrKlass + i, array);
338 }
339 }
340
341 public:
342 #ifndef PRODUCT
343 void trace_flat_array_checks() const {
344 if (has_multiple_flat_array_check_candidates()) {
345 tty->print_cr("- Unswitched and Merged Flat Array Checks:");
346 for (uint i = 0; i < _flat_array_check_candidates.size(); i++) {
347 Node* unswitch_iff = _flat_array_check_candidates.at(i);
348 Node* cloned_unswitch_iff = _old_new[unswitch_iff->_idx];
349 assert(cloned_unswitch_iff != nullptr, "must exist");
350 tty->print_cr(" - %d %s -> %d %s", unswitch_iff->_idx, unswitch_iff->Name(),
351 cloned_unswitch_iff->_idx, cloned_unswitch_iff->Name());
352 }
353 }
354 }
355 #endif // NOT PRODUCT
356 };
357
358 // LoopSelector is used for loop multiversioning and unswitching. This class creates an If node (i.e. loop selector)
359 // that selects if the true-path-loop or the false-path-loop should be executed at runtime.
360 class LoopSelector : public StackObj {
361 // Cached fields for construction.
362 PhaseIdealLoop* const _phase;
363 IdealLoopTree* const _outer_loop;
364 Node* const _original_loop_entry;
365 const uint _dom_depth; // of original_loop_entry
366
367 // Constructed selector if with its projections.
368 IfNode* const _selector;
369 IfTrueNode* const _true_path_loop_proj;
370 IfFalseNode* const _false_path_loop_proj;
371
372 enum PathToLoop {
373 TRUE_PATH, FALSE_PATH
374 };
375
376 public:
377 // For multiversioning: create a new selector (multiversion_if) from a bol condition.
378 LoopSelector(IdealLoopTree* loop, Node* bol, float prob, float fcnt)
379 : _phase(loop->_phase),
380 _outer_loop(loop->skip_strip_mined()->_parent),
381 _original_loop_entry(loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)),
382 _dom_depth(_phase->dom_depth(_original_loop_entry)),
383 _selector(create_multiversioning_if(bol, prob, fcnt)), // multiversioning
384 _true_path_loop_proj(create_proj_to_loop(TRUE_PATH)->as_IfTrue()),
385 _false_path_loop_proj(create_proj_to_loop(FALSE_PATH)->as_IfFalse()) {
386 }
387
388 // For unswitching: create an unswitching if before the loop, from a pre-existing
389 // unswitching_candidate inside the loop.
390 LoopSelector(IdealLoopTree* loop, const UnswitchCandidate& unswitch_candidate)
391 : _phase(loop->_phase),
392 _outer_loop(loop->skip_strip_mined()->_parent),
393 _original_loop_entry(loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl)),
394 _dom_depth(_phase->dom_depth(_original_loop_entry)),
395 _selector(create_unswitching_if(unswitch_candidate)), // unswitching
396 _true_path_loop_proj(create_proj_to_loop(TRUE_PATH)->as_IfTrue()),
397 _false_path_loop_proj(create_proj_to_loop(FALSE_PATH)->as_IfFalse()) {
398 }
399 NONCOPYABLE(LoopSelector);
400
401 private:
402 IfNode* create_multiversioning_if(Node* bol, float prob, float fcnt) {
403 _phase->igvn().rehash_node_delayed(_original_loop_entry);
404 IfNode* selector_if = new IfNode(_original_loop_entry, bol, prob, fcnt);
405 _phase->register_node(selector_if, _outer_loop, _original_loop_entry, _dom_depth);
406 return selector_if;
407 }
408
409 IfNode* create_unswitching_if(const UnswitchCandidate& unswitch_candidate) {
410 const uint dom_depth = _phase->dom_depth(_original_loop_entry);
411 _phase->igvn().rehash_node_delayed(_original_loop_entry);
412 IfNode* unswitch_candidate_if = unswitch_candidate.candidate();
413 BoolNode* selector_bool;
414 if (unswitch_candidate.has_multiple_flat_array_check_candidates()) {
415 selector_bool = unswitch_candidate.merge_flat_array_checks();
416 } else {
417 selector_bool = unswitch_candidate_if->in(1)->as_Bool();
418 }
419 IfNode* selector_if = IfNode::make_with_same_profile(unswitch_candidate_if, _original_loop_entry, selector_bool);
420 _phase->register_node(selector_if, _outer_loop, _original_loop_entry, dom_depth);
421 return selector_if;
422 }
423
424 IfProjNode* create_proj_to_loop(const PathToLoop path_to_loop) {
425 IfProjNode* proj_to_loop;
426 if (path_to_loop == TRUE_PATH) {
427 proj_to_loop = new IfTrueNode(_selector);
428 } else {
429 proj_to_loop = new IfFalseNode(_selector);
430 }
431 _phase->register_node(proj_to_loop, _outer_loop, _selector, _dom_depth);
432 return proj_to_loop;
433 }
434
435 public:
436 IfNode* selector() const {
437 return _selector;
438 }
439
440 IfTrueNode* true_path_loop_proj() const {
441 return _true_path_loop_proj;
442 }
443
444 IfFalseNode* false_path_loop_proj() const {
445 return _false_path_loop_proj;
446 }
447 };
448
449 // This class creates an If node (i.e. loop selector) that selects if the true-path-loop or the false-path-loop should be
450 // executed at runtime. This is done by finding an invariant and non-loop-exiting unswitch candidate If node (guaranteed
451 // to exist at this point) to perform Loop Unswitching on.
452 class UnswitchedLoopSelector : public StackObj {
453 const UnswitchCandidate& _unswitch_candidate;
454 const LoopSelector _loop_selector;
455
456 public:
457 UnswitchedLoopSelector(IdealLoopTree* loop, const UnswitchCandidate& unswitch_candidate)
458 : _unswitch_candidate(unswitch_candidate),
459 _loop_selector(loop, _unswitch_candidate) {}
460 NONCOPYABLE(UnswitchedLoopSelector);
461
462 IfNode* selector_if() const {
463 return _loop_selector.selector();
464 }
465
466 const LoopSelector& loop_selector() const {
467 return _loop_selector;
468 }
469 };
470
471 // Class to unswitch the original loop and create Predicates at the new unswitched loop versions. The newly cloned loop
472 // becomes the false-path-loop while original loop becomes the true-path-loop.
473 class OriginalLoop : public StackObj {
474 LoopNode* const _loop_head;
475 LoopNode* const _outer_loop_head; // OuterStripMinedLoopNode if loop strip mined, else just the loop head.
476 IdealLoopTree* const _loop;
477 Node_List& _old_new;
478 PhaseIdealLoop* const _phase;
479
480 public:
481 OriginalLoop(IdealLoopTree* loop, Node_List& old_new)
482 : _loop_head(loop->_head->as_Loop()),
483 _outer_loop_head(loop->_head->as_Loop()->skip_strip_mined()),
484 _loop(loop),
485 _old_new(old_new),
486 _phase(loop->_phase) {}
487 NONCOPYABLE(OriginalLoop);
488
489 // Unswitch the original loop on the invariant loop selector by creating a true-path-loop and a false-path-loop.
490 // Remove the unswitch candidate If from both unswitched loop versions which are now covered by the loop selector If.
491 void unswitch(const UnswitchedLoopSelector& unswitched_loop_selector) {
492 multiversion(unswitched_loop_selector.loop_selector());
493 }
494
495 // Multiversion the original loop. The loop selector if selects between the original loop (true-path-loop), and
496 // a copy of it (false-path-loop).
497 void multiversion(const LoopSelector& loop_selector) {
498 const uint first_false_path_loop_node_index = _phase->C->unique();
499 clone_loop(loop_selector);
500
501 move_parse_and_template_assertion_predicates_to_unswitched_loops(loop_selector,
502 first_false_path_loop_node_index);
503 DEBUG_ONLY(verify_loop_versions(_loop->_head->as_Loop(), loop_selector);)
504
505 _phase->recompute_dom_depth();
506 }
507
508 private:
509 void clone_loop(const LoopSelector& loop_selector) {
510 _phase->clone_loop(_loop, _old_new, _phase->dom_depth(_outer_loop_head),
511 PhaseIdealLoop::CloneIncludesStripMined, loop_selector.selector());
512 fix_loop_entries(loop_selector);
513 }
514
515 void fix_loop_entries(const LoopSelector& loop_selector) const {
516 _phase->replace_loop_entry(_outer_loop_head, loop_selector.true_path_loop_proj());
517 LoopNode* false_path_loop_strip_mined_head = old_to_new(_outer_loop_head)->as_Loop();
518 _phase->replace_loop_entry(false_path_loop_strip_mined_head,
519 loop_selector.false_path_loop_proj());
520 }
521
522 // Moves the Parse And Template Assertion Predicates to the true and false path loop. They are inserted between the
523 // loop heads and the loop selector If projections. The old Parse and Template Assertion Predicates before
524 // the unswitched loop selector are killed.
525 void move_parse_and_template_assertion_predicates_to_unswitched_loops(
526 const LoopSelector& loop_selector, const uint first_false_path_loop_node_index) const {
527 const NodeInOriginalLoopBody node_in_true_path_loop_body(first_false_path_loop_node_index, _old_new);
528 const NodeInClonedLoopBody node_in_false_path_loop_body(first_false_path_loop_node_index);
529 CloneUnswitchedLoopPredicatesVisitor
530 clone_unswitched_loop_predicates_visitor(_loop_head, old_to_new(_loop_head)->as_Loop(), node_in_true_path_loop_body,
531 node_in_false_path_loop_body, _phase);
532 Node* source_loop_entry = loop_selector.selector()->in(0);
533 PredicateIterator predicate_iterator(source_loop_entry);
534 predicate_iterator.for_each(clone_unswitched_loop_predicates_visitor);
535 }
536
537 #ifdef ASSERT
538 void verify_loop_versions(LoopNode* true_path_loop_head,
539 const LoopSelector& loop_selector) const {
540 verify_loop_version(true_path_loop_head,
541 loop_selector.true_path_loop_proj());
542 verify_loop_version(old_to_new(true_path_loop_head)->as_Loop(),
543 loop_selector.false_path_loop_proj());
544 }
545
546 static void verify_loop_version(LoopNode* loop_head, IfProjNode* loop_selector_if_proj) {
547 Node* entry = loop_head->skip_strip_mined()->in(LoopNode::EntryControl);
548 const Predicates predicates(entry);
549 // When skipping all predicates, we should end up at 'loop_selector_if_proj'.
550 assert(loop_selector_if_proj == predicates.entry(), "should end up at loop selector If");
551 }
552 #endif // ASSERT
553
554 Node* old_to_new(const Node* old) const {
555 return _old_new[old->_idx];
556 }
557 };
558
559 // See comments below file header for more information about Loop Unswitching.
560 void PhaseIdealLoop::do_unswitching(IdealLoopTree* loop, Node_List& old_new) {
561 assert(LoopUnswitching, "LoopUnswitching must be enabled");
562
563 LoopNode* original_head = loop->_head->as_Loop();
564 if (has_control_dependencies_from_predicates(original_head)) {
565 NOT_PRODUCT(trace_loop_unswitching_impossible(original_head);)
566 return;
567 }
568
569 NOT_PRODUCT(trace_loop_unswitching_count(loop, original_head);)
570 C->print_method(PHASE_BEFORE_LOOP_UNSWITCHING, 4, original_head);
571
572 revert_to_normal_loop(original_head);
573
574 const UnswitchCandidate unswitch_candidate(loop, old_new);
575 const UnswitchedLoopSelector unswitched_loop_selector(loop, unswitch_candidate);
576 OriginalLoop original_loop(loop, old_new);
577 original_loop.unswitch(unswitched_loop_selector);
578
579 unswitch_candidate.update_in_true_path_loop(unswitched_loop_selector.loop_selector().true_path_loop_proj());
580 unswitch_candidate.update_in_false_path_loop(unswitched_loop_selector.loop_selector().false_path_loop_proj(),
581 old_new[original_head->_idx]->as_Loop());
582 hoist_invariant_check_casts(loop, old_new, unswitch_candidate, unswitched_loop_selector.selector_if());
583 add_unswitched_loop_version_bodies_to_igvn(loop, old_new);
584
585 LoopNode* new_head = old_new[original_head->_idx]->as_Loop();
586 increment_unswitch_counts(original_head, new_head);
587
588 NOT_PRODUCT(trace_loop_unswitching_result(unswitched_loop_selector, unswitch_candidate, original_head, new_head);)
589 C->print_method(PHASE_AFTER_LOOP_UNSWITCHING, 4, new_head);
590 C->set_major_progress();
591 }
592
593 void PhaseIdealLoop::do_multiversioning(IdealLoopTree* lpt, Node_List& old_new) {
594 #ifndef PRODUCT
595 if (TraceLoopOpts || TraceLoopMultiversioning) {
596 tty->print("Multiversion ");
597 lpt->dump_head();
598 }
599 #endif
600 assert(LoopMultiversioning, "LoopMultiversioning must be enabled");
601
602 CountedLoopNode* original_head = lpt->_head->as_CountedLoop();
603 C->print_method(PHASE_BEFORE_LOOP_MULTIVERSIONING, 4, original_head);
604
605 Node* one = _igvn.intcon(1);
606 set_ctrl(one, C->root());
607 Node* opaque = new OpaqueMultiversioningNode(C, one);
608 set_ctrl(opaque, C->root());
609 _igvn.register_new_node_with_optimizer(opaque);
610 _igvn.set_type(opaque, TypeInt::BOOL);
611
612 const LoopSelector loop_selector(lpt, opaque, PROB_LIKELY_MAG(3), COUNT_UNKNOWN);
613 OriginalLoop original_loop(lpt, old_new);
614 original_loop.multiversion(loop_selector);
615
616 add_unswitched_loop_version_bodies_to_igvn(lpt, old_new);
617
618 CountedLoopNode* new_head = old_new[original_head->_idx]->as_CountedLoop();
619 original_head->set_multiversion_fast_loop();
620 new_head->set_multiversion_delayed_slow_loop();
621
622 NOT_PRODUCT(trace_loop_multiversioning_result(loop_selector, original_head, new_head);)
623 C->print_method(PHASE_AFTER_LOOP_MULTIVERSIONING, 4, new_head);
624 C->set_major_progress();
625 }
626
627 // Create a new if in the multiversioning pattern, adding an additional condition for the
628 // multiversioning fast-loop.
629 //
630 // Before:
631 // entry opaque
632 // | |
633 // multiversion_if
634 // | |
635 // +----------------+ +---------------+
636 // | |
637 // multiversion_fast_proj multiversion_slow_proj
638 // |
639 // +--------+
640 // |
641 // slow_path
642 //
643 //
644 // After:
645 // entry opaque <-- to be replaced by caller
646 // | |
647 // new_if
648 // | |
649 // | +-----------------------------+
650 // | |
651 // new_if_true opaque new_if_false
652 // | | |
653 // multiversion_if |
654 // | | |
655 // +----------------+ +---------------+ |
656 // | | |
657 // multiversion_fast_proj new_multiversion_slow_proj |
658 // | |
659 // +------+ |
660 // | |
661 // region
662 // |
663 // slow_path
664 //
665 // For more descriptions on multiversioning:
666 // See: PhaseIdealLoop::maybe_multiversion_for_auto_vectorization_runtime_checks
667 IfTrueNode* PhaseIdealLoop::create_new_if_for_multiversion(IfTrueNode* multiversioning_fast_proj) {
668 // Give all nodes in the old sub-graph a name.
669 IfNode* multiversion_if = multiversioning_fast_proj->in(0)->as_If();
670 Node* entry = multiversion_if->in(0);
671 OpaqueMultiversioningNode* opaque = multiversion_if->in(1)->as_OpaqueMultiversioning();
672 IfFalseNode* multiversion_slow_proj = multiversion_if->proj_out(0)->as_IfFalse();
673 Node* slow_path = multiversion_slow_proj->unique_ctrl_out();
674
675 // The slow_loop may still be delayed, and waiting for runtime-checks to be added to the
676 // multiversion_if. Now that we have at least one condition for the multiversioning,
677 // we should resume optimizations for the slow loop.
678 opaque->notify_slow_loop_that_it_can_resume_optimizations();
679
680 // Create new_if with its projections.
681 IfNode* new_if = IfNode::make_with_same_profile(multiversion_if, entry, opaque);
682 IdealLoopTree* lp = get_loop(entry);
683 register_control(new_if, lp, entry);
684
685 IfTrueNode* new_if_true = new IfTrueNode(new_if);
686 IfFalseNode* new_if_false = new IfFalseNode(new_if);
687 register_control(new_if_true, lp, new_if);
688 register_control(new_if_false, lp, new_if);
689
690 // Hook new_if_true into multiversion_if.
691 _igvn.replace_input_of(multiversion_if, 0, new_if_true);
692
693 // Clone multiversion_slow_path - this allows us to easily carry the dependencies to
694 // the new region below.
695 IfFalseNode* new_multiversion_slow_proj = multiversion_slow_proj->clone()->as_IfFalse();
696 register_control(new_multiversion_slow_proj, lp, multiversion_if);
697
698 // Create new Region.
699 RegionNode* region = new RegionNode(1);
700 region->add_req(new_multiversion_slow_proj);
701 region->add_req(new_if_false);
702 register_control(region, lp, new_multiversion_slow_proj);
703
704 // Hook region into slow_path, in stead of the multiversion_slow_proj.
705 // This also moves all other dependencies of the multiversion_slow_proj to the region.
706 _igvn.replace_node(multiversion_slow_proj, region);
707
708 return new_if_true;
709 }
710
711 OpaqueMultiversioningNode* find_multiversion_opaque_from_multiversion_if_false(Node* maybe_multiversion_if_false) {
712 IfFalseNode* multiversion_if_false = maybe_multiversion_if_false->isa_IfFalse();
713 if (multiversion_if_false == nullptr) { return nullptr; }
714 IfNode* multiversion_if = multiversion_if_false->in(0)->isa_If();
715 if (multiversion_if == nullptr) { return nullptr; }
716 return multiversion_if->in(1)->isa_OpaqueMultiversioning();
717 }
718
719 bool PhaseIdealLoop::try_resume_optimizations_for_delayed_slow_loop(IdealLoopTree* lpt) {
720 CountedLoopNode* cl = lpt->_head->as_CountedLoop();
721 assert(cl->is_multiversion_delayed_slow_loop(), "must currently be delayed");
722
723 // Find multiversion_if.
724 Node* entry = cl->skip_strip_mined()->in(LoopNode::EntryControl);
725 const Predicates predicates(entry);
726
727 Node* slow_path = predicates.entry();
728
729 // Find opaque.
730 OpaqueMultiversioningNode* opaque = nullptr;
731 if (slow_path->is_Region()) {
732 for (uint i = 1; i < slow_path->req(); i++) {
733 Node* n = slow_path->in(i);
734 opaque = find_multiversion_opaque_from_multiversion_if_false(n);
735 if (opaque != nullptr) { break; }
736 }
737 } else {
738 opaque = find_multiversion_opaque_from_multiversion_if_false(slow_path);
739 }
740 assert(opaque != nullptr, "must have found multiversion opaque node");
741 if (opaque == nullptr) { return false; }
742
743 // We may still be delayed, if there were not yet any runtime-checks added
744 // for the multiversioning. We may never add any, and then this loop would
745 // fold away. So we wait until some runtime-checks are added, then we know
746 // that this loop will be reachable and it is worth optimizing further.
747 if (opaque->is_delayed_slow_loop()) { return false; }
748
749 // Clear away the "delayed" status, i.e. resume optimizations.
750 cl->set_no_multiversion();
751 cl->set_multiversion_slow_loop();
752 #ifndef PRODUCT
753 if (TraceLoopOpts) {
754 tty->print("Resume Optimizations ");
755 lpt->dump_head();
756 }
757 #endif
758 return true;
759 }
760
761 bool PhaseIdealLoop::has_control_dependencies_from_predicates(LoopNode* head) {
762 Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
763 const Predicates predicates(entry);
764 if (predicates.has_any()) {
765 assert(entry->is_IfProj(), "sanity - must be ifProj since there is at least one predicate");
766 if (entry->outcnt() > 1) {
767 // Bailout if there are predicates from which there are additional control dependencies (i.e. from loop
768 // entry 'entry') to previously partially peeled statements since this case is not handled and can lead
769 // to a wrong execution. Remove this bailout, once this is fixed.
770 return true;
771 }
772 }
773 return false;
774 }
775
776 #ifndef PRODUCT
777 void PhaseIdealLoop::trace_loop_unswitching_impossible(const LoopNode* original_head) {
778 if (TraceLoopUnswitching) {
779 tty->print_cr("Loop Unswitching \"%d %s\" not possible due to control dependencies",
780 original_head->_idx, original_head->Name());
781 }
782 }
783
784 void PhaseIdealLoop::trace_loop_unswitching_count(IdealLoopTree* loop, LoopNode* original_head) {
785 if (TraceLoopOpts) {
786 tty->print("Unswitch %d ", original_head->unswitch_count() + 1);
787 loop->dump_head();
788 }
789 }
790
791 void PhaseIdealLoop::trace_loop_unswitching_result(const UnswitchedLoopSelector& unswitched_loop_selector,
792 const UnswitchCandidate& unswitch_candidate,
793 const LoopNode* original_head, const LoopNode* new_head) {
794 if (TraceLoopUnswitching) {
795 IfNode* unswitch_candidate_if = unswitch_candidate.candidate();
796 IfNode* loop_selector = unswitched_loop_selector.selector_if();
797 tty->print_cr("Loop Unswitching:");
798 tty->print_cr("- Unswitch-Candidate-If: %d %s", unswitch_candidate_if->_idx, unswitch_candidate_if->Name());
799 tty->print_cr("- Loop-Selector-If: %d %s", loop_selector->_idx, loop_selector->Name());
800 tty->print_cr("- True-Path-Loop (=Orig): %d %s", original_head->_idx, original_head->Name());
801 tty->print_cr("- False-Path-Loop (=Clone): %d %s", new_head->_idx, new_head->Name());
802 unswitch_candidate.trace_flat_array_checks();
803 }
804 }
805
806 void PhaseIdealLoop::trace_loop_multiversioning_result(const LoopSelector& loop_selector,
807 const LoopNode* original_head, const LoopNode* new_head) {
808 if (TraceLoopMultiversioning) {
809 IfNode* selector_if = loop_selector.selector();
810 tty->print_cr("Loop Multiversioning:");
811 tty->print_cr("- Loop-Selector-If: %d %s", selector_if->_idx, selector_if->Name());
812 tty->print_cr("- True-Path-Loop (=Orig / Fast): %d %s", original_head->_idx, original_head->Name());
813 tty->print_cr("- False-Path-Loop (=Clone / Slow): %d %s", new_head->_idx, new_head->Name());
814 }
815 }
816 #endif
817
818 // When unswitching a counted loop, we need to convert it back to a normal loop since it's not a proper pre, main or,
819 // post loop anymore after loop unswitching. We also lose the multiversion structure, with access to the multiversion_if.
820 void PhaseIdealLoop::revert_to_normal_loop(const LoopNode* loop_head) {
821 CountedLoopNode* cl = loop_head->isa_CountedLoop();
822 if (cl == nullptr) { return; }
823 if (!cl->is_normal_loop()) { cl->set_normal_loop(); }
824 if (cl->is_multiversion()) { cl->set_no_multiversion(); }
825 }
826
827 // Hoist invariant CheckCastPPNodes out of each unswitched loop version to the appropriate loop selector If projection.
828 void PhaseIdealLoop::hoist_invariant_check_casts(const IdealLoopTree* loop, const Node_List& old_new,
829 const UnswitchCandidate& unswitch_candidate,
830 const IfNode* loop_selector) {
831 ResourceMark rm;
832 GrowableArray<CheckCastPPNode*> loop_invariant_check_casts;
833 const IfNode* unswitch_candidate_if = unswitch_candidate.candidate();
834 for (DUIterator_Fast imax, i = unswitch_candidate_if->fast_outs(imax); i < imax; i++) {
835 IfProjNode* proj = unswitch_candidate_if->fast_out(i)->as_IfProj();
836 // Copy to a worklist for easier manipulation
837 for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) {
838 CheckCastPPNode* check_cast = proj->fast_out(j)->isa_CheckCastPP();
839 if (check_cast != nullptr && loop->is_invariant(check_cast->in(1))) {
840 loop_invariant_check_casts.push(check_cast);
841 }
842 }
843 IfProjNode* loop_selector_if_proj = loop_selector->proj_out(proj->_con)->as_IfProj();
844 while (loop_invariant_check_casts.length() > 0) {
845 CheckCastPPNode* cast = loop_invariant_check_casts.pop();
846 Node* cast_clone = cast->clone();
847 cast_clone->set_req(0, loop_selector_if_proj);
848 _igvn.replace_input_of(cast, 1, cast_clone);
849 register_new_node(cast_clone, loop_selector_if_proj);
850 // Same for the false-path-loop if there are not multiple flat array checks (in that case we leave the
851 // false-path-loop unchanged).
852 if (!unswitch_candidate.has_multiple_flat_array_check_candidates()) {
853 Node* use_clone = old_new[cast->_idx];
854 _igvn.replace_input_of(use_clone, 1, cast_clone);
855 }
856 }
857 }
858 }
859
860 // Enable more optimizations possibilities in the next IGVN round.
861 void PhaseIdealLoop::add_unswitched_loop_version_bodies_to_igvn(IdealLoopTree* loop, const Node_List& old_new) {
862 loop->record_for_igvn();
863 for (int i = loop->_body.size() - 1; i >= 0; i--) {
864 Node* n = loop->_body[i];
865 Node* n_clone = old_new[n->_idx];
866 _igvn._worklist.push(n_clone);
867 }
868 }
869
870 void PhaseIdealLoop::increment_unswitch_counts(LoopNode* original_head, LoopNode* new_head) {
871 const int unswitch_count = original_head->unswitch_count() + 1;
872 original_head->set_unswitch_count(unswitch_count);
873 new_head->set_unswitch_count(unswitch_count);
874 }