59 // the semantics so it does not appear in the hash & cmp functions.
60 virtual uint size_of() const { return sizeof(*this); }
61 protected:
62 uint _loop_flags;
63 // Names for flag bitfields
64 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
65 MainHasNoPreLoop = 1<<2,
66 HasExactTripCount = 1<<3,
67 InnerLoop = 1<<4,
68 PartialPeelLoop = 1<<5,
69 PartialPeelFailed = 1<<6,
70 WasSlpAnalyzed = 1<<7,
71 PassedSlpAnalysis = 1<<8,
72 DoUnrollOnly = 1<<9,
73 VectorizedLoop = 1<<10,
74 HasAtomicPostLoop = 1<<11,
75 StripMined = 1<<12,
76 SubwordLoop = 1<<13,
77 ProfileTripFailed = 1<<14,
78 LoopNestInnerLoop = 1<<15,
79 LoopNestLongOuterLoop = 1<<16 };
80 char _unswitch_count;
81 enum { _unswitch_max=3 };
82
83 // Expected trip count from profile data
84 float _profile_trip_cnt;
85
86 public:
87 // Names for edge indices
88 enum { Self=0, EntryControl, LoopBackControl };
89
90 bool is_inner_loop() const { return _loop_flags & InnerLoop; }
91 void set_inner_loop() { _loop_flags |= InnerLoop; }
92
93 bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
94 bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
95 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
96 bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
97 bool is_strip_mined() const { return _loop_flags & StripMined; }
98 bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
99 bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
100 bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
101 bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }
102
103 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
104 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
105 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
106 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
107 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
108 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
109 void mark_strip_mined() { _loop_flags |= StripMined; }
110 void clear_strip_mined() { _loop_flags &= ~StripMined; }
111 void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
112 void mark_subword_loop() { _loop_flags |= SubwordLoop; }
113 void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
114 void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }
115
116 int unswitch_max() { return _unswitch_max; }
117 int unswitch_count() { return _unswitch_count; }
118
119 void set_unswitch_count(int val) {
120 assert (val <= unswitch_max(), "too many unswitches");
121 _unswitch_count = val;
122 }
123
124 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
125 float profile_trip_cnt() { return _profile_trip_cnt; }
126
127 LoopNode(Node *entry, Node *backedge)
128 : RegionNode(3), _loop_flags(0), _unswitch_count(0),
129 _profile_trip_cnt(COUNT_UNKNOWN) {
130 init_class_id(Class_Loop);
131 init_req(EntryControl, entry);
132 init_req(LoopBackControl, backedge);
133 }
134
1392 bool has_control_dependencies_from_predicates(LoopNode* head) const;
1393 void verify_fast_loop(LoopNode* head, const ProjNode* proj_true) const NOT_DEBUG_RETURN;
1394 public:
1395 // Change the control input of expensive nodes to allow commoning by
1396 // IGVN when it is guaranteed to not result in a more frequent
1397 // execution of the expensive node. Return true if progress.
1398 bool process_expensive_nodes();
1399
1400 // Check whether node has become unreachable
1401 bool is_node_unreachable(Node *n) const {
1402 return !has_node(n) || n->is_unreachable(_igvn);
1403 }
1404
1405 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1406 void do_range_check(IdealLoopTree *loop, Node_List &old_new);
1407
1408 // Create a slow version of the loop by cloning the loop
1409 // and inserting an if to select fast-slow versions.
1410 // Return the inserted if.
1411 IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1412 Node_List &old_new,
1413 IfNode* unswitch_iff,
1414 CloneLoopMode mode);
1415
1416 // Clone loop with an invariant test (that does not exit) and
1417 // insert a clone of the test that selects which version to
1418 // execute.
1419 void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1420
1421 // Find candidate "if" for unswitching
1422 IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
1423
1424 // Range Check Elimination uses this function!
1425 // Constrain the main loop iterations so the affine function:
1426 // low_limit <= scale_con * I + offset < upper_limit
1427 // always holds true. That is, either increase the number of iterations in
1428 // the pre-loop or the post-loop until the condition holds true in the main
1429 // loop. Scale_con, offset and limit are all loop invariant.
1430 void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1431 // Helper function for add_constraint().
1432 Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1433
1434 // Partially peel loop up through last_peel node.
1435 bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1436 bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
1437
1438 // Move UnorderedReduction out of loop if possible
1439 void move_unordered_reduction_out_of_loop(IdealLoopTree* loop);
1440
1441 // Create a scheduled list of nodes control dependent on ctrl set.
1442 void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
1522 bool intrinsify_fill(IdealLoopTree* lpt);
1523 bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1524 Node*& shift, Node*& offset);
1525
1526 private:
1527 // Return a type based on condition control flow
1528 const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1529 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, nullptr); }
1530 // Helpers for filtered type
1531 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1532
1533 // Helper functions
1534 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1535 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1536 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1537 bool split_up( Node *n, Node *blk1, Node *blk2 );
1538
1539 Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1540 Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1541 void try_move_store_after_loop(Node* n);
1542 bool identical_backtoback_ifs(Node *n);
1543 bool can_split_if(Node *n_ctrl);
1544 bool cannot_split_division(const Node* n, const Node* region) const;
1545 static bool is_divisor_counted_loop_phi(const Node* divisor, const Node* loop);
1546 bool loop_phi_backedge_type_contains_zero(const Node* phi_divisor, const Type* zero) const;
1547
1548 // Determine if a method is too big for a/another round of split-if, based on
1549 // a magic (approximate) ratio derived from the equally magic constant 35000,
1550 // previously used for this purpose (but without relating to the node limit).
1551 bool must_throttle_split_if() {
1552 uint threshold = C->max_node_limit() * 2 / 5;
1553 return C->live_nodes() > threshold;
1554 }
1555
1556 // A simplistic node request tracking mechanism, where
1557 // = UINT_MAX Request not valid or made final.
1558 // < UINT_MAX Nodes currently requested (estimate).
1559 uint _nodes_required;
1560
1561 enum { REQUIRE_MIN = 70 };
1562
|
59 // the semantics so it does not appear in the hash & cmp functions.
60 virtual uint size_of() const { return sizeof(*this); }
61 protected:
62 uint _loop_flags;
63 // Names for flag bitfields
64 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
65 MainHasNoPreLoop = 1<<2,
66 HasExactTripCount = 1<<3,
67 InnerLoop = 1<<4,
68 PartialPeelLoop = 1<<5,
69 PartialPeelFailed = 1<<6,
70 WasSlpAnalyzed = 1<<7,
71 PassedSlpAnalysis = 1<<8,
72 DoUnrollOnly = 1<<9,
73 VectorizedLoop = 1<<10,
74 HasAtomicPostLoop = 1<<11,
75 StripMined = 1<<12,
76 SubwordLoop = 1<<13,
77 ProfileTripFailed = 1<<14,
78 LoopNestInnerLoop = 1<<15,
79 LoopNestLongOuterLoop = 1<<16,
80 FlatArrays = 1<<17};
81 char _unswitch_count;
82 enum { _unswitch_max=3 };
83
84 // Expected trip count from profile data
85 float _profile_trip_cnt;
86
87 public:
88 // Names for edge indices
89 enum { Self=0, EntryControl, LoopBackControl };
90
91 bool is_inner_loop() const { return _loop_flags & InnerLoop; }
92 void set_inner_loop() { _loop_flags |= InnerLoop; }
93
94 bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
95 bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
96 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
97 bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
98 bool is_strip_mined() const { return _loop_flags & StripMined; }
99 bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
100 bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
101 bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
102 bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }
103 bool is_flat_arrays() const { return _loop_flags & FlatArrays; }
104
105 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
106 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
107 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
108 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
109 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
110 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
111 void mark_strip_mined() { _loop_flags |= StripMined; }
112 void clear_strip_mined() { _loop_flags &= ~StripMined; }
113 void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
114 void mark_subword_loop() { _loop_flags |= SubwordLoop; }
115 void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
116 void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }
117 void mark_flat_arrays() { _loop_flags |= FlatArrays; }
118
119 int unswitch_max() { return _unswitch_max; }
120 int unswitch_count() { return _unswitch_count; }
121
122 void set_unswitch_count(int val) {
123 assert (val <= unswitch_max(), "too many unswitches");
124 _unswitch_count = val;
125 }
126
127 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
128 float profile_trip_cnt() { return _profile_trip_cnt; }
129
130 LoopNode(Node *entry, Node *backedge)
131 : RegionNode(3), _loop_flags(0), _unswitch_count(0),
132 _profile_trip_cnt(COUNT_UNKNOWN) {
133 init_class_id(Class_Loop);
134 init_req(EntryControl, entry);
135 init_req(LoopBackControl, backedge);
136 }
137
1395 bool has_control_dependencies_from_predicates(LoopNode* head) const;
1396 void verify_fast_loop(LoopNode* head, const ProjNode* proj_true) const NOT_DEBUG_RETURN;
1397 public:
1398 // Change the control input of expensive nodes to allow commoning by
1399 // IGVN when it is guaranteed to not result in a more frequent
1400 // execution of the expensive node. Return true if progress.
1401 bool process_expensive_nodes();
1402
1403 // Check whether node has become unreachable
1404 bool is_node_unreachable(Node *n) const {
1405 return !has_node(n) || n->is_unreachable(_igvn);
1406 }
1407
1408 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1409 void do_range_check(IdealLoopTree *loop, Node_List &old_new);
1410
1411 // Create a slow version of the loop by cloning the loop
1412 // and inserting an if to select fast-slow versions.
1413 // Return the inserted if.
1414 IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1415 Node_List &old_new,
1416 Node_List &unswitch_iffs,
1417 CloneLoopMode mode);
1418
1419 // Clone loop with an invariant test (that does not exit) and
1420 // insert a clone of the test that selects which version to
1421 // execute.
1422 void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1423
1424 // Find candidate "if" for unswitching
1425 IfNode* find_unswitching_candidate(const IdealLoopTree *loop, Node_List& unswitch_iffs) const;
1426
1427 // Range Check Elimination uses this function!
1428 // Constrain the main loop iterations so the affine function:
1429 // low_limit <= scale_con * I + offset < upper_limit
1430 // always holds true. That is, either increase the number of iterations in
1431 // the pre-loop or the post-loop until the condition holds true in the main
1432 // loop. Scale_con, offset and limit are all loop invariant.
1433 void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1434 // Helper function for add_constraint().
1435 Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1436
1437 // Partially peel loop up through last_peel node.
1438 bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1439 bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
1440
1441 // Move UnorderedReduction out of loop if possible
1442 void move_unordered_reduction_out_of_loop(IdealLoopTree* loop);
1443
1444 // Create a scheduled list of nodes control dependent on ctrl set.
1445 void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
1525 bool intrinsify_fill(IdealLoopTree* lpt);
1526 bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1527 Node*& shift, Node*& offset);
1528
1529 private:
1530 // Return a type based on condition control flow
1531 const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1532 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, nullptr); }
1533 // Helpers for filtered type
1534 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1535
1536 // Helper functions
1537 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1538 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1539 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1540 bool split_up( Node *n, Node *blk1, Node *blk2 );
1541
1542 Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1543 Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1544 void try_move_store_after_loop(Node* n);
1545 void move_flat_array_check_out_of_loop(Node* n);
1546 bool identical_backtoback_ifs(Node *n);
1547 bool flat_array_element_type_check(Node *n);
1548 bool can_split_if(Node *n_ctrl);
1549 bool cannot_split_division(const Node* n, const Node* region) const;
1550 static bool is_divisor_counted_loop_phi(const Node* divisor, const Node* loop);
1551 bool loop_phi_backedge_type_contains_zero(const Node* phi_divisor, const Type* zero) const;
1552
1553 // Determine if a method is too big for a/another round of split-if, based on
1554 // a magic (approximate) ratio derived from the equally magic constant 35000,
1555 // previously used for this purpose (but without relating to the node limit).
1556 bool must_throttle_split_if() {
1557 uint threshold = C->max_node_limit() * 2 / 5;
1558 return C->live_nodes() > threshold;
1559 }
1560
1561 // A simplistic node request tracking mechanism, where
1562 // = UINT_MAX Request not valid or made final.
1563 // < UINT_MAX Nodes currently requested (estimate).
1564 uint _nodes_required;
1565
1566 enum { REQUIRE_MIN = 70 };
1567
|