59 virtual uint size_of() const { return sizeof(*this); }
60 protected:
61 uint _loop_flags;
62 // Names for flag bitfields
63 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
64 MainHasNoPreLoop = 1<<2,
65 HasExactTripCount = 1<<3,
66 InnerLoop = 1<<4,
67 PartialPeelLoop = 1<<5,
68 PartialPeelFailed = 1<<6,
69 WasSlpAnalyzed = 1<<7,
70 PassedSlpAnalysis = 1<<8,
71 DoUnrollOnly = 1<<9,
72 VectorizedLoop = 1<<10,
73 HasAtomicPostLoop = 1<<11,
74 IsMultiversioned = 1<<12,
75 StripMined = 1<<13,
76 SubwordLoop = 1<<14,
77 ProfileTripFailed = 1<<15,
78 LoopNestInnerLoop = 1<<16,
79 LoopNestLongOuterLoop = 1<<17};
80 char _unswitch_count;
81 enum { _unswitch_max=3 };
82 char _postloop_flags;
83 enum { RCEPostLoop = 1 };
84
85 // Expected trip count from profile data
86 float _profile_trip_cnt;
87
88 public:
89 // Names for edge indices
90 enum { Self=0, EntryControl, LoopBackControl };
91
92 bool is_inner_loop() const { return _loop_flags & InnerLoop; }
93 void set_inner_loop() { _loop_flags |= InnerLoop; }
94
95 bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
96 bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
97 bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
98 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
99 bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
100 bool is_strip_mined() const { return _loop_flags & StripMined; }
101 bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
102 bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
103 bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
104 bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }
105
106 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
107 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
108 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
109 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
110 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
111 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
112 void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
113 void mark_strip_mined() { _loop_flags |= StripMined; }
114 void clear_strip_mined() { _loop_flags &= ~StripMined; }
115 void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
116 void mark_subword_loop() { _loop_flags |= SubwordLoop; }
117 void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
118 void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }
119
120 int unswitch_max() { return _unswitch_max; }
121 int unswitch_count() { return _unswitch_count; }
122
123 int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
124 void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
125
126 void set_unswitch_count(int val) {
127 assert (val <= unswitch_max(), "too many unswitches");
128 _unswitch_count = val;
129 }
130
131 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
132 float profile_trip_cnt() { return _profile_trip_cnt; }
133
134 LoopNode(Node *entry, Node *backedge)
135 : RegionNode(3), _loop_flags(0), _unswitch_count(0),
136 _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN) {
137 init_class_id(Class_Loop);
138 init_req(EntryControl, entry);
1433
1434 // Check whether node has become unreachable
1435 bool is_node_unreachable(Node *n) const {
1436 return !has_node(n) || n->is_unreachable(_igvn);
1437 }
1438
1439 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1440 void do_range_check(IdealLoopTree *loop, Node_List &old_new);
1441
1442 // Process post loops which have range checks and try to build a multi-version
1443 // guard to safely determine if we can execute the post loop which was RCE'd.
1444 bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1445
1446 // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1447 void poison_rce_post_loop(IdealLoopTree *rce_loop);
1448
1449 // Create a slow version of the loop by cloning the loop
1450 // and inserting an if to select fast-slow versions.
1451 // Return the inserted if.
1452 IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1453 Node_List &old_new,
1454 IfNode* unswitch_iff,
1455 CloneLoopMode mode);
1456
1457 // Clone a loop and return the clone head (clone_loop_head).
1458 // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1459 // This routine was created for usage in CountedLoopReserveKit.
1460 //
1461 // int(1) -> If -> IfTrue -> original_loop_head
1462 // |
1463 // V
1464 // IfFalse -> clone_loop_head (returned by function pointer)
1465 //
1466 LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1467 // Clone loop with an invariant test (that does not exit) and
1468 // insert a clone of the test that selects which version to
1469 // execute.
1470 void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1471
1472 // Find candidate "if" for unswitching
1473 IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
1474
1475 // Range Check Elimination uses this function!
1476 // Constrain the main loop iterations so the affine function:
1477 // low_limit <= scale_con * I + offset < upper_limit
1478 // always holds true. That is, either increase the number of iterations in
1479 // the pre-loop or the post-loop until the condition holds true in the main
1480 // loop. Scale_con, offset and limit are all loop invariant.
1481 void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1482 // Helper function for add_constraint().
1483 Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1484
1485 // Partially peel loop up through last_peel node.
1486 bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1487 bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
1488
1489 // Move UnorderedReduction out of loop if possible
1490 void move_unordered_reduction_out_of_loop(IdealLoopTree* loop);
1491
1492 // Create a scheduled list of nodes control dependent on ctrl set.
1493 void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
1573 bool intrinsify_fill(IdealLoopTree* lpt);
1574 bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1575 Node*& shift, Node*& offset);
1576
1577 private:
1578 // Return a type based on condition control flow
1579 const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1580 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, nullptr); }
1581 // Helpers for filtered type
1582 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1583
1584 // Helper functions
1585 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1586 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1587 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1588 bool split_up( Node *n, Node *blk1, Node *blk2 );
1589 void sink_use( Node *use, Node *post_loop );
1590 Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1591 Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1592 void try_move_store_after_loop(Node* n);
1593 bool identical_backtoback_ifs(Node *n);
1594 bool can_split_if(Node *n_ctrl);
1595 bool cannot_split_division(const Node* n, const Node* region) const;
1596 static bool is_divisor_counted_loop_phi(const Node* divisor, const Node* loop);
1597 bool loop_phi_backedge_type_contains_zero(const Node* phi_divisor, const Type* zero) const;
1598
1599 // Determine if a method is too big for a/another round of split-if, based on
1600 // a magic (approximate) ratio derived from the equally magic constant 35000,
1601 // previously used for this purpose (but without relating to the node limit).
1602 bool must_throttle_split_if() {
1603 uint threshold = C->max_node_limit() * 2 / 5;
1604 return C->live_nodes() > threshold;
1605 }
1606
1607 // A simplistic node request tracking mechanism, where
1608 // = UINT_MAX Request not valid or made final.
1609 // < UINT_MAX Nodes currently requested (estimate).
1610 uint _nodes_required;
1611
1612 enum { REQUIRE_MIN = 70 };
1613
|
59 virtual uint size_of() const { return sizeof(*this); }
60 protected:
61 uint _loop_flags;
62 // Names for flag bitfields
63 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
64 MainHasNoPreLoop = 1<<2,
65 HasExactTripCount = 1<<3,
66 InnerLoop = 1<<4,
67 PartialPeelLoop = 1<<5,
68 PartialPeelFailed = 1<<6,
69 WasSlpAnalyzed = 1<<7,
70 PassedSlpAnalysis = 1<<8,
71 DoUnrollOnly = 1<<9,
72 VectorizedLoop = 1<<10,
73 HasAtomicPostLoop = 1<<11,
74 IsMultiversioned = 1<<12,
75 StripMined = 1<<13,
76 SubwordLoop = 1<<14,
77 ProfileTripFailed = 1<<15,
78 LoopNestInnerLoop = 1<<16,
79 LoopNestLongOuterLoop = 1<<17,
80 FlattenedArrays = 1<<18};
81 char _unswitch_count;
82 enum { _unswitch_max=3 };
83 char _postloop_flags;
84 enum { RCEPostLoop = 1 };
85
86 // Expected trip count from profile data
87 float _profile_trip_cnt;
88
89 public:
90 // Names for edge indices
91 enum { Self=0, EntryControl, LoopBackControl };
92
93 bool is_inner_loop() const { return _loop_flags & InnerLoop; }
94 void set_inner_loop() { _loop_flags |= InnerLoop; }
95
96 bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
97 bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
98 bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
99 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
100 bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
101 bool is_strip_mined() const { return _loop_flags & StripMined; }
102 bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
103 bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
104 bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
105 bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }
106 bool is_flattened_arrays() const { return _loop_flags & FlattenedArrays; }
107
108 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
109 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
110 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
111 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
112 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
113 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
114 void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
115 void mark_strip_mined() { _loop_flags |= StripMined; }
116 void clear_strip_mined() { _loop_flags &= ~StripMined; }
117 void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
118 void mark_subword_loop() { _loop_flags |= SubwordLoop; }
119 void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
120 void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }
121 void mark_flattened_arrays() { _loop_flags |= FlattenedArrays; }
122
123 int unswitch_max() { return _unswitch_max; }
124 int unswitch_count() { return _unswitch_count; }
125
126 int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
127 void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
128
129 void set_unswitch_count(int val) {
130 assert (val <= unswitch_max(), "too many unswitches");
131 _unswitch_count = val;
132 }
133
134 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
135 float profile_trip_cnt() { return _profile_trip_cnt; }
136
137 LoopNode(Node *entry, Node *backedge)
138 : RegionNode(3), _loop_flags(0), _unswitch_count(0),
139 _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN) {
140 init_class_id(Class_Loop);
141 init_req(EntryControl, entry);
1436
1437 // Check whether node has become unreachable
1438 bool is_node_unreachable(Node *n) const {
1439 return !has_node(n) || n->is_unreachable(_igvn);
1440 }
1441
1442 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1443 void do_range_check(IdealLoopTree *loop, Node_List &old_new);
1444
1445 // Process post loops which have range checks and try to build a multi-version
1446 // guard to safely determine if we can execute the post loop which was RCE'd.
1447 bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1448
1449 // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1450 void poison_rce_post_loop(IdealLoopTree *rce_loop);
1451
1452 // Create a slow version of the loop by cloning the loop
1453 // and inserting an if to select fast-slow versions.
1454 // Return the inserted if.
1455 IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1456 Node_List &old_new,
1457 Node_List &unswitch_iffs,
1458 CloneLoopMode mode);
1459
1460 // Clone a loop and return the clone head (clone_loop_head).
1461 // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1462 // This routine was created for usage in CountedLoopReserveKit.
1463 //
1464 // int(1) -> If -> IfTrue -> original_loop_head
1465 // |
1466 // V
1467 // IfFalse -> clone_loop_head (returned by function pointer)
1468 //
1469 LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1470 // Clone loop with an invariant test (that does not exit) and
1471 // insert a clone of the test that selects which version to
1472 // execute.
1473 void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1474
1475 // Find candidate "if" for unswitching
1476 IfNode* find_unswitching_candidate(const IdealLoopTree *loop, Node_List& unswitch_iffs) const;
1477
1478 // Range Check Elimination uses this function!
1479 // Constrain the main loop iterations so the affine function:
1480 // low_limit <= scale_con * I + offset < upper_limit
1481 // always holds true. That is, either increase the number of iterations in
1482 // the pre-loop or the post-loop until the condition holds true in the main
1483 // loop. Scale_con, offset and limit are all loop invariant.
1484 void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1485 // Helper function for add_constraint().
1486 Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1487
1488 // Partially peel loop up through last_peel node.
1489 bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1490 bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
1491
1492 // Move UnorderedReduction out of loop if possible
1493 void move_unordered_reduction_out_of_loop(IdealLoopTree* loop);
1494
1495 // Create a scheduled list of nodes control dependent on ctrl set.
1496 void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
1576 bool intrinsify_fill(IdealLoopTree* lpt);
1577 bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1578 Node*& shift, Node*& offset);
1579
1580 private:
1581 // Return a type based on condition control flow
1582 const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1583 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, nullptr); }
1584 // Helpers for filtered type
1585 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1586
1587 // Helper functions
1588 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1589 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1590 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1591 bool split_up( Node *n, Node *blk1, Node *blk2 );
1592 void sink_use( Node *use, Node *post_loop );
1593 Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1594 Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1595 void try_move_store_after_loop(Node* n);
1596 void move_flat_array_check_out_of_loop(Node* n);
1597 bool identical_backtoback_ifs(Node *n);
1598 bool flatten_array_element_type_check(Node *n);
1599 bool can_split_if(Node *n_ctrl);
1600 bool cannot_split_division(const Node* n, const Node* region) const;
1601 static bool is_divisor_counted_loop_phi(const Node* divisor, const Node* loop);
1602 bool loop_phi_backedge_type_contains_zero(const Node* phi_divisor, const Type* zero) const;
1603
1604 // Determine if a method is too big for a/another round of split-if, based on
1605 // a magic (approximate) ratio derived from the equally magic constant 35000,
1606 // previously used for this purpose (but without relating to the node limit).
1607 bool must_throttle_split_if() {
1608 uint threshold = C->max_node_limit() * 2 / 5;
1609 return C->live_nodes() > threshold;
1610 }
1611
1612 // A simplistic node request tracking mechanism, where
1613 // = UINT_MAX Request not valid or made final.
1614 // < UINT_MAX Nodes currently requested (estimate).
1615 uint _nodes_required;
1616
1617 enum { REQUIRE_MIN = 70 };
1618
|