60 protected:
61 uint _loop_flags;
62 // Names for flag bitfields
63 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
64 MainHasNoPreLoop = 1<<2,
65 HasExactTripCount = 1<<3,
66 InnerLoop = 1<<4,
67 PartialPeelLoop = 1<<5,
68 PartialPeelFailed = 1<<6,
69 HasReductions = 1<<7,
70 WasSlpAnalyzed = 1<<8,
71 PassedSlpAnalysis = 1<<9,
72 DoUnrollOnly = 1<<10,
73 VectorizedLoop = 1<<11,
74 HasAtomicPostLoop = 1<<12,
75 HasRangeChecks = 1<<13,
76 IsMultiversioned = 1<<14,
77 StripMined = 1<<15,
78 SubwordLoop = 1<<16,
79 ProfileTripFailed = 1<<17,
80 LoopNestInnerLoop = 1 << 18,
81 LoopNestLongOuterLoop = 1 << 19};
82 char _unswitch_count;
83 enum { _unswitch_max=3 };
84 char _postloop_flags;
85 enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
86
87 // Expected trip count from profile data
88 float _profile_trip_cnt;
89
90 public:
91 // Names for edge indices
92 enum { Self=0, EntryControl, LoopBackControl };
93
94 bool is_inner_loop() const { return _loop_flags & InnerLoop; }
95 void set_inner_loop() { _loop_flags |= InnerLoop; }
96
97 bool range_checks_present() const { return _loop_flags & HasRangeChecks; }
98 bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
99 bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
100 bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
101 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
102 bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
103 bool is_strip_mined() const { return _loop_flags & StripMined; }
104 bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
105 bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
106 bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
107 bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }
108
109 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
110 void mark_has_reductions() { _loop_flags |= HasReductions; }
111 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
112 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
113 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
114 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
115 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
116 void mark_has_range_checks() { _loop_flags |= HasRangeChecks; }
117 void clear_has_range_checks() { _loop_flags &= ~HasRangeChecks; }
118 void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
119 void mark_strip_mined() { _loop_flags |= StripMined; }
120 void clear_strip_mined() { _loop_flags &= ~StripMined; }
121 void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
122 void mark_subword_loop() { _loop_flags |= SubwordLoop; }
123 void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
124 void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }
125
126 int unswitch_max() { return _unswitch_max; }
127 int unswitch_count() { return _unswitch_count; }
128
129 int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
130 void set_has_been_range_checked() { _postloop_flags |= LoopRCEChecked; }
131 int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
132 void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
133
134 void set_unswitch_count(int val) {
135 assert (val <= unswitch_max(), "too many unswitches");
136 _unswitch_count = val;
137 }
138
139 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
140 float profile_trip_cnt() { return _profile_trip_cnt; }
141
142 LoopNode(Node *entry, Node *backedge)
143 : RegionNode(3), _loop_flags(0), _unswitch_count(0),
144 _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN) {
1408 return !has_node(n) || n->is_unreachable(_igvn);
1409 }
1410
1411 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1412 int do_range_check( IdealLoopTree *loop, Node_List &old_new );
1413
1414 // Check to see if do_range_check(...) cleaned the main loop of range-checks
1415 void has_range_checks(IdealLoopTree *loop);
1416
1417 // Process post loops which have range checks and try to build a multi-version
1418 // guard to safely determine if we can execute the post loop which was RCE'd.
1419 bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1420
1421 // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1422 void poison_rce_post_loop(IdealLoopTree *rce_loop);
1423
1424 // Create a slow version of the loop by cloning the loop
1425 // and inserting an if to select fast-slow versions.
1426 // Return the inserted if.
1427 IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1428 Node_List &old_new,
1429 IfNode* unswitch_iff,
1430 CloneLoopMode mode);
1431
1432 // Clone a loop and return the clone head (clone_loop_head).
1433 // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1434 // This routine was created for usage in CountedLoopReserveKit.
1435 //
1436 // int(1) -> If -> IfTrue -> original_loop_head
1437 // |
1438 // V
1439 // IfFalse -> clone_loop_head (returned by function pointer)
1440 //
1441 LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1442 // Clone loop with an invariant test (that does not exit) and
1443 // insert a clone of the test that selects which version to
1444 // execute.
1445 void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1446
1447 // Find candidate "if" for unswitching
1448 IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
1449
1450 // Range Check Elimination uses this function!
1451 // Constrain the main loop iterations so the affine function:
1452 // low_limit <= scale_con * I + offset < upper_limit
1453 // always holds true. That is, either increase the number of iterations in
1454 // the pre-loop or the post-loop until the condition holds true in the main
1455 // loop. Scale_con, offset and limit are all loop invariant.
1456 void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1457 // Helper function for add_constraint().
1458 Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1459
1460 // Partially peel loop up through last_peel node.
1461 bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1462 bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
1463
1464 // Create a scheduled list of nodes control dependent on ctrl set.
1465 void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
1466 // Has a use in the vector set
1467 bool has_use_in_set( Node* n, VectorSet& vset );
1468 // Has use internal to the vector set (ie. not in a phi at the loop head)
1545 bool intrinsify_fill(IdealLoopTree* lpt);
1546 bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1547 Node*& shift, Node*& offset);
1548
1549 private:
1550 // Return a type based on condition control flow
1551 const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1552 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); }
1553 // Helpers for filtered type
1554 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1555
1556 // Helper functions
1557 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1558 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1559 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1560 bool split_up( Node *n, Node *blk1, Node *blk2 );
1561 void sink_use( Node *use, Node *post_loop );
1562 Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1563 Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1564 void try_move_store_after_loop(Node* n);
1565 bool identical_backtoback_ifs(Node *n);
1566 bool can_split_if(Node *n_ctrl);
1567
1568 // Determine if a method is too big for a/another round of split-if, based on
1569 // a magic (approximate) ratio derived from the equally magic constant 35000,
1570 // previously used for this purpose (but without relating to the node limit).
1571 bool must_throttle_split_if() {
1572 uint threshold = C->max_node_limit() * 2 / 5;
1573 return C->live_nodes() > threshold;
1574 }
1575
1576 // A simplistic node request tracking mechanism, where
1577 // = UINT_MAX Request not valid or made final.
1578 // < UINT_MAX Nodes currently requested (estimate).
1579 uint _nodes_required;
1580
1581 enum { REQUIRE_MIN = 70 };
1582
1583 uint nodes_required() const { return _nodes_required; }
1584
1585 // Given the _currently_ available number of nodes, check whether there is
|
60 protected:
61 uint _loop_flags;
62 // Names for flag bitfields
63 enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
64 MainHasNoPreLoop = 1<<2,
65 HasExactTripCount = 1<<3,
66 InnerLoop = 1<<4,
67 PartialPeelLoop = 1<<5,
68 PartialPeelFailed = 1<<6,
69 HasReductions = 1<<7,
70 WasSlpAnalyzed = 1<<8,
71 PassedSlpAnalysis = 1<<9,
72 DoUnrollOnly = 1<<10,
73 VectorizedLoop = 1<<11,
74 HasAtomicPostLoop = 1<<12,
75 HasRangeChecks = 1<<13,
76 IsMultiversioned = 1<<14,
77 StripMined = 1<<15,
78 SubwordLoop = 1<<16,
79 ProfileTripFailed = 1<<17,
80 LoopNestInnerLoop = 1<< 18,
81 LoopNestLongOuterLoop = 1<< 19,
82 FlattenedArrays = 1<<20};
83
84 char _unswitch_count;
85 enum { _unswitch_max=3 };
86 char _postloop_flags;
87 enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
88
89 // Expected trip count from profile data
90 float _profile_trip_cnt;
91
92 public:
93 // Names for edge indices
94 enum { Self=0, EntryControl, LoopBackControl };
95
96 bool is_inner_loop() const { return _loop_flags & InnerLoop; }
97 void set_inner_loop() { _loop_flags |= InnerLoop; }
98
99 bool range_checks_present() const { return _loop_flags & HasRangeChecks; }
100 bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
101 bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
102 bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
103 void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
104 bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
105 bool is_strip_mined() const { return _loop_flags & StripMined; }
106 bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
107 bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
108 bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
109 bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }
110 bool is_flattened_arrays() const { return _loop_flags & FlattenedArrays; }
111
112 void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
113 void mark_has_reductions() { _loop_flags |= HasReductions; }
114 void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
115 void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
116 void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
117 void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
118 void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
119 void mark_has_range_checks() { _loop_flags |= HasRangeChecks; }
120 void clear_has_range_checks() { _loop_flags &= ~HasRangeChecks; }
121 void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
122 void mark_strip_mined() { _loop_flags |= StripMined; }
123 void clear_strip_mined() { _loop_flags &= ~StripMined; }
124 void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
125 void mark_subword_loop() { _loop_flags |= SubwordLoop; }
126 void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
127 void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }
128 void mark_flattened_arrays() { _loop_flags |= FlattenedArrays; }
129
130 int unswitch_max() { return _unswitch_max; }
131 int unswitch_count() { return _unswitch_count; }
132
133 int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
134 void set_has_been_range_checked() { _postloop_flags |= LoopRCEChecked; }
135 int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
136 void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
137
138 void set_unswitch_count(int val) {
139 assert (val <= unswitch_max(), "too many unswitches");
140 _unswitch_count = val;
141 }
142
143 void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
144 float profile_trip_cnt() { return _profile_trip_cnt; }
145
146 LoopNode(Node *entry, Node *backedge)
147 : RegionNode(3), _loop_flags(0), _unswitch_count(0),
148 _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN) {
1412 return !has_node(n) || n->is_unreachable(_igvn);
1413 }
1414
1415 // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1416 int do_range_check( IdealLoopTree *loop, Node_List &old_new );
1417
1418 // Check to see if do_range_check(...) cleaned the main loop of range-checks
1419 void has_range_checks(IdealLoopTree *loop);
1420
1421 // Process post loops which have range checks and try to build a multi-version
1422 // guard to safely determine if we can execute the post loop which was RCE'd.
1423 bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1424
1425 // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1426 void poison_rce_post_loop(IdealLoopTree *rce_loop);
1427
1428 // Create a slow version of the loop by cloning the loop
1429 // and inserting an if to select fast-slow versions.
1430 // Return the inserted if.
1431 IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1432 Node_List &old_new,
1433 Node_List &unswitch_iffs,
1434 CloneLoopMode mode);
1435
1436 // Clone a loop and return the clone head (clone_loop_head).
1437 // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1438 // This routine was created for usage in CountedLoopReserveKit.
1439 //
1440 // int(1) -> If -> IfTrue -> original_loop_head
1441 // |
1442 // V
1443 // IfFalse -> clone_loop_head (returned by function pointer)
1444 //
1445 LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1446 // Clone loop with an invariant test (that does not exit) and
1447 // insert a clone of the test that selects which version to
1448 // execute.
1449 void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1450
1451 // Find candidate "if" for unswitching
1452 IfNode* find_unswitching_candidate(const IdealLoopTree *loop, Node_List& unswitch_iffs) const;
1453
1454 // Range Check Elimination uses this function!
1455 // Constrain the main loop iterations so the affine function:
1456 // low_limit <= scale_con * I + offset < upper_limit
1457 // always holds true. That is, either increase the number of iterations in
1458 // the pre-loop or the post-loop until the condition holds true in the main
1459 // loop. Scale_con, offset and limit are all loop invariant.
1460 void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1461 // Helper function for add_constraint().
1462 Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1463
1464 // Partially peel loop up through last_peel node.
1465 bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1466 bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
1467
1468 // Create a scheduled list of nodes control dependent on ctrl set.
1469 void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
1470 // Has a use in the vector set
1471 bool has_use_in_set( Node* n, VectorSet& vset );
1472 // Has use internal to the vector set (ie. not in a phi at the loop head)
1549 bool intrinsify_fill(IdealLoopTree* lpt);
1550 bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1551 Node*& shift, Node*& offset);
1552
1553 private:
1554 // Return a type based on condition control flow
1555 const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1556 const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); }
1557 // Helpers for filtered type
1558 const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1559
1560 // Helper functions
1561 Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1562 Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1563 void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1564 bool split_up( Node *n, Node *blk1, Node *blk2 );
1565 void sink_use( Node *use, Node *post_loop );
1566 Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1567 Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1568 void try_move_store_after_loop(Node* n);
1569 void move_flat_array_check_out_of_loop(Node* n);
1570 bool identical_backtoback_ifs(Node *n);
1571 bool flatten_array_element_type_check(Node *n);
1572 bool can_split_if(Node *n_ctrl);
1573
1574 // Determine if a method is too big for a/another round of split-if, based on
1575 // a magic (approximate) ratio derived from the equally magic constant 35000,
1576 // previously used for this purpose (but without relating to the node limit).
1577 bool must_throttle_split_if() {
1578 uint threshold = C->max_node_limit() * 2 / 5;
1579 return C->live_nodes() > threshold;
1580 }
1581
1582 // A simplistic node request tracking mechanism, where
1583 // = UINT_MAX Request not valid or made final.
1584 // < UINT_MAX Nodes currently requested (estimate).
1585 uint _nodes_required;
1586
1587 enum { REQUIRE_MIN = 70 };
1588
1589 uint nodes_required() const { return _nodes_required; }
1590
1591 // Given the _currently_ available number of nodes, check whether there is
|