< prev index next >

src/hotspot/share/opto/loopnode.hpp

Print this page

  59   virtual uint size_of() const { return sizeof(*this); }
  60 protected:
  61   uint _loop_flags;
  62   // Names for flag bitfields
  63   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
  64          MainHasNoPreLoop      = 1<<2,
  65          HasExactTripCount     = 1<<3,
  66          InnerLoop             = 1<<4,
  67          PartialPeelLoop       = 1<<5,
  68          PartialPeelFailed     = 1<<6,
  69          WasSlpAnalyzed        = 1<<7,
  70          PassedSlpAnalysis     = 1<<8,
  71          DoUnrollOnly          = 1<<9,
  72          VectorizedLoop        = 1<<10,
  73          HasAtomicPostLoop     = 1<<11,
  74          IsMultiversioned      = 1<<12,
  75          StripMined            = 1<<13,
  76          SubwordLoop           = 1<<14,
  77          ProfileTripFailed     = 1<<15,
  78          LoopNestInnerLoop     = 1<<16,
  79          LoopNestLongOuterLoop = 1<<17};

  80   char _unswitch_count;
  81   enum { _unswitch_max=3 };
  82   char _postloop_flags;
  83   enum { RCEPostLoop = 1 };
  84 
  85   // Expected trip count from profile data
  86   float _profile_trip_cnt;
  87 
  88 public:
  89   // Names for edge indices
  90   enum { Self=0, EntryControl, LoopBackControl };
  91 
  92   bool is_inner_loop() const { return _loop_flags & InnerLoop; }
  93   void set_inner_loop() { _loop_flags |= InnerLoop; }
  94 
  95   bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
  96   bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
  97   bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
  98   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
  99   bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
 100   bool is_strip_mined() const { return _loop_flags & StripMined; }
 101   bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
 102   bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
 103   bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
 104   bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }

 105 
 106   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
 107   void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
 108   void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
 109   void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
 110   void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
 111   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
 112   void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
 113   void mark_strip_mined() { _loop_flags |= StripMined; }
 114   void clear_strip_mined() { _loop_flags &= ~StripMined; }
 115   void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
 116   void mark_subword_loop() { _loop_flags |= SubwordLoop; }
 117   void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
 118   void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }

 119 
 120   int unswitch_max() { return _unswitch_max; }
 121   int unswitch_count() { return _unswitch_count; }
 122 
 123   int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
 124   void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
 125 
 126   void set_unswitch_count(int val) {
 127     assert (val <= unswitch_max(), "too many unswitches");
 128     _unswitch_count = val;
 129   }
 130 
 131   void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
 132   float profile_trip_cnt()             { return _profile_trip_cnt; }
 133 
 134   LoopNode(Node *entry, Node *backedge)
 135     : RegionNode(3), _loop_flags(0), _unswitch_count(0),
 136       _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN)  {
 137     init_class_id(Class_Loop);
 138     init_req(EntryControl, entry);

1433 
1434   // Check whether node has become unreachable
1435   bool is_node_unreachable(Node *n) const {
1436     return !has_node(n) || n->is_unreachable(_igvn);
1437   }
1438 
1439   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1440   void do_range_check(IdealLoopTree *loop, Node_List &old_new);
1441 
1442   // Process post loops which have range checks and try to build a multi-version
1443   // guard to safely determine if we can execute the post loop which was RCE'd.
1444   bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1445 
1446   // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1447   void poison_rce_post_loop(IdealLoopTree *rce_loop);
1448 
1449   // Create a slow version of the loop by cloning the loop
1450   // and inserting an if to select fast-slow versions.
1451   // Return the inserted if.
1452   IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1453                                         Node_List &old_new,
1454                                         IfNode* unswitch_iff,
1455                                         CloneLoopMode mode);
1456 
1457   // Clone a loop and return the clone head (clone_loop_head).
1458   // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1459   // This routine was created for usage in CountedLoopReserveKit.
1460   //
1461   //    int(1) -> If -> IfTrue -> original_loop_head
1462   //              |
1463   //              V
1464   //           IfFalse -> clone_loop_head (returned by function pointer)
1465   //
1466   LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1467   // Clone loop with an invariant test (that does not exit) and
1468   // insert a clone of the test that selects which version to
1469   // execute.
1470   void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1471 
1472   // Find candidate "if" for unswitching
1473   IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
1474 
1475   // Range Check Elimination uses this function!
1476   // Constrain the main loop iterations so the affine function:
1477   //    low_limit <= scale_con * I + offset  <  upper_limit
1478   // always holds true.  That is, either increase the number of iterations in
1479   // the pre-loop or the post-loop until the condition holds true in the main
1480   // loop.  Scale_con, offset and limit are all loop invariant.
1481   void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1482   // Helper function for add_constraint().
1483   Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1484 
1485   // Partially peel loop up through last_peel node.
1486   bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1487   bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
1488 
1489   // Move UnorderedReduction out of loop if possible
1490   void move_unordered_reduction_out_of_loop(IdealLoopTree* loop);
1491 
1492   // Create a scheduled list of nodes control dependent on ctrl set.
1493   void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );

1573   bool intrinsify_fill(IdealLoopTree* lpt);
1574   bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1575                        Node*& shift, Node*& offset);
1576 
1577 private:
1578   // Return a type based on condition control flow
1579   const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1580   const TypeInt* filtered_type( Node *n ) { return filtered_type(n, nullptr); }
1581  // Helpers for filtered type
1582   const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1583 
1584   // Helper functions
1585   Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1586   Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1587   void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1588   bool split_up( Node *n, Node *blk1, Node *blk2 );
1589   void sink_use( Node *use, Node *post_loop );
1590   Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1591   Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1592   void try_move_store_after_loop(Node* n);

1593   bool identical_backtoback_ifs(Node *n);

1594   bool can_split_if(Node *n_ctrl);
1595   bool cannot_split_division(const Node* n, const Node* region) const;
1596   static bool is_divisor_counted_loop_phi(const Node* divisor, const Node* loop);
1597   bool loop_phi_backedge_type_contains_zero(const Node* phi_divisor, const Type* zero) const;
1598 
1599   // Determine if a method is too big for a/another round of split-if, based on
1600   // a magic (approximate) ratio derived from the equally magic constant 35000,
1601   // previously used for this purpose (but without relating to the node limit).
1602   bool must_throttle_split_if() {
1603     uint threshold = C->max_node_limit() * 2 / 5;
1604     return C->live_nodes() > threshold;
1605   }
1606 
1607   // A simplistic node request tracking mechanism, where
1608   //   = UINT_MAX   Request not valid or made final.
1609   //   < UINT_MAX   Nodes currently requested (estimate).
1610   uint _nodes_required;
1611 
1612   enum { REQUIRE_MIN = 70 };
1613 

  59   virtual uint size_of() const { return sizeof(*this); }
  60 protected:
  61   uint _loop_flags;
  62   // Names for flag bitfields
  63   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
  64          MainHasNoPreLoop      = 1<<2,
  65          HasExactTripCount     = 1<<3,
  66          InnerLoop             = 1<<4,
  67          PartialPeelLoop       = 1<<5,
  68          PartialPeelFailed     = 1<<6,
  69          WasSlpAnalyzed        = 1<<7,
  70          PassedSlpAnalysis     = 1<<8,
  71          DoUnrollOnly          = 1<<9,
  72          VectorizedLoop        = 1<<10,
  73          HasAtomicPostLoop     = 1<<11,
  74          IsMultiversioned      = 1<<12,
  75          StripMined            = 1<<13,
  76          SubwordLoop           = 1<<14,
  77          ProfileTripFailed     = 1<<15,
  78          LoopNestInnerLoop     = 1<<16,
  79          LoopNestLongOuterLoop = 1<<17,
  80          FlattenedArrays       = 1<<18};
  81   char _unswitch_count;
  82   enum { _unswitch_max=3 };
  83   char _postloop_flags;
  84   enum { RCEPostLoop = 1 };
  85 
  86   // Expected trip count from profile data
  87   float _profile_trip_cnt;
  88 
  89 public:
  90   // Names for edge indices
  91   enum { Self=0, EntryControl, LoopBackControl };
  92 
  93   bool is_inner_loop() const { return _loop_flags & InnerLoop; }
  94   void set_inner_loop() { _loop_flags |= InnerLoop; }
  95 
  96   bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
  97   bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
  98   bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
  99   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
 100   bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
 101   bool is_strip_mined() const { return _loop_flags & StripMined; }
 102   bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
 103   bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
 104   bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
 105   bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }
 106   bool is_flattened_arrays() const { return _loop_flags & FlattenedArrays; }
 107 
 108   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
 109   void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
 110   void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
 111   void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
 112   void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
 113   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
 114   void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
 115   void mark_strip_mined() { _loop_flags |= StripMined; }
 116   void clear_strip_mined() { _loop_flags &= ~StripMined; }
 117   void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
 118   void mark_subword_loop() { _loop_flags |= SubwordLoop; }
 119   void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
 120   void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }
 121   void mark_flattened_arrays() { _loop_flags |= FlattenedArrays; }
 122 
 123   int unswitch_max() { return _unswitch_max; }
 124   int unswitch_count() { return _unswitch_count; }
 125 
 126   int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
 127   void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
 128 
 129   void set_unswitch_count(int val) {
 130     assert (val <= unswitch_max(), "too many unswitches");
 131     _unswitch_count = val;
 132   }
 133 
 134   void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
 135   float profile_trip_cnt()             { return _profile_trip_cnt; }
 136 
 137   LoopNode(Node *entry, Node *backedge)
 138     : RegionNode(3), _loop_flags(0), _unswitch_count(0),
 139       _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN)  {
 140     init_class_id(Class_Loop);
 141     init_req(EntryControl, entry);

1436 
1437   // Check whether node has become unreachable
1438   bool is_node_unreachable(Node *n) const {
1439     return !has_node(n) || n->is_unreachable(_igvn);
1440   }
1441 
1442   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1443   void do_range_check(IdealLoopTree *loop, Node_List &old_new);
1444 
1445   // Process post loops which have range checks and try to build a multi-version
1446   // guard to safely determine if we can execute the post loop which was RCE'd.
1447   bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1448 
1449   // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1450   void poison_rce_post_loop(IdealLoopTree *rce_loop);
1451 
1452   // Create a slow version of the loop by cloning the loop
1453   // and inserting an if to select fast-slow versions.
1454   // Return the inserted if.
1455   IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1456                                       Node_List &old_new,
1457                                       Node_List &unswitch_iffs,
1458                                       CloneLoopMode mode);
1459 
1460   // Clone a loop and return the clone head (clone_loop_head).
1461   // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1462   // This routine was created for usage in CountedLoopReserveKit.
1463   //
1464   //    int(1) -> If -> IfTrue -> original_loop_head
1465   //              |
1466   //              V
1467   //           IfFalse -> clone_loop_head (returned by function pointer)
1468   //
1469   LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1470   // Clone loop with an invariant test (that does not exit) and
1471   // insert a clone of the test that selects which version to
1472   // execute.
1473   void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1474 
1475   // Find candidate "if" for unswitching
1476   IfNode* find_unswitching_candidate(const IdealLoopTree *loop, Node_List& unswitch_iffs) const;
1477 
1478   // Range Check Elimination uses this function!
1479   // Constrain the main loop iterations so the affine function:
1480   //    low_limit <= scale_con * I + offset  <  upper_limit
1481   // always holds true.  That is, either increase the number of iterations in
1482   // the pre-loop or the post-loop until the condition holds true in the main
1483   // loop.  Scale_con, offset and limit are all loop invariant.
1484   void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1485   // Helper function for add_constraint().
1486   Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1487 
1488   // Partially peel loop up through last_peel node.
1489   bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1490   bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
1491 
1492   // Move UnorderedReduction out of loop if possible
1493   void move_unordered_reduction_out_of_loop(IdealLoopTree* loop);
1494 
1495   // Create a scheduled list of nodes control dependent on ctrl set.
1496   void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );

1576   bool intrinsify_fill(IdealLoopTree* lpt);
1577   bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1578                        Node*& shift, Node*& offset);
1579 
1580 private:
1581   // Return a type based on condition control flow
1582   const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1583   const TypeInt* filtered_type( Node *n ) { return filtered_type(n, nullptr); }
1584  // Helpers for filtered type
1585   const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1586 
1587   // Helper functions
1588   Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1589   Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1590   void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1591   bool split_up( Node *n, Node *blk1, Node *blk2 );
1592   void sink_use( Node *use, Node *post_loop );
1593   Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1594   Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1595   void try_move_store_after_loop(Node* n);
1596   void move_flat_array_check_out_of_loop(Node* n);
1597   bool identical_backtoback_ifs(Node *n);
1598   bool flatten_array_element_type_check(Node *n);
1599   bool can_split_if(Node *n_ctrl);
1600   bool cannot_split_division(const Node* n, const Node* region) const;
1601   static bool is_divisor_counted_loop_phi(const Node* divisor, const Node* loop);
1602   bool loop_phi_backedge_type_contains_zero(const Node* phi_divisor, const Type* zero) const;
1603 
1604   // Determine if a method is too big for a/another round of split-if, based on
1605   // a magic (approximate) ratio derived from the equally magic constant 35000,
1606   // previously used for this purpose (but without relating to the node limit).
1607   bool must_throttle_split_if() {
1608     uint threshold = C->max_node_limit() * 2 / 5;
1609     return C->live_nodes() > threshold;
1610   }
1611 
1612   // A simplistic node request tracking mechanism, where
1613   //   = UINT_MAX   Request not valid or made final.
1614   //   < UINT_MAX   Nodes currently requested (estimate).
1615   uint _nodes_required;
1616 
1617   enum { REQUIRE_MIN = 70 };
1618 
< prev index next >