< prev index next >

src/hotspot/share/opto/loopnode.hpp

Print this page

  60   // the semantics so it does not appear in the hash & cmp functions.
  61   virtual uint size_of() const { return sizeof(*this); }
  62 protected:
  63   uint _loop_flags;
  64   // Names for flag bitfields
  65   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
  66          MainHasNoPreLoop      = 1<<2,
  67          HasExactTripCount     = 1<<3,
  68          InnerLoop             = 1<<4,
  69          PartialPeelLoop       = 1<<5,
  70          PartialPeelFailed     = 1<<6,
  71          WasSlpAnalyzed        = 1<<7,
  72          PassedSlpAnalysis     = 1<<8,
  73          DoUnrollOnly          = 1<<9,
  74          VectorizedLoop        = 1<<10,
  75          HasAtomicPostLoop     = 1<<11,
  76          StripMined            = 1<<12,
  77          SubwordLoop           = 1<<13,
  78          ProfileTripFailed     = 1<<14,
  79          LoopNestInnerLoop     = 1<<15,
  80          LoopNestLongOuterLoop = 1<<16 };

  81   char _unswitch_count;
  82   enum { _unswitch_max=3 };
  83 
  84   // Expected trip count from profile data
  85   float _profile_trip_cnt;
  86 
  87 public:
  88   // Names for edge indices
  89   enum { Self=0, EntryControl, LoopBackControl };
  90 
  91   bool is_inner_loop() const { return _loop_flags & InnerLoop; }
  92   void set_inner_loop() { _loop_flags |= InnerLoop; }
  93 
  94   bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
  95   bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
  96   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
  97   bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
  98   bool is_strip_mined() const { return _loop_flags & StripMined; }
  99   bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
 100   bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
 101   bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
 102   bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }

 103 
 104   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
 105   void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
 106   void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
 107   void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
 108   void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
 109   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
 110   void mark_strip_mined() { _loop_flags |= StripMined; }
 111   void clear_strip_mined() { _loop_flags &= ~StripMined; }
 112   void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
 113   void mark_subword_loop() { _loop_flags |= SubwordLoop; }
 114   void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
 115   void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }

 116 
 117   int unswitch_max() { return _unswitch_max; }
 118   int unswitch_count() { return _unswitch_count; }
 119 
 120   void set_unswitch_count(int val) {
 121     assert (val <= unswitch_max(), "too many unswitches");
 122     _unswitch_count = val;
 123   }
 124 
 125   void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
 126   float profile_trip_cnt()             { return _profile_trip_cnt; }
 127 
 128   LoopNode(Node *entry, Node *backedge)
 129     : RegionNode(3), _loop_flags(0), _unswitch_count(0),
 130       _profile_trip_cnt(COUNT_UNKNOWN) {
 131     init_class_id(Class_Loop);
 132     init_req(EntryControl, entry);
 133     init_req(LoopBackControl, backedge);
 134   }
 135 

1391   bool has_control_dependencies_from_predicates(LoopNode* head) const;
1392   void verify_fast_loop(LoopNode* head, const ProjNode* proj_true) const NOT_DEBUG_RETURN;
1393  public:
1394   // Change the control input of expensive nodes to allow commoning by
1395   // IGVN when it is guaranteed to not result in a more frequent
1396   // execution of the expensive node. Return true if progress.
1397   bool process_expensive_nodes();
1398 
1399   // Check whether node has become unreachable
1400   bool is_node_unreachable(Node *n) const {
1401     return !has_node(n) || n->is_unreachable(_igvn);
1402   }
1403 
1404   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1405   void do_range_check(IdealLoopTree *loop, Node_List &old_new);
1406 
1407   // Create a slow version of the loop by cloning the loop
1408   // and inserting an if to select fast-slow versions.
1409   // Return the inserted if.
1410   IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1411                                         Node_List &old_new,
1412                                         IfNode* unswitch_iff,
1413                                         CloneLoopMode mode);
1414 
1415   // Clone loop with an invariant test (that does not exit) and
1416   // insert a clone of the test that selects which version to
1417   // execute.
1418   void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1419 
1420   // Find candidate "if" for unswitching
1421   IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
1422 
1423   // Range Check Elimination uses this function!
1424   // Constrain the main loop iterations so the affine function:
1425   //    low_limit <= scale_con * I + offset  <  upper_limit
1426   // always holds true.  That is, either increase the number of iterations in
1427   // the pre-loop or the post-loop until the condition holds true in the main
1428   // loop.  Scale_con, offset and limit are all loop invariant.
1429   void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1430   // Helper function for add_constraint().
1431   Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1432 
1433   // Partially peel loop up through last_peel node.
1434   bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1435   bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
1436 
1437   // AutoVectorize the loop: replace scalar ops with vector ops.
1438   enum AutoVectorizeStatus {
1439     Impossible,      // This loop has the wrong shape to even try vectorization.
1440     Success,         // We just successfully vectorized the loop.
1441     TriedAndFailed,  // We tried to vectorize, but failed.

1529   bool intrinsify_fill(IdealLoopTree* lpt);
1530   bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1531                        Node*& shift, Node*& offset);
1532 
1533 private:
1534   // Return a type based on condition control flow
1535   const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1536   const TypeInt* filtered_type( Node *n ) { return filtered_type(n, nullptr); }
1537  // Helpers for filtered type
1538   const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1539 
1540   // Helper functions
1541   Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1542   Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1543   void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1544   bool split_up( Node *n, Node *blk1, Node *blk2 );
1545 
1546   Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1547   Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1548   void try_move_store_after_loop(Node* n);

1549   bool identical_backtoback_ifs(Node *n);

1550   bool can_split_if(Node *n_ctrl);
1551   bool cannot_split_division(const Node* n, const Node* region) const;
1552   static bool is_divisor_counted_loop_phi(const Node* divisor, const Node* loop);
1553   bool loop_phi_backedge_type_contains_zero(const Node* phi_divisor, const Type* zero) const;
1554 
1555   // Determine if a method is too big for a/another round of split-if, based on
1556   // a magic (approximate) ratio derived from the equally magic constant 35000,
1557   // previously used for this purpose (but without relating to the node limit).
1558   bool must_throttle_split_if() {
1559     uint threshold = C->max_node_limit() * 2 / 5;
1560     return C->live_nodes() > threshold;
1561   }
1562 
1563   // A simplistic node request tracking mechanism, where
1564   //   = UINT_MAX   Request not valid or made final.
1565   //   < UINT_MAX   Nodes currently requested (estimate).
1566   uint _nodes_required;
1567 
1568   enum { REQUIRE_MIN = 70 };
1569 

  60   // the semantics so it does not appear in the hash & cmp functions.
  61   virtual uint size_of() const { return sizeof(*this); }
  62 protected:
  63   uint _loop_flags;
  64   // Names for flag bitfields
  65   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
  66          MainHasNoPreLoop      = 1<<2,
  67          HasExactTripCount     = 1<<3,
  68          InnerLoop             = 1<<4,
  69          PartialPeelLoop       = 1<<5,
  70          PartialPeelFailed     = 1<<6,
  71          WasSlpAnalyzed        = 1<<7,
  72          PassedSlpAnalysis     = 1<<8,
  73          DoUnrollOnly          = 1<<9,
  74          VectorizedLoop        = 1<<10,
  75          HasAtomicPostLoop     = 1<<11,
  76          StripMined            = 1<<12,
  77          SubwordLoop           = 1<<13,
  78          ProfileTripFailed     = 1<<14,
  79          LoopNestInnerLoop     = 1<<15,
  80          LoopNestLongOuterLoop = 1<<16,
  81          FlatArrays            = 1<<17};
  82   char _unswitch_count;
  83   enum { _unswitch_max=3 };
  84 
  85   // Expected trip count from profile data
  86   float _profile_trip_cnt;
  87 
  88 public:
  89   // Names for edge indices
  90   enum { Self=0, EntryControl, LoopBackControl };
  91 
  92   bool is_inner_loop() const { return _loop_flags & InnerLoop; }
  93   void set_inner_loop() { _loop_flags |= InnerLoop; }
  94 
  95   bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
  96   bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
  97   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
  98   bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
  99   bool is_strip_mined() const { return _loop_flags & StripMined; }
 100   bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
 101   bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
 102   bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
 103   bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }
 104   bool is_flat_arrays() const { return _loop_flags & FlatArrays; }
 105 
 106   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
 107   void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
 108   void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
 109   void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
 110   void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
 111   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
 112   void mark_strip_mined() { _loop_flags |= StripMined; }
 113   void clear_strip_mined() { _loop_flags &= ~StripMined; }
 114   void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
 115   void mark_subword_loop() { _loop_flags |= SubwordLoop; }
 116   void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
 117   void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }
 118   void mark_flat_arrays() { _loop_flags |= FlatArrays; }
 119 
 120   int unswitch_max() { return _unswitch_max; }
 121   int unswitch_count() { return _unswitch_count; }
 122 
 123   void set_unswitch_count(int val) {
 124     assert (val <= unswitch_max(), "too many unswitches");
 125     _unswitch_count = val;
 126   }
 127 
 128   void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
 129   float profile_trip_cnt()             { return _profile_trip_cnt; }
 130 
 131   LoopNode(Node *entry, Node *backedge)
 132     : RegionNode(3), _loop_flags(0), _unswitch_count(0),
 133       _profile_trip_cnt(COUNT_UNKNOWN) {
 134     init_class_id(Class_Loop);
 135     init_req(EntryControl, entry);
 136     init_req(LoopBackControl, backedge);
 137   }
 138 

1394   bool has_control_dependencies_from_predicates(LoopNode* head) const;
1395   void verify_fast_loop(LoopNode* head, const ProjNode* proj_true) const NOT_DEBUG_RETURN;
1396  public:
1397   // Change the control input of expensive nodes to allow commoning by
1398   // IGVN when it is guaranteed to not result in a more frequent
1399   // execution of the expensive node. Return true if progress.
1400   bool process_expensive_nodes();
1401 
1402   // Check whether node has become unreachable
1403   bool is_node_unreachable(Node *n) const {
1404     return !has_node(n) || n->is_unreachable(_igvn);
1405   }
1406 
1407   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1408   void do_range_check(IdealLoopTree *loop, Node_List &old_new);
1409 
1410   // Create a slow version of the loop by cloning the loop
1411   // and inserting an if to select fast-slow versions.
1412   // Return the inserted if.
1413   IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1414                                       Node_List &old_new,
1415                                       Node_List &unswitch_iffs,
1416                                       CloneLoopMode mode);
1417 
1418   // Clone loop with an invariant test (that does not exit) and
1419   // insert a clone of the test that selects which version to
1420   // execute.
1421   void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1422 
1423   // Find candidate "if" for unswitching
1424   IfNode* find_unswitching_candidate(const IdealLoopTree *loop, Node_List& unswitch_iffs) const;
1425 
1426   // Range Check Elimination uses this function!
1427   // Constrain the main loop iterations so the affine function:
1428   //    low_limit <= scale_con * I + offset  <  upper_limit
1429   // always holds true.  That is, either increase the number of iterations in
1430   // the pre-loop or the post-loop until the condition holds true in the main
1431   // loop.  Scale_con, offset and limit are all loop invariant.
1432   void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1433   // Helper function for add_constraint().
1434   Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1435 
1436   // Partially peel loop up through last_peel node.
1437   bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1438   bool duplicate_loop_backedge(IdealLoopTree *loop, Node_List &old_new);
1439 
1440   // AutoVectorize the loop: replace scalar ops with vector ops.
1441   enum AutoVectorizeStatus {
1442     Impossible,      // This loop has the wrong shape to even try vectorization.
1443     Success,         // We just successfully vectorized the loop.
1444     TriedAndFailed,  // We tried to vectorize, but failed.

1532   bool intrinsify_fill(IdealLoopTree* lpt);
1533   bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1534                        Node*& shift, Node*& offset);
1535 
1536 private:
1537   // Return a type based on condition control flow
1538   const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1539   const TypeInt* filtered_type( Node *n ) { return filtered_type(n, nullptr); }
1540  // Helpers for filtered type
1541   const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1542 
1543   // Helper functions
1544   Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1545   Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1546   void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1547   bool split_up( Node *n, Node *blk1, Node *blk2 );
1548 
1549   Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1550   Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1551   void try_move_store_after_loop(Node* n);
1552   void move_flat_array_check_out_of_loop(Node* n);
1553   bool identical_backtoback_ifs(Node *n);
1554   bool flat_array_element_type_check(Node *n);
1555   bool can_split_if(Node *n_ctrl);
1556   bool cannot_split_division(const Node* n, const Node* region) const;
1557   static bool is_divisor_counted_loop_phi(const Node* divisor, const Node* loop);
1558   bool loop_phi_backedge_type_contains_zero(const Node* phi_divisor, const Type* zero) const;
1559 
1560   // Determine if a method is too big for a/another round of split-if, based on
1561   // a magic (approximate) ratio derived from the equally magic constant 35000,
1562   // previously used for this purpose (but without relating to the node limit).
1563   bool must_throttle_split_if() {
1564     uint threshold = C->max_node_limit() * 2 / 5;
1565     return C->live_nodes() > threshold;
1566   }
1567 
1568   // A simplistic node request tracking mechanism, where
1569   //   = UINT_MAX   Request not valid or made final.
1570   //   < UINT_MAX   Nodes currently requested (estimate).
1571   uint _nodes_required;
1572 
1573   enum { REQUIRE_MIN = 70 };
1574 
< prev index next >