< prev index next >

src/hotspot/share/opto/loopnode.hpp

Print this page

  60 protected:
  61   uint _loop_flags;
  62   // Names for flag bitfields
  63   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
  64          MainHasNoPreLoop    = 1<<2,
  65          HasExactTripCount   = 1<<3,
  66          InnerLoop           = 1<<4,
  67          PartialPeelLoop     = 1<<5,
  68          PartialPeelFailed   = 1<<6,
  69          HasReductions       = 1<<7,
  70          WasSlpAnalyzed      = 1<<8,
  71          PassedSlpAnalysis   = 1<<9,
  72          DoUnrollOnly        = 1<<10,
  73          VectorizedLoop      = 1<<11,
  74          HasAtomicPostLoop   = 1<<12,
  75          HasRangeChecks      = 1<<13,
  76          IsMultiversioned    = 1<<14,
  77          StripMined          = 1<<15,
  78          SubwordLoop         = 1<<16,
  79          ProfileTripFailed   = 1<<17,
  80          LoopNestInnerLoop = 1 << 18,
  81          LoopNestLongOuterLoop = 1 << 19};


  82   char _unswitch_count;
  83   enum { _unswitch_max=3 };
  84   char _postloop_flags;
  85   enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
  86 
  87   // Expected trip count from profile data
  88   float _profile_trip_cnt;
  89 
  90 public:
  91   // Names for edge indices
  92   enum { Self=0, EntryControl, LoopBackControl };
  93 
  94   bool is_inner_loop() const { return _loop_flags & InnerLoop; }
  95   void set_inner_loop() { _loop_flags |= InnerLoop; }
  96 
  97   bool range_checks_present() const { return _loop_flags & HasRangeChecks; }
  98   bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
  99   bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
 100   bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
 101   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
 102   bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
 103   bool is_strip_mined() const { return _loop_flags & StripMined; }
 104   bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
 105   bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
 106   bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
 107   bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }

 108 
 109   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
 110   void mark_has_reductions() { _loop_flags |= HasReductions; }
 111   void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
 112   void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
 113   void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
 114   void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
 115   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
 116   void mark_has_range_checks() { _loop_flags |=  HasRangeChecks; }
 117   void clear_has_range_checks() { _loop_flags &= ~HasRangeChecks; }
 118   void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
 119   void mark_strip_mined() { _loop_flags |= StripMined; }
 120   void clear_strip_mined() { _loop_flags &= ~StripMined; }
 121   void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
 122   void mark_subword_loop() { _loop_flags |= SubwordLoop; }
 123   void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
 124   void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }

 125 
 126   int unswitch_max() { return _unswitch_max; }
 127   int unswitch_count() { return _unswitch_count; }
 128 
 129   int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
 130   void set_has_been_range_checked() { _postloop_flags |= LoopRCEChecked; }
 131   int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
 132   void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
 133 
 134   void set_unswitch_count(int val) {
 135     assert (val <= unswitch_max(), "too many unswitches");
 136     _unswitch_count = val;
 137   }
 138 
 139   void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
 140   float profile_trip_cnt()             { return _profile_trip_cnt; }
 141 
 142   LoopNode(Node *entry, Node *backedge)
 143     : RegionNode(3), _loop_flags(0), _unswitch_count(0),
 144       _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN)  {

1364     return !has_node(n) || n->is_unreachable(_igvn);
1365   }
1366 
1367   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1368   int do_range_check( IdealLoopTree *loop, Node_List &old_new );
1369 
1370   // Check to see if do_range_check(...) cleaned the main loop of range-checks
1371   void has_range_checks(IdealLoopTree *loop);
1372 
1373   // Process post loops which have range checks and try to build a multi-version
1374   // guard to safely determine if we can execute the post loop which was RCE'd.
1375   bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1376 
1377   // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1378   void poison_rce_post_loop(IdealLoopTree *rce_loop);
1379 
1380   // Create a slow version of the loop by cloning the loop
1381   // and inserting an if to select fast-slow versions.
1382   // Return the inserted if.
1383   IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1384                                         Node_List &old_new,
1385                                         IfNode* unswitch_iff,
1386                                         CloneLoopMode mode);
1387 
1388   // Clone a loop and return the clone head (clone_loop_head).
1389   // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1390   // This routine was created for usage in CountedLoopReserveKit.
1391   //
1392   //    int(1) -> If -> IfTrue -> original_loop_head
1393   //              |
1394   //              V
1395   //           IfFalse -> clone_loop_head (returned by function pointer)
1396   //
1397   LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1398   // Clone loop with an invariant test (that does not exit) and
1399   // insert a clone of the test that selects which version to
1400   // execute.
1401   void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1402 
1403   // Find candidate "if" for unswitching
1404   IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
1405 
1406   // Range Check Elimination uses this function!
1407   // Constrain the main loop iterations so the affine function:
1408   //    low_limit <= scale_con * I + offset  <  upper_limit
1409   // always holds true.  That is, either increase the number of iterations in
1410   // the pre-loop or the post-loop until the condition holds true in the main
1411   // loop.  Scale_con, offset and limit are all loop invariant.
1412   void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1413   // Helper function for add_constraint().
1414   Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1415 
1416   // Partially peel loop up through last_peel node.
1417   bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1418 
1419   // Create a scheduled list of nodes control dependent on ctrl set.
1420   void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
1421   // Has a use in the vector set
1422   bool has_use_in_set( Node* n, VectorSet& vset );
1423   // Has use internal to the vector set (ie. not in a phi at the loop head)
1424   bool has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop );

1506   bool intrinsify_fill(IdealLoopTree* lpt);
1507   bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1508                        Node*& shift, Node*& offset);
1509 
1510 private:
1511   // Return a type based on condition control flow
1512   const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1513   const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); }
1514  // Helpers for filtered type
1515   const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1516 
1517   // Helper functions
1518   Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1519   Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1520   void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1521   bool split_up( Node *n, Node *blk1, Node *blk2 );
1522   void sink_use( Node *use, Node *post_loop );
1523   Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1524   Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1525   void try_move_store_after_loop(Node* n);

1526   bool identical_backtoback_ifs(Node *n);

1527   bool can_split_if(Node *n_ctrl);
1528 
1529   // Determine if a method is too big for a/another round of split-if, based on
1530   // a magic (approximate) ratio derived from the equally magic constant 35000,
1531   // previously used for this purpose (but without relating to the node limit).
1532   bool must_throttle_split_if() {
1533     uint threshold = C->max_node_limit() * 2 / 5;
1534     return C->live_nodes() > threshold;
1535   }
1536 
1537   // A simplistic node request tracking mechanism, where
1538   //   = UINT_MAX   Request not valid or made final.
1539   //   < UINT_MAX   Nodes currently requested (estimate).
1540   uint _nodes_required;
1541 
1542   enum { REQUIRE_MIN = 70 };
1543 
1544   uint nodes_required() const { return _nodes_required; }
1545 
1546   // Given the _currently_  available number of nodes, check  whether there is

  60 protected:
  61   uint _loop_flags;
  62   // Names for flag bitfields
  63   enum { Normal=0, Pre=1, Main=2, Post=3, PreMainPostFlagsMask=3,
  64          MainHasNoPreLoop    = 1<<2,
  65          HasExactTripCount   = 1<<3,
  66          InnerLoop           = 1<<4,
  67          PartialPeelLoop     = 1<<5,
  68          PartialPeelFailed   = 1<<6,
  69          HasReductions       = 1<<7,
  70          WasSlpAnalyzed      = 1<<8,
  71          PassedSlpAnalysis   = 1<<9,
  72          DoUnrollOnly        = 1<<10,
  73          VectorizedLoop      = 1<<11,
  74          HasAtomicPostLoop   = 1<<12,
  75          HasRangeChecks      = 1<<13,
  76          IsMultiversioned    = 1<<14,
  77          StripMined          = 1<<15,
  78          SubwordLoop         = 1<<16,
  79          ProfileTripFailed   = 1<<17,
  80          LoopNestInnerLoop   = 1<< 18,
  81          LoopNestLongOuterLoop = 1<< 19,
  82          FlattenedArrays     = 1<<20};
  83 
  84   char _unswitch_count;
  85   enum { _unswitch_max=3 };
  86   char _postloop_flags;
  87   enum { LoopNotRCEChecked = 0, LoopRCEChecked = 1, RCEPostLoop = 2 };
  88 
  89   // Expected trip count from profile data
  90   float _profile_trip_cnt;
  91 
  92 public:
  93   // Names for edge indices
  94   enum { Self=0, EntryControl, LoopBackControl };
  95 
  96   bool is_inner_loop() const { return _loop_flags & InnerLoop; }
  97   void set_inner_loop() { _loop_flags |= InnerLoop; }
  98 
  99   bool range_checks_present() const { return _loop_flags & HasRangeChecks; }
 100   bool is_multiversioned() const { return _loop_flags & IsMultiversioned; }
 101   bool is_vectorized_loop() const { return _loop_flags & VectorizedLoop; }
 102   bool is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
 103   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
 104   bool partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
 105   bool is_strip_mined() const { return _loop_flags & StripMined; }
 106   bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
 107   bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
 108   bool is_loop_nest_inner_loop() const { return _loop_flags & LoopNestInnerLoop; }
 109   bool is_loop_nest_outer_loop() const { return _loop_flags & LoopNestLongOuterLoop; }
 110   bool is_flattened_arrays() const { return _loop_flags & FlattenedArrays; }
 111 
 112   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
 113   void mark_has_reductions() { _loop_flags |= HasReductions; }
 114   void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
 115   void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
 116   void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
 117   void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
 118   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
 119   void mark_has_range_checks() { _loop_flags |=  HasRangeChecks; }
 120   void clear_has_range_checks() { _loop_flags &= ~HasRangeChecks; }
 121   void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
 122   void mark_strip_mined() { _loop_flags |= StripMined; }
 123   void clear_strip_mined() { _loop_flags &= ~StripMined; }
 124   void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
 125   void mark_subword_loop() { _loop_flags |= SubwordLoop; }
 126   void mark_loop_nest_inner_loop() { _loop_flags |= LoopNestInnerLoop; }
 127   void mark_loop_nest_outer_loop() { _loop_flags |= LoopNestLongOuterLoop; }
 128   void mark_flattened_arrays() { _loop_flags |= FlattenedArrays; }
 129 
 130   int unswitch_max() { return _unswitch_max; }
 131   int unswitch_count() { return _unswitch_count; }
 132 
 133   int has_been_range_checked() const { return _postloop_flags & LoopRCEChecked; }
 134   void set_has_been_range_checked() { _postloop_flags |= LoopRCEChecked; }
 135   int is_rce_post_loop() const { return _postloop_flags & RCEPostLoop; }
 136   void set_is_rce_post_loop() { _postloop_flags |= RCEPostLoop; }
 137 
 138   void set_unswitch_count(int val) {
 139     assert (val <= unswitch_max(), "too many unswitches");
 140     _unswitch_count = val;
 141   }
 142 
 143   void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
 144   float profile_trip_cnt()             { return _profile_trip_cnt; }
 145 
 146   LoopNode(Node *entry, Node *backedge)
 147     : RegionNode(3), _loop_flags(0), _unswitch_count(0),
 148       _postloop_flags(0), _profile_trip_cnt(COUNT_UNKNOWN)  {

1368     return !has_node(n) || n->is_unreachable(_igvn);
1369   }
1370 
1371   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
1372   int do_range_check( IdealLoopTree *loop, Node_List &old_new );
1373 
1374   // Check to see if do_range_check(...) cleaned the main loop of range-checks
1375   void has_range_checks(IdealLoopTree *loop);
1376 
1377   // Process post loops which have range checks and try to build a multi-version
1378   // guard to safely determine if we can execute the post loop which was RCE'd.
1379   bool multi_version_post_loops(IdealLoopTree *rce_loop, IdealLoopTree *legacy_loop);
1380 
1381   // Cause the rce'd post loop to optimized away, this happens if we cannot complete multiverioning
1382   void poison_rce_post_loop(IdealLoopTree *rce_loop);
1383 
1384   // Create a slow version of the loop by cloning the loop
1385   // and inserting an if to select fast-slow versions.
1386   // Return the inserted if.
1387   IfNode* create_slow_version_of_loop(IdealLoopTree *loop,
1388                                       Node_List &old_new,
1389                                       Node_List &unswitch_iffs,
1390                                       CloneLoopMode mode);
1391 
1392   // Clone a loop and return the clone head (clone_loop_head).
1393   // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
1394   // This routine was created for usage in CountedLoopReserveKit.
1395   //
1396   //    int(1) -> If -> IfTrue -> original_loop_head
1397   //              |
1398   //              V
1399   //           IfFalse -> clone_loop_head (returned by function pointer)
1400   //
1401   LoopNode* create_reserve_version_of_loop(IdealLoopTree *loop, CountedLoopReserveKit* lk);
1402   // Clone loop with an invariant test (that does not exit) and
1403   // insert a clone of the test that selects which version to
1404   // execute.
1405   void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
1406 
1407   // Find candidate "if" for unswitching
1408   IfNode* find_unswitching_candidate(const IdealLoopTree *loop, Node_List& unswitch_iffs) const;
1409 
1410   // Range Check Elimination uses this function!
1411   // Constrain the main loop iterations so the affine function:
1412   //    low_limit <= scale_con * I + offset  <  upper_limit
1413   // always holds true.  That is, either increase the number of iterations in
1414   // the pre-loop or the post-loop until the condition holds true in the main
1415   // loop.  Scale_con, offset and limit are all loop invariant.
1416   void add_constraint(jlong stride_con, jlong scale_con, Node* offset, Node* low_limit, Node* upper_limit, Node* pre_ctrl, Node** pre_limit, Node** main_limit);
1417   // Helper function for add_constraint().
1418   Node* adjust_limit(bool reduce, Node* scale, Node* offset, Node* rc_limit, Node* old_limit, Node* pre_ctrl, bool round);
1419 
1420   // Partially peel loop up through last_peel node.
1421   bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
1422 
1423   // Create a scheduled list of nodes control dependent on ctrl set.
1424   void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
1425   // Has a use in the vector set
1426   bool has_use_in_set( Node* n, VectorSet& vset );
1427   // Has use internal to the vector set (ie. not in a phi at the loop head)
1428   bool has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop );

1510   bool intrinsify_fill(IdealLoopTree* lpt);
1511   bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
1512                        Node*& shift, Node*& offset);
1513 
1514 private:
1515   // Return a type based on condition control flow
1516   const TypeInt* filtered_type( Node *n, Node* n_ctrl);
1517   const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); }
1518  // Helpers for filtered type
1519   const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
1520 
1521   // Helper functions
1522   Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
1523   Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
1524   void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
1525   bool split_up( Node *n, Node *blk1, Node *blk2 );
1526   void sink_use( Node *use, Node *post_loop );
1527   Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
1528   Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
1529   void try_move_store_after_loop(Node* n);
1530   void move_flat_array_check_out_of_loop(Node* n);
1531   bool identical_backtoback_ifs(Node *n);
1532   bool flatten_array_element_type_check(Node *n);
1533   bool can_split_if(Node *n_ctrl);
1534 
1535   // Determine if a method is too big for a/another round of split-if, based on
1536   // a magic (approximate) ratio derived from the equally magic constant 35000,
1537   // previously used for this purpose (but without relating to the node limit).
1538   bool must_throttle_split_if() {
1539     uint threshold = C->max_node_limit() * 2 / 5;
1540     return C->live_nodes() > threshold;
1541   }
1542 
1543   // A simplistic node request tracking mechanism, where
1544   //   = UINT_MAX   Request not valid or made final.
1545   //   < UINT_MAX   Nodes currently requested (estimate).
1546   uint _nodes_required;
1547 
1548   enum { REQUIRE_MIN = 70 };
1549 
1550   uint nodes_required() const { return _nodes_required; }
1551 
1552   // Given the _currently_  available number of nodes, check  whether there is
< prev index next >