< prev index next >

src/hotspot/share/opto/loopopts.cpp

Print this page


   1 /*
   2  * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


1178   // into LoopNodes.
1179   IdealLoopTree *n_loop = get_loop(n_ctrl);
1180   for (uint j = 1; j < n_ctrl->req(); j++) {
1181     if (get_loop(n_ctrl->in(j)) != n_loop) {
1182       return false;
1183     }
1184   }
1185 
1186   // Check for safety of the merge point.
1187   if (!merge_point_safe(n_ctrl)) {
1188     return false;
1189   }
1190 
1191   return true;
1192 }
1193 
1194 //------------------------------split_if_with_blocks_post----------------------
1195 // Do the real work in a non-recursive function.  CFG hackery wants to be
1196 // in the post-order, so it can dirty the I-DOM info and not use the dirtied
1197 // info.
1198 void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
1199 
1200   // Cloning Cmp through Phi's involves the split-if transform.
1201   // FastLock is not used by an If
1202   if (n->is_Cmp() && !n->is_FastLock()) {
1203     Node *n_ctrl = get_ctrl(n);
1204     // Determine if the Node has inputs from some local Phi.
1205     // Returns the block to clone thru.
1206     Node *n_blk = has_local_phi_input(n);
1207     if (n_blk != n_ctrl) {
1208       return;
1209     }
1210 
1211     if (!can_split_if(n_ctrl)) {
1212       return;
1213     }
1214 
1215     if (n->outcnt() != 1) {
1216       return; // Multiple bool's from 1 compare?
1217     }
1218     Node *bol = n->unique_out();
1219     assert(bol->is_Bool(), "expect a bool here");
1220     if (bol->outcnt() != 1) {
1221       return;// Multiple branches from 1 compare?
1222     }


1434             // to fold a StoreP and an AddP together (as part of an
1435             // address expression) and the AddP and StoreP have
1436             // different controls.
1437             if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1438           }
1439           _igvn.remove_dead_node(n);
1440         }
1441       }
1442     }
1443   }
1444 
1445   try_move_store_after_loop(n);
1446 
1447   // Check for Opaque2's who's loop has disappeared - who's input is in the
1448   // same loop nest as their output.  Remove 'em, they are no longer useful.
1449   if( n_op == Op_Opaque2 &&
1450       n->in(1) != NULL &&
1451       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1452     _igvn.replace_node( n, n->in(1) );
1453   }






1454 }
1455 
1456 //------------------------------split_if_with_blocks---------------------------
1457 // Check for aggressive application of 'split-if' optimization,
1458 // using basic block level info.
1459 void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack) {
1460   Node* root = C->root();
1461   visited.set(root->_idx); // first, mark root as visited
1462   // Do pre-visit work for root
1463   Node* n   = split_if_with_blocks_pre(root);
1464   uint  cnt = n->outcnt();
1465   uint  i   = 0;
1466 
1467   while (true) {
1468     // Visit all children
1469     if (i < cnt) {
1470       Node* use = n->raw_out(i);
1471       ++i;
1472       if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1473         // Now do pre-visit work for this use
1474         use = split_if_with_blocks_pre(use);
1475         nstack.push(n, i); // Save parent and next use's index.
1476         n   = use;         // Process all children of current use.
1477         cnt = use->outcnt();
1478         i   = 0;
1479       }
1480     }
1481     else {
1482       // All of n's children have been processed, complete post-processing.
1483       if (cnt != 0 && !n->is_Con()) {
1484         assert(has_node(n), "no dead nodes");
1485         split_if_with_blocks_post(n);
1486       }
1487       if (must_throttle_split_if()) {
1488         nstack.clear();
1489       }
1490       if (nstack.is_empty()) {
1491         // Finished all nodes on stack.
1492         break;
1493       }
1494       // Get saved parent node and next use's index. Visit the rest of uses.
1495       n   = nstack.node();
1496       cnt = n->outcnt();
1497       i   = nstack.index();
1498       nstack.pop();
1499     }
1500   }
1501 }
1502 
1503 
1504 //=============================================================================
1505 //


3006 //         :  |   |     |       |
3007 //         :  |   v    stmt2    |
3008 //         :  | exitB:  |       |
3009 //         :  | stmt4   v       |
3010 //         :  |       ifA orig  |
3011 //         :  |      /  \       |
3012 //         :  |     /    \      |
3013 //         :  |    v     v      |
3014 //         :  |  false  true    |
3015 //         :  |  /        \     |
3016 //         :  v  v         -----+
3017 //          RegionA
3018 //             |
3019 //             v
3020 //           exitA
3021 //
3022 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
3023 
3024   assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
3025   if (!loop->_head->is_Loop()) {
3026     return false;
3027   }
3028   LoopNode *head = loop->_head->as_Loop();
3029 
3030   if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
3031     return false;
3032   }
3033 
3034   // Check for complex exit control
3035   for (uint ii = 0; ii < loop->_body.size(); ii++) {
3036     Node *n = loop->_body.at(ii);
3037     int opc = n->Opcode();
3038     if (n->is_Call()        ||
3039         opc == Op_Catch     ||
3040         opc == Op_CatchProj ||
3041         opc == Op_Jump      ||
3042         opc == Op_JumpProj) {
3043 #ifndef PRODUCT
3044       if (TracePartialPeeling) {
3045         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
3046       }
3047 #endif
3048       return false;
3049     }
3050   }
3051 
3052   int dd = dom_depth(head);
3053 
3054   // Step 1: find cut point
3055 
3056   // Walk up dominators to loop head looking for first loop exit
3057   // which is executed on every path thru loop.
3058   IfNode *peel_if = NULL;
3059   IfNode *peel_if_cmpu = NULL;
3060 
3061   Node *iff = loop->tail();
3062   while (iff != head) {
3063     if (iff->is_If()) {
3064       Node *ctrl = get_ctrl(iff->in(1));
3065       if (ctrl->is_top()) return false; // Dead test on live IF.
3066       // If loop-varying exit-test, check for induction variable
3067       if (loop->is_member(get_loop(ctrl)) &&
3068           loop->is_loop_exit(iff) &&
3069           is_possible_iv_test(iff)) {
3070         Node* cmp = iff->in(1)->in(1);
3071         if (cmp->Opcode() == Op_CmpI) {
3072           peel_if = iff->as_If();
3073         } else {
3074           assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
3075           peel_if_cmpu = iff->as_If();
3076         }
3077       }
3078     }
3079     iff = idom(iff);
3080   }
3081 
3082   // Prefer signed compare over unsigned compare.
3083   IfNode* new_peel_if = NULL;
3084   if (peel_if == NULL) {
3085     if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
3086       return false;   // No peel point found
3087     }
3088     new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
3089     if (new_peel_if == NULL) {
3090       return false;   // No peel point found
3091     }
3092     peel_if = new_peel_if;
3093   }
3094   Node* last_peel        = stay_in_loop(peel_if, loop);
3095   Node* first_not_peeled = stay_in_loop(last_peel, loop);
3096   if (first_not_peeled == NULL || first_not_peeled == head) {
3097     return false;
3098   }
3099 
3100 #ifndef PRODUCT
3101   if (TraceLoopOpts) {


3109     Node* t = head->in(2);
3110     while (true) {
3111       wl.push(t);
3112       if (t == head) break;
3113       t = idom(t);
3114     }
3115     while (wl.size() > 0) {
3116       Node* tt = wl.pop();
3117       tt->dump();
3118       if (tt == last_peel) tty->print_cr("-- cut --");
3119     }
3120   }
3121 #endif
3122   ResourceArea *area = Thread::current()->resource_area();
3123   VectorSet peel(area);
3124   VectorSet not_peel(area);
3125   Node_List peel_list(area);
3126   Node_List worklist(area);
3127   Node_List sink_list(area);
3128 
3129   if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
3130     return false;
3131   }
3132 
3133   // Set of cfg nodes to peel are those that are executable from
3134   // the head through last_peel.
3135   assert(worklist.size() == 0, "should be empty");
3136   worklist.push(head);
3137   peel.set(head->_idx);
3138   while (worklist.size() > 0) {
3139     Node *n = worklist.pop();
3140     if (n != last_peel) {
3141       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3142         Node* use = n->fast_out(j);
3143         if (use->is_CFG() &&
3144             loop->is_member(get_loop(use)) &&
3145             !peel.test_set(use->_idx)) {
3146           worklist.push(use);
3147         }
3148       }
3149     }


   1 /*
   2  * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *


1178   // into LoopNodes.
1179   IdealLoopTree *n_loop = get_loop(n_ctrl);
1180   for (uint j = 1; j < n_ctrl->req(); j++) {
1181     if (get_loop(n_ctrl->in(j)) != n_loop) {
1182       return false;
1183     }
1184   }
1185 
1186   // Check for safety of the merge point.
1187   if (!merge_point_safe(n_ctrl)) {
1188     return false;
1189   }
1190 
1191   return true;
1192 }
1193 
1194 //------------------------------split_if_with_blocks_post----------------------
1195 // Do the real work in a non-recursive function.  CFG hackery wants to be
1196 // in the post-order, so it can dirty the I-DOM info and not use the dirtied
1197 // info.
1198 void PhaseIdealLoop::split_if_with_blocks_post(Node *n, bool last_round) {
1199 
1200   // Cloning Cmp through Phi's involves the split-if transform.
1201   // FastLock is not used by an If
1202   if (n->is_Cmp() && !n->is_FastLock() && !last_round) {
1203     Node *n_ctrl = get_ctrl(n);
1204     // Determine if the Node has inputs from some local Phi.
1205     // Returns the block to clone thru.
1206     Node *n_blk = has_local_phi_input(n);
1207     if (n_blk != n_ctrl) {
1208       return;
1209     }
1210 
1211     if (!can_split_if(n_ctrl)) {
1212       return;
1213     }
1214 
1215     if (n->outcnt() != 1) {
1216       return; // Multiple bool's from 1 compare?
1217     }
1218     Node *bol = n->unique_out();
1219     assert(bol->is_Bool(), "expect a bool here");
1220     if (bol->outcnt() != 1) {
1221       return;// Multiple branches from 1 compare?
1222     }


1434             // to fold a StoreP and an AddP together (as part of an
1435             // address expression) and the AddP and StoreP have
1436             // different controls.
1437             if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1438           }
1439           _igvn.remove_dead_node(n);
1440         }
1441       }
1442     }
1443   }
1444 
1445   try_move_store_after_loop(n);
1446 
1447   // Check for Opaque2's who's loop has disappeared - who's input is in the
1448   // same loop nest as their output.  Remove 'em, they are no longer useful.
1449   if( n_op == Op_Opaque2 &&
1450       n->in(1) != NULL &&
1451       get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1452     _igvn.replace_node( n, n->in(1) );
1453   }
1454 
1455 #if INCLUDE_ZGC
1456   if (UseZGC) {
1457     ZBarrierSetC2::loop_optimize_gc_barrier(this, n, last_round);
1458   }
1459 #endif
1460 }
1461 
1462 //------------------------------split_if_with_blocks---------------------------
1463 // Check for aggressive application of 'split-if' optimization,
1464 // using basic block level info.
1465 void PhaseIdealLoop::split_if_with_blocks(VectorSet &visited, Node_Stack &nstack, bool last_round) {
1466   Node* root = C->root();
1467   visited.set(root->_idx); // first, mark root as visited
1468   // Do pre-visit work for root
1469   Node* n   = split_if_with_blocks_pre(root);
1470   uint  cnt = n->outcnt();
1471   uint  i   = 0;
1472 
1473   while (true) {
1474     // Visit all children
1475     if (i < cnt) {
1476       Node* use = n->raw_out(i);
1477       ++i;
1478       if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1479         // Now do pre-visit work for this use
1480         use = split_if_with_blocks_pre(use);
1481         nstack.push(n, i); // Save parent and next use's index.
1482         n   = use;         // Process all children of current use.
1483         cnt = use->outcnt();
1484         i   = 0;
1485       }
1486     }
1487     else {
1488       // All of n's children have been processed, complete post-processing.
1489       if (cnt != 0 && !n->is_Con()) {
1490         assert(has_node(n), "no dead nodes");
1491         split_if_with_blocks_post(n, last_round);
1492       }
1493       if (must_throttle_split_if()) {
1494         nstack.clear();
1495       }
1496       if (nstack.is_empty()) {
1497         // Finished all nodes on stack.
1498         break;
1499       }
1500       // Get saved parent node and next use's index. Visit the rest of uses.
1501       n   = nstack.node();
1502       cnt = n->outcnt();
1503       i   = nstack.index();
1504       nstack.pop();
1505     }
1506   }
1507 }
1508 
1509 
1510 //=============================================================================
1511 //


3012 //         :  |   |     |       |
3013 //         :  |   v    stmt2    |
3014 //         :  | exitB:  |       |
3015 //         :  | stmt4   v       |
3016 //         :  |       ifA orig  |
3017 //         :  |      /  \       |
3018 //         :  |     /    \      |
3019 //         :  |    v     v      |
3020 //         :  |  false  true    |
3021 //         :  |  /        \     |
3022 //         :  v  v         -----+
3023 //          RegionA
3024 //             |
3025 //             v
3026 //           exitA
3027 //
3028 bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
3029 
3030   assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
3031   if (!loop->_head->is_Loop()) {
3032     return false;  }
3033 
3034   LoopNode *head  = loop->_head->as_Loop();
3035 
3036   if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
3037     return false;
3038   }
3039 
3040   // Check for complex exit control
3041   for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
3042     Node *n = loop->_body.at(ii);
3043     int opc = n->Opcode();
3044     if (n->is_Call()        ||
3045         opc == Op_Catch     ||
3046         opc == Op_CatchProj ||
3047         opc == Op_Jump      ||
3048         opc == Op_JumpProj) {
3049 #ifndef PRODUCT
3050       if (TracePartialPeeling) {
3051         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
3052       }
3053 #endif
3054       return false;
3055     }
3056   }
3057 
3058   int dd = dom_depth(head);
3059 
3060   // Step 1: find cut point
3061 
3062   // Walk up dominators to loop head looking for first loop exit
3063   // which is executed on every path thru loop.
3064   IfNode *peel_if = NULL;
3065   IfNode *peel_if_cmpu = NULL;
3066 
3067   Node *iff = loop->tail();
3068   while( iff != head ) {
3069     if( iff->is_If() ) {
3070       Node *ctrl = get_ctrl(iff->in(1));
3071       if (ctrl->is_top()) return false; // Dead test on live IF.
3072       // If loop-varying exit-test, check for induction variable
3073       if( loop->is_member(get_loop(ctrl)) &&
3074           loop->is_loop_exit(iff) &&
3075           is_possible_iv_test(iff)) {
3076         Node* cmp = iff->in(1)->in(1);
3077         if (cmp->Opcode() == Op_CmpI) {
3078           peel_if = iff->as_If();
3079         } else {
3080           assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
3081           peel_if_cmpu = iff->as_If();
3082         }
3083       }
3084     }
3085     iff = idom(iff);
3086   }

3087   // Prefer signed compare over unsigned compare.
3088   IfNode* new_peel_if = NULL;
3089   if (peel_if == NULL) {
3090     if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
3091       return false;   // No peel point found
3092     }
3093     new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
3094     if (new_peel_if == NULL) {
3095       return false;   // No peel point found
3096     }
3097     peel_if = new_peel_if;
3098   }
3099   Node* last_peel        = stay_in_loop(peel_if, loop);
3100   Node* first_not_peeled = stay_in_loop(last_peel, loop);
3101   if (first_not_peeled == NULL || first_not_peeled == head) {
3102     return false;
3103   }
3104 
3105 #ifndef PRODUCT
3106   if (TraceLoopOpts) {


3114     Node* t = head->in(2);
3115     while (true) {
3116       wl.push(t);
3117       if (t == head) break;
3118       t = idom(t);
3119     }
3120     while (wl.size() > 0) {
3121       Node* tt = wl.pop();
3122       tt->dump();
3123       if (tt == last_peel) tty->print_cr("-- cut --");
3124     }
3125   }
3126 #endif
3127   ResourceArea *area = Thread::current()->resource_area();
3128   VectorSet peel(area);
3129   VectorSet not_peel(area);
3130   Node_List peel_list(area);
3131   Node_List worklist(area);
3132   Node_List sink_list(area);
3133 
3134   if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
3135     return false;
3136   }
3137 
3138   // Set of cfg nodes to peel are those that are executable from
3139   // the head through last_peel.
3140   assert(worklist.size() == 0, "should be empty");
3141   worklist.push(head);
3142   peel.set(head->_idx);
3143   while (worklist.size() > 0) {
3144     Node *n = worklist.pop();
3145     if (n != last_peel) {
3146       for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
3147         Node* use = n->fast_out(j);
3148         if (use->is_CFG() &&
3149             loop->is_member(get_loop(use)) &&
3150             !peel.test_set(use->_idx)) {
3151           worklist.push(use);
3152         }
3153       }
3154     }


< prev index next >