< prev index next >

src/hotspot/share/opto/loopTransform.cpp

Print this page

        

*** 284,296 **** if (n1->is_Add() && n1->in(2)->is_Con()) return NULL; Node* inv1 = n1->in(inv1_idx); Node* n2 = n1->in(3 - inv1_idx); int inv2_idx = is_invariant_addition(n2, phase); if (!inv2_idx) return NULL; - - if (!phase->may_require_nodes(10, 10)) return NULL; - Node* x = n2->in(3 - inv2_idx); Node* inv2 = n2->in(inv2_idx); bool neg_x = n2->is_Sub() && inv2_idx == 1; bool neg_inv2 = n2->is_Sub() && inv2_idx == 2; --- 284,293 ----
*** 338,413 **** Node *n = _body.at(i); for (int j = 0; j < 5; j++) { Node* nn = reassociate_add_sub(n, phase); if (nn == NULL) break; n = nn; // again ! } } } //------------------------------policy_peeling--------------------------------- ! // Return TRUE if the loop should be peeled, otherwise return FALSE. Peeling ! // is applicable if we can make a loop-invariant test (usually a null-check) ! // execute before we enter the loop. When TRUE, the estimated node budget is ! // also requested. ! bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) { ! uint estimate = estimate_peeling(phase); ! ! return estimate == 0 ? false : phase->may_require_nodes(estimate); ! } ! ! // Perform actual policy and size estimate for the loop peeling transform, and ! // return the estimated loop size if peeling is applicable, otherwise return ! // zero. No node budget is allocated. ! uint IdealLoopTree::estimate_peeling(PhaseIdealLoop *phase) { // If nodes are depleted, some transform has miscalculated its needs. assert(!phase->exceeding_node_budget(), "sanity"); ! // Peeling does loop cloning which can result in O(N^2) node construction. ! if (_body.size() > 255) { ! return 0; // Suppress too large body size. ! } ! // Optimistic estimate that approximates loop body complexity via data and ! // control flow fan-out (instead of using the more pessimistic: BodySize^2). ! uint estimate = est_loop_clone_sz(2); ! if (phase->exceeding_node_budget(estimate)) { ! return 0; // Too large to safely clone. } ! // Check for vectorized loops, any peeling done was already applied. if (_head->is_CountedLoop()) { CountedLoopNode* cl = _head->as_CountedLoop(); if (cl->is_unroll_only() || cl->trip_count() == 1) { ! return 0; } } ! Node* test = tail(); ! while (test != _head) { // Scan till run off top of loop ! if (test->is_If()) { // Test? Node *ctrl = phase->get_ctrl(test->in(1)); if (ctrl->is_top()) { ! return 0; // Found dead test on live IF? No peeling! } ! // Standard IF only has one input value to check for loop invariance. assert(test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_RangeCheck, "Check this code when new subtype is added"); // Condition is not a member of this loop? if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) { ! return estimate; // Found reason to peel! } } ! // Walk up dominators to loop _head looking for test which is executed on ! // every path through the loop. test = phase->idom(test); } ! return 0; } //------------------------------peeled_dom_test_elim--------------------------- // If we got the effect of peeling, either by actually peeling or by making // a pre-loop which must execute at least once, we can remove all --- 335,399 ---- Node *n = _body.at(i); for (int j = 0; j < 5; j++) { Node* nn = reassociate_add_sub(n, phase); if (nn == NULL) break; n = nn; // again ! }; } } //------------------------------policy_peeling--------------------------------- ! // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can ! // make some loop-invariant test (usually a null-check) happen before the loop. ! bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const { ! IdealLoopTree *loop = (IdealLoopTree*)this; // If nodes are depleted, some transform has miscalculated its needs. assert(!phase->exceeding_node_budget(), "sanity"); ! uint body_size = loop->_body.size(); ! // Peeling does loop cloning which can result in O(N^2) node construction ! if (body_size > 255) { ! return false; // Prevent overflow for large body size ! } ! uint estimate = body_size * body_size; if (phase->exceeding_node_budget(estimate)) { ! return false; // Too large to safely clone } ! // check for vectorized loops, any peeling done was already applied if (_head->is_CountedLoop()) { CountedLoopNode* cl = _head->as_CountedLoop(); if (cl->is_unroll_only() || cl->trip_count() == 1) { ! return false; } } ! Node* test = loop->tail(); ! while (test != _head) { // Scan till run off top of loop ! if (test->is_If()) { // Test? Node *ctrl = phase->get_ctrl(test->in(1)); if (ctrl->is_top()) { ! return false; // Found dead test on live IF? No peeling! } ! // Standard IF only has one input value to check for loop invariance assert(test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd || test->Opcode() == Op_RangeCheck, "Check this code when new subtype is added"); // Condition is not a member of this loop? if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) { ! // Found reason to peel! ! return phase->may_require_nodes(estimate); } } ! // Walk up dominators to loop _head looking for test which is ! // executed on every path thru loop. test = phase->idom(test); } ! return false; } //------------------------------peeled_dom_test_elim--------------------------- // If we got the effect of peeling, either by actually peeling or by making // a pre-loop which must execute at least once, we can remove all
*** 650,661 **** _igvn.hash_delete(use); use->set_req(LoopNode::LoopBackControl, C->top()); } } - // Step 4: Correct dom-depth info. Set to loop-head depth. int dd = dom_depth(head); set_idom(head, head->in(1), dd); for (uint j3 = 0; j3 < loop->_body.size(); j3++) { Node *old = loop->_body.at(j3); Node *nnn = old_new[old->_idx]; --- 636,647 ---- _igvn.hash_delete(use); use->set_req(LoopNode::LoopBackControl, C->top()); } } + // Step 4: Correct dom-depth info. Set to loop-head depth. int dd = dom_depth(head); set_idom(head, head->in(1), dd); for (uint j3 = 0; j3 < loop->_body.size(); j3++) { Node *old = loop->_body.at(j3); Node *nnn = old_new[old->_idx];
*** 669,702 **** peeled_dom_test_elim(loop,old_new); loop->record_for_igvn(); } ! // The Estimated Loop Unroll Size: UnrollFactor * (106% * BodySize + BC) + CC, ! // where BC and CC are (totally) ad-hoc/magic "body" and "clone" constants, ! // respectively, used to ensure that node usage estimates made are on the safe ! // side, for the most part. This is a simplified version of the loop clone ! // size calculation in est_loop_clone_sz(), defined for unroll factors larger ! // than one (>1), performing an overflow check and returning 'UINT_MAX' in ! // case of an overflow. ! static uint est_loop_unroll_sz(uint factor, uint size) { ! precond(0 < factor); ! ! uint const bc = 5; ! uint const cc = 7; ! uint const sz = size + (size + 15) / 16; ! uint estimate = factor * (sz + bc) + cc; ! ! return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX; ! } ! ! #define EMPTY_LOOP_SIZE 7 // Number of nodes in an empty loop. //------------------------------policy_maximally_unroll------------------------ ! // Calculate the exact loop trip-count and return TRUE if loop can be fully, ! // i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated ! // node budget is also requested. bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const { CountedLoopNode *cl = _head->as_CountedLoop(); assert(cl->is_normal_loop(), ""); if (!cl->is_valid_counted_loop()) { return false; // Malformed counted loop --- 655,669 ---- peeled_dom_test_elim(loop,old_new); loop->record_for_igvn(); } ! #define EMPTY_LOOP_SIZE 7 // number of nodes in an empty loop //------------------------------policy_maximally_unroll------------------------ ! // Calculate exact loop trip count and return true if loop can be maximally ! // unrolled. bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const { CountedLoopNode *cl = _head->as_CountedLoop(); assert(cl->is_normal_loop(), ""); if (!cl->is_valid_counted_loop()) { return false; // Malformed counted loop
*** 724,734 **** return false; } // Take into account that after unroll conjoined heads and tails will fold, // otherwise policy_unroll() may allow more unrolling than max unrolling. ! uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE); if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow). return false; } --- 691,701 ---- return false; } // Take into account that after unroll conjoined heads and tails will fold, // otherwise policy_unroll() may allow more unrolling than max unrolling. ! uint new_body_size = est_loop_clone_sz(trip_count, body_size - EMPTY_LOOP_SIZE); if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow). return false; }
*** 773,785 **** return phase->may_require_nodes(new_body_size); } //------------------------------policy_unroll---------------------------------- ! // Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll if ! // the loop is a counted loop and the loop body is small enough. When TRUE, ! // the estimated node budget is also requested. bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) { CountedLoopNode *cl = _head->as_CountedLoop(); assert(cl->is_normal_loop() || cl->is_main_loop(), ""); --- 740,751 ---- return phase->may_require_nodes(new_body_size); } //------------------------------policy_unroll---------------------------------- ! // Return TRUE or FALSE if the loop should be unrolled or not. Unroll if the ! // loop is a CountedLoop and the body is small enough. bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) { CountedLoopNode *cl = _head->as_CountedLoop(); assert(cl->is_normal_loop() || cl->is_main_loop(), "");
*** 919,929 **** int slp_max_unroll_factor = cl->slp_max_unroll(); if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) { LoopMaxUnroll = slp_max_unroll_factor; } ! uint estimate = est_loop_clone_sz(2); if (cl->has_passed_slp()) { if (slp_max_unroll_factor >= future_unroll_cnt) { return phase->may_require_nodes(estimate); } --- 885,895 ---- int slp_max_unroll_factor = cl->slp_max_unroll(); if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) { LoopMaxUnroll = slp_max_unroll_factor; } ! uint estimate = est_loop_clone_sz(2, body_size); if (cl->has_passed_slp()) { if (slp_max_unroll_factor >= future_unroll_cnt) { return phase->may_require_nodes(estimate); }
*** 990,1013 **** bool IdealLoopTree::policy_align(PhaseIdealLoop *phase) const { return false; } //------------------------------policy_range_check----------------------------- ! // Return TRUE or FALSE if the loop should be range-check-eliminated or not. ! // When TRUE, the estimated node budget is also requested. ! // ! // We will actually perform iteration-splitting, a more powerful form of RCE. bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const { if (!RangeCheckElimination) return false; // If nodes are depleted, some transform has miscalculated its needs. assert(!phase->exceeding_node_budget(), "sanity"); CountedLoopNode *cl = _head->as_CountedLoop(); ! // If we unrolled with no intention of doing RCE and we later changed our ! // minds, we got no pre-loop. Either we need to make a new pre-loop, or we ! // have to disallow RCE. if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. Node *trip_counter = cl->phi(); // check for vectorized loops, some opts are no longer needed if (cl->is_unroll_only()) return false; --- 956,977 ---- bool IdealLoopTree::policy_align(PhaseIdealLoop *phase) const { return false; } //------------------------------policy_range_check----------------------------- ! // Return TRUE or FALSE if the loop should be range-check-eliminated. ! // Actually we do iteration-splitting, a more powerful form of RCE. bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const { if (!RangeCheckElimination) return false; // If nodes are depleted, some transform has miscalculated its needs. assert(!phase->exceeding_node_budget(), "sanity"); CountedLoopNode *cl = _head->as_CountedLoop(); ! // If we unrolled with no intention of doing RCE and we later ! // changed our minds, we got no pre-loop. Either we need to ! // make a new pre-loop, or we gotta disallow RCE. if (cl->is_main_no_pre_loop()) return false; // Disallowed for now. Node *trip_counter = cl->phi(); // check for vectorized loops, some opts are no longer needed if (cl->is_unroll_only()) return false;
*** 1050,1066 **** } if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) { continue; } ! // Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2) ! // projections. If BOTH are in the loop we need loop unswitching instead ! // of iteration splitting. if (is_loop_exit(iff)) { // Found valid reason to split iterations (if there is room). // NOTE: Usually a gross overestimate. ! return phase->may_require_nodes(est_loop_clone_sz(2)); } } // End of is IF } return false; --- 1014,1030 ---- } if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) { continue; } ! // Found a test like 'trip+off vs limit'. Test is an IfNode, has two ! // (2) projections. If BOTH are in the loop we need loop unswitching ! // instead of iteration splitting. if (is_loop_exit(iff)) { // Found valid reason to split iterations (if there is room). // NOTE: Usually a gross overestimate. ! return phase->may_require_nodes(est_loop_clone_sz(2, _body.size())); } } // End of is IF } return false;
*** 1555,1564 **** --- 1519,1531 ---- CountedLoopNode *cl = loop->_head->as_CountedLoop(); // only process vectorized main loops if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return; + if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) { + return; + } int slp_max_unroll_factor = cl->slp_max_unroll(); int cur_unroll = cl->unrolled_count(); if (slp_max_unroll_factor == 0) return;
*** 1566,1579 **** if (cur_unroll != slp_max_unroll_factor) return; // we only ever process this one time if (cl->has_atomic_post_loop()) return; - if (!may_require_nodes(loop->est_loop_clone_sz(2))) { - return; - } - #ifndef PRODUCT if (TraceLoopOpts) { tty->print("PostVector "); loop->dump_head(); } --- 1533,1542 ----
*** 3213,3233 **** return true; // Here we removed an empty loop } AutoNodeBudget node_budget(phase); // Non-counted loops may be peeled; exactly 1 iteration is peeled. // This removes loop-invariant tests (usually null checks). if (!_head->is_CountedLoop()) { // Non-counted loop if (PartialPeelLoop && phase->partial_peel(this, old_new)) { // Partial peel succeeded so terminate this round of loop opts return false; } ! if (policy_peeling(phase)) { // Should we peel? if (PrintOpto) { tty->print_cr("should_peel"); } ! phase->do_peeling(this, old_new); ! } else if (policy_unswitching(phase)) { phase->do_unswitching(this, old_new); } return true; } CountedLoopNode *cl = _head->as_CountedLoop(); --- 3176,3199 ---- return true; // Here we removed an empty loop } AutoNodeBudget node_budget(phase); + bool should_peel = policy_peeling(phase); + bool should_unswitch = policy_unswitching(phase); + // Non-counted loops may be peeled; exactly 1 iteration is peeled. // This removes loop-invariant tests (usually null checks). if (!_head->is_CountedLoop()) { // Non-counted loop if (PartialPeelLoop && phase->partial_peel(this, old_new)) { // Partial peel succeeded so terminate this round of loop opts return false; } ! if (should_peel) { // Should we peel? if (PrintOpto) { tty->print_cr("should_peel"); } ! phase->do_peeling(this,old_new); ! } else if (should_unswitch) { phase->do_unswitching(this, old_new); } return true; } CountedLoopNode *cl = _head->as_CountedLoop();
*** 3241,3265 **** compute_profile_trip_cnt(phase); // Before attempting fancy unrolling, RCE or alignment, see if we want // to completely unroll this loop or do loop unswitching. if (cl->is_normal_loop()) { ! if (policy_unswitching(phase)) { phase->do_unswitching(this, old_new); return true; } ! if (policy_maximally_unroll(phase)) { // Here we did some unrolling and peeling. Eventually we will // completely unroll this loop and it will no longer be a loop. phase->do_maximally_unroll(this, old_new); return true; } } - uint est_peeling = estimate_peeling(phase); - bool should_peel = 0 < est_peeling; - // Counted loops may be peeled, may need some iterations run up // front for RCE, and may want to align loop refs to a cache // line. Thus we clone a full loop up front whose trip count is // at least 1 (if peeling), but may be several more. --- 3207,3229 ---- compute_profile_trip_cnt(phase); // Before attempting fancy unrolling, RCE or alignment, see if we want // to completely unroll this loop or do loop unswitching. if (cl->is_normal_loop()) { ! if (should_unswitch) { phase->do_unswitching(this, old_new); return true; } ! bool should_maximally_unroll = policy_maximally_unroll(phase); ! if (should_maximally_unroll) { // Here we did some unrolling and peeling. Eventually we will // completely unroll this loop and it will no longer be a loop. phase->do_maximally_unroll(this, old_new); return true; } } // Counted loops may be peeled, may need some iterations run up // front for RCE, and may want to align loop refs to a cache // line. Thus we clone a full loop up front whose trip count is // at least 1 (if peeling), but may be several more.
*** 3286,3304 **** // If we have any of these conditions (RCE, alignment, unrolling) met, then // we switch to the pre-/main-/post-loop model. This model also covers // peeling. if (should_rce || should_align || should_unroll) { if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops ! uint estimate = est_loop_clone_sz(3); ! if (!phase->may_require_nodes(estimate)) { return false; } ! phase->insert_pre_post_loops(this, old_new, !may_rce_align); } ! // Adjust the pre- and main-loop limits to let the pre and post loops run ! // with full checks, but the main-loop with no checks. Remove said checks ! // from the main body. if (should_rce) { if (phase->do_range_check(this, old_new) != 0) { cl->mark_has_range_checks(); } } else if (PostLoopMultiversioning) { --- 3250,3267 ---- // If we have any of these conditions (RCE, alignment, unrolling) met, then // we switch to the pre-/main-/post-loop model. This model also covers // peeling. if (should_rce || should_align || should_unroll) { if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops ! if (!phase->may_require_nodes(est_loop_clone_sz(3, _body.size()))) { return false; } ! phase->insert_pre_post_loops(this,old_new, !may_rce_align); } ! // Adjust the pre- and main-loop limits to let the pre and post loops run ! // with full checks, but the main-loop with no checks. Remove said ! // checks from the main body. if (should_rce) { if (phase->do_range_check(this, old_new) != 0) { cl->mark_has_range_checks(); } } else if (PostLoopMultiversioning) {
*** 3328,3340 **** if (should_align) { Unimplemented(); } } else { // Else we have an unchanged counted loop if (should_peel) { // Might want to peel but do nothing else ! if (phase->may_require_nodes(est_peeling)) { ! phase->do_peeling(this, old_new); ! } } } return true; } --- 3291,3301 ---- if (should_align) { Unimplemented(); } } else { // Else we have an unchanged counted loop if (should_peel) { // Might want to peel but do nothing else ! phase->do_peeling(this,old_new); } } return true; }
< prev index next >