< prev index next >

src/hotspot/share/opto/loopTransform.cpp

Print this page




 269 // (x + inv2) - inv1  =>  (-inv1 + inv2) + x
 270 // (x - inv2) + inv1  =>  ( inv1 - inv2) + x
 271 // (x - inv2) - inv1  =>  (-inv1 - inv2) + x
 272 // inv1 + (inv2 - x)  =>  ( inv1 + inv2) - x
 273 // inv1 - (x - inv2)  =>  ( inv1 + inv2) - x
 274 // (inv2 - x) + inv1  =>  ( inv1 + inv2) - x
 275 // (inv2 - x) - inv1  =>  (-inv1 + inv2) - x
 276 // inv1 - (x + inv2)  =>  ( inv1 - inv2) - x
 277 //
 278 Node* IdealLoopTree::reassociate_add_sub(Node* n1, PhaseIdealLoop *phase) {
 279   if ((!n1->is_Add() && !n1->is_Sub()) || n1->outcnt() == 0) return NULL;
 280   if (is_invariant(n1)) return NULL;
 281   int inv1_idx = is_invariant_addition(n1, phase);
 282   if (!inv1_idx) return NULL;
 283   // Don't mess with add of constant (igvn moves them to expression tree root.)
 284   if (n1->is_Add() && n1->in(2)->is_Con()) return NULL;
 285   Node* inv1 = n1->in(inv1_idx);
 286   Node* n2 = n1->in(3 - inv1_idx);
 287   int inv2_idx = is_invariant_addition(n2, phase);
 288   if (!inv2_idx) return NULL;
 289 
 290   if (!phase->may_require_nodes(10, 10)) return NULL;
 291 
 292   Node* x    = n2->in(3 - inv2_idx);
 293   Node* inv2 = n2->in(inv2_idx);
 294 
 295   bool neg_x    = n2->is_Sub() && inv2_idx == 1;
 296   bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
 297   bool neg_inv1 = n1->is_Sub() && inv1_idx == 2;
 298   if (n1->is_Sub() && inv1_idx == 1) {
 299     neg_x    = !neg_x;
 300     neg_inv2 = !neg_inv2;
 301   }
 302   Node* inv1_c = phase->get_ctrl(inv1);
 303   Node* inv2_c = phase->get_ctrl(inv2);
 304   Node* n_inv1;
 305   if (neg_inv1) {
 306     Node *zero = phase->_igvn.intcon(0);
 307     phase->set_ctrl(zero, phase->C->root());
 308     n_inv1 = new SubINode(zero, inv1);
 309     phase->register_new_node(n_inv1, inv1_c);
 310   } else {
 311     n_inv1 = inv1;


 323     addx = new SubINode(inv, x);
 324   } else {
 325     addx = new AddINode(x, inv);
 326   }
 327   phase->register_new_node(addx, phase->get_ctrl(x));
 328   phase->_igvn.replace_node(n1, addx);
 329   assert(phase->get_loop(phase->get_ctrl(n1)) == this, "");
 330   _body.yank(n1);
 331   return addx;
 332 }
 333 
 334 //---------------------reassociate_invariants-----------------------------
 335 // Reassociate invariant expressions:
 336 void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
 337   for (int i = _body.size() - 1; i >= 0; i--) {
 338     Node *n = _body.at(i);
 339     for (int j = 0; j < 5; j++) {
 340       Node* nn = reassociate_add_sub(n, phase);
 341       if (nn == NULL) break;
 342       n = nn; // again
 343     }
 344   }
 345 }
 346 
 347 //------------------------------policy_peeling---------------------------------
 348 // Return TRUE if the loop should be peeled, otherwise return FALSE. Peeling
 349 // is applicable if we can make a loop-invariant test (usually a null-check)
 350 // execute before we enter the loop. When TRUE, the estimated node budget is
 351 // also requested.
 352 bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) {
 353   uint estimate = estimate_peeling(phase);
 354 
 355   return estimate == 0 ? false : phase->may_require_nodes(estimate);
 356 }
 357 
 358 // Perform actual policy and size estimate for the loop peeling transform, and
 359 // return the estimated loop size if peeling is applicable, otherwise return
 360 // zero. No node budget is allocated.
 361 uint IdealLoopTree::estimate_peeling(PhaseIdealLoop *phase) {
 362 
 363   // If nodes are depleted, some transform has miscalculated its needs.
 364   assert(!phase->exceeding_node_budget(), "sanity");
 365 
 366   // Peeling does loop cloning which can result in O(N^2) node construction.
 367   if (_body.size() > 255) {
 368     return 0;   // Suppress too large body size.
 369   }
 370   // Optimistic estimate that approximates loop body complexity via data and
 371   // control flow fan-out (instead of using the more pessimistic: BodySize^2).
 372   uint estimate = est_loop_clone_sz(2);
 373 
 374   if (phase->exceeding_node_budget(estimate)) {
 375     return 0;   // Too large to safely clone.
 376   }
 377 
 378   // Check for vectorized loops, any peeling done was already applied.
 379   if (_head->is_CountedLoop()) {
 380     CountedLoopNode* cl = _head->as_CountedLoop();
 381     if (cl->is_unroll_only() || cl->trip_count() == 1) {
 382       return 0;
 383     }
 384   }
 385 
 386   Node* test = tail();
 387 
 388   while (test != _head) {   // Scan till run off top of loop
 389     if (test->is_If()) {    // Test?
 390       Node *ctrl = phase->get_ctrl(test->in(1));
 391       if (ctrl->is_top()) {
 392         return 0;           // Found dead test on live IF?  No peeling!
 393       }
 394       // Standard IF only has one input value to check for loop invariance.
 395       assert(test->Opcode() == Op_If ||
 396              test->Opcode() == Op_CountedLoopEnd ||
 397              test->Opcode() == Op_RangeCheck,
 398              "Check this code when new subtype is added");
 399       // Condition is not a member of this loop?
 400       if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
 401         return estimate;    // Found reason to peel!

 402       }
 403     }
 404     // Walk up dominators to loop _head looking for test which is executed on
 405     // every path through the loop.
 406     test = phase->idom(test);
 407   }
 408   return 0;
 409 }
 410 
 411 //------------------------------peeled_dom_test_elim---------------------------
 412 // If we got the effect of peeling, either by actually peeling or by making
 413 // a pre-loop which must execute at least once, we can remove all
 414 // loop-invariant dominated tests in the main body.
 415 void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree *loop, Node_List &old_new) {
 416   bool progress = true;
 417   while (progress) {
 418     progress = false;           // Reset for next iteration
 419     Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();
 420     Node *test = prev->in(0);
 421     while (test != loop->_head) { // Scan till run off top of loop
 422 
 423       int p_op = prev->Opcode();
 424       if ((p_op == Op_IfFalse || p_op == Op_IfTrue) &&
 425           test->is_If() &&      // Test?
 426           !test->in(1)->is_Con() && // And not already obvious?
 427           // Condition is not a member of this loop?
 428           !loop->is_member(get_loop(get_ctrl(test->in(1))))){


 635         new_exit_value = old->in(LoopNode::LoopBackControl);
 636       _igvn.hash_delete(old);
 637       old->set_req(LoopNode::EntryControl, new_exit_value);
 638     }
 639   }
 640 
 641 
 642   // Step 3: Cut the backedge on the clone (so its not a loop) and remove the
 643   //         extra backedge user.
 644   Node* new_head = old_new[head->_idx];
 645   _igvn.hash_delete(new_head);
 646   new_head->set_req(LoopNode::LoopBackControl, C->top());
 647   for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) {
 648     Node* use = new_head->fast_out(j2);
 649     if (use->in(0) == new_head && use->req() == 3 && use->is_Phi()) {
 650       _igvn.hash_delete(use);
 651       use->set_req(LoopNode::LoopBackControl, C->top());
 652     }
 653   }
 654 
 655   // Step 4: Correct dom-depth info.  Set to loop-head depth.
 656 

 657   int dd = dom_depth(head);
 658   set_idom(head, head->in(1), dd);
 659   for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
 660     Node *old = loop->_body.at(j3);
 661     Node *nnn = old_new[old->_idx];
 662     if (!has_ctrl(nnn)) {
 663       set_idom(nnn, idom(nnn), dd-1);
 664     }
 665   }
 666 
 667   // Now force out all loop-invariant dominating tests.  The optimizer
 668   // finds some, but we _know_ they are all useless.
 669   peeled_dom_test_elim(loop,old_new);
 670 
 671   loop->record_for_igvn();
 672 }
 673 
 674 // The Estimated Loop Unroll Size: UnrollFactor * (106% * BodySize + BC) + CC,
 675 // where BC  and CC are  (totally) ad-hoc/magic "body" and  "clone" constants,
 676 // respectively, used to ensure that node usage estimates made are on the safe
 677 // side, for the  most part.  This is  a simplified version of  the loop clone
 678 // size calculation in est_loop_clone_sz(),  defined for unroll factors larger
 679 // than one  (>1), performing  an overflow check  and returning  'UINT_MAX' in
 680 // case of an overflow.
 681 static uint est_loop_unroll_sz(uint factor, uint size) {
 682   precond(0 < factor);
 683 
 684   uint const bc = 5;
 685   uint const cc = 7;
 686   uint const sz = size + (size + 15) / 16;
 687   uint estimate = factor * (sz + bc) + cc;
 688 
 689   return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX;
 690 }
 691 
 692 #define EMPTY_LOOP_SIZE 7   // Number of nodes in an empty loop.
 693 
 694 //------------------------------policy_maximally_unroll------------------------
 695 // Calculate the exact  loop trip-count and return TRUE if loop can be fully,
 696 // i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
 697 // node budget is also requested.
 698 bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
 699   CountedLoopNode *cl = _head->as_CountedLoop();
 700   assert(cl->is_normal_loop(), "");
 701   if (!cl->is_valid_counted_loop()) {
 702     return false; // Malformed counted loop
 703   }
 704   if (!cl->has_exact_trip_count()) {
 705     // Trip count is not exact.
 706     return false;
 707   }
 708 
 709   uint trip_count = cl->trip_count();
 710   // Note, max_juint is used to indicate unknown trip count.
 711   assert(trip_count > 1, "one iteration loop should be optimized out already");
 712   assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
 713 
 714   // If nodes are depleted, some transform has miscalculated its needs.
 715   assert(!phase->exceeding_node_budget(), "sanity");
 716 
 717   // Real policy: if we maximally unroll, does it get too big?
 718   // Allow the unrolled mess to get larger than standard loop
 719   // size.  After all, it will no longer be a loop.
 720   uint body_size    = _body.size();
 721   uint unroll_limit = (uint)LoopUnrollLimit * 4;
 722   assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
 723   if (trip_count > unroll_limit || body_size > unroll_limit) {
 724     return false;
 725   }
 726 
 727   // Take into account that after unroll conjoined heads and tails will fold,
 728   // otherwise policy_unroll() may allow more unrolling than max unrolling.
 729   uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
 730 
 731   if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
 732     return false;
 733   }
 734 
 735   // Fully unroll a loop with few iterations regardless next conditions since
 736   // following loop optimizations will split such loop anyway (pre-main-post).
 737   if (trip_count <= 3) {
 738     return phase->may_require_nodes(new_body_size);
 739   }
 740 
 741   if (new_body_size > unroll_limit ||
 742       // Unrolling can result in a large amount of node construction
 743       phase->exceeding_node_budget(new_body_size)) {
 744     return false;
 745   }
 746 
 747   // Do not unroll a loop with String intrinsics code.
 748   // String intrinsics are large and have loops.
 749   for (uint k = 0; k < _body.size(); k++) {


 758       case Op_HasNegatives: {
 759         return false;
 760       }
 761 #if INCLUDE_RTM_OPT
 762       case Op_FastLock:
 763       case Op_FastUnlock: {
 764         // Don't unroll RTM locking code because it is large.
 765         if (UseRTMLocking) {
 766           return false;
 767         }
 768       }
 769 #endif
 770     } // switch
 771   }
 772 
 773   return phase->may_require_nodes(new_body_size);
 774 }
 775 
 776 
 777 //------------------------------policy_unroll----------------------------------
 778 // Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll if
 779 // the loop is  a counted loop and  the loop body is small  enough. When TRUE,
 780 // the estimated node budget is also requested.
 781 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 782 
 783   CountedLoopNode *cl = _head->as_CountedLoop();
 784   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 785 
 786   if (!cl->is_valid_counted_loop()) {
 787     return false; // Malformed counted loop
 788   }
 789 
 790   // If nodes are depleted, some transform has miscalculated its needs.
 791   assert(!phase->exceeding_node_budget(), "sanity");
 792 
 793   // Protect against over-unrolling.
 794   // After split at least one iteration will be executed in pre-loop.
 795   if (cl->trip_count() <= (cl->is_normal_loop() ? 2u : 1u)) {
 796     return false;
 797   }
 798   _local_loop_unroll_limit  = LoopUnrollLimit;
 799   _local_loop_unroll_factor = 4;
 800   int future_unroll_cnt = cl->unrolled_count() * 2;


 904   if (UseSuperWord) {
 905     if (!cl->is_reduction_loop()) {
 906       phase->mark_reductions(this);
 907     }
 908 
 909     // Only attempt slp analysis when user controls do not prohibit it
 910     if (LoopMaxUnroll > _local_loop_unroll_factor) {
 911       // Once policy_slp_analysis succeeds, mark the loop with the
 912       // maximal unroll factor so that we minimize analysis passes
 913       if (future_unroll_cnt >= _local_loop_unroll_factor) {
 914         policy_unroll_slp_analysis(cl, phase, future_unroll_cnt);
 915       }
 916     }
 917   }
 918 
 919   int slp_max_unroll_factor = cl->slp_max_unroll();
 920   if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
 921     LoopMaxUnroll = slp_max_unroll_factor;
 922   }
 923 
 924   uint estimate = est_loop_clone_sz(2);
 925 
 926   if (cl->has_passed_slp()) {
 927     if (slp_max_unroll_factor >= future_unroll_cnt) {
 928       return phase->may_require_nodes(estimate);
 929     }
 930     return false; // Loop too big.
 931   }
 932 
 933   // Check for being too big
 934   if (body_size > (uint)_local_loop_unroll_limit) {
 935     if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < 4u * LoopUnrollLimit) {
 936       return phase->may_require_nodes(estimate);
 937     }
 938     return false; // Loop too big.
 939   }
 940 
 941   if (cl->is_unroll_only()) {
 942     if (TraceSuperWordLoopUnrollAnalysis) {
 943       tty->print_cr("policy_unroll passed vector loop(vlen=%d, factor=%d)\n",
 944                     slp_max_unroll_factor, future_unroll_cnt);


 975             tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit);
 976           }
 977           _local_loop_unroll_limit = new_limit;
 978         }
 979       }
 980     }
 981   }
 982 }
 983 
 984 //------------------------------policy_align-----------------------------------
 985 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 986 // expression that does the alignment.  Note that only one array base can be
 987 // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
 988 // if we vectorize short memory ops into longer memory ops, we may want to
 989 // increase alignment.
 990 bool IdealLoopTree::policy_align(PhaseIdealLoop *phase) const {
 991   return false;
 992 }
 993 
 994 //------------------------------policy_range_check-----------------------------
 995 // Return TRUE or FALSE if the loop should be range-check-eliminated or not.
 996 // When TRUE, the estimated node budget is also requested.
 997 //
 998 // We will actually perform iteration-splitting, a more powerful form of RCE.
 999 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
1000   if (!RangeCheckElimination) return false;
1001 
1002   // If nodes are depleted, some transform has miscalculated its needs.
1003   assert(!phase->exceeding_node_budget(), "sanity");
1004 
1005   CountedLoopNode *cl = _head->as_CountedLoop();
1006   // If we unrolled  with no intention of doing RCE and we  later changed our
1007   // minds, we got no pre-loop.  Either we need to make a new pre-loop, or we
1008   // have to disallow RCE.
1009   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
1010   Node *trip_counter = cl->phi();
1011 
1012   // check for vectorized loops, some opts are no longer needed
1013   if (cl->is_unroll_only()) return false;
1014 
1015   // Check loop body for tests of trip-counter plus loop-invariant vs
1016   // loop-invariant.
1017   for (uint i = 0; i < _body.size(); i++) {
1018     Node *iff = _body[i];
1019     if (iff->Opcode() == Op_If ||
1020         iff->Opcode() == Op_RangeCheck) { // Test?
1021 
1022       // Comparing trip+off vs limit
1023       Node *bol = iff->in(1);
1024       if (bol->req() != 2) {
1025         continue; // dead constant test
1026       }
1027       if (!bol->is_Bool()) {
1028         assert(bol->Opcode() == Op_Conv2B, "predicate check only");


1035       Node *rc_exp = cmp->in(1);
1036       Node *limit = cmp->in(2);
1037 
1038       Node *limit_c = phase->get_ctrl(limit);
1039       if (limit_c == phase->C->top()) {
1040         return false;           // Found dead test on live IF?  No RCE!
1041       }
1042       if (is_member(phase->get_loop(limit_c))) {
1043         // Compare might have operands swapped; commute them
1044         rc_exp = cmp->in(2);
1045         limit  = cmp->in(1);
1046         limit_c = phase->get_ctrl(limit);
1047         if (is_member(phase->get_loop(limit_c))) {
1048           continue;             // Both inputs are loop varying; cannot RCE
1049         }
1050       }
1051 
1052       if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
1053         continue;
1054       }
1055       // Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2)
1056       // projections. If BOTH are in the loop we need loop unswitching instead
1057       // of iteration splitting.
1058       if (is_loop_exit(iff)) {
1059         // Found valid reason to split iterations (if there is room).
1060         // NOTE: Usually a gross overestimate.
1061         return phase->may_require_nodes(est_loop_clone_sz(2));
1062       }
1063     } // End of is IF
1064   }
1065 
1066   return false;
1067 }
1068 
1069 //------------------------------policy_peel_only-------------------------------
1070 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned.  Useful
1071 // for unrolling loops with NO array accesses.
1072 bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const {
1073 
1074   // If nodes are depleted, some transform has miscalculated its needs.
1075   assert(!phase->exceeding_node_budget(), "sanity");
1076 
1077   // check for vectorized loops, any peeling done was already applied
1078   if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
1079     return false;
1080   }
1081 


1540 
1541   // Now force out all loop-invariant dominating tests.  The optimizer
1542   // finds some, but we _know_ they are all useless.
1543   peeled_dom_test_elim(loop,old_new);
1544   loop->record_for_igvn();
1545 }
1546 
1547 //------------------------------insert_vector_post_loop------------------------
1548 // Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1549 // unroll_policy has  already informed  us that more  unrolling is  about to
1550 // happen  to the  main  loop.  The  resultant  post loop  will  serve as  a
1551 // vectorized drain loop.
1552 void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1553   if (!loop->_head->is_CountedLoop()) return;
1554 
1555   CountedLoopNode *cl = loop->_head->as_CountedLoop();
1556 
1557   // only process vectorized main loops
1558   if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
1559 



1560   int slp_max_unroll_factor = cl->slp_max_unroll();
1561   int cur_unroll = cl->unrolled_count();
1562 
1563   if (slp_max_unroll_factor == 0) return;
1564 
1565   // only process atomic unroll vector loops (not super unrolled after vectorization)
1566   if (cur_unroll != slp_max_unroll_factor) return;
1567 
1568   // we only ever process this one time
1569   if (cl->has_atomic_post_loop()) return;
1570 
1571   if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
1572     return;
1573   }
1574 
1575 #ifndef PRODUCT
1576   if (TraceLoopOpts) {
1577     tty->print("PostVector  ");
1578     loop->dump_head();
1579   }
1580 #endif
1581   C->set_major_progress();
1582 
1583   // Find common pieces of the loop being guarded with pre & post loops
1584   CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1585   CountedLoopEndNode *main_end = main_head->loopexit();
1586   // diagnostic to show loop end is not properly formed
1587   assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1588 
1589   // mark this loop as processed
1590   main_head->mark_has_atomic_post_loop();
1591 
1592   Node *incr = main_end->incr();
1593   Node *limit = main_end->limit();
1594 


3198   return true;
3199 }
3200 
3201 //=============================================================================
3202 //------------------------------iteration_split_impl---------------------------
3203 bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_new) {
3204   // Compute loop trip count if possible.
3205   compute_trip_count(phase);
3206 
3207   // Convert one iteration loop into normal code.
3208   if (do_one_iteration_loop(phase)) {
3209     return true;
3210   }
3211   // Check and remove empty loops (spam micro-benchmarks)
3212   if (do_remove_empty_loop(phase)) {
3213     return true;  // Here we removed an empty loop
3214   }
3215 
3216   AutoNodeBudget node_budget(phase);
3217 



3218   // Non-counted loops may be peeled; exactly 1 iteration is peeled.
3219   // This removes loop-invariant tests (usually null checks).
3220   if (!_head->is_CountedLoop()) { // Non-counted loop
3221     if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
3222       // Partial peel succeeded so terminate this round of loop opts
3223       return false;
3224     }
3225     if (policy_peeling(phase)) {    // Should we peel?
3226       if (PrintOpto) { tty->print_cr("should_peel"); }
3227       phase->do_peeling(this, old_new);
3228     } else if (policy_unswitching(phase)) {
3229       phase->do_unswitching(this, old_new);
3230     }
3231     return true;
3232   }
3233   CountedLoopNode *cl = _head->as_CountedLoop();
3234 
3235   if (!cl->is_valid_counted_loop()) return true; // Ignore various kinds of broken loops
3236 
3237   // Do nothing special to pre- and post- loops
3238   if (cl->is_pre_loop() || cl->is_post_loop()) return true;
3239 
3240   // Compute loop trip count from profile data
3241   compute_profile_trip_cnt(phase);
3242 
3243   // Before attempting fancy unrolling, RCE or alignment, see if we want
3244   // to completely unroll this loop or do loop unswitching.
3245   if (cl->is_normal_loop()) {
3246     if (policy_unswitching(phase)) {
3247       phase->do_unswitching(this, old_new);
3248       return true;
3249     }
3250     if (policy_maximally_unroll(phase)) {

3251       // Here we did some unrolling and peeling.  Eventually we will
3252       // completely unroll this loop and it will no longer be a loop.
3253       phase->do_maximally_unroll(this, old_new);
3254       return true;
3255     }
3256   }
3257 
3258   uint est_peeling = estimate_peeling(phase);
3259   bool should_peel = 0 < est_peeling;
3260 
3261   // Counted loops may be peeled, may need some iterations run up
3262   // front for RCE, and may want to align loop refs to a cache
3263   // line.  Thus we clone a full loop up front whose trip count is
3264   // at least 1 (if peeling), but may be several more.
3265 
3266   // The main loop will start cache-line aligned with at least 1
3267   // iteration of the unrolled body (zero-trip test required) and
3268   // will have some range checks removed.
3269 
3270   // A post-loop will finish any odd iterations (leftover after
3271   // unrolling), plus any needed for RCE purposes.
3272 
3273   bool should_unroll = policy_unroll(phase);
3274   bool should_rce    = policy_range_check(phase);
3275   // TODO: Remove align -- not used.
3276   bool should_align  = policy_align(phase);
3277 
3278   // If not RCE'ing  (iteration splitting) or Aligning, then we  do not need a
3279   // pre-loop.  We may still need to peel an initial iteration but we will not
3280   // be needing an unknown number of pre-iterations.
3281   //
3282   // Basically, if may_rce_align reports FALSE first time through, we will not
3283   // be able to later do RCE or Aligning on this loop.
3284   bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
3285 
3286   // If we have any of these conditions (RCE, alignment, unrolling) met, then
3287   // we switch to the pre-/main-/post-loop model.  This model also covers
3288   // peeling.
3289   if (should_rce || should_align || should_unroll) {
3290     if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
3291       uint estimate = est_loop_clone_sz(3);
3292       if (!phase->may_require_nodes(estimate)) {
3293         return false;
3294       }
3295       phase->insert_pre_post_loops(this, old_new, !may_rce_align);
3296     }
3297     // Adjust the pre- and main-loop limits to let the pre and  post loops run
3298     // with full checks, but the main-loop with no checks.  Remove said checks
3299     // from the main body.
3300     if (should_rce) {
3301       if (phase->do_range_check(this, old_new) != 0) {
3302         cl->mark_has_range_checks();
3303       }
3304     } else if (PostLoopMultiversioning) {
3305       phase->has_range_checks(this);
3306     }
3307 
3308     if (should_unroll && !should_peel && PostLoopMultiversioning) {
3309       // Try to setup multiversioning on main loops before they are unrolled
3310       if (cl->is_main_loop() && (cl->unrolled_count() == 1)) {
3311         phase->insert_scalar_rced_post_loop(this, old_new);
3312       }
3313     }
3314 
3315     // Double loop body for unrolling.  Adjust the minimum-trip test (will do
3316     // twice as many iterations as before) and the main body limit (only do
3317     // an even number of trips).  If we are peeling, we might enable some RCE
3318     // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
3319     // peeling.
3320     if (should_unroll && !should_peel) {
3321       if (SuperWordLoopUnrollAnalysis) {
3322         phase->insert_vector_post_loop(this, old_new);
3323       }
3324       phase->do_unroll(this, old_new, true);
3325     }
3326 
3327     // Adjust the pre-loop limits to align the main body iterations.
3328     if (should_align) {
3329       Unimplemented();
3330     }
3331   } else {                      // Else we have an unchanged counted loop
3332     if (should_peel) {          // Might want to peel but do nothing else
3333       if (phase->may_require_nodes(est_peeling)) {
3334         phase->do_peeling(this, old_new);
3335       }
3336     }
3337   }
3338   return true;
3339 }
3340 
3341 
3342 //=============================================================================
3343 //------------------------------iteration_split--------------------------------
3344 bool IdealLoopTree::iteration_split(PhaseIdealLoop* phase, Node_List &old_new) {
3345   // Recursively iteration split nested loops
3346   if (_child && !_child->iteration_split(phase, old_new)) {
3347     return false;
3348   }
3349 
3350   // Clean out prior deadwood
3351   DCE_loop_body();
3352 
3353   // Look for loop-exit tests with my 50/50 guesses from the Parsing stage.
3354   // Replace with a 1-in-10 exit guess.
3355   if (!is_root() && is_loop()) {




 269 // (x + inv2) - inv1  =>  (-inv1 + inv2) + x
 270 // (x - inv2) + inv1  =>  ( inv1 - inv2) + x
 271 // (x - inv2) - inv1  =>  (-inv1 - inv2) + x
 272 // inv1 + (inv2 - x)  =>  ( inv1 + inv2) - x
 273 // inv1 - (x - inv2)  =>  ( inv1 + inv2) - x
 274 // (inv2 - x) + inv1  =>  ( inv1 + inv2) - x
 275 // (inv2 - x) - inv1  =>  (-inv1 + inv2) - x
 276 // inv1 - (x + inv2)  =>  ( inv1 - inv2) - x
 277 //
 278 Node* IdealLoopTree::reassociate_add_sub(Node* n1, PhaseIdealLoop *phase) {
 279   if ((!n1->is_Add() && !n1->is_Sub()) || n1->outcnt() == 0) return NULL;
 280   if (is_invariant(n1)) return NULL;
 281   int inv1_idx = is_invariant_addition(n1, phase);
 282   if (!inv1_idx) return NULL;
 283   // Don't mess with add of constant (igvn moves them to expression tree root.)
 284   if (n1->is_Add() && n1->in(2)->is_Con()) return NULL;
 285   Node* inv1 = n1->in(inv1_idx);
 286   Node* n2 = n1->in(3 - inv1_idx);
 287   int inv2_idx = is_invariant_addition(n2, phase);
 288   if (!inv2_idx) return NULL;



 289   Node* x    = n2->in(3 - inv2_idx);
 290   Node* inv2 = n2->in(inv2_idx);
 291 
 292   bool neg_x    = n2->is_Sub() && inv2_idx == 1;
 293   bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
 294   bool neg_inv1 = n1->is_Sub() && inv1_idx == 2;
 295   if (n1->is_Sub() && inv1_idx == 1) {
 296     neg_x    = !neg_x;
 297     neg_inv2 = !neg_inv2;
 298   }
 299   Node* inv1_c = phase->get_ctrl(inv1);
 300   Node* inv2_c = phase->get_ctrl(inv2);
 301   Node* n_inv1;
 302   if (neg_inv1) {
 303     Node *zero = phase->_igvn.intcon(0);
 304     phase->set_ctrl(zero, phase->C->root());
 305     n_inv1 = new SubINode(zero, inv1);
 306     phase->register_new_node(n_inv1, inv1_c);
 307   } else {
 308     n_inv1 = inv1;


 320     addx = new SubINode(inv, x);
 321   } else {
 322     addx = new AddINode(x, inv);
 323   }
 324   phase->register_new_node(addx, phase->get_ctrl(x));
 325   phase->_igvn.replace_node(n1, addx);
 326   assert(phase->get_loop(phase->get_ctrl(n1)) == this, "");
 327   _body.yank(n1);
 328   return addx;
 329 }
 330 
 331 //---------------------reassociate_invariants-----------------------------
 332 // Reassociate invariant expressions:
 333 void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
 334   for (int i = _body.size() - 1; i >= 0; i--) {
 335     Node *n = _body.at(i);
 336     for (int j = 0; j < 5; j++) {
 337       Node* nn = reassociate_add_sub(n, phase);
 338       if (nn == NULL) break;
 339       n = nn; // again
 340     };
 341   }
 342 }
 343 
 344 //------------------------------policy_peeling---------------------------------
 345 // Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
 346 // make some loop-invariant test (usually a null-check) happen before the loop.
 347 bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const {
 348   IdealLoopTree *loop = (IdealLoopTree*)this;










 349 
 350   // If nodes are depleted, some transform has miscalculated its needs.
 351   assert(!phase->exceeding_node_budget(), "sanity");
 352 
 353   uint body_size = loop->_body.size();
 354   // Peeling does loop cloning which can result in O(N^2) node construction
 355   if (body_size > 255) {
 356     return false;   // Prevent overflow for large body size
 357   }
 358   uint estimate = body_size * body_size;


 359   if (phase->exceeding_node_budget(estimate)) {
 360     return false;   // Too large to safely clone
 361   }
 362 
 363   // check for vectorized loops, any peeling done was already applied
 364   if (_head->is_CountedLoop()) {
 365     CountedLoopNode* cl = _head->as_CountedLoop();
 366     if (cl->is_unroll_only() || cl->trip_count() == 1) {
 367       return false;
 368     }
 369   }
 370 
 371   Node* test = loop->tail();
 372 
 373   while (test != _head) {       // Scan till run off top of loop
 374     if (test->is_If()) {        // Test?
 375       Node *ctrl = phase->get_ctrl(test->in(1));
 376       if (ctrl->is_top()) {
 377         return false;           // Found dead test on live IF?  No peeling!
 378       }
 379       // Standard IF only has one input value to check for loop invariance
 380       assert(test->Opcode() == Op_If ||
 381              test->Opcode() == Op_CountedLoopEnd ||
 382              test->Opcode() == Op_RangeCheck,
 383              "Check this code when new subtype is added");
 384       // Condition is not a member of this loop?
 385       if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
 386         // Found reason to peel!
 387         return phase->may_require_nodes(estimate);
 388       }
 389     }
 390     // Walk up dominators to loop _head looking for test which is
 391     // executed on every path thru loop.
 392     test = phase->idom(test);
 393   }
 394   return false;
 395 }
 396 
 397 //------------------------------peeled_dom_test_elim---------------------------
 398 // If we got the effect of peeling, either by actually peeling or by making
 399 // a pre-loop which must execute at least once, we can remove all
 400 // loop-invariant dominated tests in the main body.
 401 void PhaseIdealLoop::peeled_dom_test_elim(IdealLoopTree *loop, Node_List &old_new) {
 402   bool progress = true;
 403   while (progress) {
 404     progress = false;           // Reset for next iteration
 405     Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();
 406     Node *test = prev->in(0);
 407     while (test != loop->_head) { // Scan till run off top of loop
 408 
 409       int p_op = prev->Opcode();
 410       if ((p_op == Op_IfFalse || p_op == Op_IfTrue) &&
 411           test->is_If() &&      // Test?
 412           !test->in(1)->is_Con() && // And not already obvious?
 413           // Condition is not a member of this loop?
 414           !loop->is_member(get_loop(get_ctrl(test->in(1))))){


 621         new_exit_value = old->in(LoopNode::LoopBackControl);
 622       _igvn.hash_delete(old);
 623       old->set_req(LoopNode::EntryControl, new_exit_value);
 624     }
 625   }
 626 
 627 
 628   // Step 3: Cut the backedge on the clone (so its not a loop) and remove the
 629   //         extra backedge user.
 630   Node* new_head = old_new[head->_idx];
 631   _igvn.hash_delete(new_head);
 632   new_head->set_req(LoopNode::LoopBackControl, C->top());
 633   for (DUIterator_Fast j2max, j2 = new_head->fast_outs(j2max); j2 < j2max; j2++) {
 634     Node* use = new_head->fast_out(j2);
 635     if (use->in(0) == new_head && use->req() == 3 && use->is_Phi()) {
 636       _igvn.hash_delete(use);
 637       use->set_req(LoopNode::LoopBackControl, C->top());
 638     }
 639   }
 640 

 641 
 642   // Step 4: Correct dom-depth info.  Set to loop-head depth.
 643   int dd = dom_depth(head);
 644   set_idom(head, head->in(1), dd);
 645   for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
 646     Node *old = loop->_body.at(j3);
 647     Node *nnn = old_new[old->_idx];
 648     if (!has_ctrl(nnn)) {
 649       set_idom(nnn, idom(nnn), dd-1);
 650     }
 651   }
 652 
 653   // Now force out all loop-invariant dominating tests.  The optimizer
 654   // finds some, but we _know_ they are all useless.
 655   peeled_dom_test_elim(loop,old_new);
 656 
 657   loop->record_for_igvn();
 658 }
 659 
 660 #define EMPTY_LOOP_SIZE 7 // number of nodes in an empty loop


















 661 
 662 //------------------------------policy_maximally_unroll------------------------
 663 // Calculate exact loop trip count and return true if loop can be maximally
 664 // unrolled.

 665 bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
 666   CountedLoopNode *cl = _head->as_CountedLoop();
 667   assert(cl->is_normal_loop(), "");
 668   if (!cl->is_valid_counted_loop()) {
 669     return false; // Malformed counted loop
 670   }
 671   if (!cl->has_exact_trip_count()) {
 672     // Trip count is not exact.
 673     return false;
 674   }
 675 
 676   uint trip_count = cl->trip_count();
 677   // Note, max_juint is used to indicate unknown trip count.
 678   assert(trip_count > 1, "one iteration loop should be optimized out already");
 679   assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
 680 
 681   // If nodes are depleted, some transform has miscalculated its needs.
 682   assert(!phase->exceeding_node_budget(), "sanity");
 683 
 684   // Real policy: if we maximally unroll, does it get too big?
 685   // Allow the unrolled mess to get larger than standard loop
 686   // size.  After all, it will no longer be a loop.
 687   uint body_size    = _body.size();
 688   uint unroll_limit = (uint)LoopUnrollLimit * 4;
 689   assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
 690   if (trip_count > unroll_limit || body_size > unroll_limit) {
 691     return false;
 692   }
 693 
 694   // Take into account that after unroll conjoined heads and tails will fold,
 695   // otherwise policy_unroll() may allow more unrolling than max unrolling.
 696   uint new_body_size = est_loop_clone_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
 697 
 698   if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
 699     return false;
 700   }
 701 
 702   // Fully unroll a loop with few iterations regardless next conditions since
 703   // following loop optimizations will split such loop anyway (pre-main-post).
 704   if (trip_count <= 3) {
 705     return phase->may_require_nodes(new_body_size);
 706   }
 707 
 708   if (new_body_size > unroll_limit ||
 709       // Unrolling can result in a large amount of node construction
 710       phase->exceeding_node_budget(new_body_size)) {
 711     return false;
 712   }
 713 
 714   // Do not unroll a loop with String intrinsics code.
 715   // String intrinsics are large and have loops.
 716   for (uint k = 0; k < _body.size(); k++) {


 725       case Op_HasNegatives: {
 726         return false;
 727       }
 728 #if INCLUDE_RTM_OPT
 729       case Op_FastLock:
 730       case Op_FastUnlock: {
 731         // Don't unroll RTM locking code because it is large.
 732         if (UseRTMLocking) {
 733           return false;
 734         }
 735       }
 736 #endif
 737     } // switch
 738   }
 739 
 740   return phase->may_require_nodes(new_body_size);
 741 }
 742 
 743 
 744 //------------------------------policy_unroll----------------------------------
 745 // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if the
 746 // loop is a CountedLoop and the body is small enough.

 747 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 748 
 749   CountedLoopNode *cl = _head->as_CountedLoop();
 750   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 751 
 752   if (!cl->is_valid_counted_loop()) {
 753     return false; // Malformed counted loop
 754   }
 755 
 756   // If nodes are depleted, some transform has miscalculated its needs.
 757   assert(!phase->exceeding_node_budget(), "sanity");
 758 
 759   // Protect against over-unrolling.
 760   // After split at least one iteration will be executed in pre-loop.
 761   if (cl->trip_count() <= (cl->is_normal_loop() ? 2u : 1u)) {
 762     return false;
 763   }
 764   _local_loop_unroll_limit  = LoopUnrollLimit;
 765   _local_loop_unroll_factor = 4;
 766   int future_unroll_cnt = cl->unrolled_count() * 2;


 870   if (UseSuperWord) {
 871     if (!cl->is_reduction_loop()) {
 872       phase->mark_reductions(this);
 873     }
 874 
 875     // Only attempt slp analysis when user controls do not prohibit it
 876     if (LoopMaxUnroll > _local_loop_unroll_factor) {
 877       // Once policy_slp_analysis succeeds, mark the loop with the
 878       // maximal unroll factor so that we minimize analysis passes
 879       if (future_unroll_cnt >= _local_loop_unroll_factor) {
 880         policy_unroll_slp_analysis(cl, phase, future_unroll_cnt);
 881       }
 882     }
 883   }
 884 
 885   int slp_max_unroll_factor = cl->slp_max_unroll();
 886   if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
 887     LoopMaxUnroll = slp_max_unroll_factor;
 888   }
 889 
 890   uint estimate = est_loop_clone_sz(2, body_size);
 891 
 892   if (cl->has_passed_slp()) {
 893     if (slp_max_unroll_factor >= future_unroll_cnt) {
 894       return phase->may_require_nodes(estimate);
 895     }
 896     return false; // Loop too big.
 897   }
 898 
 899   // Check for being too big
 900   if (body_size > (uint)_local_loop_unroll_limit) {
 901     if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < 4u * LoopUnrollLimit) {
 902       return phase->may_require_nodes(estimate);
 903     }
 904     return false; // Loop too big.
 905   }
 906 
 907   if (cl->is_unroll_only()) {
 908     if (TraceSuperWordLoopUnrollAnalysis) {
 909       tty->print_cr("policy_unroll passed vector loop(vlen=%d, factor=%d)\n",
 910                     slp_max_unroll_factor, future_unroll_cnt);


 941             tty->print_cr("slp analysis unroll=%d, default limit=%d\n", new_limit, _local_loop_unroll_limit);
 942           }
 943           _local_loop_unroll_limit = new_limit;
 944         }
 945       }
 946     }
 947   }
 948 }
 949 
 950 //------------------------------policy_align-----------------------------------
 951 // Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
 952 // expression that does the alignment.  Note that only one array base can be
 953 // aligned in a loop (unless the VM guarantees mutual alignment).  Note that
 954 // if we vectorize short memory ops into longer memory ops, we may want to
 955 // increase alignment.
 956 bool IdealLoopTree::policy_align(PhaseIdealLoop *phase) const {
 957   return false;
 958 }
 959 
 960 //------------------------------policy_range_check-----------------------------
 961 // Return TRUE or FALSE if the loop should be range-check-eliminated.
 962 // Actually we do iteration-splitting, a more powerful form of RCE.


 963 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
 964   if (!RangeCheckElimination) return false;
 965 
 966   // If nodes are depleted, some transform has miscalculated its needs.
 967   assert(!phase->exceeding_node_budget(), "sanity");
 968 
 969   CountedLoopNode *cl = _head->as_CountedLoop();
 970   // If we unrolled with no intention of doing RCE and we later
 971   // changed our minds, we got no pre-loop.  Either we need to
 972   // make a new pre-loop, or we gotta disallow RCE.
 973   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
 974   Node *trip_counter = cl->phi();
 975 
 976   // check for vectorized loops, some opts are no longer needed
 977   if (cl->is_unroll_only()) return false;
 978 
 979   // Check loop body for tests of trip-counter plus loop-invariant vs
 980   // loop-invariant.
 981   for (uint i = 0; i < _body.size(); i++) {
 982     Node *iff = _body[i];
 983     if (iff->Opcode() == Op_If ||
 984         iff->Opcode() == Op_RangeCheck) { // Test?
 985 
 986       // Comparing trip+off vs limit
 987       Node *bol = iff->in(1);
 988       if (bol->req() != 2) {
 989         continue; // dead constant test
 990       }
 991       if (!bol->is_Bool()) {
 992         assert(bol->Opcode() == Op_Conv2B, "predicate check only");


 999       Node *rc_exp = cmp->in(1);
1000       Node *limit = cmp->in(2);
1001 
1002       Node *limit_c = phase->get_ctrl(limit);
1003       if (limit_c == phase->C->top()) {
1004         return false;           // Found dead test on live IF?  No RCE!
1005       }
1006       if (is_member(phase->get_loop(limit_c))) {
1007         // Compare might have operands swapped; commute them
1008         rc_exp = cmp->in(2);
1009         limit  = cmp->in(1);
1010         limit_c = phase->get_ctrl(limit);
1011         if (is_member(phase->get_loop(limit_c))) {
1012           continue;             // Both inputs are loop varying; cannot RCE
1013         }
1014       }
1015 
1016       if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
1017         continue;
1018       }
1019       // Found a test like 'trip+off vs  limit'.  Test is an IfNode, has two
1020       // (2) projections.  If BOTH are in  the loop we need loop unswitching
1021       // instead of iteration splitting.
1022       if (is_loop_exit(iff)) {
1023         // Found valid reason to split iterations (if there is room).
1024         // NOTE: Usually a gross overestimate.
1025         return phase->may_require_nodes(est_loop_clone_sz(2, _body.size()));
1026       }
1027     } // End of is IF
1028   }
1029 
1030   return false;
1031 }
1032 
1033 //------------------------------policy_peel_only-------------------------------
1034 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned.  Useful
1035 // for unrolling loops with NO array accesses.
1036 bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const {
1037 
1038   // If nodes are depleted, some transform has miscalculated its needs.
1039   assert(!phase->exceeding_node_budget(), "sanity");
1040 
1041   // check for vectorized loops, any peeling done was already applied
1042   if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
1043     return false;
1044   }
1045 


1504 
1505   // Now force out all loop-invariant dominating tests.  The optimizer
1506   // finds some, but we _know_ they are all useless.
1507   peeled_dom_test_elim(loop,old_new);
1508   loop->record_for_igvn();
1509 }
1510 
1511 //------------------------------insert_vector_post_loop------------------------
1512 // Insert a copy of the atomic unrolled vectorized main loop as a post loop,
1513 // unroll_policy has  already informed  us that more  unrolling is  about to
1514 // happen  to the  main  loop.  The  resultant  post loop  will  serve as  a
1515 // vectorized drain loop.
1516 void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
1517   if (!loop->_head->is_CountedLoop()) return;
1518 
1519   CountedLoopNode *cl = loop->_head->as_CountedLoop();
1520 
1521   // only process vectorized main loops
1522   if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
1523 
1524   if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
1525     return;
1526   }
1527   int slp_max_unroll_factor = cl->slp_max_unroll();
1528   int cur_unroll = cl->unrolled_count();
1529 
1530   if (slp_max_unroll_factor == 0) return;
1531 
1532   // only process atomic unroll vector loops (not super unrolled after vectorization)
1533   if (cur_unroll != slp_max_unroll_factor) return;
1534 
1535   // we only ever process this one time
1536   if (cl->has_atomic_post_loop()) return;
1537 




1538 #ifndef PRODUCT
1539   if (TraceLoopOpts) {
1540     tty->print("PostVector  ");
1541     loop->dump_head();
1542   }
1543 #endif
1544   C->set_major_progress();
1545 
1546   // Find common pieces of the loop being guarded with pre & post loops
1547   CountedLoopNode *main_head = loop->_head->as_CountedLoop();
1548   CountedLoopEndNode *main_end = main_head->loopexit();
1549   // diagnostic to show loop end is not properly formed
1550   assert(main_end->outcnt() == 2, "1 true, 1 false path only");
1551 
1552   // mark this loop as processed
1553   main_head->mark_has_atomic_post_loop();
1554 
1555   Node *incr = main_end->incr();
1556   Node *limit = main_end->limit();
1557 


3161   return true;
3162 }
3163 
3164 //=============================================================================
3165 //------------------------------iteration_split_impl---------------------------
3166 bool IdealLoopTree::iteration_split_impl(PhaseIdealLoop *phase, Node_List &old_new) {
3167   // Compute loop trip count if possible.
3168   compute_trip_count(phase);
3169 
3170   // Convert one iteration loop into normal code.
3171   if (do_one_iteration_loop(phase)) {
3172     return true;
3173   }
3174   // Check and remove empty loops (spam micro-benchmarks)
3175   if (do_remove_empty_loop(phase)) {
3176     return true;  // Here we removed an empty loop
3177   }
3178 
3179   AutoNodeBudget node_budget(phase);
3180 
3181   bool should_peel     = policy_peeling(phase);
3182   bool should_unswitch = policy_unswitching(phase);
3183 
3184   // Non-counted loops may be peeled; exactly 1 iteration is peeled.
3185   // This removes loop-invariant tests (usually null checks).
3186   if (!_head->is_CountedLoop()) { // Non-counted loop
3187     if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
3188       // Partial peel succeeded so terminate this round of loop opts
3189       return false;
3190     }
3191     if (should_peel) {            // Should we peel?
3192       if (PrintOpto) { tty->print_cr("should_peel"); }
3193       phase->do_peeling(this,old_new);
3194     } else if (should_unswitch) {
3195       phase->do_unswitching(this, old_new);
3196     }
3197     return true;
3198   }
3199   CountedLoopNode *cl = _head->as_CountedLoop();
3200 
3201   if (!cl->is_valid_counted_loop()) return true; // Ignore various kinds of broken loops
3202 
3203   // Do nothing special to pre- and post- loops
3204   if (cl->is_pre_loop() || cl->is_post_loop()) return true;
3205 
3206   // Compute loop trip count from profile data
3207   compute_profile_trip_cnt(phase);
3208 
3209   // Before attempting fancy unrolling, RCE or alignment, see if we want
3210   // to completely unroll this loop or do loop unswitching.
3211   if (cl->is_normal_loop()) {
3212     if (should_unswitch) {
3213       phase->do_unswitching(this, old_new);
3214       return true;
3215     }
3216     bool should_maximally_unroll = policy_maximally_unroll(phase);
3217     if (should_maximally_unroll) {
3218       // Here we did some unrolling and peeling.  Eventually we will
3219       // completely unroll this loop and it will no longer be a loop.
3220       phase->do_maximally_unroll(this, old_new);
3221       return true;
3222     }
3223   }
3224 



3225   // Counted loops may be peeled, may need some iterations run up
3226   // front for RCE, and may want to align loop refs to a cache
3227   // line.  Thus we clone a full loop up front whose trip count is
3228   // at least 1 (if peeling), but may be several more.
3229 
3230   // The main loop will start cache-line aligned with at least 1
3231   // iteration of the unrolled body (zero-trip test required) and
3232   // will have some range checks removed.
3233 
3234   // A post-loop will finish any odd iterations (leftover after
3235   // unrolling), plus any needed for RCE purposes.
3236 
3237   bool should_unroll = policy_unroll(phase);
3238   bool should_rce    = policy_range_check(phase);
3239   // TODO: Remove align -- not used.
3240   bool should_align  = policy_align(phase);
3241 
3242   // If not RCE'ing  (iteration splitting) or Aligning, then we  do not need a
3243   // pre-loop.  We may still need to peel an initial iteration but we will not
3244   // be needing an unknown number of pre-iterations.
3245   //
3246   // Basically, if may_rce_align reports FALSE first time through, we will not
3247   // be able to later do RCE or Aligning on this loop.
3248   bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
3249 
3250   // If we have any of these conditions (RCE, alignment, unrolling) met, then
3251   // we switch to the pre-/main-/post-loop model.  This model also covers
3252   // peeling.
3253   if (should_rce || should_align || should_unroll) {
3254     if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
3255       if (!phase->may_require_nodes(est_loop_clone_sz(3, _body.size()))) {

3256         return false;
3257       }
3258       phase->insert_pre_post_loops(this,old_new, !may_rce_align);
3259     }
3260     // Adjust the pre- and main-loop limits to let the pre and post loops run
3261     // with full checks, but the main-loop with no checks.  Remove said
3262     // checks from the main body.
3263     if (should_rce) {
3264       if (phase->do_range_check(this, old_new) != 0) {
3265         cl->mark_has_range_checks();
3266       }
3267     } else if (PostLoopMultiversioning) {
3268       phase->has_range_checks(this);
3269     }
3270 
3271     if (should_unroll && !should_peel && PostLoopMultiversioning) {
3272       // Try to setup multiversioning on main loops before they are unrolled
3273       if (cl->is_main_loop() && (cl->unrolled_count() == 1)) {
3274         phase->insert_scalar_rced_post_loop(this, old_new);
3275       }
3276     }
3277 
3278     // Double loop body for unrolling.  Adjust the minimum-trip test (will do
3279     // twice as many iterations as before) and the main body limit (only do
3280     // an even number of trips).  If we are peeling, we might enable some RCE
3281     // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
3282     // peeling.
3283     if (should_unroll && !should_peel) {
3284       if (SuperWordLoopUnrollAnalysis) {
3285         phase->insert_vector_post_loop(this, old_new);
3286       }
3287       phase->do_unroll(this, old_new, true);
3288     }
3289 
3290     // Adjust the pre-loop limits to align the main body iterations.
3291     if (should_align) {
3292       Unimplemented();
3293     }
3294   } else {                      // Else we have an unchanged counted loop
3295     if (should_peel) {          // Might want to peel but do nothing else
3296       phase->do_peeling(this,old_new);


3297     }
3298   }
3299   return true;
3300 }
3301 
3302 
3303 //=============================================================================
3304 //------------------------------iteration_split--------------------------------
3305 bool IdealLoopTree::iteration_split(PhaseIdealLoop* phase, Node_List &old_new) {
3306   // Recursively iteration split nested loops
3307   if (_child && !_child->iteration_split(phase, old_new)) {
3308     return false;
3309   }
3310 
3311   // Clean out prior deadwood
3312   DCE_loop_body();
3313 
3314   // Look for loop-exit tests with my 50/50 guesses from the Parsing stage.
3315   // Replace with a 1-in-10 exit guess.
3316   if (!is_root() && is_loop()) {


< prev index next >