< prev index next >

src/share/vm/opto/gcm.cpp

Print this page




  24 
  25 #include "precompiled.hpp"
  26 #include "libadt/vectset.hpp"
  27 #include "memory/allocation.inline.hpp"
  28 #include "opto/block.hpp"
  29 #include "opto/c2compiler.hpp"
  30 #include "opto/callnode.hpp"
  31 #include "opto/cfgnode.hpp"
  32 #include "opto/machnode.hpp"
  33 #include "opto/opcodes.hpp"
  34 #include "opto/phaseX.hpp"
  35 #include "opto/rootnode.hpp"
  36 #include "opto/runtime.hpp"
  37 #include "runtime/deoptimization.hpp"
  38 #if defined AD_MD_HPP
  39 # include AD_MD_HPP
  40 #elif defined TARGET_ARCH_MODEL_x86_32
  41 # include "adfiles/ad_x86_32.hpp"
  42 #elif defined TARGET_ARCH_MODEL_x86_64
  43 # include "adfiles/ad_x86_64.hpp"


  44 #elif defined TARGET_ARCH_MODEL_sparc
  45 # include "adfiles/ad_sparc.hpp"
  46 #elif defined TARGET_ARCH_MODEL_zero
  47 # include "adfiles/ad_zero.hpp"
  48 #elif defined TARGET_ARCH_MODEL_ppc_64
  49 # include "adfiles/ad_ppc_64.hpp"
  50 #endif
  51 
  52 
  53 // Portions of code courtesy of Clifford Click
  54 
  55 // Optimization - Graph Style
  56 
  57 // To avoid float value underflow
  58 #define MIN_BLOCK_FREQUENCY 1.e-35f
  59 
  60 //----------------------------schedule_node_into_block-------------------------
  61 // Insert node n into block b. Look for projections of n and make sure they
  62 // are in b also.
  63 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) {


  97     Block *pb = get_block_for_node(in0); // Block-projection already has basic block
  98     uint j = 0;
  99     if (pb->_num_succs != 1) {  // More then 1 successor?
 100       // Search for successor
 101       uint max = pb->number_of_nodes();
 102       assert( max > 1, "" );
 103       uint start = max - pb->_num_succs;
 104       // Find which output path belongs to projection
 105       for (j = start; j < max; j++) {
 106         if( pb->get_node(j) == in0 )
 107           break;
 108       }
 109       assert( j < max, "must find" );
 110       // Change control to match head of successor basic block
 111       j -= start;
 112     }
 113     n->set_req(0, pb->_succs[j]->head());
 114   }
 115 }
 116 



 117 
 118 //------------------------------schedule_pinned_nodes--------------------------
 119 // Set the basic block for Nodes pinned into blocks
 120 void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
 121   // Allocate node stack of size C->live_nodes()+8 to avoid frequent realloc
 122   GrowableArray <Node *> spstack(C->live_nodes() + 8);
 123   spstack.push(_root);
 124   while (spstack.is_nonempty()) {
 125     Node* node = spstack.pop();
 126     if (!visited.test_set(node->_idx)) { // Test node and flag it as visited
 127       if (node->pinned() && !has_block(node)) {  // Pinned?  Nail it down!
 128         assert(node->in(0), "pinned Node must have Control");
 129         // Before setting block replace block_proj control edge
 130         replace_block_proj_ctrl(node);
 131         Node* input = node->in(0);
 132         while (!input->is_block_start()) {
 133           input = input->in(0);
 134         }
 135         Block* block = get_block_for_node(input); // Basic block of controlling input
 136         schedule_node_into_block(node, block);
 137       }
 138 




































 139       // process all inputs that are non NULL
 140       for (int i = node->req() - 1; i >= 0; --i) {
 141         if (node->in(i) != NULL) {
 142           spstack.push(node->in(i));
 143         }
 144       }
 145     }
 146   }
 147 }
 148 
 149 #ifdef ASSERT
 150 // Assert that new input b2 is dominated by all previous inputs.
 151 // Check this by by seeing that it is dominated by b1, the deepest
 152 // input observed until b2.
 153 static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) {
 154   if (b1 == NULL)  return;
 155   assert(b1->_dom_depth < b2->_dom_depth, "sanity");
 156   Block* tmp = b2;
 157   while (tmp != b1 && tmp != NULL) {
 158     tmp = tmp->_idom;


 231         }
 232       }
 233 
 234       // First, visit all inputs and force them to get a block.  If an
 235       // input is already in a block we quit following inputs (to avoid
 236       // cycles). Instead we put that Node on a worklist to be handled
 237       // later (since IT'S inputs may not have a block yet).
 238 
 239       // Assume all n's inputs will be processed
 240       bool done = true;
 241 
 242       while (input_index < parent_node->len()) {
 243         Node* in = parent_node->in(input_index++);
 244         if (in == NULL) {
 245           continue;
 246         }
 247 
 248         int is_visited = visited.test_set(in->_idx);
 249         if (!has_block(in)) {
 250           if (is_visited) {

 251             return false;
 252           }
 253           // Save parent node and next input's index.
 254           nstack.push(parent_node, input_index);
 255           // Process current input now.
 256           parent_node = in;
 257           input_index = 0;
 258           // Not all n's inputs processed.
 259           done = false;
 260           break;
 261         } else if (!is_visited) {
 262           // Visit this guy later, using worklist
 263           roots.push(in);
 264         }
 265       }
 266 
 267       if (done) {
 268         // All of n's inputs have been processed, complete post-processing.
 269 
 270         // Some instructions are pinned into a block.  These include Region,


1045     self->dump();
1046     tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1047       LCA->_pre_order,
1048       LCA->head()->_idx,
1049       start_latency,
1050       LCA->get_node(LCA->end_idx())->_idx,
1051       end_latency,
1052       least_freq);
1053   }
1054 #endif
1055 
1056   int cand_cnt = 0;  // number of candidates tried
1057 
1058   // Walk up the dominator tree from LCA (Lowest common ancestor) to
1059   // the earliest legal location.  Capture the least execution frequency.
1060   while (LCA != early) {
1061     LCA = LCA->_idom;         // Follow up the dominator tree
1062 
1063     if (LCA == NULL) {
1064       // Bailout without retry

1065       C->record_method_not_compilable("late schedule failed: LCA == NULL");
1066       return least;
1067     }
1068 
1069     // Don't hoist machine instructions to the root basic block
1070     if (mach && LCA == root_block)
1071       break;
1072 
1073     uint start_lat = get_latency_for_node(LCA->head());
1074     uint end_idx   = LCA->end_idx();
1075     uint end_lat   = get_latency_for_node(LCA->get_node(end_idx));
1076     double LCA_freq = LCA->_freq;
1077 #ifndef PRODUCT
1078     if (trace_opto_pipelining()) {
1079       tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1080         LCA->_pre_order, LCA->head()->_idx, start_lat, end_idx, end_lat, LCA_freq);
1081     }
1082 #endif
1083     cand_cnt++;
1084     if (LCA_freq < least_freq              || // Better Frequency


1199       continue;
1200     }
1201 
1202     // Check if 'self' could be anti-dependent on memory
1203     if (self->needs_anti_dependence_check()) {
1204       // Hoist LCA above possible-defs and insert anti-dependences to
1205       // defs in new LCA block.
1206       LCA = insert_anti_dependences(LCA, self);
1207     }
1208 
1209     if (early->_dom_depth > LCA->_dom_depth) {
1210       // Somehow the LCA has moved above the earliest legal point.
1211       // (One way this can happen is via memory_early_block.)
1212       if (C->subsume_loads() == true && !C->failing()) {
1213         // Retry with subsume_loads == false
1214         // If this is the first failure, the sentinel string will "stick"
1215         // to the Compile object, and the C2Compiler will see it and retry.
1216         C->record_failure(C2Compiler::retry_no_subsuming_loads());
1217       } else {
1218         // Bailout without retry when (early->_dom_depth > LCA->_dom_depth)

1219         C->record_method_not_compilable("late schedule failed: incorrect graph");
1220       }
1221       return;
1222     }
1223 
1224     // If there is no opportunity to hoist, then we're done.
1225     // In stress mode, try to hoist even the single operations.
1226     bool try_to_hoist = StressGCM || (LCA != early);
1227 
1228     // Must clone guys stay next to use; no hoisting allowed.
1229     // Also cannot hoist guys that alter memory or are otherwise not
1230     // allocatable (hoisting can make a value live longer, leading to
1231     // anti and output dependency problems which are normally resolved
1232     // by the register allocator giving everyone a different register).
1233     if (mach != NULL && must_clone[mach->ideal_Opcode()])
1234       try_to_hoist = false;
1235 
1236     Block* late = NULL;
1237     if (try_to_hoist) {
1238       // Now find the block with the least execution frequency.




  24 
  25 #include "precompiled.hpp"
  26 #include "libadt/vectset.hpp"
  27 #include "memory/allocation.inline.hpp"
  28 #include "opto/block.hpp"
  29 #include "opto/c2compiler.hpp"
  30 #include "opto/callnode.hpp"
  31 #include "opto/cfgnode.hpp"
  32 #include "opto/machnode.hpp"
  33 #include "opto/opcodes.hpp"
  34 #include "opto/phaseX.hpp"
  35 #include "opto/rootnode.hpp"
  36 #include "opto/runtime.hpp"
  37 #include "runtime/deoptimization.hpp"
  38 #if defined AD_MD_HPP
  39 # include AD_MD_HPP
  40 #elif defined TARGET_ARCH_MODEL_x86_32
  41 # include "adfiles/ad_x86_32.hpp"
  42 #elif defined TARGET_ARCH_MODEL_x86_64
  43 # include "adfiles/ad_x86_64.hpp"
  44 #elif defined TARGET_ARCH_MODEL_aarch64
  45 # include "adfiles/ad_aarch64.hpp"
  46 #elif defined TARGET_ARCH_MODEL_sparc
  47 # include "adfiles/ad_sparc.hpp"
  48 #elif defined TARGET_ARCH_MODEL_zero
  49 # include "adfiles/ad_zero.hpp"
  50 #elif defined TARGET_ARCH_MODEL_ppc_64
  51 # include "adfiles/ad_ppc_64.hpp"
  52 #endif
  53 
  54 
  55 // Portions of code courtesy of Clifford Click
  56 
  57 // Optimization - Graph Style
  58 
  59 // To avoid float value underflow
  60 #define MIN_BLOCK_FREQUENCY 1.e-35f
  61 
  62 //----------------------------schedule_node_into_block-------------------------
  63 // Insert node n into block b. Look for projections of n and make sure they
  64 // are in b also.
  65 void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) {


  99     Block *pb = get_block_for_node(in0); // Block-projection already has basic block
 100     uint j = 0;
 101     if (pb->_num_succs != 1) {  // More then 1 successor?
 102       // Search for successor
 103       uint max = pb->number_of_nodes();
 104       assert( max > 1, "" );
 105       uint start = max - pb->_num_succs;
 106       // Find which output path belongs to projection
 107       for (j = start; j < max; j++) {
 108         if( pb->get_node(j) == in0 )
 109           break;
 110       }
 111       assert( j < max, "must find" );
 112       // Change control to match head of successor basic block
 113       j -= start;
 114     }
 115     n->set_req(0, pb->_succs[j]->head());
 116   }
 117 }
 118 
 119 static bool is_dominator(Block* d, Block* n) {
 120   return d->dom_lca(n) == d;
 121 }
 122 
 123 //------------------------------schedule_pinned_nodes--------------------------
 124 // Set the basic block for Nodes pinned into blocks
 125 void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
 126   // Allocate node stack of size C->live_nodes()+8 to avoid frequent realloc
 127   GrowableArray <Node *> spstack(C->live_nodes() + 8);
 128   spstack.push(_root);
 129   while (spstack.is_nonempty()) {
 130     Node* node = spstack.pop();
 131     if (!visited.test_set(node->_idx)) { // Test node and flag it as visited
 132       if (node->pinned() && !has_block(node)) {  // Pinned?  Nail it down!
 133         assert(node->in(0), "pinned Node must have Control");
 134         // Before setting block replace block_proj control edge
 135         replace_block_proj_ctrl(node);
 136         Node* input = node->in(0);
 137         while (!input->is_block_start()) {
 138           input = input->in(0);
 139         }
 140         Block* block = get_block_for_node(input); // Basic block of controlling input
 141         schedule_node_into_block(node, block);
 142       }
 143 
 144       // If the node has precedence edges (added when CastPP nodes are
 145       // removed in final_graph_reshaping), fix the control of the
 146       // node to cover the precedence edges and remove the
 147       // dependencies.
 148       Node* n = NULL;
 149       for (uint i = node->len()-1; i >= node->req(); i--) {
 150         Node* m = node->in(i);
 151         if (m == NULL) continue;
 152         // Skip the precedence edge if the test that guarded a CastPP:
 153         // - was optimized out during escape analysis
 154         // (OptimizePtrCompare): the CastPP's control isn't an end of
 155         // block.
 156         // - is moved in the branch of a dominating If: the control of
 157         // the CastPP is then a Region.
 158         if (m->is_block_proj() || m->is_block_start()) {
 159           node->rm_prec(i);
 160           if (n == NULL) {
 161             n = m;
 162           } else {
 163             Block* bn = get_block_for_node(n);
 164             Block* bm = get_block_for_node(m);
 165             assert(is_dominator(bn, bm) || is_dominator(bm, bn), "one must dominate the other");
 166             n = is_dominator(bn, bm) ? m : n;
 167           }
 168         }
 169       }
 170       if (n != NULL) {
 171         assert(node->in(0), "control should have been set");
 172         Block* bn = get_block_for_node(n);
 173         Block* bnode = get_block_for_node(node->in(0));
 174         assert(is_dominator(bn, bnode) || is_dominator(bnode, bn), "one must dominate the other");
 175         if (!is_dominator(bn, bnode)) {
 176           node->set_req(0, n);
 177         }
 178       }
 179 
 180       // process all inputs that are non NULL
 181       for (int i = node->req() - 1; i >= 0; --i) {
 182         if (node->in(i) != NULL) {
 183           spstack.push(node->in(i));
 184         }
 185       }
 186     }
 187   }
 188 }
 189 
 190 #ifdef ASSERT
 191 // Assert that new input b2 is dominated by all previous inputs.
 192 // Check this by by seeing that it is dominated by b1, the deepest
 193 // input observed until b2.
 194 static void assert_dom(Block* b1, Block* b2, Node* n, const PhaseCFG* cfg) {
 195   if (b1 == NULL)  return;
 196   assert(b1->_dom_depth < b2->_dom_depth, "sanity");
 197   Block* tmp = b2;
 198   while (tmp != b1 && tmp != NULL) {
 199     tmp = tmp->_idom;


 272         }
 273       }
 274 
 275       // First, visit all inputs and force them to get a block.  If an
 276       // input is already in a block we quit following inputs (to avoid
 277       // cycles). Instead we put that Node on a worklist to be handled
 278       // later (since IT'S inputs may not have a block yet).
 279 
 280       // Assume all n's inputs will be processed
 281       bool done = true;
 282 
 283       while (input_index < parent_node->len()) {
 284         Node* in = parent_node->in(input_index++);
 285         if (in == NULL) {
 286           continue;
 287         }
 288 
 289         int is_visited = visited.test_set(in->_idx);
 290         if (!has_block(in)) {
 291           if (is_visited) {
 292             assert(false, "graph should be schedulable");
 293             return false;
 294           }
 295           // Save parent node and next input's index.
 296           nstack.push(parent_node, input_index);
 297           // Process current input now.
 298           parent_node = in;
 299           input_index = 0;
 300           // Not all n's inputs processed.
 301           done = false;
 302           break;
 303         } else if (!is_visited) {
 304           // Visit this guy later, using worklist
 305           roots.push(in);
 306         }
 307       }
 308 
 309       if (done) {
 310         // All of n's inputs have been processed, complete post-processing.
 311 
 312         // Some instructions are pinned into a block.  These include Region,


1087     self->dump();
1088     tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1089       LCA->_pre_order,
1090       LCA->head()->_idx,
1091       start_latency,
1092       LCA->get_node(LCA->end_idx())->_idx,
1093       end_latency,
1094       least_freq);
1095   }
1096 #endif
1097 
1098   int cand_cnt = 0;  // number of candidates tried
1099 
1100   // Walk up the dominator tree from LCA (Lowest common ancestor) to
1101   // the earliest legal location.  Capture the least execution frequency.
1102   while (LCA != early) {
1103     LCA = LCA->_idom;         // Follow up the dominator tree
1104 
1105     if (LCA == NULL) {
1106       // Bailout without retry
1107       assert(false, "graph should be schedulable");
1108       C->record_method_not_compilable("late schedule failed: LCA == NULL");
1109       return least;
1110     }
1111 
1112     // Don't hoist machine instructions to the root basic block
1113     if (mach && LCA == root_block)
1114       break;
1115 
1116     uint start_lat = get_latency_for_node(LCA->head());
1117     uint end_idx   = LCA->end_idx();
1118     uint end_lat   = get_latency_for_node(LCA->get_node(end_idx));
1119     double LCA_freq = LCA->_freq;
1120 #ifndef PRODUCT
1121     if (trace_opto_pipelining()) {
1122       tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
1123         LCA->_pre_order, LCA->head()->_idx, start_lat, end_idx, end_lat, LCA_freq);
1124     }
1125 #endif
1126     cand_cnt++;
1127     if (LCA_freq < least_freq              || // Better Frequency


1242       continue;
1243     }
1244 
1245     // Check if 'self' could be anti-dependent on memory
1246     if (self->needs_anti_dependence_check()) {
1247       // Hoist LCA above possible-defs and insert anti-dependences to
1248       // defs in new LCA block.
1249       LCA = insert_anti_dependences(LCA, self);
1250     }
1251 
1252     if (early->_dom_depth > LCA->_dom_depth) {
1253       // Somehow the LCA has moved above the earliest legal point.
1254       // (One way this can happen is via memory_early_block.)
1255       if (C->subsume_loads() == true && !C->failing()) {
1256         // Retry with subsume_loads == false
1257         // If this is the first failure, the sentinel string will "stick"
1258         // to the Compile object, and the C2Compiler will see it and retry.
1259         C->record_failure(C2Compiler::retry_no_subsuming_loads());
1260       } else {
1261         // Bailout without retry when (early->_dom_depth > LCA->_dom_depth)
1262         assert(false, "graph should be schedulable");
1263         C->record_method_not_compilable("late schedule failed: incorrect graph");
1264       }
1265       return;
1266     }
1267 
1268     // If there is no opportunity to hoist, then we're done.
1269     // In stress mode, try to hoist even the single operations.
1270     bool try_to_hoist = StressGCM || (LCA != early);
1271 
1272     // Must clone guys stay next to use; no hoisting allowed.
1273     // Also cannot hoist guys that alter memory or are otherwise not
1274     // allocatable (hoisting can make a value live longer, leading to
1275     // anti and output dependency problems which are normally resolved
1276     // by the register allocator giving everyone a different register).
1277     if (mach != NULL && must_clone[mach->ideal_Opcode()])
1278       try_to_hoist = false;
1279 
1280     Block* late = NULL;
1281     if (try_to_hoist) {
1282       // Now find the block with the least execution frequency.


< prev index next >