1 /* 2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_OPTO_BLOCK_HPP 26 #define SHARE_OPTO_BLOCK_HPP 27 28 #include "opto/multnode.hpp" 29 #include "opto/node.hpp" 30 #include "opto/phase.hpp" 31 #include "utilities/powerOfTwo.hpp" 32 33 // Optimization - Graph Style 34 35 class Block; 36 class CFGLoop; 37 class MachCallNode; 38 class Matcher; 39 class RootNode; 40 class VectorSet; 41 class PhaseChaitin; 42 struct Tarjan; 43 44 //------------------------------Block_Array------------------------------------ 45 // Map dense integer indices to Blocks. Uses classic doubling-array trick. 46 // Abstractly provides an infinite array of Block*'s, initialized to null. 47 // Note that the constructor just zeros things, and since I use Arena 48 // allocation I do not need a destructor to reclaim storage. 49 class Block_Array : public ArenaObj { 50 uint _size; // allocated size, as opposed to formal limit 51 DEBUG_ONLY(uint _limit;) // limit to formal domain 52 Arena *_arena; // Arena to allocate in 53 ReallocMark _nesting; // Safety checks for arena reallocation 54 protected: 55 Block **_blocks; 56 void maybe_grow(uint i) { 57 _nesting.check(_arena); // Check if a potential reallocation in the arena is safe 58 if (i >= Max()) { 59 grow(i); 60 } 61 } 62 void grow(uint i); // Grow array node to fit 63 64 public: 65 Block_Array(Arena *a) : _size(OptoBlockListSize), _arena(a) { 66 DEBUG_ONLY(_limit=0); 67 _blocks = NEW_ARENA_ARRAY( a, Block *, OptoBlockListSize ); 68 for( int i = 0; i < OptoBlockListSize; i++ ) { 69 _blocks[i] = nullptr; 70 } 71 } 72 Block *lookup( uint i ) const // Lookup, or null for not mapped 73 { return (i<Max()) ? _blocks[i] : (Block*)nullptr; } 74 Block *operator[] ( uint i ) const // Lookup, or assert for not mapped 75 { assert( i < Max(), "oob" ); return _blocks[i]; } 76 // Extend the mapping: index i maps to Block *n. 77 void map( uint i, Block *n ) { maybe_grow(i); _blocks[i] = n; } 78 uint Max() const { DEBUG_ONLY(return _limit); return _size; } 79 }; 80 81 82 class Block_List : public Block_Array { 83 public: 84 uint _cnt; 85 Block_List() : Block_List(Thread::current()->resource_area()) { } 86 Block_List(Arena* a) : Block_Array(a), _cnt(0) { } 87 88 void push( Block *b ) { map(_cnt++,b); } 89 Block *pop() { return _blocks[--_cnt]; } 90 Block *rpop() { Block *b = _blocks[0]; _blocks[0]=_blocks[--_cnt]; return b;} 91 void remove( uint i ); 92 void insert( uint i, Block *n ); 93 uint size() const { return _cnt; } 94 void reset() { _cnt = 0; } 95 void print(); 96 }; 97 98 99 class CFGElement : public AnyObj { 100 public: 101 double _freq; // Execution frequency (estimate) 102 103 CFGElement() : _freq(0.0) {} 104 virtual bool is_block() { return false; } 105 virtual bool is_loop() { return false; } 106 Block* as_Block() { assert(is_block(), "must be block"); return (Block*)this; } 107 CFGLoop* as_CFGLoop() { assert(is_loop(), "must be loop"); return (CFGLoop*)this; } 108 }; 109 110 //------------------------------Block------------------------------------------ 111 // This class defines a Basic Block. 112 // Basic blocks are used during the output routines, and are not used during 113 // any optimization pass. They are created late in the game. 114 class Block : public CFGElement { 115 116 private: 117 // Nodes in this block, in order 118 Node_List _nodes; 119 120 public: 121 122 // Get the node at index 'at_index', if 'at_index' is out of bounds return null 123 Node* get_node(uint at_index) const { 124 return _nodes[at_index]; 125 } 126 127 // Get the number of nodes in this block 128 uint number_of_nodes() const { 129 return _nodes.size(); 130 } 131 132 // Map a node 'node' to index 'to_index' in the block, if the index is out of bounds the size of the node list is increased 133 void map_node(Node* node, uint to_index) { 134 _nodes.map(to_index, node); 135 } 136 137 // Insert a node 'node' at index 'at_index', moving all nodes that are on a higher index one step, if 'at_index' is out of bounds we crash 138 void insert_node(Node* node, uint at_index) { 139 _nodes.insert(at_index, node); 140 } 141 142 // Remove a node at index 'at_index' 143 void remove_node(uint at_index) { 144 _nodes.remove(at_index); 145 } 146 147 // Push a node 'node' onto the node list 148 void push_node(Node* node) { 149 _nodes.push(node); 150 } 151 152 // Pop the last node off the node list 153 Node* pop_node() { 154 return _nodes.pop(); 155 } 156 157 // Basic blocks have a Node which defines Control for all Nodes pinned in 158 // this block. This Node is a RegionNode. Exception-causing Nodes 159 // (division, subroutines) and Phi functions are always pinned. Later, 160 // every Node will get pinned to some block. 161 Node *head() const { return get_node(0); } 162 163 // CAUTION: num_preds() is ONE based, so that predecessor numbers match 164 // input edges to Regions and Phis. 165 uint num_preds() const { return head()->req(); } 166 Node *pred(uint i) const { return head()->in(i); } 167 168 // Array of successor blocks, same size as projs array 169 Block_Array _succs; 170 171 // Basic blocks have some number of Nodes which split control to all 172 // following blocks. These Nodes are always Projections. The field in 173 // the Projection and the block-ending Node determine which Block follows. 174 uint _num_succs; 175 176 // Basic blocks also carry all sorts of good old fashioned DFS information 177 // used to find loops, loop nesting depth, dominators, etc. 178 uint _pre_order; // Pre-order DFS number 179 180 // Dominator tree 181 uint _dom_depth; // Depth in dominator tree for fast LCA 182 Block* _idom; // Immediate dominator block 183 184 CFGLoop *_loop; // Loop to which this block belongs 185 uint _rpo; // Number in reverse post order walk 186 187 virtual bool is_block() { return true; } 188 float succ_prob(uint i); // return probability of i'th successor 189 int num_fall_throughs(); // How many fall-through candidate this block has 190 void update_uncommon_branch(Block* un); // Lower branch prob to uncommon code 191 bool succ_fall_through(uint i); // Is successor "i" is a fall-through candidate 192 Block* lone_fall_through(); // Return lone fall-through Block or null 193 194 Block* dom_lca(Block* that); // Compute LCA in dominator tree. 195 196 bool dominates(Block* that) { 197 int dom_diff = this->_dom_depth - that->_dom_depth; 198 if (dom_diff > 0) return false; 199 for (; dom_diff < 0; dom_diff++) that = that->_idom; 200 return this == that; 201 } 202 203 // Report the alignment required by this block. Must be a power of 2. 204 // The previous block will insert nops to get this alignment. 205 uint code_alignment() const; 206 uint compute_loop_alignment(); 207 208 // BLOCK_FREQUENCY is a sentinel to mark uses of constant block frequencies. 209 // It is currently also used to scale such frequencies relative to 210 // FreqCountInvocations relative to the old value of 1500. 211 #define BLOCK_FREQUENCY(f) ((f * (double) 1500) / FreqCountInvocations) 212 213 // Register Pressure (estimate) for Splitting heuristic 214 uint _reg_pressure; 215 uint _ihrp_index; 216 uint _freg_pressure; 217 uint _fhrp_index; 218 219 // Mark and visited bits for an LCA calculation in raise_above_anti_dependences. 220 // Since they hold unique node indexes, they do not need reinitialization. 221 node_idx_t _raise_LCA_mark; 222 void set_raise_LCA_mark(node_idx_t x) { _raise_LCA_mark = x; } 223 node_idx_t raise_LCA_mark() const { return _raise_LCA_mark; } 224 node_idx_t _raise_LCA_visited; 225 void set_raise_LCA_visited(node_idx_t x) { _raise_LCA_visited = x; } 226 node_idx_t raise_LCA_visited() const { return _raise_LCA_visited; } 227 228 // Estimated size in bytes of first instructions in a loop. 229 uint _first_inst_size; 230 uint first_inst_size() const { return _first_inst_size; } 231 void set_first_inst_size(uint s) { _first_inst_size = s; } 232 233 // Compute the size of first instructions in this block. 234 uint compute_first_inst_size(uint& sum_size, uint inst_cnt, PhaseRegAlloc* ra); 235 236 // Compute alignment padding if the block needs it. 237 // Align a loop if loop's padding is less or equal to padding limit 238 // or the size of first instructions in the loop > padding. 239 uint alignment_padding(int current_offset) { 240 int block_alignment = code_alignment(); 241 int max_pad = block_alignment-relocInfo::addr_unit(); 242 if( max_pad > 0 ) { 243 assert(is_power_of_2(max_pad+relocInfo::addr_unit()), ""); 244 int current_alignment = current_offset & max_pad; 245 if( current_alignment != 0 ) { 246 uint padding = (block_alignment-current_alignment) & max_pad; 247 if( has_loop_alignment() && 248 padding > (uint)MaxLoopPad && 249 first_inst_size() <= padding ) { 250 return 0; 251 } 252 return padding; 253 } 254 } 255 return 0; 256 } 257 258 // Connector blocks. Connector blocks are basic blocks devoid of 259 // instructions, but may have relevant non-instruction Nodes, such as 260 // Phis or MergeMems. Such blocks are discovered and marked during the 261 // RemoveEmpty phase, and elided during Output. 262 bool _connector; 263 void set_connector() { _connector = true; } 264 bool is_connector() const { return _connector; }; 265 266 // Loop_alignment will be set for blocks which are at the top of loops. 267 // The block layout pass may rotate loops such that the loop head may not 268 // be the sequentially first block of the loop encountered in the linear 269 // list of blocks. If the layout pass is not run, loop alignment is set 270 // for each block which is the head of a loop. 271 uint _loop_alignment; 272 void set_loop_alignment(Block *loop_top) { 273 uint new_alignment = loop_top->compute_loop_alignment(); 274 if (new_alignment > _loop_alignment) { 275 _loop_alignment = new_alignment; 276 } 277 } 278 uint loop_alignment() const { return _loop_alignment; } 279 bool has_loop_alignment() const { return loop_alignment() > 0; } 280 281 // Create a new Block with given head Node. 282 // Creates the (empty) predecessor arrays. 283 Block( Arena *a, Node *headnode ) 284 : CFGElement(), 285 _nodes(a), 286 _succs(a), 287 _num_succs(0), 288 _pre_order(0), 289 _idom(nullptr), 290 _loop(nullptr), 291 _reg_pressure(0), 292 _ihrp_index(1), 293 _freg_pressure(0), 294 _fhrp_index(1), 295 _raise_LCA_mark(0), 296 _raise_LCA_visited(0), 297 _first_inst_size(999999), 298 _connector(false), 299 _loop_alignment(0) { 300 _nodes.push(headnode); 301 } 302 303 // Index of 'end' Node 304 uint end_idx() const { 305 // %%%%% add a proj after every goto 306 // so (last->is_block_proj() != last) always, then simplify this code 307 // This will not give correct end_idx for block 0 when it only contains root. 308 int last_idx = _nodes.size() - 1; 309 Node *last = _nodes[last_idx]; 310 assert(last->is_block_proj() == last || last->is_block_proj() == _nodes[last_idx - _num_succs], ""); 311 return (last->is_block_proj() == last) ? last_idx : (last_idx - _num_succs); 312 } 313 314 // Basic blocks have a Node which ends them. This Node determines which 315 // basic block follows this one in the program flow. This Node is either an 316 // IfNode, a GotoNode, a JmpNode, or a ReturnNode. 317 Node *end() const { return _nodes[end_idx()]; } 318 319 // Add an instruction to an existing block. It must go after the head 320 // instruction and before the end instruction. 321 void add_inst( Node *n ) { insert_node(n, end_idx()); } 322 // Find node in block. Fails if node not in block. 323 uint find_node( const Node *n ) const; 324 // Find and remove n from block list 325 void find_remove( const Node *n ); 326 // Check whether the node is in the block. 327 bool contains (const Node *n) const; 328 329 // Whether the block is not root-like and does not have any predecessors. 330 bool is_trivially_unreachable() const; 331 332 // Return the empty status of a block 333 enum { not_empty, empty_with_goto, completely_empty }; 334 int is_Empty() const; 335 336 // Forward through connectors 337 Block* non_connector() { 338 Block* s = this; 339 while (s->is_connector()) { 340 s = s->_succs[0]; 341 } 342 return s; 343 } 344 345 // Return true if b is a successor of this block 346 bool has_successor(Block* b) const { 347 for (uint i = 0; i < _num_succs; i++ ) { 348 if (non_connector_successor(i) == b) { 349 return true; 350 } 351 } 352 return false; 353 } 354 355 // Successor block, after forwarding through connectors 356 Block* non_connector_successor(int i) const { 357 return _succs[i]->non_connector(); 358 } 359 360 // Examine block's code shape to predict if it is not commonly executed. 361 bool has_uncommon_code() const; 362 363 #ifndef PRODUCT 364 // Debugging print of basic block 365 void dump_bidx(const Block* orig, outputStream* st = tty) const; 366 void dump_pred(const PhaseCFG* cfg, Block* orig, outputStream* st = tty) const; 367 void dump_head(const PhaseCFG* cfg, outputStream* st = tty) const; 368 void dump() const; 369 void dump(const PhaseCFG* cfg) const; 370 #endif 371 }; 372 373 374 //------------------------------PhaseCFG--------------------------------------- 375 // Build an array of Basic Block pointers, one per Node. 376 class PhaseCFG : public Phase { 377 private: 378 // Root of whole program 379 RootNode* _root; 380 381 // The block containing the root node 382 Block* _root_block; 383 384 // List of basic blocks that are created during CFG creation 385 Block_List _blocks; 386 387 // Count of basic blocks 388 uint _number_of_blocks; 389 390 // Arena for the blocks to be stored in 391 Arena* _block_arena; 392 393 // Info used for scheduling 394 PhaseChaitin* _regalloc; 395 396 // Register pressure heuristic used? 397 bool _scheduling_for_pressure; 398 399 // The matcher for this compilation 400 Matcher& _matcher; 401 402 // Map nodes to owning basic block 403 Block_Array _node_to_block_mapping; 404 405 // Loop from the root 406 CFGLoop* _root_loop; 407 408 // Outmost loop frequency 409 double _outer_loop_frequency; 410 411 // Per node latency estimation, valid only during GCM 412 GrowableArray<uint>* _node_latency; 413 414 // Build a proper looking cfg. Return count of basic blocks 415 uint build_cfg(); 416 417 // Build the dominator tree so that we know where we can move instructions 418 void build_dominator_tree(); 419 420 // Estimate block frequencies based on IfNode probabilities, so that we know where we want to move instructions 421 void estimate_block_frequency(); 422 423 // Global Code Motion. See Click's PLDI95 paper. Place Nodes in specific 424 // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block. 425 // Move nodes to ensure correctness from GVN and also try to move nodes out of loops. 426 void global_code_motion(); 427 428 // Schedule Nodes early in their basic blocks. 429 bool schedule_early(VectorSet &visited, Node_Stack &roots); 430 431 // For each node, find the latest block it can be scheduled into 432 // and then select the cheapest block between the latest and earliest 433 // block to place the node. 434 void schedule_late(VectorSet &visited, Node_Stack &stack); 435 436 // Compute the (backwards) latency of a node from a single use 437 int latency_from_use(Node *n, const Node *def, Node *use); 438 439 // Compute the (backwards) latency of a node from the uses of this instruction 440 void partial_latency_of_defs(Node *n); 441 442 // Compute the instruction global latency with a backwards walk 443 void compute_latencies_backwards(VectorSet &visited, Node_Stack &stack); 444 445 // Check if a block between early and LCA block of uses is cheaper by 446 // frequency-based policy, latency-based policy and random-based policy 447 bool is_cheaper_block(Block* LCA, Node* self, uint target_latency, 448 uint end_latency, double least_freq, 449 int cand_cnt, bool in_latency); 450 451 // Pick a block between early and late that is a cheaper alternative 452 // to late. Helper for schedule_late. 453 Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self); 454 455 bool schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call, intptr_t* recacl_pressure_nodes); 456 void set_next_call(const Block* block, Node* n, VectorSet& next_call) const; 457 void needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call); 458 459 // Perform basic-block local scheduling 460 Node* select(Block* block, Node_List& worklist, GrowableArray<int>& ready_cnt, VectorSet& next_call, uint sched_slot, 461 intptr_t* recacl_pressure_nodes); 462 void adjust_register_pressure(Node* n, Block* block, intptr_t *recalc_pressure_nodes, bool finalize_mode); 463 464 // Schedule a call next in the block 465 uint sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call); 466 467 // Cleanup if any code lands between a Call and his Catch 468 void call_catch_cleanup(Block* block); 469 470 Node* catch_cleanup_find_cloned_def(Block* use_blk, Node* def, Block* def_blk, int n_clone_idx); 471 void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx); 472 473 // Detect implicit-null-check opportunities. Basically, find null checks 474 // with suitable memory ops nearby. Use the memory op to do the null check. 475 // I can generate a memory op if there is not one nearby. 476 void implicit_null_check(Block* block, Node *proj, Node *val, int allowed_reasons); 477 478 // Perform a Depth First Search (DFS). 479 // Setup 'vertex' as DFS to vertex mapping. 480 // Setup 'semi' as vertex to DFS mapping. 481 // Set 'parent' to DFS parent. 482 uint do_DFS(Tarjan* tarjan, uint rpo_counter); 483 484 // Helper function to insert a node into a block 485 void schedule_node_into_block( Node *n, Block *b ); 486 487 void replace_block_proj_ctrl( Node *n ); 488 489 // Set the basic block for pinned Nodes 490 void schedule_pinned_nodes( VectorSet &visited ); 491 492 // I'll need a few machine-specific GotoNodes. Clone from this one. 493 // Used when building the CFG and creating end nodes for blocks. 494 MachNode* _goto; 495 496 Block* raise_above_anti_dependences(Block* LCA, Node* load, bool verify = false); 497 void verify_anti_dependences(Block* LCA, Node* load) const { 498 assert(LCA == get_block_for_node(load), "should already be scheduled"); 499 const_cast<PhaseCFG*>(this)->raise_above_anti_dependences(LCA, load, true); 500 } 501 502 bool move_to_next(Block* bx, uint b_index); 503 void move_to_end(Block* bx, uint b_index); 504 505 void insert_goto_at(uint block_no, uint succ_no); 506 507 // Check for NeverBranch at block end. This needs to become a GOTO to the 508 // true target. NeverBranch are treated as a conditional branch that always 509 // goes the same direction for most of the optimizer and are used to give a 510 // fake exit path to infinite loops. At this late stage they need to turn 511 // into Goto's so that when you enter the infinite loop you indeed hang. 512 void convert_NeverBranch_to_Goto(Block *b); 513 514 CFGLoop* create_loop_tree(); 515 bool is_dominator(Node* dom_node, Node* node); 516 bool is_CFG(Node* n); 517 bool is_control_proj_or_safepoint(Node* n) const; 518 Block* find_block_for_node(Node* n) const; 519 bool is_dominating_control(Node* dom_ctrl, Node* n); 520 #ifndef PRODUCT 521 bool _trace_opto_pipelining; // tracing flag 522 #endif 523 524 public: 525 PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher); 526 527 void set_latency_for_node(Node* node, int latency) { 528 _node_latency->at_put_grow(node->_idx, latency); 529 } 530 531 uint get_latency_for_node(Node* node) { 532 return _node_latency->at_grow(node->_idx); 533 } 534 535 // Get the outer most frequency 536 double get_outer_loop_frequency() const { 537 return _outer_loop_frequency; 538 } 539 540 // Get the root node of the CFG 541 RootNode* get_root_node() const { 542 return _root; 543 } 544 545 // Get the block of the root node 546 Block* get_root_block() const { 547 return _root_block; 548 } 549 550 // Add a block at a position and moves the later ones one step 551 void add_block_at(uint pos, Block* block) { 552 _blocks.insert(pos, block); 553 _number_of_blocks++; 554 } 555 556 // Adds a block to the top of the block list 557 void add_block(Block* block) { 558 _blocks.push(block); 559 _number_of_blocks++; 560 } 561 562 // Clear the list of blocks 563 void clear_blocks() { 564 _blocks.reset(); 565 _number_of_blocks = 0; 566 } 567 568 // Get the block at position pos in _blocks 569 Block* get_block(uint pos) const { 570 return _blocks[pos]; 571 } 572 573 // Number of blocks 574 uint number_of_blocks() const { 575 return _number_of_blocks; 576 } 577 578 // set which block this node should reside in 579 void map_node_to_block(const Node* node, Block* block) { 580 _node_to_block_mapping.map(node->_idx, block); 581 } 582 583 // removes the mapping from a node to a block 584 void unmap_node_from_block(const Node* node) { 585 _node_to_block_mapping.map(node->_idx, nullptr); 586 } 587 588 // get the block in which this node resides 589 Block* get_block_for_node(const Node* node) const { 590 return _node_to_block_mapping[node->_idx]; 591 } 592 593 // does this node reside in a block; return true 594 bool has_block(const Node* node) const { 595 return (_node_to_block_mapping.lookup(node->_idx) != nullptr); 596 } 597 598 // Use frequency calculations and code shape to predict if the block 599 // is uncommon. 600 bool is_uncommon(const Block* block); 601 602 #ifdef ASSERT 603 Unique_Node_List _raw_oops; 604 #endif 605 606 // Do global code motion by first building dominator tree and estimate block frequency 607 // Returns true on success 608 bool do_global_code_motion(); 609 610 // Compute the (backwards) latency of a node from the uses 611 void latency_from_uses(Node *n); 612 613 // Set loop alignment 614 void set_loop_alignment(); 615 616 // Remove empty basic blocks 617 void remove_empty_blocks(); 618 Block *fixup_trap_based_check(Node *branch, Block *block, int block_pos, Block *bnext); 619 void fixup_flow(); 620 // Remove all blocks that are transitively unreachable. Such blocks can be 621 // found e.g. after PhaseCFG::convert_NeverBranch_to_Goto(). This function 622 // assumes post-fixup_flow() block indices (Block::_pre_order, Block::_rpo). 623 void remove_unreachable_blocks(); 624 625 // Insert a node into a block at index and map the node to the block 626 void insert(Block *b, uint idx, Node *n) { 627 b->insert_node(n , idx); 628 map_node_to_block(n, b); 629 } 630 631 // Check all nodes and postalloc_expand them if necessary. 632 void postalloc_expand(PhaseRegAlloc* _ra); 633 634 #ifndef PRODUCT 635 bool trace_opto_pipelining() const { return _trace_opto_pipelining; } 636 637 // Debugging print of CFG 638 void dump( ) const; // CFG only 639 void _dump_cfg( const Node *end, VectorSet &visited ) const; 640 void dump_headers(); 641 #else 642 bool trace_opto_pipelining() const { return false; } 643 #endif 644 645 bool unrelated_load_in_store_null_block(Node* store, Node* load); 646 647 // Check that block b is in the home loop (or an ancestor) of n, if n is a 648 // memory writer. 649 void verify_memory_writer_placement(const Block* b, const Node* n) const NOT_DEBUG_RETURN; 650 // Check local dominator tree invariants. 651 void verify_dominator_tree() const NOT_DEBUG_RETURN; 652 void verify() const NOT_DEBUG_RETURN; 653 }; 654 655 656 //------------------------------UnionFind-------------------------------------- 657 // Map Block indices to a block-index for a cfg-cover. 658 // Array lookup in the optimized case. 659 class UnionFind : public ResourceObj { 660 uint _cnt, _max; 661 uint* _indices; 662 ReallocMark _nesting; // Safety checks for arena reallocation 663 public: 664 UnionFind( uint max ); 665 void reset( uint max ); // Reset to identity map for [0..max] 666 667 uint lookup( uint nidx ) const { 668 return _indices[nidx]; 669 } 670 uint operator[] (uint nidx) const { return lookup(nidx); } 671 672 void map( uint from_idx, uint to_idx ) { 673 assert( from_idx < _cnt, "oob" ); 674 _indices[from_idx] = to_idx; 675 } 676 void extend( uint from_idx, uint to_idx ); 677 678 uint Size() const { return _cnt; } 679 680 uint Find( uint idx ) { 681 assert( idx < 65536, "Must fit into uint"); 682 uint uf_idx = lookup(idx); 683 return (uf_idx == idx) ? uf_idx : Find_compress(idx); 684 } 685 uint Find_compress( uint idx ); 686 uint Find_const( uint idx ) const; 687 void Union( uint idx1, uint idx2 ); 688 689 }; 690 691 //----------------------------BlockProbPair--------------------------- 692 // Ordered pair of Node*. 693 class BlockProbPair { 694 protected: 695 Block* _target; // block target 696 double _prob; // probability of edge to block 697 public: 698 BlockProbPair() : _target(nullptr), _prob(0.0) {} 699 BlockProbPair(Block* b, double p) : _target(b), _prob(p) {} 700 701 Block* get_target() const { return _target; } 702 double get_prob() const { return _prob; } 703 }; 704 705 //------------------------------CFGLoop------------------------------------------- 706 class CFGLoop : public CFGElement { 707 int _id; 708 int _depth; 709 CFGLoop *_parent; // root of loop tree is the method level "pseudo" loop, it's parent is null 710 CFGLoop *_sibling; // null terminated list 711 CFGLoop *_child; // first child, use child's sibling to visit all immediately nested loops 712 GrowableArray<CFGElement*> _members; // list of members of loop 713 GrowableArray<BlockProbPair> _exits; // list of successor blocks and their probabilities 714 double _exit_prob; // probability any loop exit is taken on a single loop iteration 715 void update_succ_freq(Block* b, double freq); 716 717 public: 718 CFGLoop(int id) : 719 CFGElement(), 720 _id(id), 721 _depth(0), 722 _parent(nullptr), 723 _sibling(nullptr), 724 _child(nullptr), 725 _exit_prob(1.0f) {} 726 CFGLoop* parent() { return _parent; } 727 void push_pred(Block* blk, int i, Block_List& worklist, PhaseCFG* cfg); 728 void add_member(CFGElement *s) { _members.push(s); } 729 void add_nested_loop(CFGLoop* cl); 730 Block* head() { 731 assert(_members.at(0)->is_block(), "head must be a block"); 732 Block* hd = _members.at(0)->as_Block(); 733 assert(hd->_loop == this, "just checking"); 734 assert(hd->head()->is_Loop(), "must begin with loop head node"); 735 return hd; 736 } 737 Block* backedge_block(); // Return the block on the backedge of the loop (else null) 738 void compute_loop_depth(int depth); 739 void compute_freq(); // compute frequency with loop assuming head freq 1.0f 740 void scale_freq(); // scale frequency by loop trip count (including outer loops) 741 double outer_loop_freq() const; // frequency of outer loop 742 bool in_loop_nest(Block* b); 743 double trip_count() const { return 1.0 / _exit_prob; } 744 virtual bool is_loop() { return true; } 745 int id() { return _id; } 746 int depth() { return _depth; } 747 748 #ifndef PRODUCT 749 void dump( ) const; 750 void dump_tree() const; 751 #endif 752 }; 753 754 755 //----------------------------------CFGEdge------------------------------------ 756 // A edge between two basic blocks that will be embodied by a branch or a 757 // fall-through. 758 class CFGEdge : public ResourceObj { 759 private: 760 Block * _from; // Source basic block 761 Block * _to; // Destination basic block 762 double _freq; // Execution frequency (estimate) 763 int _state; 764 bool _infrequent; 765 int _from_pct; 766 int _to_pct; 767 768 // Private accessors 769 int from_pct() const { return _from_pct; } 770 int to_pct() const { return _to_pct; } 771 int from_infrequent() const { return from_pct() < BlockLayoutMinDiamondPercentage; } 772 int to_infrequent() const { return to_pct() < BlockLayoutMinDiamondPercentage; } 773 774 public: 775 enum { 776 open, // initial edge state; unprocessed 777 connected, // edge used to connect two traces together 778 interior // edge is interior to trace (could be backedge) 779 }; 780 781 CFGEdge(Block *from, Block *to, double freq, int from_pct, int to_pct) : 782 _from(from), _to(to), _freq(freq), 783 _state(open), _from_pct(from_pct), _to_pct(to_pct) { 784 _infrequent = from_infrequent() || to_infrequent(); 785 } 786 787 double freq() const { return _freq; } 788 Block* from() const { return _from; } 789 Block* to () const { return _to; } 790 int infrequent() const { return _infrequent; } 791 int state() const { return _state; } 792 793 void set_state(int state) { _state = state; } 794 795 #ifndef PRODUCT 796 void dump( ) const; 797 #endif 798 }; 799 800 801 //-----------------------------------Trace------------------------------------- 802 // An ordered list of basic blocks. 803 class Trace : public ResourceObj { 804 private: 805 uint _id; // Unique Trace id (derived from initial block) 806 Block ** _next_list; // Array mapping index to next block 807 Block ** _prev_list; // Array mapping index to previous block 808 Block * _first; // First block in the trace 809 Block * _last; // Last block in the trace 810 811 void set_next(Block *b, Block *n) const { _next_list[b->_pre_order] = n; } 812 813 // Return the block that precedes "b" in the trace. 814 Block * prev(Block *b) const { return _prev_list[b->_pre_order]; } 815 void set_prev(Block *b, Block *p) const { _prev_list[b->_pre_order] = p; } 816 817 // We've discovered a loop in this trace. Reset last to be "b", and first as 818 // the block following "b 819 void break_loop_after(Block *b) { 820 _last = b; 821 _first = next(b); 822 set_prev(_first, nullptr); 823 set_next(_last, nullptr); 824 } 825 826 public: 827 828 Trace(Block *b, Block **next_list, Block **prev_list) : 829 _id(b->_pre_order), 830 _next_list(next_list), 831 _prev_list(prev_list), 832 _first(b), 833 _last(b) { 834 set_next(b, nullptr); 835 set_prev(b, nullptr); 836 }; 837 838 // Return the id number 839 uint id() const { return _id; } 840 void set_id(uint id) { _id = id; } 841 842 // Return the first block in the trace 843 Block * first_block() const { return _first; } 844 845 // Return the last block in the trace 846 Block * last_block() const { return _last; } 847 848 // Return the block that follows "b" in the trace. 849 Block * next(Block *b) const { return _next_list[b->_pre_order]; } 850 851 // Insert a trace in the middle of this one after b 852 void insert_after(Block *b, Trace *tr) { 853 set_next(tr->last_block(), next(b)); 854 if (next(b) != nullptr) { 855 set_prev(next(b), tr->last_block()); 856 } 857 858 set_next(b, tr->first_block()); 859 set_prev(tr->first_block(), b); 860 861 if (b == _last) { 862 _last = tr->last_block(); 863 } 864 } 865 866 void insert_before(Block *b, Trace *tr) { 867 Block *p = prev(b); 868 assert(p != nullptr, "use append instead"); 869 insert_after(p, tr); 870 } 871 872 // Append another trace to this one. 873 void append(Trace *tr) { 874 insert_after(_last, tr); 875 } 876 877 // Append a block at the end of this trace 878 void append(Block *b) { 879 set_next(_last, b); 880 set_prev(b, _last); 881 _last = b; 882 } 883 884 bool backedge(CFGEdge *e); 885 886 #ifndef PRODUCT 887 void dump( ) const; 888 #endif 889 }; 890 891 //------------------------------PhaseBlockLayout------------------------------- 892 // Rearrange blocks into some canonical order, based on edges and their frequencies 893 class PhaseBlockLayout : public Phase { 894 PhaseCFG &_cfg; // Control flow graph 895 896 GrowableArray<CFGEdge *> *edges; 897 Trace **traces; 898 Block **next; 899 Block **prev; 900 UnionFind *uf; 901 902 // Given a block, find its encompassing Trace 903 Trace * trace(Block *b) { 904 return traces[uf->Find_compress(b->_pre_order)]; 905 } 906 public: 907 PhaseBlockLayout(PhaseCFG &cfg); 908 909 void find_edges(); 910 void grow_traces(); 911 void merge_traces(bool loose_connections); 912 void reorder_traces(int count); 913 void union_traces(Trace* from, Trace* to); 914 }; 915 916 #endif // SHARE_OPTO_BLOCK_HPP