diff -r afbe18ae0905 -r adb9a7d94cb5 src/share/vm/opto/chaitin.cpp --- a/src/share/vm/opto/chaitin.cpp Thu Aug 15 11:59:19 2013 -0700 +++ b/src/share/vm/opto/chaitin.cpp Fri Aug 16 10:23:55 2013 +0200 @@ -40,10 +40,8 @@ #include "opto/opcodes.hpp" #include "opto/rootnode.hpp" -//============================================================================= - #ifndef PRODUCT -void LRG::dump( ) const { +void LRG::dump() const { ttyLocker ttyl; tty->print("%d ",num_regs()); _mask.dump(); @@ -94,7 +92,6 @@ } #endif -//------------------------------score------------------------------------------ // Compute score from cost and area. Low score is best to spill. static double raw_score( double cost, double area ) { return cost - (area*RegisterCostAreaRatio) * 1.52588e-5; @@ -125,7 +122,6 @@ return score; } -//------------------------------LRG_List--------------------------------------- LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) { memset( _lidxs, 0, sizeof(uint)*max ); } @@ -211,7 +207,6 @@ return next; } -//------------------------------Chaitin---------------------------------------- PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher) : PhaseRegAlloc(unique, cfg, matcher, #ifndef PRODUCT @@ -232,31 +227,31 @@ { NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); ) - _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq); + _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency()); // Build a list of basic blocks, sorted by frequency - _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); + _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks()); // Experiment with sorting strategies to speed compilation double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket Block **buckets[NUMBUCKS]; // Array of buckets uint buckcnt[NUMBUCKS]; // Array of bucket counters double buckval[NUMBUCKS]; // Array of bucket value cutoffs for (uint i = 0; i < NUMBUCKS; i++) { - buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg._num_blocks); + buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks()); buckcnt[i] = 0; // Bump by three orders of magnitude each time cutoff *= 0.001; buckval[i] = cutoff; - for (uint j = 0; j < _cfg._num_blocks; j++) { + for (uint j = 0; j < _cfg.number_of_blocks(); j++) { buckets[i][j] = NULL; } } // Sort blocks into buckets - for (uint i = 0; i < _cfg._num_blocks; i++) { + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { for (uint j = 0; j < NUMBUCKS; j++) { - if ((j == NUMBUCKS - 1) || (_cfg._blocks[i]->_freq > buckval[j])) { + if ((j == NUMBUCKS - 1) || (_cfg.get_block(i)->_freq > buckval[j])) { // Assign block to end of list for appropriate bucket - buckets[j][buckcnt[j]++] = _cfg._blocks[i]; + buckets[j][buckcnt[j]++] = _cfg.get_block(i); break; // kick out of inner loop } } @@ -269,10 +264,9 @@ } } - assert(blkcnt == _cfg._num_blocks, "Block array not totally filled"); + assert(blkcnt == _cfg.number_of_blocks(), "Block array not totally filled"); } -//------------------------------Union------------------------------------------ // union 2 sets together. void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) { uint src = _lrg_map.find(src_n); @@ -285,7 +279,6 @@ _lrg_map.uf_map(dst, src); } -//------------------------------new_lrg---------------------------------------- void PhaseChaitin::new_lrg(const Node *x, uint lrg) { // Make the Node->LRG mapping _lrg_map.extend(x->_idx,lrg); @@ -311,7 +304,6 @@ return true; } -//------------------------------compact---------------------------------------- // Renumber the live ranges to compact them. Makes the IFG smaller. void PhaseChaitin::compact() { // Current the _uf_map contains a series of short chains which are headed @@ -677,20 +669,19 @@ C->set_indexSet_arena(NULL); // ResourceArea is at end of scope } -//------------------------------de_ssa----------------------------------------- void PhaseChaitin::de_ssa() { // Set initial Names for all Nodes. Most Nodes get the virtual register // number. A few get the ZERO live range number. These do not // get allocated, but instead rely on correct scheduling to ensure that // only one instance is simultaneously live at a time. uint lr_counter = 1; - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; - uint cnt = b->_nodes.size(); + for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) { + Block* block = _cfg.get_block(i); + uint cnt = block->_nodes.size(); // Handle all the normal Nodes in the block for( uint j = 0; j < cnt; j++ ) { - Node *n = b->_nodes[j]; + Node *n = block->_nodes[j]; // Pre-color to the zero live range, or pick virtual register const RegMask &rm = n->out_RegMask(); _lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0); @@ -701,52 +692,55 @@ } -//------------------------------gather_lrg_masks------------------------------- // Gather LiveRanGe information, including register masks. Modification of // cisc spillable in_RegMasks should not be done before AggressiveCoalesce. void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { // Nail down the frame pointer live range - uint fp_lrg = _lrg_map.live_range_id(_cfg._root->in(1)->in(TypeFunc::FramePtr)); + uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr)); lrgs(fp_lrg)._cost += 1e12; // Cost is infinite // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // For all instructions - for( uint j = 1; j < b->_nodes.size(); j++ ) { - Node *n = b->_nodes[j]; + for (uint j = 1; j < block->_nodes.size(); j++) { + Node* n = block->_nodes[j]; uint input_edge_start =1; // Skip control most nodes - if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base(); + if (n->is_Mach()) { + input_edge_start = n->as_Mach()->oper_input_base(); + } uint idx = n->is_Copy(); // Get virtual register number, same as LiveRanGe index uint vreg = _lrg_map.live_range_id(n); - LRG &lrg = lrgs(vreg); - if( vreg ) { // No vreg means un-allocable (e.g. memory) + LRG& lrg = lrgs(vreg); + if (vreg) { // No vreg means un-allocable (e.g. memory) // Collect has-copy bit - if( idx ) { + if (idx) { lrg._has_copy = 1; uint clidx = _lrg_map.live_range_id(n->in(idx)); - LRG ©_src = lrgs(clidx); + LRG& copy_src = lrgs(clidx); copy_src._has_copy = 1; } // Check for float-vs-int live range (used in register-pressure // calculations) const Type *n_type = n->bottom_type(); - if (n_type->is_floatingpoint()) + if (n_type->is_floatingpoint()) { lrg._is_float = 1; + } // Check for twice prior spilling. Once prior spilling might have // spilled 'soft', 2nd prior spill should have spilled 'hard' and // further spilling is unlikely to make progress. - if( _spilled_once.test(n->_idx) ) { + if (_spilled_once.test(n->_idx)) { lrg._was_spilled1 = 1; - if( _spilled_twice.test(n->_idx) ) + if (_spilled_twice.test(n->_idx)) { lrg._was_spilled2 = 1; + } } #ifndef PRODUCT @@ -783,16 +777,18 @@ // Check for bound register masks const RegMask &lrgmask = lrg.mask(); - if (lrgmask.is_bound(ireg)) + if (lrgmask.is_bound(ireg)) { lrg._is_bound = 1; + } // Check for maximum frequency value - if (lrg._maxfreq < b->_freq) - lrg._maxfreq = b->_freq; + if (lrg._maxfreq < block->_freq) { + lrg._maxfreq = block->_freq; + } // Check for oop-iness, or long/double // Check for multi-kill projection - switch( ireg ) { + switch (ireg) { case MachProjNode::fat_proj: // Fat projections have size equal to number of registers killed lrg.set_num_regs(rm.Size()); @@ -962,7 +958,7 @@ // AggressiveCoalesce. This effectively pre-virtual-splits // around uncommon uses of common defs. const RegMask &rm = n->in_RegMask(k); - if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * b->_freq) { + if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) { // Since we are BEFORE aggressive coalesce, leave the register // mask untrimmed by the call. This encourages more coalescing. // Later, AFTER aggressive, this live range will have to spill @@ -1006,8 +1002,9 @@ } // Check for maximum frequency value - if( lrg._maxfreq < b->_freq ) - lrg._maxfreq = b->_freq; + if (lrg._maxfreq < block->_freq) { + lrg._maxfreq = block->_freq; + } } // End for all allocated inputs } // end for all instructions @@ -1029,7 +1026,6 @@ } } -//------------------------------set_was_low------------------------------------ // Set the was-lo-degree bit. Conservative coalescing should not change the // colorability of the graph. If any live range was of low-degree before // coalescing, it should Simplify. This call sets the was-lo-degree bit. @@ -1066,7 +1062,6 @@ #define REGISTER_CONSTRAINED 16 -//------------------------------cache_lrg_info--------------------------------- // Compute cost/area ratio, in case we spill. Build the lo-degree list. void PhaseChaitin::cache_lrg_info( ) { @@ -1100,7 +1095,6 @@ } } -//------------------------------Pre-Simplify----------------------------------- // Simplify the IFG by removing LRGs of low degree that have NO copies void PhaseChaitin::Pre_Simplify( ) { @@ -1151,7 +1145,6 @@ // No more lo-degree no-copy live ranges to simplify } -//------------------------------Simplify--------------------------------------- // Simplify the IFG by removing LRGs of low degree. void PhaseChaitin::Simplify( ) { @@ -1288,7 +1281,6 @@ } -//------------------------------is_legal_reg----------------------------------- // Is 'reg' register legal for 'lrg'? static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) && @@ -1315,7 +1307,6 @@ return false; } -//------------------------------bias_color------------------------------------- // Choose a color using the biasing heuristic OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { @@ -1377,7 +1368,6 @@ return OptoReg::add( reg, chunk ); } -//------------------------------choose_color----------------------------------- // Choose a color in the current chunk OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) { assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)"); @@ -1399,7 +1389,6 @@ return lrg.mask().find_last_elem(); } -//------------------------------Select----------------------------------------- // Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted // in reverse order of removal. As long as nothing of hi-degree was yanked, // everything going back is guaranteed a color. Select that color. If some @@ -1574,8 +1563,6 @@ return spill_reg-LRG::SPILL_REG; // Return number of spills } - -//------------------------------copy_was_spilled------------------------------- // Copy 'was_spilled'-edness from the source Node to the dst Node. void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) { if( _spilled_once.test(src->_idx) ) { @@ -1588,14 +1575,12 @@ } } -//------------------------------set_was_spilled-------------------------------- // Set the 'spilled_once' or 'spilled_twice' flag on a node. void PhaseChaitin::set_was_spilled( Node *n ) { if( _spilled_once.test_set(n->_idx) ) _spilled_twice.set(n->_idx); } -//------------------------------fixup_spills----------------------------------- // Convert Ideal spill instructions into proper FramePtr + offset Loads and // Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are. void PhaseChaitin::fixup_spills() { @@ -1605,16 +1590,16 @@ NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); ) // Grab the Frame Pointer - Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr); + Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr); // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // For all instructions in block - uint last_inst = b->end_idx(); - for( uint j = 1; j <= last_inst; j++ ) { - Node *n = b->_nodes[j]; + uint last_inst = block->end_idx(); + for (uint j = 1; j <= last_inst; j++) { + Node* n = block->_nodes[j]; // Dead instruction??? assert( n->outcnt() != 0 ||// Nothing dead after post alloc @@ -1651,7 +1636,7 @@ assert( cisc->oper_input_base() == 2, "Only adding one edge"); cisc->ins_req(1,src); // Requires a memory edge } - b->_nodes.map(j,cisc); // Insert into basic block + block->_nodes.map(j,cisc); // Insert into basic block n->subsume_by(cisc, C); // Correct graph // ++_used_cisc_instructions; @@ -1677,7 +1662,6 @@ } // End of for all blocks } -//------------------------------find_base_for_derived-------------------------- // Helper to stretch above; recursively discover the base Node for a // given derived Node. Easy for AddP-related machine nodes, but needs // to be recursive for derived Phis. @@ -1707,7 +1691,7 @@ // Initialize it once and make it shared: // set control to _root and place it into Start block // (where top() node is placed). - base->init_req(0, _cfg._root); + base->init_req(0, _cfg.get_root_node()); Block *startb = _cfg.get_block_for_node(C->top()); startb->_nodes.insert(startb->find_node(C->top()), base ); _cfg.map_node_to_block(base, startb); @@ -1716,7 +1700,7 @@ if (_lrg_map.live_range_id(base) == 0) { new_lrg(base, maxlrg++); } - assert(base->in(0) == _cfg._root && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared"); + assert(base->in(0) == _cfg.get_root_node() && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared"); derived_base_map[derived->_idx] = base; return base; } @@ -1779,8 +1763,6 @@ return base; } - -//------------------------------stretch_base_pointer_live_ranges--------------- // At each Safepoint, insert extra debug edges for each pair of derived value/ // base pointer that is live across the Safepoint for oopmap building. The // edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the @@ -1792,14 +1774,14 @@ memset( derived_base_map, 0, sizeof(Node*)*C->unique() ); // For all blocks in RPO do... - for( uint i=0; i<_cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); // Note use of deep-copy constructor. I cannot hammer the original // liveout bits, because they are needed by the following coalesce pass. - IndexSet liveout(_live->live(b)); + IndexSet liveout(_live->live(block)); - for( uint j = b->end_idx() + 1; j > 1; j-- ) { - Node *n = b->_nodes[j-1]; + for (uint j = block->end_idx() + 1; j > 1; j--) { + Node* n = block->_nodes[j - 1]; // Pre-split compares of loop-phis. Loop-phis form a cycle we would // like to see in the same register. Compare uses the loop-phi and so @@ -1814,7 +1796,7 @@ Node *phi = n->in(1); if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) { Block *phi_block = _cfg.get_block_for_node(phi); - if (_cfg.get_block_for_node(phi_block->pred(2)) == b) { + if (_cfg.get_block_for_node(phi_block->pred(2)) == block) { const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI]; Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask ); insert_proj( phi_block, 1, spill, maxlrg++ ); @@ -1868,7 +1850,7 @@ if ((_lrg_map.live_range_id(base) >= _lrg_map.max_lrg_id() || // (Brand new base (hence not live) or !liveout.member(_lrg_map.live_range_id(base))) && // not live) AND (_lrg_map.live_range_id(base) > 0) && // not a constant - _cfg.get_block_for_node(base) != b) { // base not def'd in blk) + _cfg.get_block_for_node(base) != block) { // base not def'd in blk) // Base pointer is not currently live. Since I stretched // the base pointer to here and it crosses basic-block // boundaries, the global live info is now incorrect. @@ -1903,15 +1885,12 @@ return must_recompute_live != 0; } - -//------------------------------add_reference---------------------------------- // Extend the node to LRG mapping void PhaseChaitin::add_reference(const Node *node, const Node *old_node) { _lrg_map.extend(node->_idx, _lrg_map.live_range_id(old_node)); } -//------------------------------dump------------------------------------------- #ifndef PRODUCT void PhaseChaitin::dump(const Node *n) const { uint r = (n->_idx < _lrg_map.size()) ? _lrg_map.find_const(n) : 0; @@ -2017,8 +1996,9 @@ _matcher._new_SP, _framesize ); // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) - dump(_cfg._blocks[i]); + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + dump(_cfg.get_block(i)); + } // End of per-block dump tty->print("\n"); @@ -2059,7 +2039,6 @@ tty->print_cr(""); } -//------------------------------dump_degree_lists------------------------------ void PhaseChaitin::dump_degree_lists() const { // Dump lo-degree list tty->print("Lo degree: "); @@ -2080,7 +2059,6 @@ tty->print_cr(""); } -//------------------------------dump_simplified-------------------------------- void PhaseChaitin::dump_simplified() const { tty->print("Simplified: "); for( uint i = _simplified; i; i = lrgs(i)._next ) @@ -2099,7 +2077,6 @@ return buf+strlen(buf); } -//------------------------------dump_register---------------------------------- // Dump a register name into a buffer. Be intelligent if we get called // before allocation is complete. char *PhaseChaitin::dump_register( const Node *n, char *buf ) const { @@ -2133,7 +2110,6 @@ return buf+strlen(buf); } -//----------------------dump_for_spill_split_recycle-------------------------- void PhaseChaitin::dump_for_spill_split_recycle() const { if( WizardMode && (PrintCompilation || PrintOpto) ) { // Display which live ranges need to be split and the allocator's state @@ -2149,7 +2125,6 @@ } } -//------------------------------dump_frame------------------------------------ void PhaseChaitin::dump_frame() const { const char *fp = OptoReg::regname(OptoReg::c_frame_pointer); const TypeTuple *domain = C->tf()->domain(); @@ -2255,17 +2230,16 @@ tty->print_cr("#"); } -//------------------------------dump_bb---------------------------------------- void PhaseChaitin::dump_bb( uint pre_order ) const { tty->print_cr("---dump of B%d---",pre_order); - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; - if( b->_pre_order == pre_order ) - dump(b); + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); + if (block->_pre_order == pre_order) { + dump(block); + } } } -//------------------------------dump_lrg--------------------------------------- void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const { tty->print_cr("---dump of L%d---",lidx); @@ -2287,17 +2261,17 @@ tty->cr(); } // For all blocks - for( uint i = 0; i < _cfg._num_blocks; i++ ) { - Block *b = _cfg._blocks[i]; + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { + Block* block = _cfg.get_block(i); int dump_once = 0; // For all instructions - for( uint j = 0; j < b->_nodes.size(); j++ ) { - Node *n = b->_nodes[j]; + for( uint j = 0; j < block->_nodes.size(); j++ ) { + Node *n = block->_nodes[j]; if (_lrg_map.find_const(n) == lidx) { if (!dump_once++) { tty->cr(); - b->dump_head(&_cfg); + block->dump_head(&_cfg); } dump(n); continue; @@ -2312,7 +2286,7 @@ if (_lrg_map.find_const(m) == lidx) { if (!dump_once++) { tty->cr(); - b->dump_head(&_cfg); + block->dump_head(&_cfg); } dump(n); } @@ -2324,7 +2298,6 @@ } #endif // not PRODUCT -//------------------------------print_chaitin_statistics------------------------------- int PhaseChaitin::_final_loads = 0; int PhaseChaitin::_final_stores = 0; int PhaseChaitin::_final_memoves= 0;