duke@435: /* kvn@3577: * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. duke@435: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. duke@435: * duke@435: * This code is free software; you can redistribute it and/or modify it duke@435: * under the terms of the GNU General Public License version 2 only, as duke@435: * published by the Free Software Foundation. duke@435: * duke@435: * This code is distributed in the hope that it will be useful, but WITHOUT duke@435: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or duke@435: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License duke@435: * version 2 for more details (a copy is included in the LICENSE file that duke@435: * accompanied this code). duke@435: * duke@435: * You should have received a copy of the GNU General Public License version duke@435: * 2 along with this work; if not, write to the Free Software Foundation, duke@435: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. duke@435: * trims@1907: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA trims@1907: * or visit www.oracle.com if you need additional information or have any trims@1907: * questions. duke@435: * duke@435: */ duke@435: stefank@2314: #include "precompiled.hpp" stefank@2314: #include "compiler/compileLog.hpp" stefank@2314: #include "compiler/oopMap.hpp" stefank@2314: #include "memory/allocation.inline.hpp" stefank@2314: #include "opto/addnode.hpp" stefank@2314: #include "opto/block.hpp" stefank@2314: #include "opto/callnode.hpp" stefank@2314: #include "opto/cfgnode.hpp" stefank@2314: #include "opto/chaitin.hpp" stefank@2314: #include "opto/coalesce.hpp" stefank@2314: #include "opto/connode.hpp" stefank@2314: #include "opto/idealGraphPrinter.hpp" stefank@2314: #include "opto/indexSet.hpp" stefank@2314: #include "opto/machnode.hpp" stefank@2314: #include "opto/memnode.hpp" stefank@2314: #include "opto/opcodes.hpp" stefank@2314: #include "opto/rootnode.hpp" duke@435: duke@435: //============================================================================= duke@435: duke@435: #ifndef PRODUCT duke@435: void LRG::dump( ) const { duke@435: ttyLocker ttyl; duke@435: tty->print("%d ",num_regs()); duke@435: _mask.dump(); duke@435: if( _msize_valid ) { duke@435: if( mask_size() == compute_mask_size() ) tty->print(", #%d ",_mask_size); duke@435: else tty->print(", #!!!_%d_vs_%d ",_mask_size,_mask.Size()); duke@435: } else { duke@435: tty->print(", #?(%d) ",_mask.Size()); duke@435: } duke@435: duke@435: tty->print("EffDeg: "); duke@435: if( _degree_valid ) tty->print( "%d ", _eff_degree ); duke@435: else tty->print("? "); duke@435: never@730: if( is_multidef() ) { duke@435: tty->print("MultiDef "); duke@435: if (_defs != NULL) { duke@435: tty->print("("); duke@435: for (int i = 0; i < _defs->length(); i++) { duke@435: tty->print("N%d ", _defs->at(i)->_idx); duke@435: } duke@435: tty->print(") "); duke@435: } duke@435: } duke@435: else if( _def == 0 ) tty->print("Dead "); duke@435: else tty->print("Def: N%d ",_def->_idx); duke@435: duke@435: tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score()); duke@435: // Flags duke@435: if( _is_oop ) tty->print("Oop "); duke@435: if( _is_float ) tty->print("Float "); kvn@3882: if( _is_vector ) tty->print("Vector "); duke@435: if( _was_spilled1 ) tty->print("Spilled "); duke@435: if( _was_spilled2 ) tty->print("Spilled2 "); duke@435: if( _direct_conflict ) tty->print("Direct_conflict "); duke@435: if( _fat_proj ) tty->print("Fat "); duke@435: if( _was_lo ) tty->print("Lo "); duke@435: if( _has_copy ) tty->print("Copy "); duke@435: if( _at_risk ) tty->print("Risk "); duke@435: duke@435: if( _must_spill ) tty->print("Must_spill "); duke@435: if( _is_bound ) tty->print("Bound "); duke@435: if( _msize_valid ) { duke@435: if( _degree_valid && lo_degree() ) tty->print("Trivial "); duke@435: } duke@435: duke@435: tty->cr(); duke@435: } duke@435: #endif duke@435: duke@435: //------------------------------score------------------------------------------ duke@435: // Compute score from cost and area. Low score is best to spill. duke@435: static double raw_score( double cost, double area ) { duke@435: return cost - (area*RegisterCostAreaRatio) * 1.52588e-5; duke@435: } duke@435: duke@435: double LRG::score() const { duke@435: // Scale _area by RegisterCostAreaRatio/64K then subtract from cost. duke@435: // Bigger area lowers score, encourages spilling this live range. duke@435: // Bigger cost raise score, prevents spilling this live range. duke@435: // (Note: 1/65536 is the magic constant below; I dont trust the C optimizer duke@435: // to turn a divide by a constant into a multiply by the reciprical). duke@435: double score = raw_score( _cost, _area); duke@435: duke@435: // Account for area. Basically, LRGs covering large areas are better duke@435: // to spill because more other LRGs get freed up. duke@435: if( _area == 0.0 ) // No area? Then no progress to spill duke@435: return 1e35; duke@435: duke@435: if( _was_spilled2 ) // If spilled once before, we are unlikely duke@435: return score + 1e30; // to make progress again. duke@435: duke@435: if( _cost >= _area*3.0 ) // Tiny area relative to cost duke@435: return score + 1e17; // Probably no progress to spill duke@435: duke@435: if( (_cost+_cost) >= _area*3.0 ) // Small area relative to cost duke@435: return score + 1e10; // Likely no progress to spill duke@435: duke@435: return score; duke@435: } duke@435: duke@435: //------------------------------LRG_List--------------------------------------- duke@435: LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) { duke@435: memset( _lidxs, 0, sizeof(uint)*max ); duke@435: } duke@435: duke@435: void LRG_List::extend( uint nidx, uint lidx ) { duke@435: _nesting.check(); duke@435: if( nidx >= _max ) { duke@435: uint size = 16; duke@435: while( size <= nidx ) size <<=1; duke@435: _lidxs = REALLOC_RESOURCE_ARRAY( uint, _lidxs, _max, size ); duke@435: _max = size; duke@435: } duke@435: while( _cnt <= nidx ) duke@435: _lidxs[_cnt++] = 0; duke@435: _lidxs[nidx] = lidx; duke@435: } duke@435: duke@435: #define NUMBUCKS 3 duke@435: duke@435: //------------------------------Chaitin---------------------------------------- duke@435: PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher) duke@435: : PhaseRegAlloc(unique, cfg, matcher, duke@435: #ifndef PRODUCT duke@435: print_chaitin_statistics duke@435: #else duke@435: NULL duke@435: #endif duke@435: ), duke@435: _names(unique), _uf_map(unique), duke@435: _maxlrg(0), _live(0), duke@435: _spilled_once(Thread::current()->resource_area()), duke@435: _spilled_twice(Thread::current()->resource_area()), duke@435: _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0), duke@435: _oldphi(unique) duke@435: #ifndef PRODUCT duke@435: , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling")) duke@435: #endif duke@435: { duke@435: NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); ) kvn@1108: kvn@1108: _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq); kvn@1108: duke@435: uint i,j; duke@435: // Build a list of basic blocks, sorted by frequency duke@435: _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); duke@435: // Experiment with sorting strategies to speed compilation duke@435: double cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket duke@435: Block **buckets[NUMBUCKS]; // Array of buckets duke@435: uint buckcnt[NUMBUCKS]; // Array of bucket counters duke@435: double buckval[NUMBUCKS]; // Array of bucket value cutoffs duke@435: for( i = 0; i < NUMBUCKS; i++ ) { duke@435: buckets[i] = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); duke@435: buckcnt[i] = 0; duke@435: // Bump by three orders of magnitude each time duke@435: cutoff *= 0.001; duke@435: buckval[i] = cutoff; duke@435: for( j = 0; j < _cfg._num_blocks; j++ ) { duke@435: buckets[i][j] = NULL; duke@435: } duke@435: } duke@435: // Sort blocks into buckets duke@435: for( i = 0; i < _cfg._num_blocks; i++ ) { duke@435: for( j = 0; j < NUMBUCKS; j++ ) { duke@435: if( (j == NUMBUCKS-1) || (_cfg._blocks[i]->_freq > buckval[j]) ) { duke@435: // Assign block to end of list for appropriate bucket duke@435: buckets[j][buckcnt[j]++] = _cfg._blocks[i]; duke@435: break; // kick out of inner loop duke@435: } duke@435: } duke@435: } duke@435: // Dump buckets into final block array duke@435: uint blkcnt = 0; duke@435: for( i = 0; i < NUMBUCKS; i++ ) { duke@435: for( j = 0; j < buckcnt[i]; j++ ) { duke@435: _blks[blkcnt++] = buckets[i][j]; duke@435: } duke@435: } duke@435: duke@435: assert(blkcnt == _cfg._num_blocks, "Block array not totally filled"); duke@435: } duke@435: duke@435: void PhaseChaitin::Register_Allocate() { duke@435: duke@435: // Above the OLD FP (and in registers) are the incoming arguments. Stack duke@435: // slots in this area are called "arg_slots". Above the NEW FP (and in duke@435: // registers) is the outgoing argument area; above that is the spill/temp duke@435: // area. These are all "frame_slots". Arg_slots start at the zero duke@435: // stack_slots and count up to the known arg_size. Frame_slots start at duke@435: // the stack_slot #arg_size and go up. After allocation I map stack duke@435: // slots to actual offsets. Stack-slots in the arg_slot area are biased duke@435: // by the frame_size; stack-slots in the frame_slot area are biased by 0. duke@435: duke@435: _trip_cnt = 0; duke@435: _alternate = 0; duke@435: _matcher._allocation_started = true; duke@435: kvn@4019: ResourceArea split_arena; // Arena for Split local resources duke@435: ResourceArea live_arena; // Arena for liveness & IFG info duke@435: ResourceMark rm(&live_arena); duke@435: duke@435: // Need live-ness for the IFG; need the IFG for coalescing. If the duke@435: // liveness is JUST for coalescing, then I can get some mileage by renaming duke@435: // all copy-related live ranges low and then using the max copy-related duke@435: // live range as a cut-off for LIVE and the IFG. In other words, I can duke@435: // build a subset of LIVE and IFG just for copies. duke@435: PhaseLive live(_cfg,_names,&live_arena); duke@435: duke@435: // Need IFG for coalescing and coloring duke@435: PhaseIFG ifg( &live_arena ); duke@435: _ifg = &ifg; duke@435: duke@435: if (C->unique() > _names.Size()) _names.extend(C->unique()-1, 0); duke@435: duke@435: // Come out of SSA world to the Named world. Assign (virtual) registers to duke@435: // Nodes. Use the same register for all inputs and the output of PhiNodes duke@435: // - effectively ending SSA form. This requires either coalescing live duke@435: // ranges or inserting copies. For the moment, we insert "virtual copies" duke@435: // - we pretend there is a copy prior to each Phi in predecessor blocks. duke@435: // We will attempt to coalesce such "virtual copies" before we manifest duke@435: // them for real. duke@435: de_ssa(); duke@435: kvn@1001: #ifdef ASSERT kvn@1001: // Veify the graph before RA. kvn@1001: verify(&live_arena); kvn@1001: #endif kvn@1001: duke@435: { duke@435: NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) duke@435: _live = NULL; // Mark live as being not available duke@435: rm.reset_to_mark(); // Reclaim working storage duke@435: IndexSet::reset_memory(C, &live_arena); duke@435: ifg.init(_maxlrg); // Empty IFG duke@435: gather_lrg_masks( false ); // Collect LRG masks duke@435: live.compute( _maxlrg ); // Compute liveness duke@435: _live = &live; // Mark LIVE as being available duke@435: } duke@435: duke@435: // Base pointers are currently "used" by instructions which define new duke@435: // derived pointers. This makes base pointers live up to the where the duke@435: // derived pointer is made, but not beyond. Really, they need to be live duke@435: // across any GC point where the derived value is live. So this code looks duke@435: // at all the GC points, and "stretches" the live range of any base pointer duke@435: // to the GC point. duke@435: if( stretch_base_pointer_live_ranges(&live_arena) ) { duke@435: NOT_PRODUCT( Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler); ) duke@435: // Since some live range stretched, I need to recompute live duke@435: _live = NULL; duke@435: rm.reset_to_mark(); // Reclaim working storage duke@435: IndexSet::reset_memory(C, &live_arena); duke@435: ifg.init(_maxlrg); duke@435: gather_lrg_masks( false ); duke@435: live.compute( _maxlrg ); duke@435: _live = &live; duke@435: } duke@435: // Create the interference graph using virtual copies duke@435: build_ifg_virtual( ); // Include stack slots this time duke@435: duke@435: // Aggressive (but pessimistic) copy coalescing. duke@435: // This pass works on virtual copies. Any virtual copies which are not duke@435: // coalesced get manifested as actual copies duke@435: { duke@435: // The IFG is/was triangular. I am 'squaring it up' so Union can run duke@435: // faster. Union requires a 'for all' operation which is slow on the duke@435: // triangular adjacency matrix (quick reminder: the IFG is 'sparse' - duke@435: // meaning I can visit all the Nodes neighbors less than a Node in time duke@435: // O(# of neighbors), but I have to visit all the Nodes greater than a duke@435: // given Node and search them for an instance, i.e., time O(#MaxLRG)). duke@435: _ifg->SquareUp(); duke@435: duke@435: PhaseAggressiveCoalesce coalesce( *this ); duke@435: coalesce.coalesce_driver( ); duke@435: // Insert un-coalesced copies. Visit all Phis. Where inputs to a Phi do duke@435: // not match the Phi itself, insert a copy. duke@435: coalesce.insert_copies(_matcher); duke@435: } duke@435: duke@435: // After aggressive coalesce, attempt a first cut at coloring. duke@435: // To color, we need the IFG and for that we need LIVE. duke@435: { duke@435: NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) duke@435: _live = NULL; duke@435: rm.reset_to_mark(); // Reclaim working storage duke@435: IndexSet::reset_memory(C, &live_arena); duke@435: ifg.init(_maxlrg); duke@435: gather_lrg_masks( true ); duke@435: live.compute( _maxlrg ); duke@435: _live = &live; duke@435: } duke@435: duke@435: // Build physical interference graph duke@435: uint must_spill = 0; duke@435: must_spill = build_ifg_physical( &live_arena ); duke@435: // If we have a guaranteed spill, might as well spill now duke@435: if( must_spill ) { duke@435: if( !_maxlrg ) return; duke@435: // Bail out if unique gets too large (ie - unique > MaxNodeLimit) duke@435: C->check_node_count(10*must_spill, "out of nodes before split"); duke@435: if (C->failing()) return; kvn@4019: _maxlrg = Split(_maxlrg, &split_arena); // Split spilling LRG everywhere duke@435: // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor) duke@435: // or we failed to split duke@435: C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split"); duke@435: if (C->failing()) return; duke@435: duke@435: NOT_PRODUCT( C->verify_graph_edges(); ) duke@435: duke@435: compact(); // Compact LRGs; return new lower max lrg duke@435: duke@435: { duke@435: NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) duke@435: _live = NULL; duke@435: rm.reset_to_mark(); // Reclaim working storage duke@435: IndexSet::reset_memory(C, &live_arena); duke@435: ifg.init(_maxlrg); // Build a new interference graph duke@435: gather_lrg_masks( true ); // Collect intersect mask duke@435: live.compute( _maxlrg ); // Compute LIVE duke@435: _live = &live; duke@435: } duke@435: build_ifg_physical( &live_arena ); duke@435: _ifg->SquareUp(); duke@435: _ifg->Compute_Effective_Degree(); duke@435: // Only do conservative coalescing if requested duke@435: if( OptoCoalesce ) { duke@435: // Conservative (and pessimistic) copy coalescing of those spills duke@435: PhaseConservativeCoalesce coalesce( *this ); duke@435: // If max live ranges greater than cutoff, don't color the stack. duke@435: // This cutoff can be larger than below since it is only done once. duke@435: coalesce.coalesce_driver( ); duke@435: } duke@435: compress_uf_map_for_nodes(); duke@435: duke@435: #ifdef ASSERT kvn@1001: verify(&live_arena, true); duke@435: #endif duke@435: } else { duke@435: ifg.SquareUp(); duke@435: ifg.Compute_Effective_Degree(); duke@435: #ifdef ASSERT duke@435: set_was_low(); duke@435: #endif duke@435: } duke@435: duke@435: // Prepare for Simplify & Select duke@435: cache_lrg_info(); // Count degree of LRGs duke@435: duke@435: // Simplify the InterFerence Graph by removing LRGs of low degree. duke@435: // LRGs of low degree are trivially colorable. duke@435: Simplify(); duke@435: duke@435: // Select colors by re-inserting LRGs back into the IFG in reverse order. duke@435: // Return whether or not something spills. duke@435: uint spills = Select( ); duke@435: duke@435: // If we spill, split and recycle the entire thing duke@435: while( spills ) { duke@435: if( _trip_cnt++ > 24 ) { duke@435: DEBUG_ONLY( dump_for_spill_split_recycle(); ) duke@435: if( _trip_cnt > 27 ) { duke@435: C->record_method_not_compilable("failed spill-split-recycle sanity check"); duke@435: return; duke@435: } duke@435: } duke@435: duke@435: if( !_maxlrg ) return; kvn@4019: _maxlrg = Split(_maxlrg, &split_arena); // Split spilling LRG everywhere duke@435: // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor) duke@435: C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after split"); duke@435: if (C->failing()) return; duke@435: duke@435: compact(); // Compact LRGs; return new lower max lrg duke@435: duke@435: // Nuke the live-ness and interference graph and LiveRanGe info duke@435: { duke@435: NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); ) duke@435: _live = NULL; duke@435: rm.reset_to_mark(); // Reclaim working storage duke@435: IndexSet::reset_memory(C, &live_arena); duke@435: ifg.init(_maxlrg); duke@435: duke@435: // Create LiveRanGe array. duke@435: // Intersect register masks for all USEs and DEFs duke@435: gather_lrg_masks( true ); duke@435: live.compute( _maxlrg ); duke@435: _live = &live; duke@435: } duke@435: must_spill = build_ifg_physical( &live_arena ); duke@435: _ifg->SquareUp(); duke@435: _ifg->Compute_Effective_Degree(); duke@435: duke@435: // Only do conservative coalescing if requested duke@435: if( OptoCoalesce ) { duke@435: // Conservative (and pessimistic) copy coalescing duke@435: PhaseConservativeCoalesce coalesce( *this ); duke@435: // Check for few live ranges determines how aggressive coalesce is. duke@435: coalesce.coalesce_driver( ); duke@435: } duke@435: compress_uf_map_for_nodes(); duke@435: #ifdef ASSERT kvn@1001: verify(&live_arena, true); duke@435: #endif duke@435: cache_lrg_info(); // Count degree of LRGs duke@435: duke@435: // Simplify the InterFerence Graph by removing LRGs of low degree. duke@435: // LRGs of low degree are trivially colorable. duke@435: Simplify(); duke@435: duke@435: // Select colors by re-inserting LRGs back into the IFG in reverse order. duke@435: // Return whether or not something spills. duke@435: spills = Select( ); duke@435: } duke@435: duke@435: // Count number of Simplify-Select trips per coloring success. duke@435: _allocator_attempts += _trip_cnt + 1; duke@435: _allocator_successes += 1; duke@435: duke@435: // Peephole remove copies duke@435: post_allocate_copy_removal(); duke@435: kvn@1001: #ifdef ASSERT kvn@1001: // Veify the graph after RA. kvn@1001: verify(&live_arena); kvn@1001: #endif kvn@1001: duke@435: // max_reg is past the largest *register* used. duke@435: // Convert that to a frame_slot number. duke@435: if( _max_reg <= _matcher._new_SP ) duke@435: _framesize = C->out_preserve_stack_slots(); duke@435: else _framesize = _max_reg -_matcher._new_SP; duke@435: assert((int)(_matcher._new_SP+_framesize) >= (int)_matcher._out_arg_limit, "framesize must be large enough"); duke@435: duke@435: // This frame must preserve the required fp alignment never@854: _framesize = round_to(_framesize, Matcher::stack_alignment_in_slots()); duke@435: assert( _framesize >= 0 && _framesize <= 1000000, "sanity check" ); duke@435: #ifndef PRODUCT duke@435: _total_framesize += _framesize; duke@435: if( (int)_framesize > _max_framesize ) duke@435: _max_framesize = _framesize; duke@435: #endif duke@435: duke@435: // Convert CISC spills duke@435: fixup_spills(); duke@435: duke@435: // Log regalloc results duke@435: CompileLog* log = Compile::current()->log(); duke@435: if (log != NULL) { duke@435: log->elem("regalloc attempts='%d' success='%d'", _trip_cnt, !C->failing()); duke@435: } duke@435: duke@435: if (C->failing()) return; duke@435: duke@435: NOT_PRODUCT( C->verify_graph_edges(); ) duke@435: duke@435: // Move important info out of the live_arena to longer lasting storage. duke@435: alloc_node_regs(_names.Size()); kvn@3882: for (uint i=0; i < _names.Size(); i++) { kvn@3882: if (_names[i]) { // Live range associated with Node? kvn@3882: LRG &lrg = lrgs(_names[i]); kvn@3882: if (!lrg.alive()) { kvn@4007: set_bad(i); kvn@3882: } else if (lrg.num_regs() == 1) { kvn@4007: set1(i, lrg.reg()); kvn@4007: } else { // Must be a register-set kvn@4007: if (!lrg._fat_proj) { // Must be aligned adjacent register set duke@435: // Live ranges record the highest register in their mask. duke@435: // We want the low register for the AD file writer's convenience. kvn@4007: OptoReg::Name hi = lrg.reg(); // Get hi register kvn@4007: OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo kvn@4007: // We have to use pair [lo,lo+1] even for wide vectors because kvn@4007: // the rest of code generation works only with pairs. It is safe kvn@4007: // since for registers encoding only 'lo' is used. kvn@4007: // Second reg from pair is used in ScheduleAndBundle on SPARC where kvn@4007: // vector max size is 8 which corresponds to registers pair. kvn@4007: // It is also used in BuildOopMaps but oop operations are not kvn@4007: // vectorized. kvn@4007: set2(i, lo); duke@435: } else { // Misaligned; extract 2 bits duke@435: OptoReg::Name hi = lrg.reg(); // Get hi register duke@435: lrg.Remove(hi); // Yank from mask duke@435: int lo = lrg.mask().find_first_elem(); // Find lo kvn@4007: set_pair(i, hi, lo); duke@435: } duke@435: } duke@435: if( lrg._is_oop ) _node_oops.set(i); duke@435: } else { kvn@4007: set_bad(i); duke@435: } duke@435: } duke@435: duke@435: // Done! duke@435: _live = NULL; duke@435: _ifg = NULL; duke@435: C->set_indexSet_arena(NULL); // ResourceArea is at end of scope duke@435: } duke@435: duke@435: //------------------------------de_ssa----------------------------------------- duke@435: void PhaseChaitin::de_ssa() { duke@435: // Set initial Names for all Nodes. Most Nodes get the virtual register duke@435: // number. A few get the ZERO live range number. These do not duke@435: // get allocated, but instead rely on correct scheduling to ensure that duke@435: // only one instance is simultaneously live at a time. duke@435: uint lr_counter = 1; duke@435: for( uint i = 0; i < _cfg._num_blocks; i++ ) { duke@435: Block *b = _cfg._blocks[i]; duke@435: uint cnt = b->_nodes.size(); duke@435: duke@435: // Handle all the normal Nodes in the block duke@435: for( uint j = 0; j < cnt; j++ ) { duke@435: Node *n = b->_nodes[j]; duke@435: // Pre-color to the zero live range, or pick virtual register duke@435: const RegMask &rm = n->out_RegMask(); duke@435: _names.map( n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0 ); duke@435: } duke@435: } duke@435: // Reset the Union-Find mapping to be identity duke@435: reset_uf_map(lr_counter); duke@435: } duke@435: duke@435: duke@435: //------------------------------gather_lrg_masks------------------------------- duke@435: // Gather LiveRanGe information, including register masks. Modification of duke@435: // cisc spillable in_RegMasks should not be done before AggressiveCoalesce. duke@435: void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { duke@435: duke@435: // Nail down the frame pointer live range duke@435: uint fp_lrg = n2lidx(_cfg._root->in(1)->in(TypeFunc::FramePtr)); duke@435: lrgs(fp_lrg)._cost += 1e12; // Cost is infinite duke@435: duke@435: // For all blocks duke@435: for( uint i = 0; i < _cfg._num_blocks; i++ ) { duke@435: Block *b = _cfg._blocks[i]; duke@435: duke@435: // For all instructions duke@435: for( uint j = 1; j < b->_nodes.size(); j++ ) { duke@435: Node *n = b->_nodes[j]; duke@435: uint input_edge_start =1; // Skip control most nodes duke@435: if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base(); duke@435: uint idx = n->is_Copy(); duke@435: duke@435: // Get virtual register number, same as LiveRanGe index duke@435: uint vreg = n2lidx(n); duke@435: LRG &lrg = lrgs(vreg); duke@435: if( vreg ) { // No vreg means un-allocable (e.g. memory) duke@435: duke@435: // Collect has-copy bit duke@435: if( idx ) { duke@435: lrg._has_copy = 1; duke@435: uint clidx = n2lidx(n->in(idx)); duke@435: LRG ©_src = lrgs(clidx); duke@435: copy_src._has_copy = 1; duke@435: } duke@435: duke@435: // Check for float-vs-int live range (used in register-pressure duke@435: // calculations) duke@435: const Type *n_type = n->bottom_type(); kvn@3882: if (n_type->is_floatingpoint()) duke@435: lrg._is_float = 1; duke@435: duke@435: // Check for twice prior spilling. Once prior spilling might have duke@435: // spilled 'soft', 2nd prior spill should have spilled 'hard' and duke@435: // further spilling is unlikely to make progress. duke@435: if( _spilled_once.test(n->_idx) ) { duke@435: lrg._was_spilled1 = 1; duke@435: if( _spilled_twice.test(n->_idx) ) duke@435: lrg._was_spilled2 = 1; duke@435: } duke@435: duke@435: #ifndef PRODUCT duke@435: if (trace_spilling() && lrg._def != NULL) { duke@435: // collect defs for MultiDef printing duke@435: if (lrg._defs == NULL) { kvn@2040: lrg._defs = new (_ifg->_arena) GrowableArray(_ifg->_arena, 2, 0, NULL); duke@435: lrg._defs->append(lrg._def); duke@435: } duke@435: lrg._defs->append(n); duke@435: } duke@435: #endif duke@435: duke@435: // Check for a single def LRG; these can spill nicely duke@435: // via rematerialization. Flag as NULL for no def found duke@435: // yet, or 'n' for single def or -1 for many defs. duke@435: lrg._def = lrg._def ? NodeSentinel : n; duke@435: duke@435: // Limit result register mask to acceptable registers duke@435: const RegMask &rm = n->out_RegMask(); duke@435: lrg.AND( rm ); duke@435: duke@435: int ireg = n->ideal_reg(); duke@435: assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP, duke@435: "oops must be in Op_RegP's" ); kvn@3882: kvn@3882: // Check for vector live range (only if vector register is used). kvn@3882: // On SPARC vector uses RegD which could be misaligned so it is not kvn@3882: // processes as vector in RA. kvn@3882: if (RegMask::is_vector(ireg)) kvn@3882: lrg._is_vector = 1; kvn@3882: assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD, kvn@3882: "vector must be in vector registers"); kvn@3882: kvn@3882: // Check for bound register masks kvn@3882: const RegMask &lrgmask = lrg.mask(); kvn@3882: if (lrgmask.is_bound(ireg)) kvn@3882: lrg._is_bound = 1; kvn@3882: kvn@3882: // Check for maximum frequency value kvn@3882: if (lrg._maxfreq < b->_freq) kvn@3882: lrg._maxfreq = b->_freq; kvn@3882: duke@435: // Check for oop-iness, or long/double duke@435: // Check for multi-kill projection duke@435: switch( ireg ) { duke@435: case MachProjNode::fat_proj: duke@435: // Fat projections have size equal to number of registers killed duke@435: lrg.set_num_regs(rm.Size()); duke@435: lrg.set_reg_pressure(lrg.num_regs()); duke@435: lrg._fat_proj = 1; duke@435: lrg._is_bound = 1; duke@435: break; duke@435: case Op_RegP: duke@435: #ifdef _LP64 duke@435: lrg.set_num_regs(2); // Size is 2 stack words duke@435: #else duke@435: lrg.set_num_regs(1); // Size is 1 stack word duke@435: #endif duke@435: // Register pressure is tracked relative to the maximum values duke@435: // suggested for that platform, INTPRESSURE and FLOATPRESSURE, duke@435: // and relative to other types which compete for the same regs. duke@435: // duke@435: // The following table contains suggested values based on the duke@435: // architectures as defined in each .ad file. duke@435: // INTPRESSURE and FLOATPRESSURE may be tuned differently for duke@435: // compile-speed or performance. duke@435: // Note1: duke@435: // SPARC and SPARCV9 reg_pressures are at 2 instead of 1 duke@435: // since .ad registers are defined as high and low halves. duke@435: // These reg_pressure values remain compatible with the code duke@435: // in is_high_pressure() which relates get_invalid_mask_size(), duke@435: // Block::_reg_pressure and INTPRESSURE, FLOATPRESSURE. duke@435: // Note2: duke@435: // SPARC -d32 has 24 registers available for integral values, duke@435: // but only 10 of these are safe for 64-bit longs. duke@435: // Using set_reg_pressure(2) for both int and long means duke@435: // the allocator will believe it can fit 26 longs into duke@435: // registers. Using 2 for longs and 1 for ints means the duke@435: // allocator will attempt to put 52 integers into registers. duke@435: // The settings below limit this problem to methods with duke@435: // many long values which are being run on 32-bit SPARC. duke@435: // duke@435: // ------------------- reg_pressure -------------------- duke@435: // Each entry is reg_pressure_per_value,number_of_regs duke@435: // RegL RegI RegFlags RegF RegD INTPRESSURE FLOATPRESSURE duke@435: // IA32 2 1 1 1 1 6 6 duke@435: // IA64 1 1 1 1 1 50 41 duke@435: // SPARC 2 2 2 2 2 48 (24) 52 (26) duke@435: // SPARCV9 2 2 2 2 2 48 (24) 52 (26) duke@435: // AMD64 1 1 1 1 1 14 15 duke@435: // ----------------------------------------------------- duke@435: #if defined(SPARC) duke@435: lrg.set_reg_pressure(2); // use for v9 as well duke@435: #else duke@435: lrg.set_reg_pressure(1); // normally one value per register duke@435: #endif duke@435: if( n_type->isa_oop_ptr() ) { duke@435: lrg._is_oop = 1; duke@435: } duke@435: break; duke@435: case Op_RegL: // Check for long or double duke@435: case Op_RegD: duke@435: lrg.set_num_regs(2); duke@435: // Define platform specific register pressure roland@2683: #if defined(SPARC) || defined(ARM) duke@435: lrg.set_reg_pressure(2); duke@435: #elif defined(IA32) duke@435: if( ireg == Op_RegL ) { duke@435: lrg.set_reg_pressure(2); duke@435: } else { duke@435: lrg.set_reg_pressure(1); duke@435: } duke@435: #else duke@435: lrg.set_reg_pressure(1); // normally one value per register duke@435: #endif duke@435: // If this def of a double forces a mis-aligned double, duke@435: // flag as '_fat_proj' - really flag as allowing misalignment duke@435: // AND changes how we count interferences. A mis-aligned duke@435: // double can interfere with TWO aligned pairs, or effectively duke@435: // FOUR registers! kvn@3882: if (rm.is_misaligned_pair()) { duke@435: lrg._fat_proj = 1; duke@435: lrg._is_bound = 1; duke@435: } duke@435: break; duke@435: case Op_RegF: duke@435: case Op_RegI: coleenp@548: case Op_RegN: duke@435: case Op_RegFlags: duke@435: case 0: // not an ideal register duke@435: lrg.set_num_regs(1); duke@435: #ifdef SPARC duke@435: lrg.set_reg_pressure(2); duke@435: #else duke@435: lrg.set_reg_pressure(1); duke@435: #endif duke@435: break; kvn@3882: case Op_VecS: kvn@3882: assert(Matcher::vector_size_supported(T_BYTE,4), "sanity"); kvn@3882: assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity"); kvn@3882: lrg.set_num_regs(RegMask::SlotsPerVecS); kvn@3882: lrg.set_reg_pressure(1); kvn@3882: break; kvn@3882: case Op_VecD: kvn@3882: assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecD), "sanity"); kvn@3882: assert(RegMask::num_registers(Op_VecD) == RegMask::SlotsPerVecD, "sanity"); kvn@3882: assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecD), "vector should be aligned"); kvn@3882: lrg.set_num_regs(RegMask::SlotsPerVecD); kvn@3882: lrg.set_reg_pressure(1); kvn@3882: break; kvn@3882: case Op_VecX: kvn@3882: assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecX), "sanity"); kvn@3882: assert(RegMask::num_registers(Op_VecX) == RegMask::SlotsPerVecX, "sanity"); kvn@3882: assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecX), "vector should be aligned"); kvn@3882: lrg.set_num_regs(RegMask::SlotsPerVecX); kvn@3882: lrg.set_reg_pressure(1); kvn@3882: break; kvn@3882: case Op_VecY: kvn@3882: assert(Matcher::vector_size_supported(T_FLOAT,RegMask::SlotsPerVecY), "sanity"); kvn@3882: assert(RegMask::num_registers(Op_VecY) == RegMask::SlotsPerVecY, "sanity"); kvn@3882: assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecY), "vector should be aligned"); kvn@3882: lrg.set_num_regs(RegMask::SlotsPerVecY); kvn@3882: lrg.set_reg_pressure(1); kvn@3882: break; duke@435: default: duke@435: ShouldNotReachHere(); duke@435: } duke@435: } duke@435: duke@435: // Now do the same for inputs duke@435: uint cnt = n->req(); duke@435: // Setup for CISC SPILLING duke@435: uint inp = (uint)AdlcVMDeps::Not_cisc_spillable; duke@435: if( UseCISCSpill && after_aggressive ) { duke@435: inp = n->cisc_operand(); duke@435: if( inp != (uint)AdlcVMDeps::Not_cisc_spillable ) duke@435: // Convert operand number to edge index number duke@435: inp = n->as_Mach()->operand_index(inp); duke@435: } duke@435: // Prepare register mask for each input duke@435: for( uint k = input_edge_start; k < cnt; k++ ) { duke@435: uint vreg = n2lidx(n->in(k)); duke@435: if( !vreg ) continue; duke@435: duke@435: // If this instruction is CISC Spillable, add the flags duke@435: // bit to its appropriate input duke@435: if( UseCISCSpill && after_aggressive && inp == k ) { duke@435: #ifndef PRODUCT duke@435: if( TraceCISCSpill ) { duke@435: tty->print(" use_cisc_RegMask: "); duke@435: n->dump(); duke@435: } duke@435: #endif duke@435: n->as_Mach()->use_cisc_RegMask(); duke@435: } duke@435: duke@435: LRG &lrg = lrgs(vreg); duke@435: // // Testing for floating point code shape duke@435: // Node *test = n->in(k); duke@435: // if( test->is_Mach() ) { duke@435: // MachNode *m = test->as_Mach(); duke@435: // int op = m->ideal_Opcode(); duke@435: // if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) { duke@435: // int zzz = 1; duke@435: // } duke@435: // } duke@435: duke@435: // Limit result register mask to acceptable registers. duke@435: // Do not limit registers from uncommon uses before duke@435: // AggressiveCoalesce. This effectively pre-virtual-splits duke@435: // around uncommon uses of common defs. duke@435: const RegMask &rm = n->in_RegMask(k); duke@435: if( !after_aggressive && duke@435: _cfg._bbs[n->in(k)->_idx]->_freq > 1000*b->_freq ) { duke@435: // Since we are BEFORE aggressive coalesce, leave the register duke@435: // mask untrimmed by the call. This encourages more coalescing. duke@435: // Later, AFTER aggressive, this live range will have to spill duke@435: // but the spiller handles slow-path calls very nicely. duke@435: } else { duke@435: lrg.AND( rm ); duke@435: } kvn@3882: duke@435: // Check for bound register masks duke@435: const RegMask &lrgmask = lrg.mask(); kvn@3882: int kreg = n->in(k)->ideal_reg(); kvn@3882: bool is_vect = RegMask::is_vector(kreg); kvn@3882: assert(n->in(k)->bottom_type()->isa_vect() == NULL || kvn@3882: is_vect || kreg == Op_RegD, kvn@3882: "vector must be in vector registers"); kvn@3882: if (lrgmask.is_bound(kreg)) duke@435: lrg._is_bound = 1; kvn@3882: duke@435: // If this use of a double forces a mis-aligned double, duke@435: // flag as '_fat_proj' - really flag as allowing misalignment duke@435: // AND changes how we count interferences. A mis-aligned duke@435: // double can interfere with TWO aligned pairs, or effectively duke@435: // FOUR registers! kvn@3882: #ifdef ASSERT kvn@3882: if (is_vect) { kvn@3882: assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned"); kvn@3882: assert(!lrg._fat_proj, "sanity"); kvn@3882: assert(RegMask::num_registers(kreg) == lrg.num_regs(), "sanity"); kvn@3882: } kvn@3882: #endif kvn@3882: if (!is_vect && lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_pair()) { duke@435: lrg._fat_proj = 1; duke@435: lrg._is_bound = 1; duke@435: } duke@435: // if the LRG is an unaligned pair, we will have to spill duke@435: // so clear the LRG's register mask if it is not already spilled kvn@3882: if (!is_vect && !n->is_SpillCopy() && kvn@3882: (lrg._def == NULL || lrg.is_multidef() || !lrg._def->is_SpillCopy()) && kvn@3882: lrgmask.is_misaligned_pair()) { duke@435: lrg.Clear(); duke@435: } duke@435: duke@435: // Check for maximum frequency value duke@435: if( lrg._maxfreq < b->_freq ) duke@435: lrg._maxfreq = b->_freq; duke@435: duke@435: } // End for all allocated inputs duke@435: } // end for all instructions duke@435: } // end for all blocks duke@435: duke@435: // Final per-liverange setup kvn@3882: for (uint i2=0; i2<_maxlrg; i2++) { duke@435: LRG &lrg = lrgs(i2); kvn@3882: assert(!lrg._is_vector || !lrg._fat_proj, "sanity"); kvn@3882: if (lrg.num_regs() > 1 && !lrg._fat_proj) { kvn@3882: lrg.clear_to_sets(); kvn@3882: } duke@435: lrg.compute_set_mask_size(); kvn@3882: if (lrg.not_free()) { // Handle case where we lose from the start duke@435: lrg.set_reg(OptoReg::Name(LRG::SPILL_REG)); duke@435: lrg._direct_conflict = 1; duke@435: } duke@435: lrg.set_degree(0); // no neighbors in IFG yet duke@435: } duke@435: } duke@435: duke@435: //------------------------------set_was_low------------------------------------ duke@435: // Set the was-lo-degree bit. Conservative coalescing should not change the duke@435: // colorability of the graph. If any live range was of low-degree before duke@435: // coalescing, it should Simplify. This call sets the was-lo-degree bit. duke@435: // The bit is checked in Simplify. duke@435: void PhaseChaitin::set_was_low() { duke@435: #ifdef ASSERT duke@435: for( uint i = 1; i < _maxlrg; i++ ) { duke@435: int size = lrgs(i).num_regs(); duke@435: uint old_was_lo = lrgs(i)._was_lo; duke@435: lrgs(i)._was_lo = 0; duke@435: if( lrgs(i).lo_degree() ) { duke@435: lrgs(i)._was_lo = 1; // Trivially of low degree duke@435: } else { // Else check the Brigg's assertion duke@435: // Brigg's observation is that the lo-degree neighbors of a duke@435: // hi-degree live range will not interfere with the color choices duke@435: // of said hi-degree live range. The Simplify reverse-stack-coloring duke@435: // order takes care of the details. Hence you do not have to count duke@435: // low-degree neighbors when determining if this guy colors. duke@435: int briggs_degree = 0; duke@435: IndexSet *s = _ifg->neighbors(i); duke@435: IndexSetIterator elements(s); duke@435: uint lidx; duke@435: while((lidx = elements.next()) != 0) { duke@435: if( !lrgs(lidx).lo_degree() ) duke@435: briggs_degree += MAX2(size,lrgs(lidx).num_regs()); duke@435: } duke@435: if( briggs_degree < lrgs(i).degrees_of_freedom() ) duke@435: lrgs(i)._was_lo = 1; // Low degree via the briggs assertion duke@435: } duke@435: assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease"); duke@435: } duke@435: #endif duke@435: } duke@435: duke@435: #define REGISTER_CONSTRAINED 16 duke@435: duke@435: //------------------------------cache_lrg_info--------------------------------- duke@435: // Compute cost/area ratio, in case we spill. Build the lo-degree list. duke@435: void PhaseChaitin::cache_lrg_info( ) { duke@435: duke@435: for( uint i = 1; i < _maxlrg; i++ ) { duke@435: LRG &lrg = lrgs(i); duke@435: duke@435: // Check for being of low degree: means we can be trivially colored. duke@435: // Low degree, dead or must-spill guys just get to simplify right away duke@435: if( lrg.lo_degree() || duke@435: !lrg.alive() || duke@435: lrg._must_spill ) { duke@435: // Split low degree list into those guys that must get a duke@435: // register and those that can go to register or stack. duke@435: // The idea is LRGs that can go register or stack color first when duke@435: // they have a good chance of getting a register. The register-only duke@435: // lo-degree live ranges always get a register. duke@435: OptoReg::Name hi_reg = lrg.mask().find_last_elem(); duke@435: if( OptoReg::is_stack(hi_reg)) { // Can go to stack? duke@435: lrg._next = _lo_stk_degree; duke@435: _lo_stk_degree = i; duke@435: } else { duke@435: lrg._next = _lo_degree; duke@435: _lo_degree = i; duke@435: } duke@435: } else { // Else high degree duke@435: lrgs(_hi_degree)._prev = i; duke@435: lrg._next = _hi_degree; duke@435: lrg._prev = 0; duke@435: _hi_degree = i; duke@435: } duke@435: } duke@435: } duke@435: duke@435: //------------------------------Pre-Simplify----------------------------------- duke@435: // Simplify the IFG by removing LRGs of low degree that have NO copies duke@435: void PhaseChaitin::Pre_Simplify( ) { duke@435: duke@435: // Warm up the lo-degree no-copy list duke@435: int lo_no_copy = 0; duke@435: for( uint i = 1; i < _maxlrg; i++ ) { duke@435: if( (lrgs(i).lo_degree() && !lrgs(i)._has_copy) || duke@435: !lrgs(i).alive() || duke@435: lrgs(i)._must_spill ) { duke@435: lrgs(i)._next = lo_no_copy; duke@435: lo_no_copy = i; duke@435: } duke@435: } duke@435: duke@435: while( lo_no_copy ) { duke@435: uint lo = lo_no_copy; duke@435: lo_no_copy = lrgs(lo)._next; duke@435: int size = lrgs(lo).num_regs(); duke@435: duke@435: // Put the simplified guy on the simplified list. duke@435: lrgs(lo)._next = _simplified; duke@435: _simplified = lo; duke@435: duke@435: // Yank this guy from the IFG. duke@435: IndexSet *adj = _ifg->remove_node( lo ); duke@435: duke@435: // If any neighbors' degrees fall below their number of duke@435: // allowed registers, then put that neighbor on the low degree duke@435: // list. Note that 'degree' can only fall and 'numregs' is duke@435: // unchanged by this action. Thus the two are equal at most once, duke@435: // so LRGs hit the lo-degree worklists at most once. duke@435: IndexSetIterator elements(adj); duke@435: uint neighbor; duke@435: while ((neighbor = elements.next()) != 0) { duke@435: LRG *n = &lrgs(neighbor); duke@435: assert( _ifg->effective_degree(neighbor) == n->degree(), "" ); duke@435: duke@435: // Check for just becoming of-low-degree duke@435: if( n->just_lo_degree() && !n->_has_copy ) { duke@435: assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice"); duke@435: // Put on lo-degree list duke@435: n->_next = lo_no_copy; duke@435: lo_no_copy = neighbor; duke@435: } duke@435: } duke@435: } // End of while lo-degree no_copy worklist not empty duke@435: duke@435: // No more lo-degree no-copy live ranges to simplify duke@435: } duke@435: duke@435: //------------------------------Simplify--------------------------------------- duke@435: // Simplify the IFG by removing LRGs of low degree. duke@435: void PhaseChaitin::Simplify( ) { duke@435: duke@435: while( 1 ) { // Repeat till simplified it all duke@435: // May want to explore simplifying lo_degree before _lo_stk_degree. duke@435: // This might result in more spills coloring into registers during duke@435: // Select(). duke@435: while( _lo_degree || _lo_stk_degree ) { duke@435: // If possible, pull from lo_stk first duke@435: uint lo; duke@435: if( _lo_degree ) { duke@435: lo = _lo_degree; duke@435: _lo_degree = lrgs(lo)._next; duke@435: } else { duke@435: lo = _lo_stk_degree; duke@435: _lo_stk_degree = lrgs(lo)._next; duke@435: } duke@435: duke@435: // Put the simplified guy on the simplified list. duke@435: lrgs(lo)._next = _simplified; duke@435: _simplified = lo; duke@435: // If this guy is "at risk" then mark his current neighbors duke@435: if( lrgs(lo)._at_risk ) { duke@435: IndexSetIterator elements(_ifg->neighbors(lo)); duke@435: uint datum; duke@435: while ((datum = elements.next()) != 0) { duke@435: lrgs(datum)._risk_bias = lo; duke@435: } duke@435: } duke@435: duke@435: // Yank this guy from the IFG. duke@435: IndexSet *adj = _ifg->remove_node( lo ); duke@435: duke@435: // If any neighbors' degrees fall below their number of duke@435: // allowed registers, then put that neighbor on the low degree duke@435: // list. Note that 'degree' can only fall and 'numregs' is duke@435: // unchanged by this action. Thus the two are equal at most once, duke@435: // so LRGs hit the lo-degree worklist at most once. duke@435: IndexSetIterator elements(adj); duke@435: uint neighbor; duke@435: while ((neighbor = elements.next()) != 0) { duke@435: LRG *n = &lrgs(neighbor); duke@435: #ifdef ASSERT kvn@985: if( VerifyOpto || VerifyRegisterAllocator ) { duke@435: assert( _ifg->effective_degree(neighbor) == n->degree(), "" ); duke@435: } duke@435: #endif duke@435: duke@435: // Check for just becoming of-low-degree just counting registers. duke@435: // _must_spill live ranges are already on the low degree list. duke@435: if( n->just_lo_degree() && !n->_must_spill ) { duke@435: assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice"); duke@435: // Pull from hi-degree list duke@435: uint prev = n->_prev; duke@435: uint next = n->_next; duke@435: if( prev ) lrgs(prev)._next = next; duke@435: else _hi_degree = next; duke@435: lrgs(next)._prev = prev; duke@435: n->_next = _lo_degree; duke@435: _lo_degree = neighbor; duke@435: } duke@435: } duke@435: } // End of while lo-degree/lo_stk_degree worklist not empty duke@435: duke@435: // Check for got everything: is hi-degree list empty? duke@435: if( !_hi_degree ) break; duke@435: duke@435: // Time to pick a potential spill guy duke@435: uint lo_score = _hi_degree; duke@435: double score = lrgs(lo_score).score(); duke@435: double area = lrgs(lo_score)._area; kvn@1443: double cost = lrgs(lo_score)._cost; kvn@1443: bool bound = lrgs(lo_score)._is_bound; duke@435: duke@435: // Find cheapest guy duke@435: debug_only( int lo_no_simplify=0; ); kvn@1447: for( uint i = _hi_degree; i; i = lrgs(i)._next ) { duke@435: assert( !(*_ifg->_yanked)[i], "" ); duke@435: // It's just vaguely possible to move hi-degree to lo-degree without duke@435: // going through a just-lo-degree stage: If you remove a double from duke@435: // a float live range it's degree will drop by 2 and you can skip the duke@435: // just-lo-degree stage. It's very rare (shows up after 5000+ methods duke@435: // in -Xcomp of Java2Demo). So just choose this guy to simplify next. duke@435: if( lrgs(i).lo_degree() ) { duke@435: lo_score = i; duke@435: break; duke@435: } duke@435: debug_only( if( lrgs(i)._was_lo ) lo_no_simplify=i; ); duke@435: double iscore = lrgs(i).score(); duke@435: double iarea = lrgs(i)._area; kvn@1443: double icost = lrgs(i)._cost; kvn@1443: bool ibound = lrgs(i)._is_bound; duke@435: duke@435: // Compare cost/area of i vs cost/area of lo_score. Smaller cost/area duke@435: // wins. Ties happen because all live ranges in question have spilled duke@435: // a few times before and the spill-score adds a huge number which duke@435: // washes out the low order bits. We are choosing the lesser of 2 duke@435: // evils; in this case pick largest area to spill. kvn@1443: // Ties also happen when live ranges are defined and used only inside kvn@1443: // one block. In which case their area is 0 and score set to max. kvn@1443: // In such case choose bound live range over unbound to free registers kvn@1443: // or with smaller cost to spill. duke@435: if( iscore < score || kvn@1443: (iscore == score && iarea > area && lrgs(lo_score)._was_spilled2) || kvn@1443: (iscore == score && iarea == area && kvn@1443: ( (ibound && !bound) || ibound == bound && (icost < cost) )) ) { duke@435: lo_score = i; duke@435: score = iscore; duke@435: area = iarea; kvn@1443: cost = icost; kvn@1443: bound = ibound; duke@435: } duke@435: } duke@435: LRG *lo_lrg = &lrgs(lo_score); duke@435: // The live range we choose for spilling is either hi-degree, or very duke@435: // rarely it can be low-degree. If we choose a hi-degree live range duke@435: // there better not be any lo-degree choices. duke@435: assert( lo_lrg->lo_degree() || !lo_no_simplify, "Live range was lo-degree before coalesce; should simplify" ); duke@435: duke@435: // Pull from hi-degree list duke@435: uint prev = lo_lrg->_prev; duke@435: uint next = lo_lrg->_next; duke@435: if( prev ) lrgs(prev)._next = next; duke@435: else _hi_degree = next; duke@435: lrgs(next)._prev = prev; duke@435: // Jam him on the lo-degree list, despite his high degree. duke@435: // Maybe he'll get a color, and maybe he'll spill. duke@435: // Only Select() will know. duke@435: lrgs(lo_score)._at_risk = true; duke@435: _lo_degree = lo_score; duke@435: lo_lrg->_next = 0; duke@435: duke@435: } // End of while not simplified everything duke@435: duke@435: } duke@435: kvn@4007: //------------------------------is_legal_reg----------------------------------- kvn@4007: // Is 'reg' register legal for 'lrg'? kvn@4007: static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { kvn@4007: if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) && kvn@4007: lrg.mask().Member(OptoReg::add(reg,-chunk))) { kvn@4007: // RA uses OptoReg which represent the highest element of a registers set. kvn@4007: // For example, vectorX (128bit) on x86 uses [XMM,XMMb,XMMc,XMMd] set kvn@4007: // in which XMMd is used by RA to represent such vectors. A double value kvn@4007: // uses [XMM,XMMb] pairs and XMMb is used by RA for it. kvn@4007: // The register mask uses largest bits set of overlapping register sets. kvn@4007: // On x86 with AVX it uses 8 bits for each XMM registers set. kvn@4007: // kvn@4007: // The 'lrg' already has cleared-to-set register mask (done in Select() kvn@4007: // before calling choose_color()). Passing mask.Member(reg) check above kvn@4007: // indicates that the size (num_regs) of 'reg' set is less or equal to kvn@4007: // 'lrg' set size. kvn@4007: // For set size 1 any register which is member of 'lrg' mask is legal. kvn@4007: if (lrg.num_regs()==1) kvn@4007: return true; kvn@4007: // For larger sets only an aligned register with the same set size is legal. kvn@4007: int mask = lrg.num_regs()-1; kvn@4007: if ((reg&mask) == mask) kvn@4007: return true; kvn@4007: } kvn@4007: return false; kvn@4007: } kvn@4007: duke@435: //------------------------------bias_color------------------------------------- duke@435: // Choose a color using the biasing heuristic duke@435: OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { duke@435: duke@435: // Check for "at_risk" LRG's duke@435: uint risk_lrg = Find(lrg._risk_bias); duke@435: if( risk_lrg != 0 ) { duke@435: // Walk the colored neighbors of the "at_risk" candidate duke@435: // Choose a color which is both legal and already taken by a neighbor duke@435: // of the "at_risk" candidate in order to improve the chances of the duke@435: // "at_risk" candidate of coloring duke@435: IndexSetIterator elements(_ifg->neighbors(risk_lrg)); duke@435: uint datum; duke@435: while ((datum = elements.next()) != 0) { duke@435: OptoReg::Name reg = lrgs(datum).reg(); duke@435: // If this LRG's register is legal for us, choose it kvn@4007: if (is_legal_reg(lrg, reg, chunk)) duke@435: return reg; duke@435: } duke@435: } duke@435: duke@435: uint copy_lrg = Find(lrg._copy_bias); duke@435: if( copy_lrg != 0 ) { duke@435: // If he has a color, duke@435: if( !(*(_ifg->_yanked))[copy_lrg] ) { duke@435: OptoReg::Name reg = lrgs(copy_lrg).reg(); duke@435: // And it is legal for you, kvn@4007: if (is_legal_reg(lrg, reg, chunk)) duke@435: return reg; duke@435: } else if( chunk == 0 ) { duke@435: // Choose a color which is legal for him duke@435: RegMask tempmask = lrg.mask(); duke@435: tempmask.AND(lrgs(copy_lrg).mask()); kvn@3882: tempmask.clear_to_sets(lrg.num_regs()); kvn@3882: OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs()); kvn@3882: if (OptoReg::is_valid(reg)) duke@435: return reg; duke@435: } duke@435: } duke@435: duke@435: // If no bias info exists, just go with the register selection ordering kvn@3882: if (lrg._is_vector || lrg.num_regs() == 2) { kvn@3882: // Find an aligned set kvn@3882: return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk); duke@435: } duke@435: duke@435: // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate duke@435: // copy removal to remove many more copies, by preventing a just-assigned duke@435: // register from being repeatedly assigned. duke@435: OptoReg::Name reg = lrg.mask().find_first_elem(); duke@435: if( (++_alternate & 1) && OptoReg::is_valid(reg) ) { duke@435: // This 'Remove; find; Insert' idiom is an expensive way to find the duke@435: // SECOND element in the mask. duke@435: lrg.Remove(reg); duke@435: OptoReg::Name reg2 = lrg.mask().find_first_elem(); duke@435: lrg.Insert(reg); duke@435: if( OptoReg::is_reg(reg2)) duke@435: reg = reg2; duke@435: } duke@435: return OptoReg::add( reg, chunk ); duke@435: } duke@435: duke@435: //------------------------------choose_color----------------------------------- duke@435: // Choose a color in the current chunk duke@435: OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) { duke@435: assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)"); duke@435: assert(C->out_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP+0)), "must not allocate stack0 (inside preserve area)"); duke@435: duke@435: if( lrg.num_regs() == 1 || // Common Case duke@435: !lrg._fat_proj ) // Aligned+adjacent pairs ok duke@435: // Use a heuristic to "bias" the color choice duke@435: return bias_color(lrg, chunk); duke@435: kvn@3882: assert(!lrg._is_vector, "should be not vector here" ); duke@435: assert( lrg.num_regs() >= 2, "dead live ranges do not color" ); duke@435: duke@435: // Fat-proj case or misaligned double argument. duke@435: assert(lrg.compute_mask_size() == lrg.num_regs() || duke@435: lrg.num_regs() == 2,"fat projs exactly color" ); duke@435: assert( !chunk, "always color in 1st chunk" ); duke@435: // Return the highest element in the set. duke@435: return lrg.mask().find_last_elem(); duke@435: } duke@435: duke@435: //------------------------------Select----------------------------------------- duke@435: // Select colors by re-inserting LRGs back into the IFG. LRGs are re-inserted duke@435: // in reverse order of removal. As long as nothing of hi-degree was yanked, duke@435: // everything going back is guaranteed a color. Select that color. If some duke@435: // hi-degree LRG cannot get a color then we record that we must spill. duke@435: uint PhaseChaitin::Select( ) { duke@435: uint spill_reg = LRG::SPILL_REG; duke@435: _max_reg = OptoReg::Name(0); // Past max register used duke@435: while( _simplified ) { duke@435: // Pull next LRG from the simplified list - in reverse order of removal duke@435: uint lidx = _simplified; duke@435: LRG *lrg = &lrgs(lidx); duke@435: _simplified = lrg->_next; duke@435: duke@435: duke@435: #ifndef PRODUCT duke@435: if (trace_spilling()) { duke@435: ttyLocker ttyl; duke@435: tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(), duke@435: lrg->degrees_of_freedom()); duke@435: lrg->dump(); duke@435: } duke@435: #endif duke@435: duke@435: // Re-insert into the IFG duke@435: _ifg->re_insert(lidx); duke@435: if( !lrg->alive() ) continue; duke@435: // capture allstackedness flag before mask is hacked duke@435: const int is_allstack = lrg->mask().is_AllStack(); duke@435: duke@435: // Yeah, yeah, yeah, I know, I know. I can refactor this duke@435: // to avoid the GOTO, although the refactored code will not duke@435: // be much clearer. We arrive here IFF we have a stack-based duke@435: // live range that cannot color in the current chunk, and it duke@435: // has to move into the next free stack chunk. duke@435: int chunk = 0; // Current chunk is first chunk duke@435: retry_next_chunk: duke@435: duke@435: // Remove neighbor colors duke@435: IndexSet *s = _ifg->neighbors(lidx); duke@435: duke@435: debug_only(RegMask orig_mask = lrg->mask();) duke@435: IndexSetIterator elements(s); duke@435: uint neighbor; duke@435: while ((neighbor = elements.next()) != 0) { duke@435: // Note that neighbor might be a spill_reg. In this case, exclusion duke@435: // of its color will be a no-op, since the spill_reg chunk is in outer duke@435: // space. Also, if neighbor is in a different chunk, this exclusion duke@435: // will be a no-op. (Later on, if lrg runs out of possible colors in duke@435: // its chunk, a new chunk of color may be tried, in which case duke@435: // examination of neighbors is started again, at retry_next_chunk.) duke@435: LRG &nlrg = lrgs(neighbor); duke@435: OptoReg::Name nreg = nlrg.reg(); duke@435: // Only subtract masks in the same chunk duke@435: if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) { duke@435: #ifndef PRODUCT duke@435: uint size = lrg->mask().Size(); duke@435: RegMask rm = lrg->mask(); duke@435: #endif duke@435: lrg->SUBTRACT(nlrg.mask()); duke@435: #ifndef PRODUCT duke@435: if (trace_spilling() && lrg->mask().Size() != size) { duke@435: ttyLocker ttyl; duke@435: tty->print("L%d ", lidx); duke@435: rm.dump(); duke@435: tty->print(" intersected L%d ", neighbor); duke@435: nlrg.mask().dump(); duke@435: tty->print(" removed "); duke@435: rm.SUBTRACT(lrg->mask()); duke@435: rm.dump(); duke@435: tty->print(" leaving "); duke@435: lrg->mask().dump(); duke@435: tty->cr(); duke@435: } duke@435: #endif duke@435: } duke@435: } duke@435: //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness"); duke@435: // Aligned pairs need aligned masks kvn@3882: assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); kvn@3882: if (lrg->num_regs() > 1 && !lrg->_fat_proj) { kvn@3882: lrg->clear_to_sets(); kvn@3882: } duke@435: duke@435: // Check if a color is available and if so pick the color duke@435: OptoReg::Name reg = choose_color( *lrg, chunk ); duke@435: #ifdef SPARC duke@435: debug_only(lrg->compute_set_mask_size()); kvn@3882: assert(lrg->num_regs() < 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned"); duke@435: #endif duke@435: duke@435: //--------------- duke@435: // If we fail to color and the AllStack flag is set, trigger duke@435: // a chunk-rollover event duke@435: if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) { duke@435: // Bump register mask up to next stack chunk duke@435: chunk += RegMask::CHUNK_SIZE; duke@435: lrg->Set_All(); duke@435: duke@435: goto retry_next_chunk; duke@435: } duke@435: duke@435: //--------------- duke@435: // Did we get a color? duke@435: else if( OptoReg::is_valid(reg)) { duke@435: #ifndef PRODUCT duke@435: RegMask avail_rm = lrg->mask(); duke@435: #endif duke@435: duke@435: // Record selected register duke@435: lrg->set_reg(reg); duke@435: duke@435: if( reg >= _max_reg ) // Compute max register limit duke@435: _max_reg = OptoReg::add(reg,1); duke@435: // Fold reg back into normal space duke@435: reg = OptoReg::add(reg,-chunk); duke@435: duke@435: // If the live range is not bound, then we actually had some choices duke@435: // to make. In this case, the mask has more bits in it than the colors twisti@1040: // chosen. Restrict the mask to just what was picked. kvn@3882: int n_regs = lrg->num_regs(); kvn@3882: assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); kvn@3882: if (n_regs == 1 || !lrg->_fat_proj) { kvn@3882: assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecY, "sanity"); duke@435: lrg->Clear(); // Clear the mask duke@435: lrg->Insert(reg); // Set regmask to match selected reg kvn@3882: // For vectors and pairs, also insert the low bit of the pair kvn@3882: for (int i = 1; i < n_regs; i++) kvn@3882: lrg->Insert(OptoReg::add(reg,-i)); kvn@3882: lrg->set_mask_size(n_regs); duke@435: } else { // Else fatproj duke@435: // mask must be equal to fatproj bits, by definition duke@435: } duke@435: #ifndef PRODUCT duke@435: if (trace_spilling()) { duke@435: ttyLocker ttyl; duke@435: tty->print("L%d selected ", lidx); duke@435: lrg->mask().dump(); duke@435: tty->print(" from "); duke@435: avail_rm.dump(); duke@435: tty->cr(); duke@435: } duke@435: #endif duke@435: // Note that reg is the highest-numbered register in the newly-bound mask. duke@435: } // end color available case duke@435: duke@435: //--------------- duke@435: // Live range is live and no colors available duke@435: else { duke@435: assert( lrg->alive(), "" ); never@730: assert( !lrg->_fat_proj || lrg->is_multidef() || duke@435: lrg->_def->outcnt() > 0, "fat_proj cannot spill"); duke@435: assert( !orig_mask.is_AllStack(), "All Stack does not spill" ); duke@435: duke@435: // Assign the special spillreg register duke@435: lrg->set_reg(OptoReg::Name(spill_reg++)); duke@435: // Do not empty the regmask; leave mask_size lying around duke@435: // for use during Spilling duke@435: #ifndef PRODUCT duke@435: if( trace_spilling() ) { duke@435: ttyLocker ttyl; duke@435: tty->print("L%d spilling with neighbors: ", lidx); duke@435: s->dump(); duke@435: debug_only(tty->print(" original mask: ")); duke@435: debug_only(orig_mask.dump()); duke@435: dump_lrg(lidx); duke@435: } duke@435: #endif duke@435: } // end spill case duke@435: duke@435: } duke@435: duke@435: return spill_reg-LRG::SPILL_REG; // Return number of spills duke@435: } duke@435: duke@435: duke@435: //------------------------------copy_was_spilled------------------------------- duke@435: // Copy 'was_spilled'-edness from the source Node to the dst Node. duke@435: void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) { duke@435: if( _spilled_once.test(src->_idx) ) { duke@435: _spilled_once.set(dst->_idx); duke@435: lrgs(Find(dst))._was_spilled1 = 1; duke@435: if( _spilled_twice.test(src->_idx) ) { duke@435: _spilled_twice.set(dst->_idx); duke@435: lrgs(Find(dst))._was_spilled2 = 1; duke@435: } duke@435: } duke@435: } duke@435: duke@435: //------------------------------set_was_spilled-------------------------------- duke@435: // Set the 'spilled_once' or 'spilled_twice' flag on a node. duke@435: void PhaseChaitin::set_was_spilled( Node *n ) { duke@435: if( _spilled_once.test_set(n->_idx) ) duke@435: _spilled_twice.set(n->_idx); duke@435: } duke@435: duke@435: //------------------------------fixup_spills----------------------------------- duke@435: // Convert Ideal spill instructions into proper FramePtr + offset Loads and duke@435: // Stores. Use-def chains are NOT preserved, but Node->LRG->reg maps are. duke@435: void PhaseChaitin::fixup_spills() { duke@435: // This function does only cisc spill work. duke@435: if( !UseCISCSpill ) return; duke@435: duke@435: NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); ) duke@435: duke@435: // Grab the Frame Pointer duke@435: Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr); duke@435: duke@435: // For all blocks duke@435: for( uint i = 0; i < _cfg._num_blocks; i++ ) { duke@435: Block *b = _cfg._blocks[i]; duke@435: duke@435: // For all instructions in block duke@435: uint last_inst = b->end_idx(); duke@435: for( uint j = 1; j <= last_inst; j++ ) { duke@435: Node *n = b->_nodes[j]; duke@435: duke@435: // Dead instruction??? duke@435: assert( n->outcnt() != 0 ||// Nothing dead after post alloc duke@435: C->top() == n || // Or the random TOP node duke@435: n->is_Proj(), // Or a fat-proj kill node duke@435: "No dead instructions after post-alloc" ); duke@435: duke@435: int inp = n->cisc_operand(); duke@435: if( inp != AdlcVMDeps::Not_cisc_spillable ) { duke@435: // Convert operand number to edge index number duke@435: MachNode *mach = n->as_Mach(); duke@435: inp = mach->operand_index(inp); duke@435: Node *src = n->in(inp); // Value to load or store duke@435: LRG &lrg_cisc = lrgs( Find_const(src) ); duke@435: OptoReg::Name src_reg = lrg_cisc.reg(); duke@435: // Doubles record the HIGH register of an adjacent pair. duke@435: src_reg = OptoReg::add(src_reg,1-lrg_cisc.num_regs()); duke@435: if( OptoReg::is_stack(src_reg) ) { // If input is on stack duke@435: // This is a CISC Spill, get stack offset and construct new node duke@435: #ifndef PRODUCT duke@435: if( TraceCISCSpill ) { duke@435: tty->print(" reg-instr: "); duke@435: n->dump(); duke@435: } duke@435: #endif duke@435: int stk_offset = reg2offset(src_reg); duke@435: // Bailout if we might exceed node limit when spilling this instruction duke@435: C->check_node_count(0, "out of nodes fixing spills"); duke@435: if (C->failing()) return; duke@435: // Transform node duke@435: MachNode *cisc = mach->cisc_version(stk_offset, C)->as_Mach(); duke@435: cisc->set_req(inp,fp); // Base register is frame pointer duke@435: if( cisc->oper_input_base() > 1 && mach->oper_input_base() <= 1 ) { duke@435: assert( cisc->oper_input_base() == 2, "Only adding one edge"); duke@435: cisc->ins_req(1,src); // Requires a memory edge duke@435: } duke@435: b->_nodes.map(j,cisc); // Insert into basic block kvn@603: n->subsume_by(cisc); // Correct graph duke@435: // duke@435: ++_used_cisc_instructions; duke@435: #ifndef PRODUCT duke@435: if( TraceCISCSpill ) { duke@435: tty->print(" cisc-instr: "); duke@435: cisc->dump(); duke@435: } duke@435: #endif duke@435: } else { duke@435: #ifndef PRODUCT duke@435: if( TraceCISCSpill ) { duke@435: tty->print(" using reg-instr: "); duke@435: n->dump(); duke@435: } duke@435: #endif duke@435: ++_unused_cisc_instructions; // input can be on stack duke@435: } duke@435: } duke@435: duke@435: } // End of for all instructions duke@435: duke@435: } // End of for all blocks duke@435: } duke@435: duke@435: //------------------------------find_base_for_derived-------------------------- duke@435: // Helper to stretch above; recursively discover the base Node for a duke@435: // given derived Node. Easy for AddP-related machine nodes, but needs duke@435: // to be recursive for derived Phis. duke@435: Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg ) { duke@435: // See if already computed; if so return it duke@435: if( derived_base_map[derived->_idx] ) duke@435: return derived_base_map[derived->_idx]; duke@435: duke@435: // See if this happens to be a base. duke@435: // NOTE: we use TypePtr instead of TypeOopPtr because we can have duke@435: // pointers derived from NULL! These are always along paths that duke@435: // can't happen at run-time but the optimizer cannot deduce it so duke@435: // we have to handle it gracefully. kvn@1164: assert(!derived->bottom_type()->isa_narrowoop() || kvn@1164: derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity"); duke@435: const TypePtr *tj = derived->bottom_type()->isa_ptr(); duke@435: // If its an OOP with a non-zero offset, then it is derived. kvn@1164: if( tj == NULL || tj->_offset == 0 ) { duke@435: derived_base_map[derived->_idx] = derived; duke@435: return derived; duke@435: } duke@435: // Derived is NULL+offset? Base is NULL! duke@435: if( derived->is_Con() ) { kvn@1164: Node *base = _matcher.mach_null(); kvn@1164: assert(base != NULL, "sanity"); kvn@1164: if (base->in(0) == NULL) { kvn@1164: // Initialize it once and make it shared: kvn@1164: // set control to _root and place it into Start block kvn@1164: // (where top() node is placed). kvn@1164: base->init_req(0, _cfg._root); kvn@1164: Block *startb = _cfg._bbs[C->top()->_idx]; kvn@1164: startb->_nodes.insert(startb->find_node(C->top()), base ); kvn@1164: _cfg._bbs.map( base->_idx, startb ); kvn@1164: assert (n2lidx(base) == 0, "should not have LRG yet"); kvn@1164: } kvn@1164: if (n2lidx(base) == 0) { kvn@1164: new_lrg(base, maxlrg++); kvn@1164: } kvn@1164: assert(base->in(0) == _cfg._root && kvn@1164: _cfg._bbs[base->_idx] == _cfg._bbs[C->top()->_idx], "base NULL should be shared"); duke@435: derived_base_map[derived->_idx] = base; duke@435: return base; duke@435: } duke@435: duke@435: // Check for AddP-related opcodes duke@435: if( !derived->is_Phi() ) { kvn@3971: assert(derived->as_Mach()->ideal_Opcode() == Op_AddP, err_msg_res("but is: %s", derived->Name())); duke@435: Node *base = derived->in(AddPNode::Base); duke@435: derived_base_map[derived->_idx] = base; duke@435: return base; duke@435: } duke@435: duke@435: // Recursively find bases for Phis. duke@435: // First check to see if we can avoid a base Phi here. duke@435: Node *base = find_base_for_derived( derived_base_map, derived->in(1),maxlrg); duke@435: uint i; duke@435: for( i = 2; i < derived->req(); i++ ) duke@435: if( base != find_base_for_derived( derived_base_map,derived->in(i),maxlrg)) duke@435: break; duke@435: // Went to the end without finding any different bases? duke@435: if( i == derived->req() ) { // No need for a base Phi here duke@435: derived_base_map[derived->_idx] = base; duke@435: return base; duke@435: } duke@435: duke@435: // Now we see we need a base-Phi here to merge the bases kvn@1164: const Type *t = base->bottom_type(); kvn@1164: base = new (C, derived->req()) PhiNode( derived->in(0), t ); kvn@1164: for( i = 1; i < derived->req(); i++ ) { duke@435: base->init_req(i, find_base_for_derived(derived_base_map, derived->in(i), maxlrg)); kvn@1164: t = t->meet(base->in(i)->bottom_type()); kvn@1164: } kvn@1164: base->as_Phi()->set_type(t); duke@435: duke@435: // Search the current block for an existing base-Phi duke@435: Block *b = _cfg._bbs[derived->_idx]; duke@435: for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi duke@435: Node *phi = b->_nodes[i]; duke@435: if( !phi->is_Phi() ) { // Found end of Phis with no match? duke@435: b->_nodes.insert( i, base ); // Must insert created Phi here as base duke@435: _cfg._bbs.map( base->_idx, b ); duke@435: new_lrg(base,maxlrg++); duke@435: break; duke@435: } duke@435: // See if Phi matches. duke@435: uint j; duke@435: for( j = 1; j < base->req(); j++ ) duke@435: if( phi->in(j) != base->in(j) && duke@435: !(phi->in(j)->is_Con() && base->in(j)->is_Con()) ) // allow different NULLs duke@435: break; duke@435: if( j == base->req() ) { // All inputs match? duke@435: base = phi; // Then use existing 'phi' and drop 'base' duke@435: break; duke@435: } duke@435: } duke@435: duke@435: duke@435: // Cache info for later passes duke@435: derived_base_map[derived->_idx] = base; duke@435: return base; duke@435: } duke@435: duke@435: duke@435: //------------------------------stretch_base_pointer_live_ranges--------------- duke@435: // At each Safepoint, insert extra debug edges for each pair of derived value/ duke@435: // base pointer that is live across the Safepoint for oopmap building. The duke@435: // edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the duke@435: // required edge set. duke@435: bool PhaseChaitin::stretch_base_pointer_live_ranges( ResourceArea *a ) { duke@435: int must_recompute_live = false; duke@435: uint maxlrg = _maxlrg; duke@435: Node **derived_base_map = (Node**)a->Amalloc(sizeof(Node*)*C->unique()); duke@435: memset( derived_base_map, 0, sizeof(Node*)*C->unique() ); duke@435: duke@435: // For all blocks in RPO do... duke@435: for( uint i=0; i<_cfg._num_blocks; i++ ) { duke@435: Block *b = _cfg._blocks[i]; duke@435: // Note use of deep-copy constructor. I cannot hammer the original duke@435: // liveout bits, because they are needed by the following coalesce pass. duke@435: IndexSet liveout(_live->live(b)); duke@435: duke@435: for( uint j = b->end_idx() + 1; j > 1; j-- ) { duke@435: Node *n = b->_nodes[j-1]; duke@435: duke@435: // Pre-split compares of loop-phis. Loop-phis form a cycle we would duke@435: // like to see in the same register. Compare uses the loop-phi and so duke@435: // extends its live range BUT cannot be part of the cycle. If this duke@435: // extended live range overlaps with the update of the loop-phi value duke@435: // we need both alive at the same time -- which requires at least 1 duke@435: // copy. But because Intel has only 2-address registers we end up with duke@435: // at least 2 copies, one before the loop-phi update instruction and duke@435: // one after. Instead we split the input to the compare just after the duke@435: // phi. duke@435: if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) { duke@435: Node *phi = n->in(1); duke@435: if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) { duke@435: Block *phi_block = _cfg._bbs[phi->_idx]; duke@435: if( _cfg._bbs[phi_block->pred(2)->_idx] == b ) { duke@435: const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI]; duke@435: Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask ); duke@435: insert_proj( phi_block, 1, spill, maxlrg++ ); duke@435: n->set_req(1,spill); duke@435: must_recompute_live = true; duke@435: } duke@435: } duke@435: } duke@435: duke@435: // Get value being defined duke@435: uint lidx = n2lidx(n); duke@435: if( lidx && lidx < _maxlrg /* Ignore the occasional brand-new live range */) { duke@435: // Remove from live-out set duke@435: liveout.remove(lidx); duke@435: duke@435: // Copies do not define a new value and so do not interfere. duke@435: // Remove the copies source from the liveout set before interfering. duke@435: uint idx = n->is_Copy(); duke@435: if( idx ) liveout.remove( n2lidx(n->in(idx)) ); duke@435: } duke@435: duke@435: // Found a safepoint? duke@435: JVMState *jvms = n->jvms(); duke@435: if( jvms ) { duke@435: // Now scan for a live derived pointer duke@435: IndexSetIterator elements(&liveout); duke@435: uint neighbor; duke@435: while ((neighbor = elements.next()) != 0) { duke@435: // Find reaching DEF for base and derived values duke@435: // This works because we are still in SSA during this call. duke@435: Node *derived = lrgs(neighbor)._def; duke@435: const TypePtr *tj = derived->bottom_type()->isa_ptr(); kvn@1164: assert(!derived->bottom_type()->isa_narrowoop() || kvn@1164: derived->bottom_type()->make_ptr()->is_ptr()->_offset == 0, "sanity"); duke@435: // If its an OOP with a non-zero offset, then it is derived. duke@435: if( tj && tj->_offset != 0 && tj->isa_oop_ptr() ) { duke@435: Node *base = find_base_for_derived( derived_base_map, derived, maxlrg ); duke@435: assert( base->_idx < _names.Size(), "" ); duke@435: // Add reaching DEFs of derived pointer and base pointer as a duke@435: // pair of inputs duke@435: n->add_req( derived ); duke@435: n->add_req( base ); duke@435: duke@435: // See if the base pointer is already live to this point. duke@435: // Since I'm working on the SSA form, live-ness amounts to duke@435: // reaching def's. So if I find the base's live range then duke@435: // I know the base's def reaches here. duke@435: if( (n2lidx(base) >= _maxlrg ||// (Brand new base (hence not live) or duke@435: !liveout.member( n2lidx(base) ) ) && // not live) AND duke@435: (n2lidx(base) > 0) && // not a constant duke@435: _cfg._bbs[base->_idx] != b ) { // base not def'd in blk) duke@435: // Base pointer is not currently live. Since I stretched duke@435: // the base pointer to here and it crosses basic-block duke@435: // boundaries, the global live info is now incorrect. duke@435: // Recompute live. duke@435: must_recompute_live = true; duke@435: } // End of if base pointer is not live to debug info duke@435: } duke@435: } // End of scan all live data for derived ptrs crossing GC point duke@435: } // End of if found a GC point duke@435: duke@435: // Make all inputs live duke@435: if( !n->is_Phi() ) { // Phi function uses come from prior block duke@435: for( uint k = 1; k < n->req(); k++ ) { duke@435: uint lidx = n2lidx(n->in(k)); duke@435: if( lidx < _maxlrg ) duke@435: liveout.insert( lidx ); duke@435: } duke@435: } duke@435: duke@435: } // End of forall instructions in block duke@435: liveout.clear(); // Free the memory used by liveout. duke@435: duke@435: } // End of forall blocks duke@435: _maxlrg = maxlrg; duke@435: duke@435: // If I created a new live range I need to recompute live duke@435: if( maxlrg != _ifg->_maxlrg ) duke@435: must_recompute_live = true; duke@435: duke@435: return must_recompute_live != 0; duke@435: } duke@435: duke@435: duke@435: //------------------------------add_reference---------------------------------- duke@435: // Extend the node to LRG mapping duke@435: void PhaseChaitin::add_reference( const Node *node, const Node *old_node ) { duke@435: _names.extend( node->_idx, n2lidx(old_node) ); duke@435: } duke@435: duke@435: //------------------------------dump------------------------------------------- duke@435: #ifndef PRODUCT duke@435: void PhaseChaitin::dump( const Node *n ) const { duke@435: uint r = (n->_idx < _names.Size() ) ? Find_const(n) : 0; duke@435: tty->print("L%d",r); duke@435: if( r && n->Opcode() != Op_Phi ) { duke@435: if( _node_regs ) { // Got a post-allocation copy of allocation? duke@435: tty->print("["); duke@435: OptoReg::Name second = get_reg_second(n); duke@435: if( OptoReg::is_valid(second) ) { duke@435: if( OptoReg::is_reg(second) ) duke@435: tty->print("%s:",Matcher::regName[second]); duke@435: else duke@435: tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(second)); duke@435: } duke@435: OptoReg::Name first = get_reg_first(n); duke@435: if( OptoReg::is_reg(first) ) duke@435: tty->print("%s]",Matcher::regName[first]); duke@435: else duke@435: tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(first)); duke@435: } else duke@435: n->out_RegMask().dump(); duke@435: } duke@435: tty->print("/N%d\t",n->_idx); duke@435: tty->print("%s === ", n->Name()); duke@435: uint k; duke@435: for( k = 0; k < n->req(); k++) { duke@435: Node *m = n->in(k); duke@435: if( !m ) tty->print("_ "); duke@435: else { duke@435: uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0; duke@435: tty->print("L%d",r); duke@435: // Data MultiNode's can have projections with no real registers. duke@435: // Don't die while dumping them. duke@435: int op = n->Opcode(); duke@435: if( r && op != Op_Phi && op != Op_Proj && op != Op_SCMemProj) { duke@435: if( _node_regs ) { duke@435: tty->print("["); duke@435: OptoReg::Name second = get_reg_second(n->in(k)); duke@435: if( OptoReg::is_valid(second) ) { duke@435: if( OptoReg::is_reg(second) ) duke@435: tty->print("%s:",Matcher::regName[second]); duke@435: else duke@435: tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), duke@435: reg2offset_unchecked(second)); duke@435: } duke@435: OptoReg::Name first = get_reg_first(n->in(k)); duke@435: if( OptoReg::is_reg(first) ) duke@435: tty->print("%s]",Matcher::regName[first]); duke@435: else duke@435: tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), duke@435: reg2offset_unchecked(first)); duke@435: } else duke@435: n->in_RegMask(k).dump(); duke@435: } duke@435: tty->print("/N%d ",m->_idx); duke@435: } duke@435: } duke@435: if( k < n->len() && n->in(k) ) tty->print("| "); duke@435: for( ; k < n->len(); k++ ) { duke@435: Node *m = n->in(k); duke@435: if( !m ) break; duke@435: uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0; duke@435: tty->print("L%d",r); duke@435: tty->print("/N%d ",m->_idx); duke@435: } duke@435: if( n->is_Mach() ) n->as_Mach()->dump_spec(tty); duke@435: else n->dump_spec(tty); duke@435: if( _spilled_once.test(n->_idx ) ) { duke@435: tty->print(" Spill_1"); duke@435: if( _spilled_twice.test(n->_idx ) ) duke@435: tty->print(" Spill_2"); duke@435: } duke@435: tty->print("\n"); duke@435: } duke@435: duke@435: void PhaseChaitin::dump( const Block * b ) const { duke@435: b->dump_head( &_cfg._bbs ); duke@435: duke@435: // For all instructions duke@435: for( uint j = 0; j < b->_nodes.size(); j++ ) duke@435: dump(b->_nodes[j]); duke@435: // Print live-out info at end of block duke@435: if( _live ) { duke@435: tty->print("Liveout: "); duke@435: IndexSet *live = _live->live(b); duke@435: IndexSetIterator elements(live); duke@435: tty->print("{"); duke@435: uint i; duke@435: while ((i = elements.next()) != 0) { duke@435: tty->print("L%d ", Find_const(i)); duke@435: } duke@435: tty->print_cr("}"); duke@435: } duke@435: tty->print("\n"); duke@435: } duke@435: duke@435: void PhaseChaitin::dump() const { duke@435: tty->print( "--- Chaitin -- argsize: %d framesize: %d ---\n", duke@435: _matcher._new_SP, _framesize ); duke@435: duke@435: // For all blocks duke@435: for( uint i = 0; i < _cfg._num_blocks; i++ ) duke@435: dump(_cfg._blocks[i]); duke@435: // End of per-block dump duke@435: tty->print("\n"); duke@435: duke@435: if (!_ifg) { duke@435: tty->print("(No IFG.)\n"); duke@435: return; duke@435: } duke@435: duke@435: // Dump LRG array duke@435: tty->print("--- Live RanGe Array ---\n"); duke@435: for(uint i2 = 1; i2 < _maxlrg; i2++ ) { duke@435: tty->print("L%d: ",i2); duke@435: if( i2 < _ifg->_maxlrg ) lrgs(i2).dump( ); never@2358: else tty->print_cr("new LRG"); duke@435: } duke@435: tty->print_cr(""); duke@435: duke@435: // Dump lo-degree list duke@435: tty->print("Lo degree: "); duke@435: for(uint i3 = _lo_degree; i3; i3 = lrgs(i3)._next ) duke@435: tty->print("L%d ",i3); duke@435: tty->print_cr(""); duke@435: duke@435: // Dump lo-stk-degree list duke@435: tty->print("Lo stk degree: "); duke@435: for(uint i4 = _lo_stk_degree; i4; i4 = lrgs(i4)._next ) duke@435: tty->print("L%d ",i4); duke@435: tty->print_cr(""); duke@435: duke@435: // Dump lo-degree list duke@435: tty->print("Hi degree: "); duke@435: for(uint i5 = _hi_degree; i5; i5 = lrgs(i5)._next ) duke@435: tty->print("L%d ",i5); duke@435: tty->print_cr(""); duke@435: } duke@435: duke@435: //------------------------------dump_degree_lists------------------------------ duke@435: void PhaseChaitin::dump_degree_lists() const { duke@435: // Dump lo-degree list duke@435: tty->print("Lo degree: "); duke@435: for( uint i = _lo_degree; i; i = lrgs(i)._next ) duke@435: tty->print("L%d ",i); duke@435: tty->print_cr(""); duke@435: duke@435: // Dump lo-stk-degree list duke@435: tty->print("Lo stk degree: "); duke@435: for(uint i2 = _lo_stk_degree; i2; i2 = lrgs(i2)._next ) duke@435: tty->print("L%d ",i2); duke@435: tty->print_cr(""); duke@435: duke@435: // Dump lo-degree list duke@435: tty->print("Hi degree: "); duke@435: for(uint i3 = _hi_degree; i3; i3 = lrgs(i3)._next ) duke@435: tty->print("L%d ",i3); duke@435: tty->print_cr(""); duke@435: } duke@435: duke@435: //------------------------------dump_simplified-------------------------------- duke@435: void PhaseChaitin::dump_simplified() const { duke@435: tty->print("Simplified: "); duke@435: for( uint i = _simplified; i; i = lrgs(i)._next ) duke@435: tty->print("L%d ",i); duke@435: tty->print_cr(""); duke@435: } duke@435: duke@435: static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) { duke@435: if ((int)reg < 0) duke@435: sprintf(buf, "", (int)reg); duke@435: else if (OptoReg::is_reg(reg)) duke@435: strcpy(buf, Matcher::regName[reg]); duke@435: else duke@435: sprintf(buf,"%s + #%d",OptoReg::regname(OptoReg::c_frame_pointer), duke@435: pc->reg2offset(reg)); duke@435: return buf+strlen(buf); duke@435: } duke@435: duke@435: //------------------------------dump_register---------------------------------- duke@435: // Dump a register name into a buffer. Be intelligent if we get called duke@435: // before allocation is complete. duke@435: char *PhaseChaitin::dump_register( const Node *n, char *buf ) const { duke@435: if( !this ) { // Not got anything? duke@435: sprintf(buf,"N%d",n->_idx); // Then use Node index duke@435: } else if( _node_regs ) { duke@435: // Post allocation, use direct mappings, no LRG info available duke@435: print_reg( get_reg_first(n), this, buf ); duke@435: } else { duke@435: uint lidx = Find_const(n); // Grab LRG number duke@435: if( !_ifg ) { duke@435: sprintf(buf,"L%d",lidx); // No register binding yet duke@435: } else if( !lidx ) { // Special, not allocated value duke@435: strcpy(buf,"Special"); kvn@3882: } else { kvn@3882: if (lrgs(lidx)._is_vector) { kvn@3882: if (lrgs(lidx).mask().is_bound_set(lrgs(lidx).num_regs())) kvn@3882: print_reg( lrgs(lidx).reg(), this, buf ); // a bound machine register kvn@3882: else kvn@3882: sprintf(buf,"L%d",lidx); // No register binding yet kvn@3882: } else if( (lrgs(lidx).num_regs() == 1) kvn@3882: ? lrgs(lidx).mask().is_bound1() kvn@3882: : lrgs(lidx).mask().is_bound_pair() ) { kvn@3882: // Hah! We have a bound machine register kvn@3882: print_reg( lrgs(lidx).reg(), this, buf ); kvn@3882: } else { kvn@3882: sprintf(buf,"L%d",lidx); // No register binding yet kvn@3882: } duke@435: } duke@435: } duke@435: return buf+strlen(buf); duke@435: } duke@435: duke@435: //----------------------dump_for_spill_split_recycle-------------------------- duke@435: void PhaseChaitin::dump_for_spill_split_recycle() const { duke@435: if( WizardMode && (PrintCompilation || PrintOpto) ) { duke@435: // Display which live ranges need to be split and the allocator's state duke@435: tty->print_cr("Graph-Coloring Iteration %d will split the following live ranges", _trip_cnt); duke@435: for( uint bidx = 1; bidx < _maxlrg; bidx++ ) { duke@435: if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) { duke@435: tty->print("L%d: ", bidx); duke@435: lrgs(bidx).dump(); duke@435: } duke@435: } duke@435: tty->cr(); duke@435: dump(); duke@435: } duke@435: } duke@435: duke@435: //------------------------------dump_frame------------------------------------ duke@435: void PhaseChaitin::dump_frame() const { duke@435: const char *fp = OptoReg::regname(OptoReg::c_frame_pointer); duke@435: const TypeTuple *domain = C->tf()->domain(); duke@435: const int argcnt = domain->cnt() - TypeFunc::Parms; duke@435: duke@435: // Incoming arguments in registers dump duke@435: for( int k = 0; k < argcnt; k++ ) { duke@435: OptoReg::Name parmreg = _matcher._parm_regs[k].first(); duke@435: if( OptoReg::is_reg(parmreg)) { duke@435: const char *reg_name = OptoReg::regname(parmreg); duke@435: tty->print("#r%3.3d %s", parmreg, reg_name); duke@435: parmreg = _matcher._parm_regs[k].second(); duke@435: if( OptoReg::is_reg(parmreg)) { duke@435: tty->print(":%s", OptoReg::regname(parmreg)); duke@435: } duke@435: tty->print(" : parm %d: ", k); duke@435: domain->field_at(k + TypeFunc::Parms)->dump(); duke@435: tty->print_cr(""); duke@435: } duke@435: } duke@435: duke@435: // Check for un-owned padding above incoming args duke@435: OptoReg::Name reg = _matcher._new_SP; duke@435: if( reg > _matcher._in_arg_limit ) { duke@435: reg = OptoReg::add(reg, -1); duke@435: tty->print_cr("#r%3.3d %s+%2d: pad0, owned by CALLER", reg, fp, reg2offset_unchecked(reg)); duke@435: } duke@435: duke@435: // Incoming argument area dump duke@435: OptoReg::Name begin_in_arg = OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots()); duke@435: while( reg > begin_in_arg ) { duke@435: reg = OptoReg::add(reg, -1); duke@435: tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg)); duke@435: int j; duke@435: for( j = 0; j < argcnt; j++) { duke@435: if( _matcher._parm_regs[j].first() == reg || duke@435: _matcher._parm_regs[j].second() == reg ) { duke@435: tty->print("parm %d: ",j); duke@435: domain->field_at(j + TypeFunc::Parms)->dump(); duke@435: tty->print_cr(""); duke@435: break; duke@435: } duke@435: } duke@435: if( j >= argcnt ) duke@435: tty->print_cr("HOLE, owned by SELF"); duke@435: } duke@435: duke@435: // Old outgoing preserve area duke@435: while( reg > _matcher._old_SP ) { duke@435: reg = OptoReg::add(reg, -1); duke@435: tty->print_cr("#r%3.3d %s+%2d: old out preserve",reg,fp,reg2offset_unchecked(reg)); duke@435: } duke@435: duke@435: // Old SP duke@435: tty->print_cr("# -- Old %s -- Framesize: %d --",fp, duke@435: reg2offset_unchecked(OptoReg::add(_matcher._old_SP,-1)) - reg2offset_unchecked(_matcher._new_SP)+jintSize); duke@435: duke@435: // Preserve area dump kvn@3577: int fixed_slots = C->fixed_slots(); kvn@3577: OptoReg::Name begin_in_preserve = OptoReg::add(_matcher._old_SP, -(int)C->in_preserve_stack_slots()); kvn@3577: OptoReg::Name return_addr = _matcher.return_addr(); kvn@3577: duke@435: reg = OptoReg::add(reg, -1); kvn@3577: while (OptoReg::is_stack(reg)) { duke@435: tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg)); kvn@3577: if (return_addr == reg) { duke@435: tty->print_cr("return address"); kvn@3577: } else if (reg >= begin_in_preserve) { kvn@3577: // Preserved slots are present on x86 kvn@3577: if (return_addr == OptoReg::add(reg, VMRegImpl::slots_per_word)) kvn@3577: tty->print_cr("saved fp register"); kvn@3577: else if (return_addr == OptoReg::add(reg, 2*VMRegImpl::slots_per_word) && kvn@3577: VerifyStackAtCalls) kvn@3577: tty->print_cr("0xBADB100D +VerifyStackAtCalls"); kvn@3577: else kvn@3577: tty->print_cr("in_preserve"); kvn@3577: } else if ((int)OptoReg::reg2stack(reg) < fixed_slots) { duke@435: tty->print_cr("Fixed slot %d", OptoReg::reg2stack(reg)); kvn@3577: } else { kvn@3577: tty->print_cr("pad2, stack alignment"); kvn@3577: } duke@435: reg = OptoReg::add(reg, -1); duke@435: } duke@435: duke@435: // Spill area dump duke@435: reg = OptoReg::add(_matcher._new_SP, _framesize ); duke@435: while( reg > _matcher._out_arg_limit ) { duke@435: reg = OptoReg::add(reg, -1); duke@435: tty->print_cr("#r%3.3d %s+%2d: spill",reg,fp,reg2offset_unchecked(reg)); duke@435: } duke@435: duke@435: // Outgoing argument area dump duke@435: while( reg > OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ) { duke@435: reg = OptoReg::add(reg, -1); duke@435: tty->print_cr("#r%3.3d %s+%2d: outgoing argument",reg,fp,reg2offset_unchecked(reg)); duke@435: } duke@435: duke@435: // Outgoing new preserve area duke@435: while( reg > _matcher._new_SP ) { duke@435: reg = OptoReg::add(reg, -1); duke@435: tty->print_cr("#r%3.3d %s+%2d: new out preserve",reg,fp,reg2offset_unchecked(reg)); duke@435: } duke@435: tty->print_cr("#"); duke@435: } duke@435: duke@435: //------------------------------dump_bb---------------------------------------- duke@435: void PhaseChaitin::dump_bb( uint pre_order ) const { duke@435: tty->print_cr("---dump of B%d---",pre_order); duke@435: for( uint i = 0; i < _cfg._num_blocks; i++ ) { duke@435: Block *b = _cfg._blocks[i]; duke@435: if( b->_pre_order == pre_order ) duke@435: dump(b); duke@435: } duke@435: } duke@435: duke@435: //------------------------------dump_lrg--------------------------------------- never@2358: void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const { duke@435: tty->print_cr("---dump of L%d---",lidx); duke@435: duke@435: if( _ifg ) { duke@435: if( lidx >= _maxlrg ) { duke@435: tty->print("Attempt to print live range index beyond max live range.\n"); duke@435: return; duke@435: } duke@435: tty->print("L%d: ",lidx); never@2358: if( lidx < _ifg->_maxlrg ) lrgs(lidx).dump( ); never@2358: else tty->print_cr("new LRG"); duke@435: } never@2358: if( _ifg && lidx < _ifg->_maxlrg) { never@2358: tty->print("Neighbors: %d - ", _ifg->neighbor_cnt(lidx)); duke@435: _ifg->neighbors(lidx)->dump(); duke@435: tty->cr(); duke@435: } duke@435: // For all blocks duke@435: for( uint i = 0; i < _cfg._num_blocks; i++ ) { duke@435: Block *b = _cfg._blocks[i]; duke@435: int dump_once = 0; duke@435: duke@435: // For all instructions duke@435: for( uint j = 0; j < b->_nodes.size(); j++ ) { duke@435: Node *n = b->_nodes[j]; duke@435: if( Find_const(n) == lidx ) { duke@435: if( !dump_once++ ) { duke@435: tty->cr(); duke@435: b->dump_head( &_cfg._bbs ); duke@435: } duke@435: dump(n); duke@435: continue; duke@435: } never@2358: if (!defs_only) { never@2358: uint cnt = n->req(); never@2358: for( uint k = 1; k < cnt; k++ ) { never@2358: Node *m = n->in(k); never@2358: if (!m) continue; // be robust in the dumper never@2358: if( Find_const(m) == lidx ) { never@2358: if( !dump_once++ ) { never@2358: tty->cr(); never@2358: b->dump_head( &_cfg._bbs ); never@2358: } never@2358: dump(n); duke@435: } duke@435: } duke@435: } duke@435: } duke@435: } // End of per-block dump duke@435: tty->cr(); duke@435: } duke@435: #endif // not PRODUCT duke@435: duke@435: //------------------------------print_chaitin_statistics------------------------------- duke@435: int PhaseChaitin::_final_loads = 0; duke@435: int PhaseChaitin::_final_stores = 0; duke@435: int PhaseChaitin::_final_memoves= 0; duke@435: int PhaseChaitin::_final_copies = 0; duke@435: double PhaseChaitin::_final_load_cost = 0; duke@435: double PhaseChaitin::_final_store_cost = 0; duke@435: double PhaseChaitin::_final_memove_cost= 0; duke@435: double PhaseChaitin::_final_copy_cost = 0; duke@435: int PhaseChaitin::_conserv_coalesce = 0; duke@435: int PhaseChaitin::_conserv_coalesce_pair = 0; duke@435: int PhaseChaitin::_conserv_coalesce_trie = 0; duke@435: int PhaseChaitin::_conserv_coalesce_quad = 0; duke@435: int PhaseChaitin::_post_alloc = 0; duke@435: int PhaseChaitin::_lost_opp_pp_coalesce = 0; duke@435: int PhaseChaitin::_lost_opp_cflow_coalesce = 0; duke@435: int PhaseChaitin::_used_cisc_instructions = 0; duke@435: int PhaseChaitin::_unused_cisc_instructions = 0; duke@435: int PhaseChaitin::_allocator_attempts = 0; duke@435: int PhaseChaitin::_allocator_successes = 0; duke@435: duke@435: #ifndef PRODUCT duke@435: uint PhaseChaitin::_high_pressure = 0; duke@435: uint PhaseChaitin::_low_pressure = 0; duke@435: duke@435: void PhaseChaitin::print_chaitin_statistics() { duke@435: tty->print_cr("Inserted %d spill loads, %d spill stores, %d mem-mem moves and %d copies.", _final_loads, _final_stores, _final_memoves, _final_copies); duke@435: tty->print_cr("Total load cost= %6.0f, store cost = %6.0f, mem-mem cost = %5.2f, copy cost = %5.0f.", _final_load_cost, _final_store_cost, _final_memove_cost, _final_copy_cost); duke@435: tty->print_cr("Adjusted spill cost = %7.0f.", duke@435: _final_load_cost*4.0 + _final_store_cost * 2.0 + duke@435: _final_copy_cost*1.0 + _final_memove_cost*12.0); duke@435: tty->print("Conservatively coalesced %d copies, %d pairs", duke@435: _conserv_coalesce, _conserv_coalesce_pair); duke@435: if( _conserv_coalesce_trie || _conserv_coalesce_quad ) duke@435: tty->print(", %d tries, %d quads", _conserv_coalesce_trie, _conserv_coalesce_quad); duke@435: tty->print_cr(", %d post alloc.", _post_alloc); duke@435: if( _lost_opp_pp_coalesce || _lost_opp_cflow_coalesce ) duke@435: tty->print_cr("Lost coalesce opportunity, %d private-private, and %d cflow interfered.", duke@435: _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce ); duke@435: if( _used_cisc_instructions || _unused_cisc_instructions ) duke@435: tty->print_cr("Used cisc instruction %d, remained in register %d", duke@435: _used_cisc_instructions, _unused_cisc_instructions); duke@435: if( _allocator_successes != 0 ) duke@435: tty->print_cr("Average allocation trips %f", (float)_allocator_attempts/(float)_allocator_successes); duke@435: tty->print_cr("High Pressure Blocks = %d, Low Pressure Blocks = %d", _high_pressure, _low_pressure); duke@435: } duke@435: #endif // not PRODUCT