Wed, 27 Apr 2016 01:25:04 +0800
Initial load
http://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/
changeset: 6782:28b50d07f6f8
tag: jdk8u25-b17
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. |
aoqi@0 | 8 | * |
aoqi@0 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 13 | * accompanied this code). |
aoqi@0 | 14 | * |
aoqi@0 | 15 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 18 | * |
aoqi@0 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 20 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 21 | * questions. |
aoqi@0 | 22 | * |
aoqi@0 | 23 | */ |
aoqi@0 | 24 | |
aoqi@0 | 25 | #include "precompiled.hpp" |
aoqi@0 | 26 | #include "compiler/oopMap.hpp" |
aoqi@0 | 27 | #include "memory/allocation.inline.hpp" |
aoqi@0 | 28 | #include "opto/addnode.hpp" |
aoqi@0 | 29 | #include "opto/block.hpp" |
aoqi@0 | 30 | #include "opto/callnode.hpp" |
aoqi@0 | 31 | #include "opto/cfgnode.hpp" |
aoqi@0 | 32 | #include "opto/chaitin.hpp" |
aoqi@0 | 33 | #include "opto/coalesce.hpp" |
aoqi@0 | 34 | #include "opto/connode.hpp" |
aoqi@0 | 35 | #include "opto/indexSet.hpp" |
aoqi@0 | 36 | #include "opto/machnode.hpp" |
aoqi@0 | 37 | #include "opto/memnode.hpp" |
aoqi@0 | 38 | #include "opto/opcodes.hpp" |
aoqi@0 | 39 | |
aoqi@0 | 40 | PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) { |
aoqi@0 | 41 | } |
aoqi@0 | 42 | |
aoqi@0 | 43 | void PhaseIFG::init( uint maxlrg ) { |
aoqi@0 | 44 | _maxlrg = maxlrg; |
aoqi@0 | 45 | _yanked = new (_arena) VectorSet(_arena); |
aoqi@0 | 46 | _is_square = false; |
aoqi@0 | 47 | // Make uninitialized adjacency lists |
aoqi@0 | 48 | _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg); |
aoqi@0 | 49 | // Also make empty live range structures |
aoqi@0 | 50 | _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) ); |
aoqi@0 | 51 | memset(_lrgs,0,sizeof(LRG)*maxlrg); |
aoqi@0 | 52 | // Init all to empty |
aoqi@0 | 53 | for( uint i = 0; i < maxlrg; i++ ) { |
aoqi@0 | 54 | _adjs[i].initialize(maxlrg); |
aoqi@0 | 55 | _lrgs[i].Set_All(); |
aoqi@0 | 56 | } |
aoqi@0 | 57 | } |
aoqi@0 | 58 | |
aoqi@0 | 59 | // Add edge between vertices a & b. These are sorted (triangular matrix), |
aoqi@0 | 60 | // then the smaller number is inserted in the larger numbered array. |
aoqi@0 | 61 | int PhaseIFG::add_edge( uint a, uint b ) { |
aoqi@0 | 62 | lrgs(a).invalid_degree(); |
aoqi@0 | 63 | lrgs(b).invalid_degree(); |
aoqi@0 | 64 | // Sort a and b, so that a is bigger |
aoqi@0 | 65 | assert( !_is_square, "only on triangular" ); |
aoqi@0 | 66 | if( a < b ) { uint tmp = a; a = b; b = tmp; } |
aoqi@0 | 67 | return _adjs[a].insert( b ); |
aoqi@0 | 68 | } |
aoqi@0 | 69 | |
aoqi@0 | 70 | // Add an edge between 'a' and everything in the vector. |
aoqi@0 | 71 | void PhaseIFG::add_vector( uint a, IndexSet *vec ) { |
aoqi@0 | 72 | // IFG is triangular, so do the inserts where 'a' < 'b'. |
aoqi@0 | 73 | assert( !_is_square, "only on triangular" ); |
aoqi@0 | 74 | IndexSet *adjs_a = &_adjs[a]; |
aoqi@0 | 75 | if( !vec->count() ) return; |
aoqi@0 | 76 | |
aoqi@0 | 77 | IndexSetIterator elements(vec); |
aoqi@0 | 78 | uint neighbor; |
aoqi@0 | 79 | while ((neighbor = elements.next()) != 0) { |
aoqi@0 | 80 | add_edge( a, neighbor ); |
aoqi@0 | 81 | } |
aoqi@0 | 82 | } |
aoqi@0 | 83 | |
aoqi@0 | 84 | // Is there an edge between a and b? |
aoqi@0 | 85 | int PhaseIFG::test_edge( uint a, uint b ) const { |
aoqi@0 | 86 | // Sort a and b, so that a is larger |
aoqi@0 | 87 | assert( !_is_square, "only on triangular" ); |
aoqi@0 | 88 | if( a < b ) { uint tmp = a; a = b; b = tmp; } |
aoqi@0 | 89 | return _adjs[a].member(b); |
aoqi@0 | 90 | } |
aoqi@0 | 91 | |
aoqi@0 | 92 | // Convert triangular matrix to square matrix |
aoqi@0 | 93 | void PhaseIFG::SquareUp() { |
aoqi@0 | 94 | assert( !_is_square, "only on triangular" ); |
aoqi@0 | 95 | |
aoqi@0 | 96 | // Simple transpose |
aoqi@0 | 97 | for( uint i = 0; i < _maxlrg; i++ ) { |
aoqi@0 | 98 | IndexSetIterator elements(&_adjs[i]); |
aoqi@0 | 99 | uint datum; |
aoqi@0 | 100 | while ((datum = elements.next()) != 0) { |
aoqi@0 | 101 | _adjs[datum].insert( i ); |
aoqi@0 | 102 | } |
aoqi@0 | 103 | } |
aoqi@0 | 104 | _is_square = true; |
aoqi@0 | 105 | } |
aoqi@0 | 106 | |
aoqi@0 | 107 | // Compute effective degree in bulk |
aoqi@0 | 108 | void PhaseIFG::Compute_Effective_Degree() { |
aoqi@0 | 109 | assert( _is_square, "only on square" ); |
aoqi@0 | 110 | |
aoqi@0 | 111 | for( uint i = 0; i < _maxlrg; i++ ) |
aoqi@0 | 112 | lrgs(i).set_degree(effective_degree(i)); |
aoqi@0 | 113 | } |
aoqi@0 | 114 | |
aoqi@0 | 115 | int PhaseIFG::test_edge_sq( uint a, uint b ) const { |
aoqi@0 | 116 | assert( _is_square, "only on square" ); |
aoqi@0 | 117 | // Swap, so that 'a' has the lesser count. Then binary search is on |
aoqi@0 | 118 | // the smaller of a's list and b's list. |
aoqi@0 | 119 | if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; } |
aoqi@0 | 120 | //return _adjs[a].unordered_member(b); |
aoqi@0 | 121 | return _adjs[a].member(b); |
aoqi@0 | 122 | } |
aoqi@0 | 123 | |
aoqi@0 | 124 | // Union edges of B into A |
aoqi@0 | 125 | void PhaseIFG::Union( uint a, uint b ) { |
aoqi@0 | 126 | assert( _is_square, "only on square" ); |
aoqi@0 | 127 | IndexSet *A = &_adjs[a]; |
aoqi@0 | 128 | IndexSetIterator b_elements(&_adjs[b]); |
aoqi@0 | 129 | uint datum; |
aoqi@0 | 130 | while ((datum = b_elements.next()) != 0) { |
aoqi@0 | 131 | if(A->insert(datum)) { |
aoqi@0 | 132 | _adjs[datum].insert(a); |
aoqi@0 | 133 | lrgs(a).invalid_degree(); |
aoqi@0 | 134 | lrgs(datum).invalid_degree(); |
aoqi@0 | 135 | } |
aoqi@0 | 136 | } |
aoqi@0 | 137 | } |
aoqi@0 | 138 | |
aoqi@0 | 139 | // Yank a Node and all connected edges from the IFG. Return a |
aoqi@0 | 140 | // list of neighbors (edges) yanked. |
aoqi@0 | 141 | IndexSet *PhaseIFG::remove_node( uint a ) { |
aoqi@0 | 142 | assert( _is_square, "only on square" ); |
aoqi@0 | 143 | assert( !_yanked->test(a), "" ); |
aoqi@0 | 144 | _yanked->set(a); |
aoqi@0 | 145 | |
aoqi@0 | 146 | // I remove the LRG from all neighbors. |
aoqi@0 | 147 | IndexSetIterator elements(&_adjs[a]); |
aoqi@0 | 148 | LRG &lrg_a = lrgs(a); |
aoqi@0 | 149 | uint datum; |
aoqi@0 | 150 | while ((datum = elements.next()) != 0) { |
aoqi@0 | 151 | _adjs[datum].remove(a); |
aoqi@0 | 152 | lrgs(datum).inc_degree( -lrg_a.compute_degree(lrgs(datum)) ); |
aoqi@0 | 153 | } |
aoqi@0 | 154 | return neighbors(a); |
aoqi@0 | 155 | } |
aoqi@0 | 156 | |
aoqi@0 | 157 | // Re-insert a yanked Node. |
aoqi@0 | 158 | void PhaseIFG::re_insert( uint a ) { |
aoqi@0 | 159 | assert( _is_square, "only on square" ); |
aoqi@0 | 160 | assert( _yanked->test(a), "" ); |
aoqi@0 | 161 | (*_yanked) >>= a; |
aoqi@0 | 162 | |
aoqi@0 | 163 | IndexSetIterator elements(&_adjs[a]); |
aoqi@0 | 164 | uint datum; |
aoqi@0 | 165 | while ((datum = elements.next()) != 0) { |
aoqi@0 | 166 | _adjs[datum].insert(a); |
aoqi@0 | 167 | lrgs(datum).invalid_degree(); |
aoqi@0 | 168 | } |
aoqi@0 | 169 | } |
aoqi@0 | 170 | |
aoqi@0 | 171 | // Compute the degree between 2 live ranges. If both live ranges are |
aoqi@0 | 172 | // aligned-adjacent powers-of-2 then we use the MAX size. If either is |
aoqi@0 | 173 | // mis-aligned (or for Fat-Projections, not-adjacent) then we have to |
aoqi@0 | 174 | // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why |
aoqi@0 | 175 | // this is so. |
aoqi@0 | 176 | int LRG::compute_degree( LRG &l ) const { |
aoqi@0 | 177 | int tmp; |
aoqi@0 | 178 | int num_regs = _num_regs; |
aoqi@0 | 179 | int nregs = l.num_regs(); |
aoqi@0 | 180 | tmp = (_fat_proj || l._fat_proj) // either is a fat-proj? |
aoqi@0 | 181 | ? (num_regs * nregs) // then use product |
aoqi@0 | 182 | : MAX2(num_regs,nregs); // else use max |
aoqi@0 | 183 | return tmp; |
aoqi@0 | 184 | } |
aoqi@0 | 185 | |
aoqi@0 | 186 | // Compute effective degree for this live range. If both live ranges are |
aoqi@0 | 187 | // aligned-adjacent powers-of-2 then we use the MAX size. If either is |
aoqi@0 | 188 | // mis-aligned (or for Fat-Projections, not-adjacent) then we have to |
aoqi@0 | 189 | // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why |
aoqi@0 | 190 | // this is so. |
aoqi@0 | 191 | int PhaseIFG::effective_degree( uint lidx ) const { |
aoqi@0 | 192 | int eff = 0; |
aoqi@0 | 193 | int num_regs = lrgs(lidx).num_regs(); |
aoqi@0 | 194 | int fat_proj = lrgs(lidx)._fat_proj; |
aoqi@0 | 195 | IndexSet *s = neighbors(lidx); |
aoqi@0 | 196 | IndexSetIterator elements(s); |
aoqi@0 | 197 | uint nidx; |
aoqi@0 | 198 | while((nidx = elements.next()) != 0) { |
aoqi@0 | 199 | LRG &lrgn = lrgs(nidx); |
aoqi@0 | 200 | int nregs = lrgn.num_regs(); |
aoqi@0 | 201 | eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj? |
aoqi@0 | 202 | ? (num_regs * nregs) // then use product |
aoqi@0 | 203 | : MAX2(num_regs,nregs); // else use max |
aoqi@0 | 204 | } |
aoqi@0 | 205 | return eff; |
aoqi@0 | 206 | } |
aoqi@0 | 207 | |
aoqi@0 | 208 | |
aoqi@0 | 209 | #ifndef PRODUCT |
aoqi@0 | 210 | void PhaseIFG::dump() const { |
aoqi@0 | 211 | tty->print_cr("-- Interference Graph --%s--", |
aoqi@0 | 212 | _is_square ? "square" : "triangular" ); |
aoqi@0 | 213 | if( _is_square ) { |
aoqi@0 | 214 | for( uint i = 0; i < _maxlrg; i++ ) { |
aoqi@0 | 215 | tty->print( (*_yanked)[i] ? "XX " : " "); |
aoqi@0 | 216 | tty->print("L%d: { ",i); |
aoqi@0 | 217 | IndexSetIterator elements(&_adjs[i]); |
aoqi@0 | 218 | uint datum; |
aoqi@0 | 219 | while ((datum = elements.next()) != 0) { |
aoqi@0 | 220 | tty->print("L%d ", datum); |
aoqi@0 | 221 | } |
aoqi@0 | 222 | tty->print_cr("}"); |
aoqi@0 | 223 | |
aoqi@0 | 224 | } |
aoqi@0 | 225 | return; |
aoqi@0 | 226 | } |
aoqi@0 | 227 | |
aoqi@0 | 228 | // Triangular |
aoqi@0 | 229 | for( uint i = 0; i < _maxlrg; i++ ) { |
aoqi@0 | 230 | uint j; |
aoqi@0 | 231 | tty->print( (*_yanked)[i] ? "XX " : " "); |
aoqi@0 | 232 | tty->print("L%d: { ",i); |
aoqi@0 | 233 | for( j = _maxlrg; j > i; j-- ) |
aoqi@0 | 234 | if( test_edge(j - 1,i) ) { |
aoqi@0 | 235 | tty->print("L%d ",j - 1); |
aoqi@0 | 236 | } |
aoqi@0 | 237 | tty->print("| "); |
aoqi@0 | 238 | IndexSetIterator elements(&_adjs[i]); |
aoqi@0 | 239 | uint datum; |
aoqi@0 | 240 | while ((datum = elements.next()) != 0) { |
aoqi@0 | 241 | tty->print("L%d ", datum); |
aoqi@0 | 242 | } |
aoqi@0 | 243 | tty->print("}\n"); |
aoqi@0 | 244 | } |
aoqi@0 | 245 | tty->print("\n"); |
aoqi@0 | 246 | } |
aoqi@0 | 247 | |
aoqi@0 | 248 | void PhaseIFG::stats() const { |
aoqi@0 | 249 | ResourceMark rm; |
aoqi@0 | 250 | int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2); |
aoqi@0 | 251 | memset( h_cnt, 0, sizeof(int)*_maxlrg*2 ); |
aoqi@0 | 252 | uint i; |
aoqi@0 | 253 | for( i = 0; i < _maxlrg; i++ ) { |
aoqi@0 | 254 | h_cnt[neighbor_cnt(i)]++; |
aoqi@0 | 255 | } |
aoqi@0 | 256 | tty->print_cr("--Histogram of counts--"); |
aoqi@0 | 257 | for( i = 0; i < _maxlrg*2; i++ ) |
aoqi@0 | 258 | if( h_cnt[i] ) |
aoqi@0 | 259 | tty->print("%d/%d ",i,h_cnt[i]); |
aoqi@0 | 260 | tty->cr(); |
aoqi@0 | 261 | } |
aoqi@0 | 262 | |
aoqi@0 | 263 | void PhaseIFG::verify( const PhaseChaitin *pc ) const { |
aoqi@0 | 264 | // IFG is square, sorted and no need for Find |
aoqi@0 | 265 | for( uint i = 0; i < _maxlrg; i++ ) { |
aoqi@0 | 266 | assert(!((*_yanked)[i]) || !neighbor_cnt(i), "Is removed completely" ); |
aoqi@0 | 267 | IndexSet *set = &_adjs[i]; |
aoqi@0 | 268 | IndexSetIterator elements(set); |
aoqi@0 | 269 | uint idx; |
aoqi@0 | 270 | uint last = 0; |
aoqi@0 | 271 | while ((idx = elements.next()) != 0) { |
aoqi@0 | 272 | assert(idx != i, "Must have empty diagonal"); |
aoqi@0 | 273 | assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find"); |
aoqi@0 | 274 | assert(_adjs[idx].member(i), "IFG not square"); |
aoqi@0 | 275 | assert(!(*_yanked)[idx], "No yanked neighbors"); |
aoqi@0 | 276 | assert(last < idx, "not sorted increasing"); |
aoqi@0 | 277 | last = idx; |
aoqi@0 | 278 | } |
aoqi@0 | 279 | assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong"); |
aoqi@0 | 280 | } |
aoqi@0 | 281 | } |
aoqi@0 | 282 | #endif |
aoqi@0 | 283 | |
aoqi@0 | 284 | // Interfere this register with everything currently live. Use the RegMasks |
aoqi@0 | 285 | // to trim the set of possible interferences. Return a count of register-only |
aoqi@0 | 286 | // interferences as an estimate of register pressure. |
aoqi@0 | 287 | void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) { |
aoqi@0 | 288 | uint retval = 0; |
aoqi@0 | 289 | // Interfere with everything live. |
aoqi@0 | 290 | const RegMask &rm = lrgs(r).mask(); |
aoqi@0 | 291 | // Check for interference by checking overlap of regmasks. |
aoqi@0 | 292 | // Only interfere if acceptable register masks overlap. |
aoqi@0 | 293 | IndexSetIterator elements(liveout); |
aoqi@0 | 294 | uint l; |
aoqi@0 | 295 | while( (l = elements.next()) != 0 ) |
aoqi@0 | 296 | if( rm.overlap( lrgs(l).mask() ) ) |
aoqi@0 | 297 | _ifg->add_edge( r, l ); |
aoqi@0 | 298 | } |
aoqi@0 | 299 | |
aoqi@0 | 300 | // Actually build the interference graph. Uses virtual registers only, no |
aoqi@0 | 301 | // physical register masks. This allows me to be very aggressive when |
aoqi@0 | 302 | // coalescing copies. Some of this aggressiveness will have to be undone |
aoqi@0 | 303 | // later, but I'd rather get all the copies I can now (since unremoved copies |
aoqi@0 | 304 | // at this point can end up in bad places). Copies I re-insert later I have |
aoqi@0 | 305 | // more opportunity to insert them in low-frequency locations. |
aoqi@0 | 306 | void PhaseChaitin::build_ifg_virtual( ) { |
aoqi@0 | 307 | |
aoqi@0 | 308 | // For all blocks (in any order) do... |
aoqi@0 | 309 | for (uint i = 0; i < _cfg.number_of_blocks(); i++) { |
aoqi@0 | 310 | Block* block = _cfg.get_block(i); |
aoqi@0 | 311 | IndexSet* liveout = _live->live(block); |
aoqi@0 | 312 | |
aoqi@0 | 313 | // The IFG is built by a single reverse pass over each basic block. |
aoqi@0 | 314 | // Starting with the known live-out set, we remove things that get |
aoqi@0 | 315 | // defined and add things that become live (essentially executing one |
aoqi@0 | 316 | // pass of a standard LIVE analysis). Just before a Node defines a value |
aoqi@0 | 317 | // (and removes it from the live-ness set) that value is certainly live. |
aoqi@0 | 318 | // The defined value interferes with everything currently live. The |
aoqi@0 | 319 | // value is then removed from the live-ness set and it's inputs are |
aoqi@0 | 320 | // added to the live-ness set. |
aoqi@0 | 321 | for (uint j = block->end_idx() + 1; j > 1; j--) { |
aoqi@0 | 322 | Node* n = block->get_node(j - 1); |
aoqi@0 | 323 | |
aoqi@0 | 324 | // Get value being defined |
aoqi@0 | 325 | uint r = _lrg_map.live_range_id(n); |
aoqi@0 | 326 | |
aoqi@0 | 327 | // Some special values do not allocate |
aoqi@0 | 328 | if (r) { |
aoqi@0 | 329 | |
aoqi@0 | 330 | // Remove from live-out set |
aoqi@0 | 331 | liveout->remove(r); |
aoqi@0 | 332 | |
aoqi@0 | 333 | // Copies do not define a new value and so do not interfere. |
aoqi@0 | 334 | // Remove the copies source from the liveout set before interfering. |
aoqi@0 | 335 | uint idx = n->is_Copy(); |
aoqi@0 | 336 | if (idx) { |
aoqi@0 | 337 | liveout->remove(_lrg_map.live_range_id(n->in(idx))); |
aoqi@0 | 338 | } |
aoqi@0 | 339 | |
aoqi@0 | 340 | // Interfere with everything live |
aoqi@0 | 341 | interfere_with_live(r, liveout); |
aoqi@0 | 342 | } |
aoqi@0 | 343 | |
aoqi@0 | 344 | // Make all inputs live |
aoqi@0 | 345 | if (!n->is_Phi()) { // Phi function uses come from prior block |
aoqi@0 | 346 | for(uint k = 1; k < n->req(); k++) { |
aoqi@0 | 347 | liveout->insert(_lrg_map.live_range_id(n->in(k))); |
aoqi@0 | 348 | } |
aoqi@0 | 349 | } |
aoqi@0 | 350 | |
aoqi@0 | 351 | // 2-address instructions always have the defined value live |
aoqi@0 | 352 | // on entry to the instruction, even though it is being defined |
aoqi@0 | 353 | // by the instruction. We pretend a virtual copy sits just prior |
aoqi@0 | 354 | // to the instruction and kills the src-def'd register. |
aoqi@0 | 355 | // In other words, for 2-address instructions the defined value |
aoqi@0 | 356 | // interferes with all inputs. |
aoqi@0 | 357 | uint idx; |
aoqi@0 | 358 | if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) { |
aoqi@0 | 359 | const MachNode *mach = n->as_Mach(); |
aoqi@0 | 360 | // Sometimes my 2-address ADDs are commuted in a bad way. |
aoqi@0 | 361 | // We generally want the USE-DEF register to refer to the |
aoqi@0 | 362 | // loop-varying quantity, to avoid a copy. |
aoqi@0 | 363 | uint op = mach->ideal_Opcode(); |
aoqi@0 | 364 | // Check that mach->num_opnds() == 3 to ensure instruction is |
aoqi@0 | 365 | // not subsuming constants, effectively excludes addI_cin_imm |
aoqi@0 | 366 | // Can NOT swap for instructions like addI_cin_imm since it |
aoqi@0 | 367 | // is adding zero to yhi + carry and the second ideal-input |
aoqi@0 | 368 | // points to the result of adding low-halves. |
aoqi@0 | 369 | // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm |
aoqi@0 | 370 | if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) && |
aoqi@0 | 371 | n->in(1)->bottom_type()->base() == Type::Int && |
aoqi@0 | 372 | // See if the ADD is involved in a tight data loop the wrong way |
aoqi@0 | 373 | n->in(2)->is_Phi() && |
aoqi@0 | 374 | n->in(2)->in(2) == n ) { |
aoqi@0 | 375 | Node *tmp = n->in(1); |
aoqi@0 | 376 | n->set_req( 1, n->in(2) ); |
aoqi@0 | 377 | n->set_req( 2, tmp ); |
aoqi@0 | 378 | } |
aoqi@0 | 379 | // Defined value interferes with all inputs |
aoqi@0 | 380 | uint lidx = _lrg_map.live_range_id(n->in(idx)); |
aoqi@0 | 381 | for (uint k = 1; k < n->req(); k++) { |
aoqi@0 | 382 | uint kidx = _lrg_map.live_range_id(n->in(k)); |
aoqi@0 | 383 | if (kidx != lidx) { |
aoqi@0 | 384 | _ifg->add_edge(r, kidx); |
aoqi@0 | 385 | } |
aoqi@0 | 386 | } |
aoqi@0 | 387 | } |
aoqi@0 | 388 | } // End of forall instructions in block |
aoqi@0 | 389 | } // End of forall blocks |
aoqi@0 | 390 | } |
aoqi@0 | 391 | |
aoqi@0 | 392 | uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) { |
aoqi@0 | 393 | IndexSetIterator elements(liveout); |
aoqi@0 | 394 | uint lidx; |
aoqi@0 | 395 | uint cnt = 0; |
aoqi@0 | 396 | while ((lidx = elements.next()) != 0) { |
aoqi@0 | 397 | if( lrgs(lidx).mask().is_UP() && |
aoqi@0 | 398 | lrgs(lidx).mask_size() && |
aoqi@0 | 399 | !lrgs(lidx)._is_float && |
aoqi@0 | 400 | !lrgs(lidx)._is_vector && |
aoqi@0 | 401 | lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) |
aoqi@0 | 402 | cnt += lrgs(lidx).reg_pressure(); |
aoqi@0 | 403 | } |
aoqi@0 | 404 | return cnt; |
aoqi@0 | 405 | } |
aoqi@0 | 406 | |
aoqi@0 | 407 | uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) { |
aoqi@0 | 408 | IndexSetIterator elements(liveout); |
aoqi@0 | 409 | uint lidx; |
aoqi@0 | 410 | uint cnt = 0; |
aoqi@0 | 411 | while ((lidx = elements.next()) != 0) { |
aoqi@0 | 412 | if( lrgs(lidx).mask().is_UP() && |
aoqi@0 | 413 | lrgs(lidx).mask_size() && |
aoqi@0 | 414 | (lrgs(lidx)._is_float || lrgs(lidx)._is_vector)) |
aoqi@0 | 415 | cnt += lrgs(lidx).reg_pressure(); |
aoqi@0 | 416 | } |
aoqi@0 | 417 | return cnt; |
aoqi@0 | 418 | } |
aoqi@0 | 419 | |
aoqi@0 | 420 | // Adjust register pressure down by 1. Capture last hi-to-low transition, |
aoqi@0 | 421 | static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) { |
aoqi@0 | 422 | if (lrg->mask().is_UP() && lrg->mask_size()) { |
aoqi@0 | 423 | if (lrg->_is_float || lrg->_is_vector) { |
aoqi@0 | 424 | pressure[1] -= lrg->reg_pressure(); |
aoqi@0 | 425 | if( pressure[1] == (uint)FLOATPRESSURE ) { |
aoqi@0 | 426 | hrp_index[1] = where; |
aoqi@0 | 427 | if( pressure[1] > b->_freg_pressure ) |
aoqi@0 | 428 | b->_freg_pressure = pressure[1]+1; |
aoqi@0 | 429 | } |
aoqi@0 | 430 | } else if( lrg->mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) { |
aoqi@0 | 431 | pressure[0] -= lrg->reg_pressure(); |
aoqi@0 | 432 | if( pressure[0] == (uint)INTPRESSURE ) { |
aoqi@0 | 433 | hrp_index[0] = where; |
aoqi@0 | 434 | if( pressure[0] > b->_reg_pressure ) |
aoqi@0 | 435 | b->_reg_pressure = pressure[0]+1; |
aoqi@0 | 436 | } |
aoqi@0 | 437 | } |
aoqi@0 | 438 | } |
aoqi@0 | 439 | } |
aoqi@0 | 440 | |
aoqi@0 | 441 | // Build the interference graph using physical registers when available. |
aoqi@0 | 442 | // That is, if 2 live ranges are simultaneously alive but in their acceptable |
aoqi@0 | 443 | // register sets do not overlap, then they do not interfere. |
aoqi@0 | 444 | uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { |
aoqi@0 | 445 | NOT_PRODUCT( Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler); ) |
aoqi@0 | 446 | |
aoqi@0 | 447 | uint must_spill = 0; |
aoqi@0 | 448 | |
aoqi@0 | 449 | // For all blocks (in any order) do... |
aoqi@0 | 450 | for (uint i = 0; i < _cfg.number_of_blocks(); i++) { |
aoqi@0 | 451 | Block* block = _cfg.get_block(i); |
aoqi@0 | 452 | // Clone (rather than smash in place) the liveout info, so it is alive |
aoqi@0 | 453 | // for the "collect_gc_info" phase later. |
aoqi@0 | 454 | IndexSet liveout(_live->live(block)); |
aoqi@0 | 455 | uint last_inst = block->end_idx(); |
aoqi@0 | 456 | // Compute first nonphi node index |
aoqi@0 | 457 | uint first_inst; |
aoqi@0 | 458 | for (first_inst = 1; first_inst < last_inst; first_inst++) { |
aoqi@0 | 459 | if (!block->get_node(first_inst)->is_Phi()) { |
aoqi@0 | 460 | break; |
aoqi@0 | 461 | } |
aoqi@0 | 462 | } |
aoqi@0 | 463 | |
aoqi@0 | 464 | // Spills could be inserted before CreateEx node which should be |
aoqi@0 | 465 | // first instruction in block after Phis. Move CreateEx up. |
aoqi@0 | 466 | for (uint insidx = first_inst; insidx < last_inst; insidx++) { |
aoqi@0 | 467 | Node *ex = block->get_node(insidx); |
aoqi@0 | 468 | if (ex->is_SpillCopy()) { |
aoqi@0 | 469 | continue; |
aoqi@0 | 470 | } |
aoqi@0 | 471 | if (insidx > first_inst && ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) { |
aoqi@0 | 472 | // If the CreateEx isn't above all the MachSpillCopies |
aoqi@0 | 473 | // then move it to the top. |
aoqi@0 | 474 | block->remove_node(insidx); |
aoqi@0 | 475 | block->insert_node(ex, first_inst); |
aoqi@0 | 476 | } |
aoqi@0 | 477 | // Stop once a CreateEx or any other node is found |
aoqi@0 | 478 | break; |
aoqi@0 | 479 | } |
aoqi@0 | 480 | |
aoqi@0 | 481 | // Reset block's register pressure values for each ifg construction |
aoqi@0 | 482 | uint pressure[2], hrp_index[2]; |
aoqi@0 | 483 | pressure[0] = pressure[1] = 0; |
aoqi@0 | 484 | hrp_index[0] = hrp_index[1] = last_inst+1; |
aoqi@0 | 485 | block->_reg_pressure = block->_freg_pressure = 0; |
aoqi@0 | 486 | // Liveout things are presumed live for the whole block. We accumulate |
aoqi@0 | 487 | // 'area' accordingly. If they get killed in the block, we'll subtract |
aoqi@0 | 488 | // the unused part of the block from the area. |
aoqi@0 | 489 | int inst_count = last_inst - first_inst; |
aoqi@0 | 490 | double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); |
aoqi@0 | 491 | assert(!(cost < 0.0), "negative spill cost" ); |
aoqi@0 | 492 | IndexSetIterator elements(&liveout); |
aoqi@0 | 493 | uint lidx; |
aoqi@0 | 494 | while ((lidx = elements.next()) != 0) { |
aoqi@0 | 495 | LRG &lrg = lrgs(lidx); |
aoqi@0 | 496 | lrg._area += cost; |
aoqi@0 | 497 | // Compute initial register pressure |
aoqi@0 | 498 | if (lrg.mask().is_UP() && lrg.mask_size()) { |
aoqi@0 | 499 | if (lrg._is_float || lrg._is_vector) { // Count float pressure |
aoqi@0 | 500 | pressure[1] += lrg.reg_pressure(); |
aoqi@0 | 501 | if (pressure[1] > block->_freg_pressure) { |
aoqi@0 | 502 | block->_freg_pressure = pressure[1]; |
aoqi@0 | 503 | } |
aoqi@0 | 504 | // Count int pressure, but do not count the SP, flags |
aoqi@0 | 505 | } else if(lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) { |
aoqi@0 | 506 | pressure[0] += lrg.reg_pressure(); |
aoqi@0 | 507 | if (pressure[0] > block->_reg_pressure) { |
aoqi@0 | 508 | block->_reg_pressure = pressure[0]; |
aoqi@0 | 509 | } |
aoqi@0 | 510 | } |
aoqi@0 | 511 | } |
aoqi@0 | 512 | } |
aoqi@0 | 513 | assert( pressure[0] == count_int_pressure (&liveout), "" ); |
aoqi@0 | 514 | assert( pressure[1] == count_float_pressure(&liveout), "" ); |
aoqi@0 | 515 | |
aoqi@0 | 516 | // The IFG is built by a single reverse pass over each basic block. |
aoqi@0 | 517 | // Starting with the known live-out set, we remove things that get |
aoqi@0 | 518 | // defined and add things that become live (essentially executing one |
aoqi@0 | 519 | // pass of a standard LIVE analysis). Just before a Node defines a value |
aoqi@0 | 520 | // (and removes it from the live-ness set) that value is certainly live. |
aoqi@0 | 521 | // The defined value interferes with everything currently live. The |
aoqi@0 | 522 | // value is then removed from the live-ness set and it's inputs are added |
aoqi@0 | 523 | // to the live-ness set. |
aoqi@0 | 524 | uint j; |
aoqi@0 | 525 | for (j = last_inst + 1; j > 1; j--) { |
aoqi@0 | 526 | Node* n = block->get_node(j - 1); |
aoqi@0 | 527 | |
aoqi@0 | 528 | // Get value being defined |
aoqi@0 | 529 | uint r = _lrg_map.live_range_id(n); |
aoqi@0 | 530 | |
aoqi@0 | 531 | // Some special values do not allocate |
aoqi@0 | 532 | if(r) { |
aoqi@0 | 533 | // A DEF normally costs block frequency; rematerialized values are |
aoqi@0 | 534 | // removed from the DEF sight, so LOWER costs here. |
aoqi@0 | 535 | lrgs(r)._cost += n->rematerialize() ? 0 : block->_freq; |
aoqi@0 | 536 | |
aoqi@0 | 537 | // If it is not live, then this instruction is dead. Probably caused |
aoqi@0 | 538 | // by spilling and rematerialization. Who cares why, yank this baby. |
aoqi@0 | 539 | if( !liveout.member(r) && n->Opcode() != Op_SafePoint ) { |
aoqi@0 | 540 | Node *def = n->in(0); |
aoqi@0 | 541 | if( !n->is_Proj() || |
aoqi@0 | 542 | // Could also be a flags-projection of a dead ADD or such. |
aoqi@0 | 543 | (_lrg_map.live_range_id(def) && !liveout.member(_lrg_map.live_range_id(def)))) { |
aoqi@0 | 544 | block->remove_node(j - 1); |
aoqi@0 | 545 | if (lrgs(r)._def == n) { |
aoqi@0 | 546 | lrgs(r)._def = 0; |
aoqi@0 | 547 | } |
aoqi@0 | 548 | n->disconnect_inputs(NULL, C); |
aoqi@0 | 549 | _cfg.unmap_node_from_block(n); |
aoqi@0 | 550 | n->replace_by(C->top()); |
aoqi@0 | 551 | // Since yanking a Node from block, high pressure moves up one |
aoqi@0 | 552 | hrp_index[0]--; |
aoqi@0 | 553 | hrp_index[1]--; |
aoqi@0 | 554 | continue; |
aoqi@0 | 555 | } |
aoqi@0 | 556 | |
aoqi@0 | 557 | // Fat-projections kill many registers which cannot be used to |
aoqi@0 | 558 | // hold live ranges. |
aoqi@0 | 559 | if (lrgs(r)._fat_proj) { |
aoqi@0 | 560 | // Count the int-only registers |
aoqi@0 | 561 | RegMask itmp = lrgs(r).mask(); |
aoqi@0 | 562 | itmp.AND(*Matcher::idealreg2regmask[Op_RegI]); |
aoqi@0 | 563 | int iregs = itmp.Size(); |
aoqi@0 | 564 | if (pressure[0]+iregs > block->_reg_pressure) { |
aoqi@0 | 565 | block->_reg_pressure = pressure[0] + iregs; |
aoqi@0 | 566 | } |
aoqi@0 | 567 | if (pressure[0] <= (uint)INTPRESSURE && pressure[0] + iregs > (uint)INTPRESSURE) { |
aoqi@0 | 568 | hrp_index[0] = j - 1; |
aoqi@0 | 569 | } |
aoqi@0 | 570 | // Count the float-only registers |
aoqi@0 | 571 | RegMask ftmp = lrgs(r).mask(); |
aoqi@0 | 572 | ftmp.AND(*Matcher::idealreg2regmask[Op_RegD]); |
aoqi@0 | 573 | int fregs = ftmp.Size(); |
aoqi@0 | 574 | if (pressure[1] + fregs > block->_freg_pressure) { |
aoqi@0 | 575 | block->_freg_pressure = pressure[1] + fregs; |
aoqi@0 | 576 | } |
aoqi@0 | 577 | if(pressure[1] <= (uint)FLOATPRESSURE && pressure[1]+fregs > (uint)FLOATPRESSURE) { |
aoqi@0 | 578 | hrp_index[1] = j - 1; |
aoqi@0 | 579 | } |
aoqi@0 | 580 | } |
aoqi@0 | 581 | |
aoqi@0 | 582 | } else { // Else it is live |
aoqi@0 | 583 | // A DEF also ends 'area' partway through the block. |
aoqi@0 | 584 | lrgs(r)._area -= cost; |
aoqi@0 | 585 | assert(!(lrgs(r)._area < 0.0), "negative spill area" ); |
aoqi@0 | 586 | |
aoqi@0 | 587 | // Insure high score for immediate-use spill copies so they get a color |
aoqi@0 | 588 | if( n->is_SpillCopy() |
aoqi@0 | 589 | && lrgs(r).is_singledef() // MultiDef live range can still split |
aoqi@0 | 590 | && n->outcnt() == 1 // and use must be in this block |
aoqi@0 | 591 | && _cfg.get_block_for_node(n->unique_out()) == block) { |
aoqi@0 | 592 | // All single-use MachSpillCopy(s) that immediately precede their |
aoqi@0 | 593 | // use must color early. If a longer live range steals their |
aoqi@0 | 594 | // color, the spill copy will split and may push another spill copy |
aoqi@0 | 595 | // further away resulting in an infinite spill-split-retry cycle. |
aoqi@0 | 596 | // Assigning a zero area results in a high score() and a good |
aoqi@0 | 597 | // location in the simplify list. |
aoqi@0 | 598 | // |
aoqi@0 | 599 | |
aoqi@0 | 600 | Node *single_use = n->unique_out(); |
aoqi@0 | 601 | assert(block->find_node(single_use) >= j, "Use must be later in block"); |
aoqi@0 | 602 | // Use can be earlier in block if it is a Phi, but then I should be a MultiDef |
aoqi@0 | 603 | |
aoqi@0 | 604 | // Find first non SpillCopy 'm' that follows the current instruction |
aoqi@0 | 605 | // (j - 1) is index for current instruction 'n' |
aoqi@0 | 606 | Node *m = n; |
aoqi@0 | 607 | for (uint i = j; i <= last_inst && m->is_SpillCopy(); ++i) { |
aoqi@0 | 608 | m = block->get_node(i); |
aoqi@0 | 609 | } |
aoqi@0 | 610 | if (m == single_use) { |
aoqi@0 | 611 | lrgs(r)._area = 0.0; |
aoqi@0 | 612 | } |
aoqi@0 | 613 | } |
aoqi@0 | 614 | |
aoqi@0 | 615 | // Remove from live-out set |
aoqi@0 | 616 | if( liveout.remove(r) ) { |
aoqi@0 | 617 | // Adjust register pressure. |
aoqi@0 | 618 | // Capture last hi-to-lo pressure transition |
aoqi@0 | 619 | lower_pressure(&lrgs(r), j - 1, block, pressure, hrp_index); |
aoqi@0 | 620 | assert( pressure[0] == count_int_pressure (&liveout), "" ); |
aoqi@0 | 621 | assert( pressure[1] == count_float_pressure(&liveout), "" ); |
aoqi@0 | 622 | } |
aoqi@0 | 623 | |
aoqi@0 | 624 | // Copies do not define a new value and so do not interfere. |
aoqi@0 | 625 | // Remove the copies source from the liveout set before interfering. |
aoqi@0 | 626 | uint idx = n->is_Copy(); |
aoqi@0 | 627 | if (idx) { |
aoqi@0 | 628 | uint x = _lrg_map.live_range_id(n->in(idx)); |
aoqi@0 | 629 | if (liveout.remove(x)) { |
aoqi@0 | 630 | lrgs(x)._area -= cost; |
aoqi@0 | 631 | // Adjust register pressure. |
aoqi@0 | 632 | lower_pressure(&lrgs(x), j - 1, block, pressure, hrp_index); |
aoqi@0 | 633 | assert( pressure[0] == count_int_pressure (&liveout), "" ); |
aoqi@0 | 634 | assert( pressure[1] == count_float_pressure(&liveout), "" ); |
aoqi@0 | 635 | } |
aoqi@0 | 636 | } |
aoqi@0 | 637 | } // End of if live or not |
aoqi@0 | 638 | |
aoqi@0 | 639 | // Interfere with everything live. If the defined value must |
aoqi@0 | 640 | // go in a particular register, just remove that register from |
aoqi@0 | 641 | // all conflicting parties and avoid the interference. |
aoqi@0 | 642 | |
aoqi@0 | 643 | // Make exclusions for rematerializable defs. Since rematerializable |
aoqi@0 | 644 | // DEFs are not bound but the live range is, some uses must be bound. |
aoqi@0 | 645 | // If we spill live range 'r', it can rematerialize at each use site |
aoqi@0 | 646 | // according to its bindings. |
aoqi@0 | 647 | const RegMask &rmask = lrgs(r).mask(); |
aoqi@0 | 648 | if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) { |
aoqi@0 | 649 | // Check for common case |
aoqi@0 | 650 | int r_size = lrgs(r).num_regs(); |
aoqi@0 | 651 | OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical; |
aoqi@0 | 652 | // Smear odd bits |
aoqi@0 | 653 | IndexSetIterator elements(&liveout); |
aoqi@0 | 654 | uint l; |
aoqi@0 | 655 | while ((l = elements.next()) != 0) { |
aoqi@0 | 656 | LRG &lrg = lrgs(l); |
aoqi@0 | 657 | // If 'l' must spill already, do not further hack his bits. |
aoqi@0 | 658 | // He'll get some interferences and be forced to spill later. |
aoqi@0 | 659 | if( lrg._must_spill ) continue; |
aoqi@0 | 660 | // Remove bound register(s) from 'l's choices |
aoqi@0 | 661 | RegMask old = lrg.mask(); |
aoqi@0 | 662 | uint old_size = lrg.mask_size(); |
aoqi@0 | 663 | // Remove the bits from LRG 'r' from LRG 'l' so 'l' no |
aoqi@0 | 664 | // longer interferes with 'r'. If 'l' requires aligned |
aoqi@0 | 665 | // adjacent pairs, subtract out bit pairs. |
aoqi@0 | 666 | assert(!lrg._is_vector || !lrg._fat_proj, "sanity"); |
aoqi@0 | 667 | if (lrg.num_regs() > 1 && !lrg._fat_proj) { |
aoqi@0 | 668 | RegMask r2mask = rmask; |
aoqi@0 | 669 | // Leave only aligned set of bits. |
aoqi@0 | 670 | r2mask.smear_to_sets(lrg.num_regs()); |
aoqi@0 | 671 | // It includes vector case. |
aoqi@0 | 672 | lrg.SUBTRACT( r2mask ); |
aoqi@0 | 673 | lrg.compute_set_mask_size(); |
aoqi@0 | 674 | } else if( r_size != 1 ) { // fat proj |
aoqi@0 | 675 | lrg.SUBTRACT( rmask ); |
aoqi@0 | 676 | lrg.compute_set_mask_size(); |
aoqi@0 | 677 | } else { // Common case: size 1 bound removal |
aoqi@0 | 678 | if( lrg.mask().Member(r_reg) ) { |
aoqi@0 | 679 | lrg.Remove(r_reg); |
aoqi@0 | 680 | lrg.set_mask_size(lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1); |
aoqi@0 | 681 | } |
aoqi@0 | 682 | } |
aoqi@0 | 683 | // If 'l' goes completely dry, it must spill. |
aoqi@0 | 684 | if( lrg.not_free() ) { |
aoqi@0 | 685 | // Give 'l' some kind of reasonable mask, so he picks up |
aoqi@0 | 686 | // interferences (and will spill later). |
aoqi@0 | 687 | lrg.set_mask( old ); |
aoqi@0 | 688 | lrg.set_mask_size(old_size); |
aoqi@0 | 689 | must_spill++; |
aoqi@0 | 690 | lrg._must_spill = 1; |
aoqi@0 | 691 | lrg.set_reg(OptoReg::Name(LRG::SPILL_REG)); |
aoqi@0 | 692 | } |
aoqi@0 | 693 | } |
aoqi@0 | 694 | } // End of if bound |
aoqi@0 | 695 | |
aoqi@0 | 696 | // Now interference with everything that is live and has |
aoqi@0 | 697 | // compatible register sets. |
aoqi@0 | 698 | interfere_with_live(r,&liveout); |
aoqi@0 | 699 | |
aoqi@0 | 700 | } // End of if normal register-allocated value |
aoqi@0 | 701 | |
aoqi@0 | 702 | // Area remaining in the block |
aoqi@0 | 703 | inst_count--; |
aoqi@0 | 704 | cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); |
aoqi@0 | 705 | |
aoqi@0 | 706 | // Make all inputs live |
aoqi@0 | 707 | if( !n->is_Phi() ) { // Phi function uses come from prior block |
aoqi@0 | 708 | JVMState* jvms = n->jvms(); |
aoqi@0 | 709 | uint debug_start = jvms ? jvms->debug_start() : 999999; |
aoqi@0 | 710 | // Start loop at 1 (skip control edge) for most Nodes. |
aoqi@0 | 711 | // SCMemProj's might be the sole use of a StoreLConditional. |
aoqi@0 | 712 | // While StoreLConditionals set memory (the SCMemProj use) |
aoqi@0 | 713 | // they also def flags; if that flag def is unused the |
aoqi@0 | 714 | // allocator sees a flag-setting instruction with no use of |
aoqi@0 | 715 | // the flags and assumes it's dead. This keeps the (useless) |
aoqi@0 | 716 | // flag-setting behavior alive while also keeping the (useful) |
aoqi@0 | 717 | // memory update effect. |
aoqi@0 | 718 | for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) { |
aoqi@0 | 719 | Node *def = n->in(k); |
aoqi@0 | 720 | uint x = _lrg_map.live_range_id(def); |
aoqi@0 | 721 | if (!x) { |
aoqi@0 | 722 | continue; |
aoqi@0 | 723 | } |
aoqi@0 | 724 | LRG &lrg = lrgs(x); |
aoqi@0 | 725 | // No use-side cost for spilling debug info |
aoqi@0 | 726 | if (k < debug_start) { |
aoqi@0 | 727 | // A USE costs twice block frequency (once for the Load, once |
aoqi@0 | 728 | // for a Load-delay). Rematerialized uses only cost once. |
aoqi@0 | 729 | lrg._cost += (def->rematerialize() ? block->_freq : (block->_freq + block->_freq)); |
aoqi@0 | 730 | } |
aoqi@0 | 731 | // It is live now |
aoqi@0 | 732 | if (liveout.insert(x)) { |
aoqi@0 | 733 | // Newly live things assumed live from here to top of block |
aoqi@0 | 734 | lrg._area += cost; |
aoqi@0 | 735 | // Adjust register pressure |
aoqi@0 | 736 | if (lrg.mask().is_UP() && lrg.mask_size()) { |
aoqi@0 | 737 | if (lrg._is_float || lrg._is_vector) { |
aoqi@0 | 738 | pressure[1] += lrg.reg_pressure(); |
aoqi@0 | 739 | if (pressure[1] > block->_freg_pressure) { |
aoqi@0 | 740 | block->_freg_pressure = pressure[1]; |
aoqi@0 | 741 | } |
aoqi@0 | 742 | } else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) { |
aoqi@0 | 743 | pressure[0] += lrg.reg_pressure(); |
aoqi@0 | 744 | if (pressure[0] > block->_reg_pressure) { |
aoqi@0 | 745 | block->_reg_pressure = pressure[0]; |
aoqi@0 | 746 | } |
aoqi@0 | 747 | } |
aoqi@0 | 748 | } |
aoqi@0 | 749 | assert( pressure[0] == count_int_pressure (&liveout), "" ); |
aoqi@0 | 750 | assert( pressure[1] == count_float_pressure(&liveout), "" ); |
aoqi@0 | 751 | } |
aoqi@0 | 752 | assert(!(lrg._area < 0.0), "negative spill area" ); |
aoqi@0 | 753 | } |
aoqi@0 | 754 | } |
aoqi@0 | 755 | } // End of reverse pass over all instructions in block |
aoqi@0 | 756 | |
aoqi@0 | 757 | // If we run off the top of the block with high pressure and |
aoqi@0 | 758 | // never see a hi-to-low pressure transition, just record that |
aoqi@0 | 759 | // the whole block is high pressure. |
aoqi@0 | 760 | if (pressure[0] > (uint)INTPRESSURE) { |
aoqi@0 | 761 | hrp_index[0] = 0; |
aoqi@0 | 762 | if (pressure[0] > block->_reg_pressure) { |
aoqi@0 | 763 | block->_reg_pressure = pressure[0]; |
aoqi@0 | 764 | } |
aoqi@0 | 765 | } |
aoqi@0 | 766 | if (pressure[1] > (uint)FLOATPRESSURE) { |
aoqi@0 | 767 | hrp_index[1] = 0; |
aoqi@0 | 768 | if (pressure[1] > block->_freg_pressure) { |
aoqi@0 | 769 | block->_freg_pressure = pressure[1]; |
aoqi@0 | 770 | } |
aoqi@0 | 771 | } |
aoqi@0 | 772 | |
aoqi@0 | 773 | // Compute high pressure indice; avoid landing in the middle of projnodes |
aoqi@0 | 774 | j = hrp_index[0]; |
aoqi@0 | 775 | if (j < block->number_of_nodes() && j < block->end_idx() + 1) { |
aoqi@0 | 776 | Node* cur = block->get_node(j); |
aoqi@0 | 777 | while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) { |
aoqi@0 | 778 | j--; |
aoqi@0 | 779 | cur = block->get_node(j); |
aoqi@0 | 780 | } |
aoqi@0 | 781 | } |
aoqi@0 | 782 | block->_ihrp_index = j; |
aoqi@0 | 783 | j = hrp_index[1]; |
aoqi@0 | 784 | if (j < block->number_of_nodes() && j < block->end_idx() + 1) { |
aoqi@0 | 785 | Node* cur = block->get_node(j); |
aoqi@0 | 786 | while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) { |
aoqi@0 | 787 | j--; |
aoqi@0 | 788 | cur = block->get_node(j); |
aoqi@0 | 789 | } |
aoqi@0 | 790 | } |
aoqi@0 | 791 | block->_fhrp_index = j; |
aoqi@0 | 792 | |
aoqi@0 | 793 | #ifndef PRODUCT |
aoqi@0 | 794 | // Gather Register Pressure Statistics |
aoqi@0 | 795 | if( PrintOptoStatistics ) { |
aoqi@0 | 796 | if (block->_reg_pressure > (uint)INTPRESSURE || block->_freg_pressure > (uint)FLOATPRESSURE) { |
aoqi@0 | 797 | _high_pressure++; |
aoqi@0 | 798 | } else { |
aoqi@0 | 799 | _low_pressure++; |
aoqi@0 | 800 | } |
aoqi@0 | 801 | } |
aoqi@0 | 802 | #endif |
aoqi@0 | 803 | } // End of for all blocks |
aoqi@0 | 804 | |
aoqi@0 | 805 | return must_spill; |
aoqi@0 | 806 | } |