1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/opto/memnode.cpp Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,3222 @@ 1.4 +/* 1.5 + * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +// Portions of code courtesy of Clifford Click 1.29 + 1.30 +// Optimization - Graph Style 1.31 + 1.32 +#include "incls/_precompiled.incl" 1.33 +#include "incls/_memnode.cpp.incl" 1.34 + 1.35 +//============================================================================= 1.36 +uint MemNode::size_of() const { return sizeof(*this); } 1.37 + 1.38 +const TypePtr *MemNode::adr_type() const { 1.39 + Node* adr = in(Address); 1.40 + const TypePtr* cross_check = NULL; 1.41 + DEBUG_ONLY(cross_check = _adr_type); 1.42 + return calculate_adr_type(adr->bottom_type(), cross_check); 1.43 +} 1.44 + 1.45 +#ifndef PRODUCT 1.46 +void MemNode::dump_spec(outputStream *st) const { 1.47 + if (in(Address) == NULL) return; // node is dead 1.48 +#ifndef ASSERT 1.49 + // fake the missing field 1.50 + const TypePtr* _adr_type = NULL; 1.51 + if (in(Address) != NULL) 1.52 + _adr_type = in(Address)->bottom_type()->isa_ptr(); 1.53 +#endif 1.54 + dump_adr_type(this, _adr_type, st); 1.55 + 1.56 + Compile* C = Compile::current(); 1.57 + if( C->alias_type(_adr_type)->is_volatile() ) 1.58 + st->print(" Volatile!"); 1.59 +} 1.60 + 1.61 +void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) { 1.62 + st->print(" @"); 1.63 + if (adr_type == NULL) { 1.64 + st->print("NULL"); 1.65 + } else { 1.66 + adr_type->dump_on(st); 1.67 + Compile* C = Compile::current(); 1.68 + Compile::AliasType* atp = NULL; 1.69 + if (C->have_alias_type(adr_type)) atp = C->alias_type(adr_type); 1.70 + if (atp == NULL) 1.71 + st->print(", idx=?\?;"); 1.72 + else if (atp->index() == Compile::AliasIdxBot) 1.73 + st->print(", idx=Bot;"); 1.74 + else if (atp->index() == Compile::AliasIdxTop) 1.75 + st->print(", idx=Top;"); 1.76 + else if (atp->index() == Compile::AliasIdxRaw) 1.77 + st->print(", idx=Raw;"); 1.78 + else { 1.79 + ciField* field = atp->field(); 1.80 + if (field) { 1.81 + st->print(", name="); 1.82 + field->print_name_on(st); 1.83 + } 1.84 + st->print(", idx=%d;", atp->index()); 1.85 + } 1.86 + } 1.87 +} 1.88 + 1.89 +extern void print_alias_types(); 1.90 + 1.91 +#endif 1.92 + 1.93 +//--------------------------Ideal_common--------------------------------------- 1.94 +// Look for degenerate control and memory inputs. Bypass MergeMem inputs. 1.95 +// Unhook non-raw memories from complete (macro-expanded) initializations. 1.96 +Node *MemNode::Ideal_common(PhaseGVN *phase, bool can_reshape) { 1.97 + // If our control input is a dead region, kill all below the region 1.98 + Node *ctl = in(MemNode::Control); 1.99 + if (ctl && remove_dead_region(phase, can_reshape)) 1.100 + return this; 1.101 + 1.102 + // Ignore if memory is dead, or self-loop 1.103 + Node *mem = in(MemNode::Memory); 1.104 + if( phase->type( mem ) == Type::TOP ) return NodeSentinel; // caller will return NULL 1.105 + assert( mem != this, "dead loop in MemNode::Ideal" ); 1.106 + 1.107 + Node *address = in(MemNode::Address); 1.108 + const Type *t_adr = phase->type( address ); 1.109 + if( t_adr == Type::TOP ) return NodeSentinel; // caller will return NULL 1.110 + 1.111 + // Avoid independent memory operations 1.112 + Node* old_mem = mem; 1.113 + 1.114 + if (mem->is_Proj() && mem->in(0)->is_Initialize()) { 1.115 + InitializeNode* init = mem->in(0)->as_Initialize(); 1.116 + if (init->is_complete()) { // i.e., after macro expansion 1.117 + const TypePtr* tp = t_adr->is_ptr(); 1.118 + uint alias_idx = phase->C->get_alias_index(tp); 1.119 + // Free this slice from the init. It was hooked, temporarily, 1.120 + // by GraphKit::set_output_for_allocation. 1.121 + if (alias_idx > Compile::AliasIdxRaw) { 1.122 + mem = init->memory(alias_idx); 1.123 + // ...but not with the raw-pointer slice. 1.124 + } 1.125 + } 1.126 + } 1.127 + 1.128 + if (mem->is_MergeMem()) { 1.129 + MergeMemNode* mmem = mem->as_MergeMem(); 1.130 + const TypePtr *tp = t_adr->is_ptr(); 1.131 + uint alias_idx = phase->C->get_alias_index(tp); 1.132 +#ifdef ASSERT 1.133 + { 1.134 + // Check that current type is consistent with the alias index used during graph construction 1.135 + assert(alias_idx >= Compile::AliasIdxRaw, "must not be a bad alias_idx"); 1.136 + const TypePtr *adr_t = adr_type(); 1.137 + bool consistent = adr_t == NULL || adr_t->empty() || phase->C->must_alias(adr_t, alias_idx ); 1.138 + // Sometimes dead array references collapse to a[-1], a[-2], or a[-3] 1.139 + if( !consistent && adr_t != NULL && !adr_t->empty() && 1.140 + tp->isa_aryptr() && tp->offset() == Type::OffsetBot && 1.141 + adr_t->isa_aryptr() && adr_t->offset() != Type::OffsetBot && 1.142 + ( adr_t->offset() == arrayOopDesc::length_offset_in_bytes() || 1.143 + adr_t->offset() == oopDesc::klass_offset_in_bytes() || 1.144 + adr_t->offset() == oopDesc::mark_offset_in_bytes() ) ) { 1.145 + // don't assert if it is dead code. 1.146 + consistent = true; 1.147 + } 1.148 + if( !consistent ) { 1.149 + tty->print("alias_idx==%d, adr_type()==", alias_idx); if( adr_t == NULL ) { tty->print("NULL"); } else { adr_t->dump(); } 1.150 + tty->cr(); 1.151 + print_alias_types(); 1.152 + assert(consistent, "adr_type must match alias idx"); 1.153 + } 1.154 + } 1.155 +#endif 1.156 + // TypeInstPtr::NOTNULL+any is an OOP with unknown offset - generally 1.157 + // means an array I have not precisely typed yet. Do not do any 1.158 + // alias stuff with it any time soon. 1.159 + const TypeInstPtr *tinst = tp->isa_instptr(); 1.160 + if( tp->base() != Type::AnyPtr && 1.161 + !(tinst && 1.162 + tinst->klass()->is_java_lang_Object() && 1.163 + tinst->offset() == Type::OffsetBot) ) { 1.164 + // compress paths and change unreachable cycles to TOP 1.165 + // If not, we can update the input infinitely along a MergeMem cycle 1.166 + // Equivalent code in PhiNode::Ideal 1.167 + Node* m = phase->transform(mmem); 1.168 + // If tranformed to a MergeMem, get the desired slice 1.169 + // Otherwise the returned node represents memory for every slice 1.170 + mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m; 1.171 + // Update input if it is progress over what we have now 1.172 + } 1.173 + } 1.174 + 1.175 + if (mem != old_mem) { 1.176 + set_req(MemNode::Memory, mem); 1.177 + return this; 1.178 + } 1.179 + 1.180 + // let the subclass continue analyzing... 1.181 + return NULL; 1.182 +} 1.183 + 1.184 +// Helper function for proving some simple control dominations. 1.185 +// Attempt to prove that control input 'dom' dominates (or equals) 'sub'. 1.186 +// Already assumes that 'dom' is available at 'sub', and that 'sub' 1.187 +// is not a constant (dominated by the method's StartNode). 1.188 +// Used by MemNode::find_previous_store to prove that the 1.189 +// control input of a memory operation predates (dominates) 1.190 +// an allocation it wants to look past. 1.191 +bool MemNode::detect_dominating_control(Node* dom, Node* sub) { 1.192 + if (dom == NULL) return false; 1.193 + if (dom->is_Proj()) dom = dom->in(0); 1.194 + if (dom->is_Start()) return true; // anything inside the method 1.195 + if (dom->is_Root()) return true; // dom 'controls' a constant 1.196 + int cnt = 20; // detect cycle or too much effort 1.197 + while (sub != NULL) { // walk 'sub' up the chain to 'dom' 1.198 + if (--cnt < 0) return false; // in a cycle or too complex 1.199 + if (sub == dom) return true; 1.200 + if (sub->is_Start()) return false; 1.201 + if (sub->is_Root()) return false; 1.202 + Node* up = sub->in(0); 1.203 + if (sub == up && sub->is_Region()) { 1.204 + for (uint i = 1; i < sub->req(); i++) { 1.205 + Node* in = sub->in(i); 1.206 + if (in != NULL && !in->is_top() && in != sub) { 1.207 + up = in; break; // take any path on the way up to 'dom' 1.208 + } 1.209 + } 1.210 + } 1.211 + if (sub == up) return false; // some kind of tight cycle 1.212 + sub = up; 1.213 + } 1.214 + return false; 1.215 +} 1.216 + 1.217 +//---------------------detect_ptr_independence--------------------------------- 1.218 +// Used by MemNode::find_previous_store to prove that two base 1.219 +// pointers are never equal. 1.220 +// The pointers are accompanied by their associated allocations, 1.221 +// if any, which have been previously discovered by the caller. 1.222 +bool MemNode::detect_ptr_independence(Node* p1, AllocateNode* a1, 1.223 + Node* p2, AllocateNode* a2, 1.224 + PhaseTransform* phase) { 1.225 + // Attempt to prove that these two pointers cannot be aliased. 1.226 + // They may both manifestly be allocations, and they should differ. 1.227 + // Or, if they are not both allocations, they can be distinct constants. 1.228 + // Otherwise, one is an allocation and the other a pre-existing value. 1.229 + if (a1 == NULL && a2 == NULL) { // neither an allocation 1.230 + return (p1 != p2) && p1->is_Con() && p2->is_Con(); 1.231 + } else if (a1 != NULL && a2 != NULL) { // both allocations 1.232 + return (a1 != a2); 1.233 + } else if (a1 != NULL) { // one allocation a1 1.234 + // (Note: p2->is_Con implies p2->in(0)->is_Root, which dominates.) 1.235 + return detect_dominating_control(p2->in(0), a1->in(0)); 1.236 + } else { //(a2 != NULL) // one allocation a2 1.237 + return detect_dominating_control(p1->in(0), a2->in(0)); 1.238 + } 1.239 + return false; 1.240 +} 1.241 + 1.242 + 1.243 +// The logic for reordering loads and stores uses four steps: 1.244 +// (a) Walk carefully past stores and initializations which we 1.245 +// can prove are independent of this load. 1.246 +// (b) Observe that the next memory state makes an exact match 1.247 +// with self (load or store), and locate the relevant store. 1.248 +// (c) Ensure that, if we were to wire self directly to the store, 1.249 +// the optimizer would fold it up somehow. 1.250 +// (d) Do the rewiring, and return, depending on some other part of 1.251 +// the optimizer to fold up the load. 1.252 +// This routine handles steps (a) and (b). Steps (c) and (d) are 1.253 +// specific to loads and stores, so they are handled by the callers. 1.254 +// (Currently, only LoadNode::Ideal has steps (c), (d). More later.) 1.255 +// 1.256 +Node* MemNode::find_previous_store(PhaseTransform* phase) { 1.257 + Node* ctrl = in(MemNode::Control); 1.258 + Node* adr = in(MemNode::Address); 1.259 + intptr_t offset = 0; 1.260 + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); 1.261 + AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase); 1.262 + 1.263 + if (offset == Type::OffsetBot) 1.264 + return NULL; // cannot unalias unless there are precise offsets 1.265 + 1.266 + intptr_t size_in_bytes = memory_size(); 1.267 + 1.268 + Node* mem = in(MemNode::Memory); // start searching here... 1.269 + 1.270 + int cnt = 50; // Cycle limiter 1.271 + for (;;) { // While we can dance past unrelated stores... 1.272 + if (--cnt < 0) break; // Caught in cycle or a complicated dance? 1.273 + 1.274 + if (mem->is_Store()) { 1.275 + Node* st_adr = mem->in(MemNode::Address); 1.276 + intptr_t st_offset = 0; 1.277 + Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset); 1.278 + if (st_base == NULL) 1.279 + break; // inscrutable pointer 1.280 + if (st_offset != offset && st_offset != Type::OffsetBot) { 1.281 + const int MAX_STORE = BytesPerLong; 1.282 + if (st_offset >= offset + size_in_bytes || 1.283 + st_offset <= offset - MAX_STORE || 1.284 + st_offset <= offset - mem->as_Store()->memory_size()) { 1.285 + // Success: The offsets are provably independent. 1.286 + // (You may ask, why not just test st_offset != offset and be done? 1.287 + // The answer is that stores of different sizes can co-exist 1.288 + // in the same sequence of RawMem effects. We sometimes initialize 1.289 + // a whole 'tile' of array elements with a single jint or jlong.) 1.290 + mem = mem->in(MemNode::Memory); 1.291 + continue; // (a) advance through independent store memory 1.292 + } 1.293 + } 1.294 + if (st_base != base && 1.295 + detect_ptr_independence(base, alloc, 1.296 + st_base, 1.297 + AllocateNode::Ideal_allocation(st_base, phase), 1.298 + phase)) { 1.299 + // Success: The bases are provably independent. 1.300 + mem = mem->in(MemNode::Memory); 1.301 + continue; // (a) advance through independent store memory 1.302 + } 1.303 + 1.304 + // (b) At this point, if the bases or offsets do not agree, we lose, 1.305 + // since we have not managed to prove 'this' and 'mem' independent. 1.306 + if (st_base == base && st_offset == offset) { 1.307 + return mem; // let caller handle steps (c), (d) 1.308 + } 1.309 + 1.310 + } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) { 1.311 + InitializeNode* st_init = mem->in(0)->as_Initialize(); 1.312 + AllocateNode* st_alloc = st_init->allocation(); 1.313 + if (st_alloc == NULL) 1.314 + break; // something degenerated 1.315 + bool known_identical = false; 1.316 + bool known_independent = false; 1.317 + if (alloc == st_alloc) 1.318 + known_identical = true; 1.319 + else if (alloc != NULL) 1.320 + known_independent = true; 1.321 + else if (ctrl != NULL && 1.322 + detect_dominating_control(ctrl, st_alloc->in(0))) 1.323 + known_independent = true; 1.324 + 1.325 + if (known_independent) { 1.326 + // The bases are provably independent: Either they are 1.327 + // manifestly distinct allocations, or else the control 1.328 + // of this load dominates the store's allocation. 1.329 + int alias_idx = phase->C->get_alias_index(adr_type()); 1.330 + if (alias_idx == Compile::AliasIdxRaw) { 1.331 + mem = st_alloc->in(TypeFunc::Memory); 1.332 + } else { 1.333 + mem = st_init->memory(alias_idx); 1.334 + } 1.335 + continue; // (a) advance through independent store memory 1.336 + } 1.337 + 1.338 + // (b) at this point, if we are not looking at a store initializing 1.339 + // the same allocation we are loading from, we lose. 1.340 + if (known_identical) { 1.341 + // From caller, can_see_stored_value will consult find_captured_store. 1.342 + return mem; // let caller handle steps (c), (d) 1.343 + } 1.344 + 1.345 + } 1.346 + 1.347 + // Unless there is an explicit 'continue', we must bail out here, 1.348 + // because 'mem' is an inscrutable memory state (e.g., a call). 1.349 + break; 1.350 + } 1.351 + 1.352 + return NULL; // bail out 1.353 +} 1.354 + 1.355 +//----------------------calculate_adr_type------------------------------------- 1.356 +// Helper function. Notices when the given type of address hits top or bottom. 1.357 +// Also, asserts a cross-check of the type against the expected address type. 1.358 +const TypePtr* MemNode::calculate_adr_type(const Type* t, const TypePtr* cross_check) { 1.359 + if (t == Type::TOP) return NULL; // does not touch memory any more? 1.360 + #ifdef PRODUCT 1.361 + cross_check = NULL; 1.362 + #else 1.363 + if (!VerifyAliases || is_error_reported() || Node::in_dump()) cross_check = NULL; 1.364 + #endif 1.365 + const TypePtr* tp = t->isa_ptr(); 1.366 + if (tp == NULL) { 1.367 + assert(cross_check == NULL || cross_check == TypePtr::BOTTOM, "expected memory type must be wide"); 1.368 + return TypePtr::BOTTOM; // touches lots of memory 1.369 + } else { 1.370 + #ifdef ASSERT 1.371 + // %%%% [phh] We don't check the alias index if cross_check is 1.372 + // TypeRawPtr::BOTTOM. Needs to be investigated. 1.373 + if (cross_check != NULL && 1.374 + cross_check != TypePtr::BOTTOM && 1.375 + cross_check != TypeRawPtr::BOTTOM) { 1.376 + // Recheck the alias index, to see if it has changed (due to a bug). 1.377 + Compile* C = Compile::current(); 1.378 + assert(C->get_alias_index(cross_check) == C->get_alias_index(tp), 1.379 + "must stay in the original alias category"); 1.380 + // The type of the address must be contained in the adr_type, 1.381 + // disregarding "null"-ness. 1.382 + // (We make an exception for TypeRawPtr::BOTTOM, which is a bit bucket.) 1.383 + const TypePtr* tp_notnull = tp->join(TypePtr::NOTNULL)->is_ptr(); 1.384 + assert(cross_check->meet(tp_notnull) == cross_check, 1.385 + "real address must not escape from expected memory type"); 1.386 + } 1.387 + #endif 1.388 + return tp; 1.389 + } 1.390 +} 1.391 + 1.392 +//------------------------adr_phi_is_loop_invariant---------------------------- 1.393 +// A helper function for Ideal_DU_postCCP to check if a Phi in a counted 1.394 +// loop is loop invariant. Make a quick traversal of Phi and associated 1.395 +// CastPP nodes, looking to see if they are a closed group within the loop. 1.396 +bool MemNode::adr_phi_is_loop_invariant(Node* adr_phi, Node* cast) { 1.397 + // The idea is that the phi-nest must boil down to only CastPP nodes 1.398 + // with the same data. This implies that any path into the loop already 1.399 + // includes such a CastPP, and so the original cast, whatever its input, 1.400 + // must be covered by an equivalent cast, with an earlier control input. 1.401 + ResourceMark rm; 1.402 + 1.403 + // The loop entry input of the phi should be the unique dominating 1.404 + // node for every Phi/CastPP in the loop. 1.405 + Unique_Node_List closure; 1.406 + closure.push(adr_phi->in(LoopNode::EntryControl)); 1.407 + 1.408 + // Add the phi node and the cast to the worklist. 1.409 + Unique_Node_List worklist; 1.410 + worklist.push(adr_phi); 1.411 + if( cast != NULL ){ 1.412 + if( !cast->is_ConstraintCast() ) return false; 1.413 + worklist.push(cast); 1.414 + } 1.415 + 1.416 + // Begin recursive walk of phi nodes. 1.417 + while( worklist.size() ){ 1.418 + // Take a node off the worklist 1.419 + Node *n = worklist.pop(); 1.420 + if( !closure.member(n) ){ 1.421 + // Add it to the closure. 1.422 + closure.push(n); 1.423 + // Make a sanity check to ensure we don't waste too much time here. 1.424 + if( closure.size() > 20) return false; 1.425 + // This node is OK if: 1.426 + // - it is a cast of an identical value 1.427 + // - or it is a phi node (then we add its inputs to the worklist) 1.428 + // Otherwise, the node is not OK, and we presume the cast is not invariant 1.429 + if( n->is_ConstraintCast() ){ 1.430 + worklist.push(n->in(1)); 1.431 + } else if( n->is_Phi() ) { 1.432 + for( uint i = 1; i < n->req(); i++ ) { 1.433 + worklist.push(n->in(i)); 1.434 + } 1.435 + } else { 1.436 + return false; 1.437 + } 1.438 + } 1.439 + } 1.440 + 1.441 + // Quit when the worklist is empty, and we've found no offending nodes. 1.442 + return true; 1.443 +} 1.444 + 1.445 +//------------------------------Ideal_DU_postCCP------------------------------- 1.446 +// Find any cast-away of null-ness and keep its control. Null cast-aways are 1.447 +// going away in this pass and we need to make this memory op depend on the 1.448 +// gating null check. 1.449 + 1.450 +// I tried to leave the CastPP's in. This makes the graph more accurate in 1.451 +// some sense; we get to keep around the knowledge that an oop is not-null 1.452 +// after some test. Alas, the CastPP's interfere with GVN (some values are 1.453 +// the regular oop, some are the CastPP of the oop, all merge at Phi's which 1.454 +// cannot collapse, etc). This cost us 10% on SpecJVM, even when I removed 1.455 +// some of the more trivial cases in the optimizer. Removing more useless 1.456 +// Phi's started allowing Loads to illegally float above null checks. I gave 1.457 +// up on this approach. CNC 10/20/2000 1.458 +Node *MemNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { 1.459 + Node *ctr = in(MemNode::Control); 1.460 + Node *mem = in(MemNode::Memory); 1.461 + Node *adr = in(MemNode::Address); 1.462 + Node *skipped_cast = NULL; 1.463 + // Need a null check? Regular static accesses do not because they are 1.464 + // from constant addresses. Array ops are gated by the range check (which 1.465 + // always includes a NULL check). Just check field ops. 1.466 + if( !ctr ) { 1.467 + // Scan upwards for the highest location we can place this memory op. 1.468 + while( true ) { 1.469 + switch( adr->Opcode() ) { 1.470 + 1.471 + case Op_AddP: // No change to NULL-ness, so peek thru AddP's 1.472 + adr = adr->in(AddPNode::Base); 1.473 + continue; 1.474 + 1.475 + case Op_CastPP: 1.476 + // If the CastPP is useless, just peek on through it. 1.477 + if( ccp->type(adr) == ccp->type(adr->in(1)) ) { 1.478 + // Remember the cast that we've peeked though. If we peek 1.479 + // through more than one, then we end up remembering the highest 1.480 + // one, that is, if in a loop, the one closest to the top. 1.481 + skipped_cast = adr; 1.482 + adr = adr->in(1); 1.483 + continue; 1.484 + } 1.485 + // CastPP is going away in this pass! We need this memory op to be 1.486 + // control-dependent on the test that is guarding the CastPP. 1.487 + ccp->hash_delete(this); 1.488 + set_req(MemNode::Control, adr->in(0)); 1.489 + ccp->hash_insert(this); 1.490 + return this; 1.491 + 1.492 + case Op_Phi: 1.493 + // Attempt to float above a Phi to some dominating point. 1.494 + if (adr->in(0) != NULL && adr->in(0)->is_CountedLoop()) { 1.495 + // If we've already peeked through a Cast (which could have set the 1.496 + // control), we can't float above a Phi, because the skipped Cast 1.497 + // may not be loop invariant. 1.498 + if (adr_phi_is_loop_invariant(adr, skipped_cast)) { 1.499 + adr = adr->in(1); 1.500 + continue; 1.501 + } 1.502 + } 1.503 + 1.504 + // Intentional fallthrough! 1.505 + 1.506 + // No obvious dominating point. The mem op is pinned below the Phi 1.507 + // by the Phi itself. If the Phi goes away (no true value is merged) 1.508 + // then the mem op can float, but not indefinitely. It must be pinned 1.509 + // behind the controls leading to the Phi. 1.510 + case Op_CheckCastPP: 1.511 + // These usually stick around to change address type, however a 1.512 + // useless one can be elided and we still need to pick up a control edge 1.513 + if (adr->in(0) == NULL) { 1.514 + // This CheckCastPP node has NO control and is likely useless. But we 1.515 + // need check further up the ancestor chain for a control input to keep 1.516 + // the node in place. 4959717. 1.517 + skipped_cast = adr; 1.518 + adr = adr->in(1); 1.519 + continue; 1.520 + } 1.521 + ccp->hash_delete(this); 1.522 + set_req(MemNode::Control, adr->in(0)); 1.523 + ccp->hash_insert(this); 1.524 + return this; 1.525 + 1.526 + // List of "safe" opcodes; those that implicitly block the memory 1.527 + // op below any null check. 1.528 + case Op_CastX2P: // no null checks on native pointers 1.529 + case Op_Parm: // 'this' pointer is not null 1.530 + case Op_LoadP: // Loading from within a klass 1.531 + case Op_LoadKlass: // Loading from within a klass 1.532 + case Op_ConP: // Loading from a klass 1.533 + case Op_CreateEx: // Sucking up the guts of an exception oop 1.534 + case Op_Con: // Reading from TLS 1.535 + case Op_CMoveP: // CMoveP is pinned 1.536 + break; // No progress 1.537 + 1.538 + case Op_Proj: // Direct call to an allocation routine 1.539 + case Op_SCMemProj: // Memory state from store conditional ops 1.540 +#ifdef ASSERT 1.541 + { 1.542 + assert(adr->as_Proj()->_con == TypeFunc::Parms, "must be return value"); 1.543 + const Node* call = adr->in(0); 1.544 + if (call->is_CallStaticJava()) { 1.545 + const CallStaticJavaNode* call_java = call->as_CallStaticJava(); 1.546 + assert(call_java && call_java->method() == NULL, "must be runtime call"); 1.547 + // We further presume that this is one of 1.548 + // new_instance_Java, new_array_Java, or 1.549 + // the like, but do not assert for this. 1.550 + } else if (call->is_Allocate()) { 1.551 + // similar case to new_instance_Java, etc. 1.552 + } else if (!call->is_CallLeaf()) { 1.553 + // Projections from fetch_oop (OSR) are allowed as well. 1.554 + ShouldNotReachHere(); 1.555 + } 1.556 + } 1.557 +#endif 1.558 + break; 1.559 + default: 1.560 + ShouldNotReachHere(); 1.561 + } 1.562 + break; 1.563 + } 1.564 + } 1.565 + 1.566 + return NULL; // No progress 1.567 +} 1.568 + 1.569 + 1.570 +//============================================================================= 1.571 +uint LoadNode::size_of() const { return sizeof(*this); } 1.572 +uint LoadNode::cmp( const Node &n ) const 1.573 +{ return !Type::cmp( _type, ((LoadNode&)n)._type ); } 1.574 +const Type *LoadNode::bottom_type() const { return _type; } 1.575 +uint LoadNode::ideal_reg() const { 1.576 + return Matcher::base2reg[_type->base()]; 1.577 +} 1.578 + 1.579 +#ifndef PRODUCT 1.580 +void LoadNode::dump_spec(outputStream *st) const { 1.581 + MemNode::dump_spec(st); 1.582 + if( !Verbose && !WizardMode ) { 1.583 + // standard dump does this in Verbose and WizardMode 1.584 + st->print(" #"); _type->dump_on(st); 1.585 + } 1.586 +} 1.587 +#endif 1.588 + 1.589 + 1.590 +//----------------------------LoadNode::make----------------------------------- 1.591 +// Polymorphic factory method: 1.592 +LoadNode *LoadNode::make( Compile *C, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt ) { 1.593 + // sanity check the alias category against the created node type 1.594 + assert(!(adr_type->isa_oopptr() && 1.595 + adr_type->offset() == oopDesc::klass_offset_in_bytes()), 1.596 + "use LoadKlassNode instead"); 1.597 + assert(!(adr_type->isa_aryptr() && 1.598 + adr_type->offset() == arrayOopDesc::length_offset_in_bytes()), 1.599 + "use LoadRangeNode instead"); 1.600 + switch (bt) { 1.601 + case T_BOOLEAN: 1.602 + case T_BYTE: return new (C, 3) LoadBNode(ctl, mem, adr, adr_type, rt->is_int() ); 1.603 + case T_INT: return new (C, 3) LoadINode(ctl, mem, adr, adr_type, rt->is_int() ); 1.604 + case T_CHAR: return new (C, 3) LoadCNode(ctl, mem, adr, adr_type, rt->is_int() ); 1.605 + case T_SHORT: return new (C, 3) LoadSNode(ctl, mem, adr, adr_type, rt->is_int() ); 1.606 + case T_LONG: return new (C, 3) LoadLNode(ctl, mem, adr, adr_type, rt->is_long() ); 1.607 + case T_FLOAT: return new (C, 3) LoadFNode(ctl, mem, adr, adr_type, rt ); 1.608 + case T_DOUBLE: return new (C, 3) LoadDNode(ctl, mem, adr, adr_type, rt ); 1.609 + case T_ADDRESS: return new (C, 3) LoadPNode(ctl, mem, adr, adr_type, rt->is_ptr() ); 1.610 + case T_OBJECT: return new (C, 3) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr()); 1.611 + } 1.612 + ShouldNotReachHere(); 1.613 + return (LoadNode*)NULL; 1.614 +} 1.615 + 1.616 +LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt) { 1.617 + bool require_atomic = true; 1.618 + return new (C, 3) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), require_atomic); 1.619 +} 1.620 + 1.621 + 1.622 + 1.623 + 1.624 +//------------------------------hash------------------------------------------- 1.625 +uint LoadNode::hash() const { 1.626 + // unroll addition of interesting fields 1.627 + return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address); 1.628 +} 1.629 + 1.630 +//---------------------------can_see_stored_value------------------------------ 1.631 +// This routine exists to make sure this set of tests is done the same 1.632 +// everywhere. We need to make a coordinated change: first LoadNode::Ideal 1.633 +// will change the graph shape in a way which makes memory alive twice at the 1.634 +// same time (uses the Oracle model of aliasing), then some 1.635 +// LoadXNode::Identity will fold things back to the equivalence-class model 1.636 +// of aliasing. 1.637 +Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const { 1.638 + Node* ld_adr = in(MemNode::Address); 1.639 + 1.640 + // Loop around twice in the case Load -> Initialize -> Store. 1.641 + // (See PhaseIterGVN::add_users_to_worklist, which knows about this case.) 1.642 + for (int trip = 0; trip <= 1; trip++) { 1.643 + 1.644 + if (st->is_Store()) { 1.645 + Node* st_adr = st->in(MemNode::Address); 1.646 + if (!phase->eqv(st_adr, ld_adr)) { 1.647 + // Try harder before giving up... Match raw and non-raw pointers. 1.648 + intptr_t st_off = 0; 1.649 + AllocateNode* alloc = AllocateNode::Ideal_allocation(st_adr, phase, st_off); 1.650 + if (alloc == NULL) return NULL; 1.651 + intptr_t ld_off = 0; 1.652 + AllocateNode* allo2 = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off); 1.653 + if (alloc != allo2) return NULL; 1.654 + if (ld_off != st_off) return NULL; 1.655 + // At this point we have proven something like this setup: 1.656 + // A = Allocate(...) 1.657 + // L = LoadQ(, AddP(CastPP(, A.Parm),, #Off)) 1.658 + // S = StoreQ(, AddP(, A.Parm , #Off), V) 1.659 + // (Actually, we haven't yet proven the Q's are the same.) 1.660 + // In other words, we are loading from a casted version of 1.661 + // the same pointer-and-offset that we stored to. 1.662 + // Thus, we are able to replace L by V. 1.663 + } 1.664 + // Now prove that we have a LoadQ matched to a StoreQ, for some Q. 1.665 + if (store_Opcode() != st->Opcode()) 1.666 + return NULL; 1.667 + return st->in(MemNode::ValueIn); 1.668 + } 1.669 + 1.670 + intptr_t offset = 0; // scratch 1.671 + 1.672 + // A load from a freshly-created object always returns zero. 1.673 + // (This can happen after LoadNode::Ideal resets the load's memory input 1.674 + // to find_captured_store, which returned InitializeNode::zero_memory.) 1.675 + if (st->is_Proj() && st->in(0)->is_Allocate() && 1.676 + st->in(0) == AllocateNode::Ideal_allocation(ld_adr, phase, offset) && 1.677 + offset >= st->in(0)->as_Allocate()->minimum_header_size()) { 1.678 + // return a zero value for the load's basic type 1.679 + // (This is one of the few places where a generic PhaseTransform 1.680 + // can create new nodes. Think of it as lazily manifesting 1.681 + // virtually pre-existing constants.) 1.682 + return phase->zerocon(memory_type()); 1.683 + } 1.684 + 1.685 + // A load from an initialization barrier can match a captured store. 1.686 + if (st->is_Proj() && st->in(0)->is_Initialize()) { 1.687 + InitializeNode* init = st->in(0)->as_Initialize(); 1.688 + AllocateNode* alloc = init->allocation(); 1.689 + if (alloc != NULL && 1.690 + alloc == AllocateNode::Ideal_allocation(ld_adr, phase, offset)) { 1.691 + // examine a captured store value 1.692 + st = init->find_captured_store(offset, memory_size(), phase); 1.693 + if (st != NULL) 1.694 + continue; // take one more trip around 1.695 + } 1.696 + } 1.697 + 1.698 + break; 1.699 + } 1.700 + 1.701 + return NULL; 1.702 +} 1.703 + 1.704 +//------------------------------Identity--------------------------------------- 1.705 +// Loads are identity if previous store is to same address 1.706 +Node *LoadNode::Identity( PhaseTransform *phase ) { 1.707 + // If the previous store-maker is the right kind of Store, and the store is 1.708 + // to the same address, then we are equal to the value stored. 1.709 + Node* mem = in(MemNode::Memory); 1.710 + Node* value = can_see_stored_value(mem, phase); 1.711 + if( value ) { 1.712 + // byte, short & char stores truncate naturally. 1.713 + // A load has to load the truncated value which requires 1.714 + // some sort of masking operation and that requires an 1.715 + // Ideal call instead of an Identity call. 1.716 + if (memory_size() < BytesPerInt) { 1.717 + // If the input to the store does not fit with the load's result type, 1.718 + // it must be truncated via an Ideal call. 1.719 + if (!phase->type(value)->higher_equal(phase->type(this))) 1.720 + return this; 1.721 + } 1.722 + // (This works even when value is a Con, but LoadNode::Value 1.723 + // usually runs first, producing the singleton type of the Con.) 1.724 + return value; 1.725 + } 1.726 + return this; 1.727 +} 1.728 + 1.729 +//------------------------------Ideal------------------------------------------ 1.730 +// If the load is from Field memory and the pointer is non-null, we can 1.731 +// zero out the control input. 1.732 +// If the offset is constant and the base is an object allocation, 1.733 +// try to hook me up to the exact initializing store. 1.734 +Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.735 + Node* p = MemNode::Ideal_common(phase, can_reshape); 1.736 + if (p) return (p == NodeSentinel) ? NULL : p; 1.737 + 1.738 + Node* ctrl = in(MemNode::Control); 1.739 + Node* address = in(MemNode::Address); 1.740 + 1.741 + // Skip up past a SafePoint control. Cannot do this for Stores because 1.742 + // pointer stores & cardmarks must stay on the same side of a SafePoint. 1.743 + if( ctrl != NULL && ctrl->Opcode() == Op_SafePoint && 1.744 + phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw ) { 1.745 + ctrl = ctrl->in(0); 1.746 + set_req(MemNode::Control,ctrl); 1.747 + } 1.748 + 1.749 + // Check for useless control edge in some common special cases 1.750 + if (in(MemNode::Control) != NULL) { 1.751 + intptr_t ignore = 0; 1.752 + Node* base = AddPNode::Ideal_base_and_offset(address, phase, ignore); 1.753 + if (base != NULL 1.754 + && phase->type(base)->higher_equal(TypePtr::NOTNULL) 1.755 + && detect_dominating_control(base->in(0), phase->C->start())) { 1.756 + // A method-invariant, non-null address (constant or 'this' argument). 1.757 + set_req(MemNode::Control, NULL); 1.758 + } 1.759 + } 1.760 + 1.761 + // Check for prior store with a different base or offset; make Load 1.762 + // independent. Skip through any number of them. Bail out if the stores 1.763 + // are in an endless dead cycle and report no progress. This is a key 1.764 + // transform for Reflection. However, if after skipping through the Stores 1.765 + // we can't then fold up against a prior store do NOT do the transform as 1.766 + // this amounts to using the 'Oracle' model of aliasing. It leaves the same 1.767 + // array memory alive twice: once for the hoisted Load and again after the 1.768 + // bypassed Store. This situation only works if EVERYBODY who does 1.769 + // anti-dependence work knows how to bypass. I.e. we need all 1.770 + // anti-dependence checks to ask the same Oracle. Right now, that Oracle is 1.771 + // the alias index stuff. So instead, peek through Stores and IFF we can 1.772 + // fold up, do so. 1.773 + Node* prev_mem = find_previous_store(phase); 1.774 + // Steps (a), (b): Walk past independent stores to find an exact match. 1.775 + if (prev_mem != NULL && prev_mem != in(MemNode::Memory)) { 1.776 + // (c) See if we can fold up on the spot, but don't fold up here. 1.777 + // Fold-up might require truncation (for LoadB/LoadS/LoadC) or 1.778 + // just return a prior value, which is done by Identity calls. 1.779 + if (can_see_stored_value(prev_mem, phase)) { 1.780 + // Make ready for step (d): 1.781 + set_req(MemNode::Memory, prev_mem); 1.782 + return this; 1.783 + } 1.784 + } 1.785 + 1.786 + return NULL; // No further progress 1.787 +} 1.788 + 1.789 +// Helper to recognize certain Klass fields which are invariant across 1.790 +// some group of array types (e.g., int[] or all T[] where T < Object). 1.791 +const Type* 1.792 +LoadNode::load_array_final_field(const TypeKlassPtr *tkls, 1.793 + ciKlass* klass) const { 1.794 + if (tkls->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) { 1.795 + // The field is Klass::_modifier_flags. Return its (constant) value. 1.796 + // (Folds up the 2nd indirection in aClassConstant.getModifiers().) 1.797 + assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags"); 1.798 + return TypeInt::make(klass->modifier_flags()); 1.799 + } 1.800 + if (tkls->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) { 1.801 + // The field is Klass::_access_flags. Return its (constant) value. 1.802 + // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).) 1.803 + assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags"); 1.804 + return TypeInt::make(klass->access_flags()); 1.805 + } 1.806 + if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)) { 1.807 + // The field is Klass::_layout_helper. Return its constant value if known. 1.808 + assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper"); 1.809 + return TypeInt::make(klass->layout_helper()); 1.810 + } 1.811 + 1.812 + // No match. 1.813 + return NULL; 1.814 +} 1.815 + 1.816 +//------------------------------Value----------------------------------------- 1.817 +const Type *LoadNode::Value( PhaseTransform *phase ) const { 1.818 + // Either input is TOP ==> the result is TOP 1.819 + Node* mem = in(MemNode::Memory); 1.820 + const Type *t1 = phase->type(mem); 1.821 + if (t1 == Type::TOP) return Type::TOP; 1.822 + Node* adr = in(MemNode::Address); 1.823 + const TypePtr* tp = phase->type(adr)->isa_ptr(); 1.824 + if (tp == NULL || tp->empty()) return Type::TOP; 1.825 + int off = tp->offset(); 1.826 + assert(off != Type::OffsetTop, "case covered by TypePtr::empty"); 1.827 + 1.828 + // Try to guess loaded type from pointer type 1.829 + if (tp->base() == Type::AryPtr) { 1.830 + const Type *t = tp->is_aryptr()->elem(); 1.831 + // Don't do this for integer types. There is only potential profit if 1.832 + // the element type t is lower than _type; that is, for int types, if _type is 1.833 + // more restrictive than t. This only happens here if one is short and the other 1.834 + // char (both 16 bits), and in those cases we've made an intentional decision 1.835 + // to use one kind of load over the other. See AndINode::Ideal and 4965907. 1.836 + // Also, do not try to narrow the type for a LoadKlass, regardless of offset. 1.837 + // 1.838 + // Yes, it is possible to encounter an expression like (LoadKlass p1:(AddP x x 8)) 1.839 + // where the _gvn.type of the AddP is wider than 8. This occurs when an earlier 1.840 + // copy p0 of (AddP x x 8) has been proven equal to p1, and the p0 has been 1.841 + // subsumed by p1. If p1 is on the worklist but has not yet been re-transformed, 1.842 + // it is possible that p1 will have a type like Foo*[int+]:NotNull*+any. 1.843 + // In fact, that could have been the original type of p1, and p1 could have 1.844 + // had an original form like p1:(AddP x x (LShiftL quux 3)), where the 1.845 + // expression (LShiftL quux 3) independently optimized to the constant 8. 1.846 + if ((t->isa_int() == NULL) && (t->isa_long() == NULL) 1.847 + && Opcode() != Op_LoadKlass) { 1.848 + // t might actually be lower than _type, if _type is a unique 1.849 + // concrete subclass of abstract class t. 1.850 + // Make sure the reference is not into the header, by comparing 1.851 + // the offset against the offset of the start of the array's data. 1.852 + // Different array types begin at slightly different offsets (12 vs. 16). 1.853 + // We choose T_BYTE as an example base type that is least restrictive 1.854 + // as to alignment, which will therefore produce the smallest 1.855 + // possible base offset. 1.856 + const int min_base_off = arrayOopDesc::base_offset_in_bytes(T_BYTE); 1.857 + if ((uint)off >= (uint)min_base_off) { // is the offset beyond the header? 1.858 + const Type* jt = t->join(_type); 1.859 + // In any case, do not allow the join, per se, to empty out the type. 1.860 + if (jt->empty() && !t->empty()) { 1.861 + // This can happen if a interface-typed array narrows to a class type. 1.862 + jt = _type; 1.863 + } 1.864 + return jt; 1.865 + } 1.866 + } 1.867 + } else if (tp->base() == Type::InstPtr) { 1.868 + assert( off != Type::OffsetBot || 1.869 + // arrays can be cast to Objects 1.870 + tp->is_oopptr()->klass()->is_java_lang_Object() || 1.871 + // unsafe field access may not have a constant offset 1.872 + phase->C->has_unsafe_access(), 1.873 + "Field accesses must be precise" ); 1.874 + // For oop loads, we expect the _type to be precise 1.875 + } else if (tp->base() == Type::KlassPtr) { 1.876 + assert( off != Type::OffsetBot || 1.877 + // arrays can be cast to Objects 1.878 + tp->is_klassptr()->klass()->is_java_lang_Object() || 1.879 + // also allow array-loading from the primary supertype 1.880 + // array during subtype checks 1.881 + Opcode() == Op_LoadKlass, 1.882 + "Field accesses must be precise" ); 1.883 + // For klass/static loads, we expect the _type to be precise 1.884 + } 1.885 + 1.886 + const TypeKlassPtr *tkls = tp->isa_klassptr(); 1.887 + if (tkls != NULL && !StressReflectiveCode) { 1.888 + ciKlass* klass = tkls->klass(); 1.889 + if (klass->is_loaded() && tkls->klass_is_exact()) { 1.890 + // We are loading a field from a Klass metaobject whose identity 1.891 + // is known at compile time (the type is "exact" or "precise"). 1.892 + // Check for fields we know are maintained as constants by the VM. 1.893 + if (tkls->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) { 1.894 + // The field is Klass::_super_check_offset. Return its (constant) value. 1.895 + // (Folds up type checking code.) 1.896 + assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset"); 1.897 + return TypeInt::make(klass->super_check_offset()); 1.898 + } 1.899 + // Compute index into primary_supers array 1.900 + juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop); 1.901 + // Check for overflowing; use unsigned compare to handle the negative case. 1.902 + if( depth < ciKlass::primary_super_limit() ) { 1.903 + // The field is an element of Klass::_primary_supers. Return its (constant) value. 1.904 + // (Folds up type checking code.) 1.905 + assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers"); 1.906 + ciKlass *ss = klass->super_of_depth(depth); 1.907 + return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR; 1.908 + } 1.909 + const Type* aift = load_array_final_field(tkls, klass); 1.910 + if (aift != NULL) return aift; 1.911 + if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) + (int)sizeof(oopDesc) 1.912 + && klass->is_array_klass()) { 1.913 + // The field is arrayKlass::_component_mirror. Return its (constant) value. 1.914 + // (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.) 1.915 + assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror"); 1.916 + return TypeInstPtr::make(klass->as_array_klass()->component_mirror()); 1.917 + } 1.918 + if (tkls->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) { 1.919 + // The field is Klass::_java_mirror. Return its (constant) value. 1.920 + // (Folds up the 2nd indirection in anObjConstant.getClass().) 1.921 + assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror"); 1.922 + return TypeInstPtr::make(klass->java_mirror()); 1.923 + } 1.924 + } 1.925 + 1.926 + // We can still check if we are loading from the primary_supers array at a 1.927 + // shallow enough depth. Even though the klass is not exact, entries less 1.928 + // than or equal to its super depth are correct. 1.929 + if (klass->is_loaded() ) { 1.930 + ciType *inner = klass->klass(); 1.931 + while( inner->is_obj_array_klass() ) 1.932 + inner = inner->as_obj_array_klass()->base_element_type(); 1.933 + if( inner->is_instance_klass() && 1.934 + !inner->as_instance_klass()->flags().is_interface() ) { 1.935 + // Compute index into primary_supers array 1.936 + juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop); 1.937 + // Check for overflowing; use unsigned compare to handle the negative case. 1.938 + if( depth < ciKlass::primary_super_limit() && 1.939 + depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case 1.940 + // The field is an element of Klass::_primary_supers. Return its (constant) value. 1.941 + // (Folds up type checking code.) 1.942 + assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers"); 1.943 + ciKlass *ss = klass->super_of_depth(depth); 1.944 + return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR; 1.945 + } 1.946 + } 1.947 + } 1.948 + 1.949 + // If the type is enough to determine that the thing is not an array, 1.950 + // we can give the layout_helper a positive interval type. 1.951 + // This will help short-circuit some reflective code. 1.952 + if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc) 1.953 + && !klass->is_array_klass() // not directly typed as an array 1.954 + && !klass->is_interface() // specifically not Serializable & Cloneable 1.955 + && !klass->is_java_lang_Object() // not the supertype of all T[] 1.956 + ) { 1.957 + // Note: When interfaces are reliable, we can narrow the interface 1.958 + // test to (klass != Serializable && klass != Cloneable). 1.959 + assert(Opcode() == Op_LoadI, "must load an int from _layout_helper"); 1.960 + jint min_size = Klass::instance_layout_helper(oopDesc::header_size(), false); 1.961 + // The key property of this type is that it folds up tests 1.962 + // for array-ness, since it proves that the layout_helper is positive. 1.963 + // Thus, a generic value like the basic object layout helper works fine. 1.964 + return TypeInt::make(min_size, max_jint, Type::WidenMin); 1.965 + } 1.966 + } 1.967 + 1.968 + // If we are loading from a freshly-allocated object, produce a zero, 1.969 + // if the load is provably beyond the header of the object. 1.970 + // (Also allow a variable load from a fresh array to produce zero.) 1.971 + if (ReduceFieldZeroing) { 1.972 + Node* value = can_see_stored_value(mem,phase); 1.973 + if (value != NULL && value->is_Con()) 1.974 + return value->bottom_type(); 1.975 + } 1.976 + 1.977 + return _type; 1.978 +} 1.979 + 1.980 +//------------------------------match_edge------------------------------------- 1.981 +// Do we Match on this edge index or not? Match only the address. 1.982 +uint LoadNode::match_edge(uint idx) const { 1.983 + return idx == MemNode::Address; 1.984 +} 1.985 + 1.986 +//--------------------------LoadBNode::Ideal-------------------------------------- 1.987 +// 1.988 +// If the previous store is to the same address as this load, 1.989 +// and the value stored was larger than a byte, replace this load 1.990 +// with the value stored truncated to a byte. If no truncation is 1.991 +// needed, the replacement is done in LoadNode::Identity(). 1.992 +// 1.993 +Node *LoadBNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.994 + Node* mem = in(MemNode::Memory); 1.995 + Node* value = can_see_stored_value(mem,phase); 1.996 + if( value && !phase->type(value)->higher_equal( _type ) ) { 1.997 + Node *result = phase->transform( new (phase->C, 3) LShiftINode(value, phase->intcon(24)) ); 1.998 + return new (phase->C, 3) RShiftINode(result, phase->intcon(24)); 1.999 + } 1.1000 + // Identity call will handle the case where truncation is not needed. 1.1001 + return LoadNode::Ideal(phase, can_reshape); 1.1002 +} 1.1003 + 1.1004 +//--------------------------LoadCNode::Ideal-------------------------------------- 1.1005 +// 1.1006 +// If the previous store is to the same address as this load, 1.1007 +// and the value stored was larger than a char, replace this load 1.1008 +// with the value stored truncated to a char. If no truncation is 1.1009 +// needed, the replacement is done in LoadNode::Identity(). 1.1010 +// 1.1011 +Node *LoadCNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.1012 + Node* mem = in(MemNode::Memory); 1.1013 + Node* value = can_see_stored_value(mem,phase); 1.1014 + if( value && !phase->type(value)->higher_equal( _type ) ) 1.1015 + return new (phase->C, 3) AndINode(value,phase->intcon(0xFFFF)); 1.1016 + // Identity call will handle the case where truncation is not needed. 1.1017 + return LoadNode::Ideal(phase, can_reshape); 1.1018 +} 1.1019 + 1.1020 +//--------------------------LoadSNode::Ideal-------------------------------------- 1.1021 +// 1.1022 +// If the previous store is to the same address as this load, 1.1023 +// and the value stored was larger than a short, replace this load 1.1024 +// with the value stored truncated to a short. If no truncation is 1.1025 +// needed, the replacement is done in LoadNode::Identity(). 1.1026 +// 1.1027 +Node *LoadSNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.1028 + Node* mem = in(MemNode::Memory); 1.1029 + Node* value = can_see_stored_value(mem,phase); 1.1030 + if( value && !phase->type(value)->higher_equal( _type ) ) { 1.1031 + Node *result = phase->transform( new (phase->C, 3) LShiftINode(value, phase->intcon(16)) ); 1.1032 + return new (phase->C, 3) RShiftINode(result, phase->intcon(16)); 1.1033 + } 1.1034 + // Identity call will handle the case where truncation is not needed. 1.1035 + return LoadNode::Ideal(phase, can_reshape); 1.1036 +} 1.1037 + 1.1038 +//============================================================================= 1.1039 +//------------------------------Value------------------------------------------ 1.1040 +const Type *LoadKlassNode::Value( PhaseTransform *phase ) const { 1.1041 + // Either input is TOP ==> the result is TOP 1.1042 + const Type *t1 = phase->type( in(MemNode::Memory) ); 1.1043 + if (t1 == Type::TOP) return Type::TOP; 1.1044 + Node *adr = in(MemNode::Address); 1.1045 + const Type *t2 = phase->type( adr ); 1.1046 + if (t2 == Type::TOP) return Type::TOP; 1.1047 + const TypePtr *tp = t2->is_ptr(); 1.1048 + if (TypePtr::above_centerline(tp->ptr()) || 1.1049 + tp->ptr() == TypePtr::Null) return Type::TOP; 1.1050 + 1.1051 + // Return a more precise klass, if possible 1.1052 + const TypeInstPtr *tinst = tp->isa_instptr(); 1.1053 + if (tinst != NULL) { 1.1054 + ciInstanceKlass* ik = tinst->klass()->as_instance_klass(); 1.1055 + int offset = tinst->offset(); 1.1056 + if (ik == phase->C->env()->Class_klass() 1.1057 + && (offset == java_lang_Class::klass_offset_in_bytes() || 1.1058 + offset == java_lang_Class::array_klass_offset_in_bytes())) { 1.1059 + // We are loading a special hidden field from a Class mirror object, 1.1060 + // the field which points to the VM's Klass metaobject. 1.1061 + ciType* t = tinst->java_mirror_type(); 1.1062 + // java_mirror_type returns non-null for compile-time Class constants. 1.1063 + if (t != NULL) { 1.1064 + // constant oop => constant klass 1.1065 + if (offset == java_lang_Class::array_klass_offset_in_bytes()) { 1.1066 + return TypeKlassPtr::make(ciArrayKlass::make(t)); 1.1067 + } 1.1068 + if (!t->is_klass()) { 1.1069 + // a primitive Class (e.g., int.class) has NULL for a klass field 1.1070 + return TypePtr::NULL_PTR; 1.1071 + } 1.1072 + // (Folds up the 1st indirection in aClassConstant.getModifiers().) 1.1073 + return TypeKlassPtr::make(t->as_klass()); 1.1074 + } 1.1075 + // non-constant mirror, so we can't tell what's going on 1.1076 + } 1.1077 + if( !ik->is_loaded() ) 1.1078 + return _type; // Bail out if not loaded 1.1079 + if (offset == oopDesc::klass_offset_in_bytes()) { 1.1080 + if (tinst->klass_is_exact()) { 1.1081 + return TypeKlassPtr::make(ik); 1.1082 + } 1.1083 + // See if we can become precise: no subklasses and no interface 1.1084 + // (Note: We need to support verified interfaces.) 1.1085 + if (!ik->is_interface() && !ik->has_subklass()) { 1.1086 + //assert(!UseExactTypes, "this code should be useless with exact types"); 1.1087 + // Add a dependence; if any subclass added we need to recompile 1.1088 + if (!ik->is_final()) { 1.1089 + // %%% should use stronger assert_unique_concrete_subtype instead 1.1090 + phase->C->dependencies()->assert_leaf_type(ik); 1.1091 + } 1.1092 + // Return precise klass 1.1093 + return TypeKlassPtr::make(ik); 1.1094 + } 1.1095 + 1.1096 + // Return root of possible klass 1.1097 + return TypeKlassPtr::make(TypePtr::NotNull, ik, 0/*offset*/); 1.1098 + } 1.1099 + } 1.1100 + 1.1101 + // Check for loading klass from an array 1.1102 + const TypeAryPtr *tary = tp->isa_aryptr(); 1.1103 + if( tary != NULL ) { 1.1104 + ciKlass *tary_klass = tary->klass(); 1.1105 + if (tary_klass != NULL // can be NULL when at BOTTOM or TOP 1.1106 + && tary->offset() == oopDesc::klass_offset_in_bytes()) { 1.1107 + if (tary->klass_is_exact()) { 1.1108 + return TypeKlassPtr::make(tary_klass); 1.1109 + } 1.1110 + ciArrayKlass *ak = tary->klass()->as_array_klass(); 1.1111 + // If the klass is an object array, we defer the question to the 1.1112 + // array component klass. 1.1113 + if( ak->is_obj_array_klass() ) { 1.1114 + assert( ak->is_loaded(), "" ); 1.1115 + ciKlass *base_k = ak->as_obj_array_klass()->base_element_klass(); 1.1116 + if( base_k->is_loaded() && base_k->is_instance_klass() ) { 1.1117 + ciInstanceKlass* ik = base_k->as_instance_klass(); 1.1118 + // See if we can become precise: no subklasses and no interface 1.1119 + if (!ik->is_interface() && !ik->has_subklass()) { 1.1120 + //assert(!UseExactTypes, "this code should be useless with exact types"); 1.1121 + // Add a dependence; if any subclass added we need to recompile 1.1122 + if (!ik->is_final()) { 1.1123 + phase->C->dependencies()->assert_leaf_type(ik); 1.1124 + } 1.1125 + // Return precise array klass 1.1126 + return TypeKlassPtr::make(ak); 1.1127 + } 1.1128 + } 1.1129 + return TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/); 1.1130 + } else { // Found a type-array? 1.1131 + //assert(!UseExactTypes, "this code should be useless with exact types"); 1.1132 + assert( ak->is_type_array_klass(), "" ); 1.1133 + return TypeKlassPtr::make(ak); // These are always precise 1.1134 + } 1.1135 + } 1.1136 + } 1.1137 + 1.1138 + // Check for loading klass from an array klass 1.1139 + const TypeKlassPtr *tkls = tp->isa_klassptr(); 1.1140 + if (tkls != NULL && !StressReflectiveCode) { 1.1141 + ciKlass* klass = tkls->klass(); 1.1142 + if( !klass->is_loaded() ) 1.1143 + return _type; // Bail out if not loaded 1.1144 + if( klass->is_obj_array_klass() && 1.1145 + (uint)tkls->offset() == objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)) { 1.1146 + ciKlass* elem = klass->as_obj_array_klass()->element_klass(); 1.1147 + // // Always returning precise element type is incorrect, 1.1148 + // // e.g., element type could be object and array may contain strings 1.1149 + // return TypeKlassPtr::make(TypePtr::Constant, elem, 0); 1.1150 + 1.1151 + // The array's TypeKlassPtr was declared 'precise' or 'not precise' 1.1152 + // according to the element type's subclassing. 1.1153 + return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/); 1.1154 + } 1.1155 + if( klass->is_instance_klass() && tkls->klass_is_exact() && 1.1156 + (uint)tkls->offset() == Klass::super_offset_in_bytes() + sizeof(oopDesc)) { 1.1157 + ciKlass* sup = klass->as_instance_klass()->super(); 1.1158 + // The field is Klass::_super. Return its (constant) value. 1.1159 + // (Folds up the 2nd indirection in aClassConstant.getSuperClass().) 1.1160 + return sup ? TypeKlassPtr::make(sup) : TypePtr::NULL_PTR; 1.1161 + } 1.1162 + } 1.1163 + 1.1164 + // Bailout case 1.1165 + return LoadNode::Value(phase); 1.1166 +} 1.1167 + 1.1168 +//------------------------------Identity--------------------------------------- 1.1169 +// To clean up reflective code, simplify k.java_mirror.as_klass to plain k. 1.1170 +// Also feed through the klass in Allocate(...klass...)._klass. 1.1171 +Node* LoadKlassNode::Identity( PhaseTransform *phase ) { 1.1172 + Node* x = LoadNode::Identity(phase); 1.1173 + if (x != this) return x; 1.1174 + 1.1175 + // Take apart the address into an oop and and offset. 1.1176 + // Return 'this' if we cannot. 1.1177 + Node* adr = in(MemNode::Address); 1.1178 + intptr_t offset = 0; 1.1179 + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); 1.1180 + if (base == NULL) return this; 1.1181 + const TypeOopPtr* toop = phase->type(adr)->isa_oopptr(); 1.1182 + if (toop == NULL) return this; 1.1183 + 1.1184 + // We can fetch the klass directly through an AllocateNode. 1.1185 + // This works even if the klass is not constant (clone or newArray). 1.1186 + if (offset == oopDesc::klass_offset_in_bytes()) { 1.1187 + Node* allocated_klass = AllocateNode::Ideal_klass(base, phase); 1.1188 + if (allocated_klass != NULL) { 1.1189 + return allocated_klass; 1.1190 + } 1.1191 + } 1.1192 + 1.1193 + // Simplify k.java_mirror.as_klass to plain k, where k is a klassOop. 1.1194 + // Simplify ak.component_mirror.array_klass to plain ak, ak an arrayKlass. 1.1195 + // See inline_native_Class_query for occurrences of these patterns. 1.1196 + // Java Example: x.getClass().isAssignableFrom(y) 1.1197 + // Java Example: Array.newInstance(x.getClass().getComponentType(), n) 1.1198 + // 1.1199 + // This improves reflective code, often making the Class 1.1200 + // mirror go completely dead. (Current exception: Class 1.1201 + // mirrors may appear in debug info, but we could clean them out by 1.1202 + // introducing a new debug info operator for klassOop.java_mirror). 1.1203 + if (toop->isa_instptr() && toop->klass() == phase->C->env()->Class_klass() 1.1204 + && (offset == java_lang_Class::klass_offset_in_bytes() || 1.1205 + offset == java_lang_Class::array_klass_offset_in_bytes())) { 1.1206 + // We are loading a special hidden field from a Class mirror, 1.1207 + // the field which points to its Klass or arrayKlass metaobject. 1.1208 + if (base->is_Load()) { 1.1209 + Node* adr2 = base->in(MemNode::Address); 1.1210 + const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr(); 1.1211 + if (tkls != NULL && !tkls->empty() 1.1212 + && (tkls->klass()->is_instance_klass() || 1.1213 + tkls->klass()->is_array_klass()) 1.1214 + && adr2->is_AddP() 1.1215 + ) { 1.1216 + int mirror_field = Klass::java_mirror_offset_in_bytes(); 1.1217 + if (offset == java_lang_Class::array_klass_offset_in_bytes()) { 1.1218 + mirror_field = in_bytes(arrayKlass::component_mirror_offset()); 1.1219 + } 1.1220 + if (tkls->offset() == mirror_field + (int)sizeof(oopDesc)) { 1.1221 + return adr2->in(AddPNode::Base); 1.1222 + } 1.1223 + } 1.1224 + } 1.1225 + } 1.1226 + 1.1227 + return this; 1.1228 +} 1.1229 + 1.1230 +//------------------------------Value----------------------------------------- 1.1231 +const Type *LoadRangeNode::Value( PhaseTransform *phase ) const { 1.1232 + // Either input is TOP ==> the result is TOP 1.1233 + const Type *t1 = phase->type( in(MemNode::Memory) ); 1.1234 + if( t1 == Type::TOP ) return Type::TOP; 1.1235 + Node *adr = in(MemNode::Address); 1.1236 + const Type *t2 = phase->type( adr ); 1.1237 + if( t2 == Type::TOP ) return Type::TOP; 1.1238 + const TypePtr *tp = t2->is_ptr(); 1.1239 + if (TypePtr::above_centerline(tp->ptr())) return Type::TOP; 1.1240 + const TypeAryPtr *tap = tp->isa_aryptr(); 1.1241 + if( !tap ) return _type; 1.1242 + return tap->size(); 1.1243 +} 1.1244 + 1.1245 +//------------------------------Identity--------------------------------------- 1.1246 +// Feed through the length in AllocateArray(...length...)._length. 1.1247 +Node* LoadRangeNode::Identity( PhaseTransform *phase ) { 1.1248 + Node* x = LoadINode::Identity(phase); 1.1249 + if (x != this) return x; 1.1250 + 1.1251 + // Take apart the address into an oop and and offset. 1.1252 + // Return 'this' if we cannot. 1.1253 + Node* adr = in(MemNode::Address); 1.1254 + intptr_t offset = 0; 1.1255 + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); 1.1256 + if (base == NULL) return this; 1.1257 + const TypeAryPtr* tary = phase->type(adr)->isa_aryptr(); 1.1258 + if (tary == NULL) return this; 1.1259 + 1.1260 + // We can fetch the length directly through an AllocateArrayNode. 1.1261 + // This works even if the length is not constant (clone or newArray). 1.1262 + if (offset == arrayOopDesc::length_offset_in_bytes()) { 1.1263 + Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase); 1.1264 + if (allocated_length != NULL) { 1.1265 + return allocated_length; 1.1266 + } 1.1267 + } 1.1268 + 1.1269 + return this; 1.1270 + 1.1271 +} 1.1272 +//============================================================================= 1.1273 +//---------------------------StoreNode::make----------------------------------- 1.1274 +// Polymorphic factory method: 1.1275 +StoreNode* StoreNode::make( Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt ) { 1.1276 + switch (bt) { 1.1277 + case T_BOOLEAN: 1.1278 + case T_BYTE: return new (C, 4) StoreBNode(ctl, mem, adr, adr_type, val); 1.1279 + case T_INT: return new (C, 4) StoreINode(ctl, mem, adr, adr_type, val); 1.1280 + case T_CHAR: 1.1281 + case T_SHORT: return new (C, 4) StoreCNode(ctl, mem, adr, adr_type, val); 1.1282 + case T_LONG: return new (C, 4) StoreLNode(ctl, mem, adr, adr_type, val); 1.1283 + case T_FLOAT: return new (C, 4) StoreFNode(ctl, mem, adr, adr_type, val); 1.1284 + case T_DOUBLE: return new (C, 4) StoreDNode(ctl, mem, adr, adr_type, val); 1.1285 + case T_ADDRESS: 1.1286 + case T_OBJECT: return new (C, 4) StorePNode(ctl, mem, adr, adr_type, val); 1.1287 + } 1.1288 + ShouldNotReachHere(); 1.1289 + return (StoreNode*)NULL; 1.1290 +} 1.1291 + 1.1292 +StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val) { 1.1293 + bool require_atomic = true; 1.1294 + return new (C, 4) StoreLNode(ctl, mem, adr, adr_type, val, require_atomic); 1.1295 +} 1.1296 + 1.1297 + 1.1298 +//--------------------------bottom_type---------------------------------------- 1.1299 +const Type *StoreNode::bottom_type() const { 1.1300 + return Type::MEMORY; 1.1301 +} 1.1302 + 1.1303 +//------------------------------hash------------------------------------------- 1.1304 +uint StoreNode::hash() const { 1.1305 + // unroll addition of interesting fields 1.1306 + //return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address) + (uintptr_t)in(ValueIn); 1.1307 + 1.1308 + // Since they are not commoned, do not hash them: 1.1309 + return NO_HASH; 1.1310 +} 1.1311 + 1.1312 +//------------------------------Ideal------------------------------------------ 1.1313 +// Change back-to-back Store(, p, x) -> Store(m, p, y) to Store(m, p, x). 1.1314 +// When a store immediately follows a relevant allocation/initialization, 1.1315 +// try to capture it into the initialization, or hoist it above. 1.1316 +Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.1317 + Node* p = MemNode::Ideal_common(phase, can_reshape); 1.1318 + if (p) return (p == NodeSentinel) ? NULL : p; 1.1319 + 1.1320 + Node* mem = in(MemNode::Memory); 1.1321 + Node* address = in(MemNode::Address); 1.1322 + 1.1323 + // Back-to-back stores to same address? Fold em up. 1.1324 + // Generally unsafe if I have intervening uses... 1.1325 + if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address)) { 1.1326 + // Looking at a dead closed cycle of memory? 1.1327 + assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal"); 1.1328 + 1.1329 + assert(Opcode() == mem->Opcode() || 1.1330 + phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw, 1.1331 + "no mismatched stores, except on raw memory"); 1.1332 + 1.1333 + if (mem->outcnt() == 1 && // check for intervening uses 1.1334 + mem->as_Store()->memory_size() <= this->memory_size()) { 1.1335 + // If anybody other than 'this' uses 'mem', we cannot fold 'mem' away. 1.1336 + // For example, 'mem' might be the final state at a conditional return. 1.1337 + // Or, 'mem' might be used by some node which is live at the same time 1.1338 + // 'this' is live, which might be unschedulable. So, require exactly 1.1339 + // ONE user, the 'this' store, until such time as we clone 'mem' for 1.1340 + // each of 'mem's uses (thus making the exactly-1-user-rule hold true). 1.1341 + if (can_reshape) { // (%%% is this an anachronism?) 1.1342 + set_req_X(MemNode::Memory, mem->in(MemNode::Memory), 1.1343 + phase->is_IterGVN()); 1.1344 + } else { 1.1345 + // It's OK to do this in the parser, since DU info is always accurate, 1.1346 + // and the parser always refers to nodes via SafePointNode maps. 1.1347 + set_req(MemNode::Memory, mem->in(MemNode::Memory)); 1.1348 + } 1.1349 + return this; 1.1350 + } 1.1351 + } 1.1352 + 1.1353 + // Capture an unaliased, unconditional, simple store into an initializer. 1.1354 + // Or, if it is independent of the allocation, hoist it above the allocation. 1.1355 + if (ReduceFieldZeroing && /*can_reshape &&*/ 1.1356 + mem->is_Proj() && mem->in(0)->is_Initialize()) { 1.1357 + InitializeNode* init = mem->in(0)->as_Initialize(); 1.1358 + intptr_t offset = init->can_capture_store(this, phase); 1.1359 + if (offset > 0) { 1.1360 + Node* moved = init->capture_store(this, offset, phase); 1.1361 + // If the InitializeNode captured me, it made a raw copy of me, 1.1362 + // and I need to disappear. 1.1363 + if (moved != NULL) { 1.1364 + // %%% hack to ensure that Ideal returns a new node: 1.1365 + mem = MergeMemNode::make(phase->C, mem); 1.1366 + return mem; // fold me away 1.1367 + } 1.1368 + } 1.1369 + } 1.1370 + 1.1371 + return NULL; // No further progress 1.1372 +} 1.1373 + 1.1374 +//------------------------------Value----------------------------------------- 1.1375 +const Type *StoreNode::Value( PhaseTransform *phase ) const { 1.1376 + // Either input is TOP ==> the result is TOP 1.1377 + const Type *t1 = phase->type( in(MemNode::Memory) ); 1.1378 + if( t1 == Type::TOP ) return Type::TOP; 1.1379 + const Type *t2 = phase->type( in(MemNode::Address) ); 1.1380 + if( t2 == Type::TOP ) return Type::TOP; 1.1381 + const Type *t3 = phase->type( in(MemNode::ValueIn) ); 1.1382 + if( t3 == Type::TOP ) return Type::TOP; 1.1383 + return Type::MEMORY; 1.1384 +} 1.1385 + 1.1386 +//------------------------------Identity--------------------------------------- 1.1387 +// Remove redundant stores: 1.1388 +// Store(m, p, Load(m, p)) changes to m. 1.1389 +// Store(, p, x) -> Store(m, p, x) changes to Store(m, p, x). 1.1390 +Node *StoreNode::Identity( PhaseTransform *phase ) { 1.1391 + Node* mem = in(MemNode::Memory); 1.1392 + Node* adr = in(MemNode::Address); 1.1393 + Node* val = in(MemNode::ValueIn); 1.1394 + 1.1395 + // Load then Store? Then the Store is useless 1.1396 + if (val->is_Load() && 1.1397 + phase->eqv_uncast( val->in(MemNode::Address), adr ) && 1.1398 + phase->eqv_uncast( val->in(MemNode::Memory ), mem ) && 1.1399 + val->as_Load()->store_Opcode() == Opcode()) { 1.1400 + return mem; 1.1401 + } 1.1402 + 1.1403 + // Two stores in a row of the same value? 1.1404 + if (mem->is_Store() && 1.1405 + phase->eqv_uncast( mem->in(MemNode::Address), adr ) && 1.1406 + phase->eqv_uncast( mem->in(MemNode::ValueIn), val ) && 1.1407 + mem->Opcode() == Opcode()) { 1.1408 + return mem; 1.1409 + } 1.1410 + 1.1411 + // Store of zero anywhere into a freshly-allocated object? 1.1412 + // Then the store is useless. 1.1413 + // (It must already have been captured by the InitializeNode.) 1.1414 + if (ReduceFieldZeroing && phase->type(val)->is_zero_type()) { 1.1415 + // a newly allocated object is already all-zeroes everywhere 1.1416 + if (mem->is_Proj() && mem->in(0)->is_Allocate()) { 1.1417 + return mem; 1.1418 + } 1.1419 + 1.1420 + // the store may also apply to zero-bits in an earlier object 1.1421 + Node* prev_mem = find_previous_store(phase); 1.1422 + // Steps (a), (b): Walk past independent stores to find an exact match. 1.1423 + if (prev_mem != NULL) { 1.1424 + Node* prev_val = can_see_stored_value(prev_mem, phase); 1.1425 + if (prev_val != NULL && phase->eqv(prev_val, val)) { 1.1426 + // prev_val and val might differ by a cast; it would be good 1.1427 + // to keep the more informative of the two. 1.1428 + return mem; 1.1429 + } 1.1430 + } 1.1431 + } 1.1432 + 1.1433 + return this; 1.1434 +} 1.1435 + 1.1436 +//------------------------------match_edge------------------------------------- 1.1437 +// Do we Match on this edge index or not? Match only memory & value 1.1438 +uint StoreNode::match_edge(uint idx) const { 1.1439 + return idx == MemNode::Address || idx == MemNode::ValueIn; 1.1440 +} 1.1441 + 1.1442 +//------------------------------cmp-------------------------------------------- 1.1443 +// Do not common stores up together. They generally have to be split 1.1444 +// back up anyways, so do not bother. 1.1445 +uint StoreNode::cmp( const Node &n ) const { 1.1446 + return (&n == this); // Always fail except on self 1.1447 +} 1.1448 + 1.1449 +//------------------------------Ideal_masked_input----------------------------- 1.1450 +// Check for a useless mask before a partial-word store 1.1451 +// (StoreB ... (AndI valIn conIa) ) 1.1452 +// If (conIa & mask == mask) this simplifies to 1.1453 +// (StoreB ... (valIn) ) 1.1454 +Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) { 1.1455 + Node *val = in(MemNode::ValueIn); 1.1456 + if( val->Opcode() == Op_AndI ) { 1.1457 + const TypeInt *t = phase->type( val->in(2) )->isa_int(); 1.1458 + if( t && t->is_con() && (t->get_con() & mask) == mask ) { 1.1459 + set_req(MemNode::ValueIn, val->in(1)); 1.1460 + return this; 1.1461 + } 1.1462 + } 1.1463 + return NULL; 1.1464 +} 1.1465 + 1.1466 + 1.1467 +//------------------------------Ideal_sign_extended_input---------------------- 1.1468 +// Check for useless sign-extension before a partial-word store 1.1469 +// (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) ) 1.1470 +// If (conIL == conIR && conIR <= num_bits) this simplifies to 1.1471 +// (StoreB ... (valIn) ) 1.1472 +Node *StoreNode::Ideal_sign_extended_input(PhaseGVN *phase, int num_bits) { 1.1473 + Node *val = in(MemNode::ValueIn); 1.1474 + if( val->Opcode() == Op_RShiftI ) { 1.1475 + const TypeInt *t = phase->type( val->in(2) )->isa_int(); 1.1476 + if( t && t->is_con() && (t->get_con() <= num_bits) ) { 1.1477 + Node *shl = val->in(1); 1.1478 + if( shl->Opcode() == Op_LShiftI ) { 1.1479 + const TypeInt *t2 = phase->type( shl->in(2) )->isa_int(); 1.1480 + if( t2 && t2->is_con() && (t2->get_con() == t->get_con()) ) { 1.1481 + set_req(MemNode::ValueIn, shl->in(1)); 1.1482 + return this; 1.1483 + } 1.1484 + } 1.1485 + } 1.1486 + } 1.1487 + return NULL; 1.1488 +} 1.1489 + 1.1490 +//------------------------------value_never_loaded----------------------------------- 1.1491 +// Determine whether there are any possible loads of the value stored. 1.1492 +// For simplicity, we actually check if there are any loads from the 1.1493 +// address stored to, not just for loads of the value stored by this node. 1.1494 +// 1.1495 +bool StoreNode::value_never_loaded( PhaseTransform *phase) const { 1.1496 + Node *adr = in(Address); 1.1497 + const TypeOopPtr *adr_oop = phase->type(adr)->isa_oopptr(); 1.1498 + if (adr_oop == NULL) 1.1499 + return false; 1.1500 + if (!adr_oop->is_instance()) 1.1501 + return false; // if not a distinct instance, there may be aliases of the address 1.1502 + for (DUIterator_Fast imax, i = adr->fast_outs(imax); i < imax; i++) { 1.1503 + Node *use = adr->fast_out(i); 1.1504 + int opc = use->Opcode(); 1.1505 + if (use->is_Load() || use->is_LoadStore()) { 1.1506 + return false; 1.1507 + } 1.1508 + } 1.1509 + return true; 1.1510 +} 1.1511 + 1.1512 +//============================================================================= 1.1513 +//------------------------------Ideal------------------------------------------ 1.1514 +// If the store is from an AND mask that leaves the low bits untouched, then 1.1515 +// we can skip the AND operation. If the store is from a sign-extension 1.1516 +// (a left shift, then right shift) we can skip both. 1.1517 +Node *StoreBNode::Ideal(PhaseGVN *phase, bool can_reshape){ 1.1518 + Node *progress = StoreNode::Ideal_masked_input(phase, 0xFF); 1.1519 + if( progress != NULL ) return progress; 1.1520 + 1.1521 + progress = StoreNode::Ideal_sign_extended_input(phase, 24); 1.1522 + if( progress != NULL ) return progress; 1.1523 + 1.1524 + // Finally check the default case 1.1525 + return StoreNode::Ideal(phase, can_reshape); 1.1526 +} 1.1527 + 1.1528 +//============================================================================= 1.1529 +//------------------------------Ideal------------------------------------------ 1.1530 +// If the store is from an AND mask that leaves the low bits untouched, then 1.1531 +// we can skip the AND operation 1.1532 +Node *StoreCNode::Ideal(PhaseGVN *phase, bool can_reshape){ 1.1533 + Node *progress = StoreNode::Ideal_masked_input(phase, 0xFFFF); 1.1534 + if( progress != NULL ) return progress; 1.1535 + 1.1536 + progress = StoreNode::Ideal_sign_extended_input(phase, 16); 1.1537 + if( progress != NULL ) return progress; 1.1538 + 1.1539 + // Finally check the default case 1.1540 + return StoreNode::Ideal(phase, can_reshape); 1.1541 +} 1.1542 + 1.1543 +//============================================================================= 1.1544 +//------------------------------Identity--------------------------------------- 1.1545 +Node *StoreCMNode::Identity( PhaseTransform *phase ) { 1.1546 + // No need to card mark when storing a null ptr 1.1547 + Node* my_store = in(MemNode::OopStore); 1.1548 + if (my_store->is_Store()) { 1.1549 + const Type *t1 = phase->type( my_store->in(MemNode::ValueIn) ); 1.1550 + if( t1 == TypePtr::NULL_PTR ) { 1.1551 + return in(MemNode::Memory); 1.1552 + } 1.1553 + } 1.1554 + return this; 1.1555 +} 1.1556 + 1.1557 +//------------------------------Value----------------------------------------- 1.1558 +const Type *StoreCMNode::Value( PhaseTransform *phase ) const { 1.1559 + // If extra input is TOP ==> the result is TOP 1.1560 + const Type *t1 = phase->type( in(MemNode::OopStore) ); 1.1561 + if( t1 == Type::TOP ) return Type::TOP; 1.1562 + 1.1563 + return StoreNode::Value( phase ); 1.1564 +} 1.1565 + 1.1566 + 1.1567 +//============================================================================= 1.1568 +//----------------------------------SCMemProjNode------------------------------ 1.1569 +const Type * SCMemProjNode::Value( PhaseTransform *phase ) const 1.1570 +{ 1.1571 + return bottom_type(); 1.1572 +} 1.1573 + 1.1574 +//============================================================================= 1.1575 +LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : Node(5) { 1.1576 + init_req(MemNode::Control, c ); 1.1577 + init_req(MemNode::Memory , mem); 1.1578 + init_req(MemNode::Address, adr); 1.1579 + init_req(MemNode::ValueIn, val); 1.1580 + init_req( ExpectedIn, ex ); 1.1581 + init_class_id(Class_LoadStore); 1.1582 + 1.1583 +} 1.1584 + 1.1585 +//============================================================================= 1.1586 +//-------------------------------adr_type-------------------------------------- 1.1587 +// Do we Match on this edge index or not? Do not match memory 1.1588 +const TypePtr* ClearArrayNode::adr_type() const { 1.1589 + Node *adr = in(3); 1.1590 + return MemNode::calculate_adr_type(adr->bottom_type()); 1.1591 +} 1.1592 + 1.1593 +//------------------------------match_edge------------------------------------- 1.1594 +// Do we Match on this edge index or not? Do not match memory 1.1595 +uint ClearArrayNode::match_edge(uint idx) const { 1.1596 + return idx > 1; 1.1597 +} 1.1598 + 1.1599 +//------------------------------Identity--------------------------------------- 1.1600 +// Clearing a zero length array does nothing 1.1601 +Node *ClearArrayNode::Identity( PhaseTransform *phase ) { 1.1602 + return phase->type(in(2))->higher_equal(TypeInt::ZERO) ? in(1) : this; 1.1603 +} 1.1604 + 1.1605 +//------------------------------Idealize--------------------------------------- 1.1606 +// Clearing a short array is faster with stores 1.1607 +Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){ 1.1608 + const int unit = BytesPerLong; 1.1609 + const TypeX* t = phase->type(in(2))->isa_intptr_t(); 1.1610 + if (!t) return NULL; 1.1611 + if (!t->is_con()) return NULL; 1.1612 + intptr_t raw_count = t->get_con(); 1.1613 + intptr_t size = raw_count; 1.1614 + if (!Matcher::init_array_count_is_in_bytes) size *= unit; 1.1615 + // Clearing nothing uses the Identity call. 1.1616 + // Negative clears are possible on dead ClearArrays 1.1617 + // (see jck test stmt114.stmt11402.val). 1.1618 + if (size <= 0 || size % unit != 0) return NULL; 1.1619 + intptr_t count = size / unit; 1.1620 + // Length too long; use fast hardware clear 1.1621 + if (size > Matcher::init_array_short_size) return NULL; 1.1622 + Node *mem = in(1); 1.1623 + if( phase->type(mem)==Type::TOP ) return NULL; 1.1624 + Node *adr = in(3); 1.1625 + const Type* at = phase->type(adr); 1.1626 + if( at==Type::TOP ) return NULL; 1.1627 + const TypePtr* atp = at->isa_ptr(); 1.1628 + // adjust atp to be the correct array element address type 1.1629 + if (atp == NULL) atp = TypePtr::BOTTOM; 1.1630 + else atp = atp->add_offset(Type::OffsetBot); 1.1631 + // Get base for derived pointer purposes 1.1632 + if( adr->Opcode() != Op_AddP ) Unimplemented(); 1.1633 + Node *base = adr->in(1); 1.1634 + 1.1635 + Node *zero = phase->makecon(TypeLong::ZERO); 1.1636 + Node *off = phase->MakeConX(BytesPerLong); 1.1637 + mem = new (phase->C, 4) StoreLNode(in(0),mem,adr,atp,zero); 1.1638 + count--; 1.1639 + while( count-- ) { 1.1640 + mem = phase->transform(mem); 1.1641 + adr = phase->transform(new (phase->C, 4) AddPNode(base,adr,off)); 1.1642 + mem = new (phase->C, 4) StoreLNode(in(0),mem,adr,atp,zero); 1.1643 + } 1.1644 + return mem; 1.1645 +} 1.1646 + 1.1647 +//----------------------------clear_memory------------------------------------- 1.1648 +// Generate code to initialize object storage to zero. 1.1649 +Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest, 1.1650 + intptr_t start_offset, 1.1651 + Node* end_offset, 1.1652 + PhaseGVN* phase) { 1.1653 + Compile* C = phase->C; 1.1654 + intptr_t offset = start_offset; 1.1655 + 1.1656 + int unit = BytesPerLong; 1.1657 + if ((offset % unit) != 0) { 1.1658 + Node* adr = new (C, 4) AddPNode(dest, dest, phase->MakeConX(offset)); 1.1659 + adr = phase->transform(adr); 1.1660 + const TypePtr* atp = TypeRawPtr::BOTTOM; 1.1661 + mem = StoreNode::make(C, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT); 1.1662 + mem = phase->transform(mem); 1.1663 + offset += BytesPerInt; 1.1664 + } 1.1665 + assert((offset % unit) == 0, ""); 1.1666 + 1.1667 + // Initialize the remaining stuff, if any, with a ClearArray. 1.1668 + return clear_memory(ctl, mem, dest, phase->MakeConX(offset), end_offset, phase); 1.1669 +} 1.1670 + 1.1671 +Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest, 1.1672 + Node* start_offset, 1.1673 + Node* end_offset, 1.1674 + PhaseGVN* phase) { 1.1675 + Compile* C = phase->C; 1.1676 + int unit = BytesPerLong; 1.1677 + Node* zbase = start_offset; 1.1678 + Node* zend = end_offset; 1.1679 + 1.1680 + // Scale to the unit required by the CPU: 1.1681 + if (!Matcher::init_array_count_is_in_bytes) { 1.1682 + Node* shift = phase->intcon(exact_log2(unit)); 1.1683 + zbase = phase->transform( new(C,3) URShiftXNode(zbase, shift) ); 1.1684 + zend = phase->transform( new(C,3) URShiftXNode(zend, shift) ); 1.1685 + } 1.1686 + 1.1687 + Node* zsize = phase->transform( new(C,3) SubXNode(zend, zbase) ); 1.1688 + Node* zinit = phase->zerocon((unit == BytesPerLong) ? T_LONG : T_INT); 1.1689 + 1.1690 + // Bulk clear double-words 1.1691 + Node* adr = phase->transform( new(C,4) AddPNode(dest, dest, start_offset) ); 1.1692 + mem = new (C, 4) ClearArrayNode(ctl, mem, zsize, adr); 1.1693 + return phase->transform(mem); 1.1694 +} 1.1695 + 1.1696 +Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest, 1.1697 + intptr_t start_offset, 1.1698 + intptr_t end_offset, 1.1699 + PhaseGVN* phase) { 1.1700 + Compile* C = phase->C; 1.1701 + assert((end_offset % BytesPerInt) == 0, "odd end offset"); 1.1702 + intptr_t done_offset = end_offset; 1.1703 + if ((done_offset % BytesPerLong) != 0) { 1.1704 + done_offset -= BytesPerInt; 1.1705 + } 1.1706 + if (done_offset > start_offset) { 1.1707 + mem = clear_memory(ctl, mem, dest, 1.1708 + start_offset, phase->MakeConX(done_offset), phase); 1.1709 + } 1.1710 + if (done_offset < end_offset) { // emit the final 32-bit store 1.1711 + Node* adr = new (C, 4) AddPNode(dest, dest, phase->MakeConX(done_offset)); 1.1712 + adr = phase->transform(adr); 1.1713 + const TypePtr* atp = TypeRawPtr::BOTTOM; 1.1714 + mem = StoreNode::make(C, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT); 1.1715 + mem = phase->transform(mem); 1.1716 + done_offset += BytesPerInt; 1.1717 + } 1.1718 + assert(done_offset == end_offset, ""); 1.1719 + return mem; 1.1720 +} 1.1721 + 1.1722 +//============================================================================= 1.1723 +// Do we match on this edge? No memory edges 1.1724 +uint StrCompNode::match_edge(uint idx) const { 1.1725 + return idx == 5 || idx == 6; 1.1726 +} 1.1727 + 1.1728 +//------------------------------Ideal------------------------------------------ 1.1729 +// Return a node which is more "ideal" than the current node. Strip out 1.1730 +// control copies 1.1731 +Node *StrCompNode::Ideal(PhaseGVN *phase, bool can_reshape){ 1.1732 + return remove_dead_region(phase, can_reshape) ? this : NULL; 1.1733 +} 1.1734 + 1.1735 + 1.1736 +//============================================================================= 1.1737 +MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent) 1.1738 + : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)), 1.1739 + _adr_type(C->get_adr_type(alias_idx)) 1.1740 +{ 1.1741 + init_class_id(Class_MemBar); 1.1742 + Node* top = C->top(); 1.1743 + init_req(TypeFunc::I_O,top); 1.1744 + init_req(TypeFunc::FramePtr,top); 1.1745 + init_req(TypeFunc::ReturnAdr,top); 1.1746 + if (precedent != NULL) 1.1747 + init_req(TypeFunc::Parms, precedent); 1.1748 +} 1.1749 + 1.1750 +//------------------------------cmp-------------------------------------------- 1.1751 +uint MemBarNode::hash() const { return NO_HASH; } 1.1752 +uint MemBarNode::cmp( const Node &n ) const { 1.1753 + return (&n == this); // Always fail except on self 1.1754 +} 1.1755 + 1.1756 +//------------------------------make------------------------------------------- 1.1757 +MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) { 1.1758 + int len = Precedent + (pn == NULL? 0: 1); 1.1759 + switch (opcode) { 1.1760 + case Op_MemBarAcquire: return new(C, len) MemBarAcquireNode(C, atp, pn); 1.1761 + case Op_MemBarRelease: return new(C, len) MemBarReleaseNode(C, atp, pn); 1.1762 + case Op_MemBarVolatile: return new(C, len) MemBarVolatileNode(C, atp, pn); 1.1763 + case Op_MemBarCPUOrder: return new(C, len) MemBarCPUOrderNode(C, atp, pn); 1.1764 + case Op_Initialize: return new(C, len) InitializeNode(C, atp, pn); 1.1765 + default: ShouldNotReachHere(); return NULL; 1.1766 + } 1.1767 +} 1.1768 + 1.1769 +//------------------------------Ideal------------------------------------------ 1.1770 +// Return a node which is more "ideal" than the current node. Strip out 1.1771 +// control copies 1.1772 +Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.1773 + if (remove_dead_region(phase, can_reshape)) return this; 1.1774 + return NULL; 1.1775 +} 1.1776 + 1.1777 +//------------------------------Value------------------------------------------ 1.1778 +const Type *MemBarNode::Value( PhaseTransform *phase ) const { 1.1779 + if( !in(0) ) return Type::TOP; 1.1780 + if( phase->type(in(0)) == Type::TOP ) 1.1781 + return Type::TOP; 1.1782 + return TypeTuple::MEMBAR; 1.1783 +} 1.1784 + 1.1785 +//------------------------------match------------------------------------------ 1.1786 +// Construct projections for memory. 1.1787 +Node *MemBarNode::match( const ProjNode *proj, const Matcher *m ) { 1.1788 + switch (proj->_con) { 1.1789 + case TypeFunc::Control: 1.1790 + case TypeFunc::Memory: 1.1791 + return new (m->C, 1) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj); 1.1792 + } 1.1793 + ShouldNotReachHere(); 1.1794 + return NULL; 1.1795 +} 1.1796 + 1.1797 +//===========================InitializeNode==================================== 1.1798 +// SUMMARY: 1.1799 +// This node acts as a memory barrier on raw memory, after some raw stores. 1.1800 +// The 'cooked' oop value feeds from the Initialize, not the Allocation. 1.1801 +// The Initialize can 'capture' suitably constrained stores as raw inits. 1.1802 +// It can coalesce related raw stores into larger units (called 'tiles'). 1.1803 +// It can avoid zeroing new storage for memory units which have raw inits. 1.1804 +// At macro-expansion, it is marked 'complete', and does not optimize further. 1.1805 +// 1.1806 +// EXAMPLE: 1.1807 +// The object 'new short[2]' occupies 16 bytes in a 32-bit machine. 1.1808 +// ctl = incoming control; mem* = incoming memory 1.1809 +// (Note: A star * on a memory edge denotes I/O and other standard edges.) 1.1810 +// First allocate uninitialized memory and fill in the header: 1.1811 +// alloc = (Allocate ctl mem* 16 #short[].klass ...) 1.1812 +// ctl := alloc.Control; mem* := alloc.Memory* 1.1813 +// rawmem = alloc.Memory; rawoop = alloc.RawAddress 1.1814 +// Then initialize to zero the non-header parts of the raw memory block: 1.1815 +// init = (Initialize alloc.Control alloc.Memory* alloc.RawAddress) 1.1816 +// ctl := init.Control; mem.SLICE(#short[*]) := init.Memory 1.1817 +// After the initialize node executes, the object is ready for service: 1.1818 +// oop := (CheckCastPP init.Control alloc.RawAddress #short[]) 1.1819 +// Suppose its body is immediately initialized as {1,2}: 1.1820 +// store1 = (StoreC init.Control init.Memory (+ oop 12) 1) 1.1821 +// store2 = (StoreC init.Control store1 (+ oop 14) 2) 1.1822 +// mem.SLICE(#short[*]) := store2 1.1823 +// 1.1824 +// DETAILS: 1.1825 +// An InitializeNode collects and isolates object initialization after 1.1826 +// an AllocateNode and before the next possible safepoint. As a 1.1827 +// memory barrier (MemBarNode), it keeps critical stores from drifting 1.1828 +// down past any safepoint or any publication of the allocation. 1.1829 +// Before this barrier, a newly-allocated object may have uninitialized bits. 1.1830 +// After this barrier, it may be treated as a real oop, and GC is allowed. 1.1831 +// 1.1832 +// The semantics of the InitializeNode include an implicit zeroing of 1.1833 +// the new object from object header to the end of the object. 1.1834 +// (The object header and end are determined by the AllocateNode.) 1.1835 +// 1.1836 +// Certain stores may be added as direct inputs to the InitializeNode. 1.1837 +// These stores must update raw memory, and they must be to addresses 1.1838 +// derived from the raw address produced by AllocateNode, and with 1.1839 +// a constant offset. They must be ordered by increasing offset. 1.1840 +// The first one is at in(RawStores), the last at in(req()-1). 1.1841 +// Unlike most memory operations, they are not linked in a chain, 1.1842 +// but are displayed in parallel as users of the rawmem output of 1.1843 +// the allocation. 1.1844 +// 1.1845 +// (See comments in InitializeNode::capture_store, which continue 1.1846 +// the example given above.) 1.1847 +// 1.1848 +// When the associated Allocate is macro-expanded, the InitializeNode 1.1849 +// may be rewritten to optimize collected stores. A ClearArrayNode 1.1850 +// may also be created at that point to represent any required zeroing. 1.1851 +// The InitializeNode is then marked 'complete', prohibiting further 1.1852 +// capturing of nearby memory operations. 1.1853 +// 1.1854 +// During macro-expansion, all captured initializations which store 1.1855 +// constant values of 32 bits or smaller are coalesced (if advantagous) 1.1856 +// into larger 'tiles' 32 or 64 bits. This allows an object to be 1.1857 +// initialized in fewer memory operations. Memory words which are 1.1858 +// covered by neither tiles nor non-constant stores are pre-zeroed 1.1859 +// by explicit stores of zero. (The code shape happens to do all 1.1860 +// zeroing first, then all other stores, with both sequences occurring 1.1861 +// in order of ascending offsets.) 1.1862 +// 1.1863 +// Alternatively, code may be inserted between an AllocateNode and its 1.1864 +// InitializeNode, to perform arbitrary initialization of the new object. 1.1865 +// E.g., the object copying intrinsics insert complex data transfers here. 1.1866 +// The initialization must then be marked as 'complete' disable the 1.1867 +// built-in zeroing semantics and the collection of initializing stores. 1.1868 +// 1.1869 +// While an InitializeNode is incomplete, reads from the memory state 1.1870 +// produced by it are optimizable if they match the control edge and 1.1871 +// new oop address associated with the allocation/initialization. 1.1872 +// They return a stored value (if the offset matches) or else zero. 1.1873 +// A write to the memory state, if it matches control and address, 1.1874 +// and if it is to a constant offset, may be 'captured' by the 1.1875 +// InitializeNode. It is cloned as a raw memory operation and rewired 1.1876 +// inside the initialization, to the raw oop produced by the allocation. 1.1877 +// Operations on addresses which are provably distinct (e.g., to 1.1878 +// other AllocateNodes) are allowed to bypass the initialization. 1.1879 +// 1.1880 +// The effect of all this is to consolidate object initialization 1.1881 +// (both arrays and non-arrays, both piecewise and bulk) into a 1.1882 +// single location, where it can be optimized as a unit. 1.1883 +// 1.1884 +// Only stores with an offset less than TrackedInitializationLimit words 1.1885 +// will be considered for capture by an InitializeNode. This puts a 1.1886 +// reasonable limit on the complexity of optimized initializations. 1.1887 + 1.1888 +//---------------------------InitializeNode------------------------------------ 1.1889 +InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop) 1.1890 + : _is_complete(false), 1.1891 + MemBarNode(C, adr_type, rawoop) 1.1892 +{ 1.1893 + init_class_id(Class_Initialize); 1.1894 + 1.1895 + assert(adr_type == Compile::AliasIdxRaw, "only valid atp"); 1.1896 + assert(in(RawAddress) == rawoop, "proper init"); 1.1897 + // Note: allocation() can be NULL, for secondary initialization barriers 1.1898 +} 1.1899 + 1.1900 +// Since this node is not matched, it will be processed by the 1.1901 +// register allocator. Declare that there are no constraints 1.1902 +// on the allocation of the RawAddress edge. 1.1903 +const RegMask &InitializeNode::in_RegMask(uint idx) const { 1.1904 + // This edge should be set to top, by the set_complete. But be conservative. 1.1905 + if (idx == InitializeNode::RawAddress) 1.1906 + return *(Compile::current()->matcher()->idealreg2spillmask[in(idx)->ideal_reg()]); 1.1907 + return RegMask::Empty; 1.1908 +} 1.1909 + 1.1910 +Node* InitializeNode::memory(uint alias_idx) { 1.1911 + Node* mem = in(Memory); 1.1912 + if (mem->is_MergeMem()) { 1.1913 + return mem->as_MergeMem()->memory_at(alias_idx); 1.1914 + } else { 1.1915 + // incoming raw memory is not split 1.1916 + return mem; 1.1917 + } 1.1918 +} 1.1919 + 1.1920 +bool InitializeNode::is_non_zero() { 1.1921 + if (is_complete()) return false; 1.1922 + remove_extra_zeroes(); 1.1923 + return (req() > RawStores); 1.1924 +} 1.1925 + 1.1926 +void InitializeNode::set_complete(PhaseGVN* phase) { 1.1927 + assert(!is_complete(), "caller responsibility"); 1.1928 + _is_complete = true; 1.1929 + 1.1930 + // After this node is complete, it contains a bunch of 1.1931 + // raw-memory initializations. There is no need for 1.1932 + // it to have anything to do with non-raw memory effects. 1.1933 + // Therefore, tell all non-raw users to re-optimize themselves, 1.1934 + // after skipping the memory effects of this initialization. 1.1935 + PhaseIterGVN* igvn = phase->is_IterGVN(); 1.1936 + if (igvn) igvn->add_users_to_worklist(this); 1.1937 +} 1.1938 + 1.1939 +// convenience function 1.1940 +// return false if the init contains any stores already 1.1941 +bool AllocateNode::maybe_set_complete(PhaseGVN* phase) { 1.1942 + InitializeNode* init = initialization(); 1.1943 + if (init == NULL || init->is_complete()) return false; 1.1944 + init->remove_extra_zeroes(); 1.1945 + // for now, if this allocation has already collected any inits, bail: 1.1946 + if (init->is_non_zero()) return false; 1.1947 + init->set_complete(phase); 1.1948 + return true; 1.1949 +} 1.1950 + 1.1951 +void InitializeNode::remove_extra_zeroes() { 1.1952 + if (req() == RawStores) return; 1.1953 + Node* zmem = zero_memory(); 1.1954 + uint fill = RawStores; 1.1955 + for (uint i = fill; i < req(); i++) { 1.1956 + Node* n = in(i); 1.1957 + if (n->is_top() || n == zmem) continue; // skip 1.1958 + if (fill < i) set_req(fill, n); // compact 1.1959 + ++fill; 1.1960 + } 1.1961 + // delete any empty spaces created: 1.1962 + while (fill < req()) { 1.1963 + del_req(fill); 1.1964 + } 1.1965 +} 1.1966 + 1.1967 +// Helper for remembering which stores go with which offsets. 1.1968 +intptr_t InitializeNode::get_store_offset(Node* st, PhaseTransform* phase) { 1.1969 + if (!st->is_Store()) return -1; // can happen to dead code via subsume_node 1.1970 + intptr_t offset = -1; 1.1971 + Node* base = AddPNode::Ideal_base_and_offset(st->in(MemNode::Address), 1.1972 + phase, offset); 1.1973 + if (base == NULL) return -1; // something is dead, 1.1974 + if (offset < 0) return -1; // dead, dead 1.1975 + return offset; 1.1976 +} 1.1977 + 1.1978 +// Helper for proving that an initialization expression is 1.1979 +// "simple enough" to be folded into an object initialization. 1.1980 +// Attempts to prove that a store's initial value 'n' can be captured 1.1981 +// within the initialization without creating a vicious cycle, such as: 1.1982 +// { Foo p = new Foo(); p.next = p; } 1.1983 +// True for constants and parameters and small combinations thereof. 1.1984 +bool InitializeNode::detect_init_independence(Node* n, 1.1985 + bool st_is_pinned, 1.1986 + int& count) { 1.1987 + if (n == NULL) return true; // (can this really happen?) 1.1988 + if (n->is_Proj()) n = n->in(0); 1.1989 + if (n == this) return false; // found a cycle 1.1990 + if (n->is_Con()) return true; 1.1991 + if (n->is_Start()) return true; // params, etc., are OK 1.1992 + if (n->is_Root()) return true; // even better 1.1993 + 1.1994 + Node* ctl = n->in(0); 1.1995 + if (ctl != NULL && !ctl->is_top()) { 1.1996 + if (ctl->is_Proj()) ctl = ctl->in(0); 1.1997 + if (ctl == this) return false; 1.1998 + 1.1999 + // If we already know that the enclosing memory op is pinned right after 1.2000 + // the init, then any control flow that the store has picked up 1.2001 + // must have preceded the init, or else be equal to the init. 1.2002 + // Even after loop optimizations (which might change control edges) 1.2003 + // a store is never pinned *before* the availability of its inputs. 1.2004 + if (!MemNode::detect_dominating_control(ctl, this->in(0))) 1.2005 + return false; // failed to prove a good control 1.2006 + 1.2007 + } 1.2008 + 1.2009 + // Check data edges for possible dependencies on 'this'. 1.2010 + if ((count += 1) > 20) return false; // complexity limit 1.2011 + for (uint i = 1; i < n->req(); i++) { 1.2012 + Node* m = n->in(i); 1.2013 + if (m == NULL || m == n || m->is_top()) continue; 1.2014 + uint first_i = n->find_edge(m); 1.2015 + if (i != first_i) continue; // process duplicate edge just once 1.2016 + if (!detect_init_independence(m, st_is_pinned, count)) { 1.2017 + return false; 1.2018 + } 1.2019 + } 1.2020 + 1.2021 + return true; 1.2022 +} 1.2023 + 1.2024 +// Here are all the checks a Store must pass before it can be moved into 1.2025 +// an initialization. Returns zero if a check fails. 1.2026 +// On success, returns the (constant) offset to which the store applies, 1.2027 +// within the initialized memory. 1.2028 +intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseTransform* phase) { 1.2029 + const int FAIL = 0; 1.2030 + if (st->req() != MemNode::ValueIn + 1) 1.2031 + return FAIL; // an inscrutable StoreNode (card mark?) 1.2032 + Node* ctl = st->in(MemNode::Control); 1.2033 + if (!(ctl != NULL && ctl->is_Proj() && ctl->in(0) == this)) 1.2034 + return FAIL; // must be unconditional after the initialization 1.2035 + Node* mem = st->in(MemNode::Memory); 1.2036 + if (!(mem->is_Proj() && mem->in(0) == this)) 1.2037 + return FAIL; // must not be preceded by other stores 1.2038 + Node* adr = st->in(MemNode::Address); 1.2039 + intptr_t offset; 1.2040 + AllocateNode* alloc = AllocateNode::Ideal_allocation(adr, phase, offset); 1.2041 + if (alloc == NULL) 1.2042 + return FAIL; // inscrutable address 1.2043 + if (alloc != allocation()) 1.2044 + return FAIL; // wrong allocation! (store needs to float up) 1.2045 + Node* val = st->in(MemNode::ValueIn); 1.2046 + int complexity_count = 0; 1.2047 + if (!detect_init_independence(val, true, complexity_count)) 1.2048 + return FAIL; // stored value must be 'simple enough' 1.2049 + 1.2050 + return offset; // success 1.2051 +} 1.2052 + 1.2053 +// Find the captured store in(i) which corresponds to the range 1.2054 +// [start..start+size) in the initialized object. 1.2055 +// If there is one, return its index i. If there isn't, return the 1.2056 +// negative of the index where it should be inserted. 1.2057 +// Return 0 if the queried range overlaps an initialization boundary 1.2058 +// or if dead code is encountered. 1.2059 +// If size_in_bytes is zero, do not bother with overlap checks. 1.2060 +int InitializeNode::captured_store_insertion_point(intptr_t start, 1.2061 + int size_in_bytes, 1.2062 + PhaseTransform* phase) { 1.2063 + const int FAIL = 0, MAX_STORE = BytesPerLong; 1.2064 + 1.2065 + if (is_complete()) 1.2066 + return FAIL; // arraycopy got here first; punt 1.2067 + 1.2068 + assert(allocation() != NULL, "must be present"); 1.2069 + 1.2070 + // no negatives, no header fields: 1.2071 + if (start < (intptr_t) sizeof(oopDesc)) return FAIL; 1.2072 + if (start < (intptr_t) sizeof(arrayOopDesc) && 1.2073 + start < (intptr_t) allocation()->minimum_header_size()) return FAIL; 1.2074 + 1.2075 + // after a certain size, we bail out on tracking all the stores: 1.2076 + intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize); 1.2077 + if (start >= ti_limit) return FAIL; 1.2078 + 1.2079 + for (uint i = InitializeNode::RawStores, limit = req(); ; ) { 1.2080 + if (i >= limit) return -(int)i; // not found; here is where to put it 1.2081 + 1.2082 + Node* st = in(i); 1.2083 + intptr_t st_off = get_store_offset(st, phase); 1.2084 + if (st_off < 0) { 1.2085 + if (st != zero_memory()) { 1.2086 + return FAIL; // bail out if there is dead garbage 1.2087 + } 1.2088 + } else if (st_off > start) { 1.2089 + // ...we are done, since stores are ordered 1.2090 + if (st_off < start + size_in_bytes) { 1.2091 + return FAIL; // the next store overlaps 1.2092 + } 1.2093 + return -(int)i; // not found; here is where to put it 1.2094 + } else if (st_off < start) { 1.2095 + if (size_in_bytes != 0 && 1.2096 + start < st_off + MAX_STORE && 1.2097 + start < st_off + st->as_Store()->memory_size()) { 1.2098 + return FAIL; // the previous store overlaps 1.2099 + } 1.2100 + } else { 1.2101 + if (size_in_bytes != 0 && 1.2102 + st->as_Store()->memory_size() != size_in_bytes) { 1.2103 + return FAIL; // mismatched store size 1.2104 + } 1.2105 + return i; 1.2106 + } 1.2107 + 1.2108 + ++i; 1.2109 + } 1.2110 +} 1.2111 + 1.2112 +// Look for a captured store which initializes at the offset 'start' 1.2113 +// with the given size. If there is no such store, and no other 1.2114 +// initialization interferes, then return zero_memory (the memory 1.2115 +// projection of the AllocateNode). 1.2116 +Node* InitializeNode::find_captured_store(intptr_t start, int size_in_bytes, 1.2117 + PhaseTransform* phase) { 1.2118 + assert(stores_are_sane(phase), ""); 1.2119 + int i = captured_store_insertion_point(start, size_in_bytes, phase); 1.2120 + if (i == 0) { 1.2121 + return NULL; // something is dead 1.2122 + } else if (i < 0) { 1.2123 + return zero_memory(); // just primordial zero bits here 1.2124 + } else { 1.2125 + Node* st = in(i); // here is the store at this position 1.2126 + assert(get_store_offset(st->as_Store(), phase) == start, "sanity"); 1.2127 + return st; 1.2128 + } 1.2129 +} 1.2130 + 1.2131 +// Create, as a raw pointer, an address within my new object at 'offset'. 1.2132 +Node* InitializeNode::make_raw_address(intptr_t offset, 1.2133 + PhaseTransform* phase) { 1.2134 + Node* addr = in(RawAddress); 1.2135 + if (offset != 0) { 1.2136 + Compile* C = phase->C; 1.2137 + addr = phase->transform( new (C, 4) AddPNode(C->top(), addr, 1.2138 + phase->MakeConX(offset)) ); 1.2139 + } 1.2140 + return addr; 1.2141 +} 1.2142 + 1.2143 +// Clone the given store, converting it into a raw store 1.2144 +// initializing a field or element of my new object. 1.2145 +// Caller is responsible for retiring the original store, 1.2146 +// with subsume_node or the like. 1.2147 +// 1.2148 +// From the example above InitializeNode::InitializeNode, 1.2149 +// here are the old stores to be captured: 1.2150 +// store1 = (StoreC init.Control init.Memory (+ oop 12) 1) 1.2151 +// store2 = (StoreC init.Control store1 (+ oop 14) 2) 1.2152 +// 1.2153 +// Here is the changed code; note the extra edges on init: 1.2154 +// alloc = (Allocate ...) 1.2155 +// rawoop = alloc.RawAddress 1.2156 +// rawstore1 = (StoreC alloc.Control alloc.Memory (+ rawoop 12) 1) 1.2157 +// rawstore2 = (StoreC alloc.Control alloc.Memory (+ rawoop 14) 2) 1.2158 +// init = (Initialize alloc.Control alloc.Memory rawoop 1.2159 +// rawstore1 rawstore2) 1.2160 +// 1.2161 +Node* InitializeNode::capture_store(StoreNode* st, intptr_t start, 1.2162 + PhaseTransform* phase) { 1.2163 + assert(stores_are_sane(phase), ""); 1.2164 + 1.2165 + if (start < 0) return NULL; 1.2166 + assert(can_capture_store(st, phase) == start, "sanity"); 1.2167 + 1.2168 + Compile* C = phase->C; 1.2169 + int size_in_bytes = st->memory_size(); 1.2170 + int i = captured_store_insertion_point(start, size_in_bytes, phase); 1.2171 + if (i == 0) return NULL; // bail out 1.2172 + Node* prev_mem = NULL; // raw memory for the captured store 1.2173 + if (i > 0) { 1.2174 + prev_mem = in(i); // there is a pre-existing store under this one 1.2175 + set_req(i, C->top()); // temporarily disconnect it 1.2176 + // See StoreNode::Ideal 'st->outcnt() == 1' for the reason to disconnect. 1.2177 + } else { 1.2178 + i = -i; // no pre-existing store 1.2179 + prev_mem = zero_memory(); // a slice of the newly allocated object 1.2180 + if (i > InitializeNode::RawStores && in(i-1) == prev_mem) 1.2181 + set_req(--i, C->top()); // reuse this edge; it has been folded away 1.2182 + else 1.2183 + ins_req(i, C->top()); // build a new edge 1.2184 + } 1.2185 + Node* new_st = st->clone(); 1.2186 + new_st->set_req(MemNode::Control, in(Control)); 1.2187 + new_st->set_req(MemNode::Memory, prev_mem); 1.2188 + new_st->set_req(MemNode::Address, make_raw_address(start, phase)); 1.2189 + new_st = phase->transform(new_st); 1.2190 + 1.2191 + // At this point, new_st might have swallowed a pre-existing store 1.2192 + // at the same offset, or perhaps new_st might have disappeared, 1.2193 + // if it redundantly stored the same value (or zero to fresh memory). 1.2194 + 1.2195 + // In any case, wire it in: 1.2196 + set_req(i, new_st); 1.2197 + 1.2198 + // The caller may now kill the old guy. 1.2199 + DEBUG_ONLY(Node* check_st = find_captured_store(start, size_in_bytes, phase)); 1.2200 + assert(check_st == new_st || check_st == NULL, "must be findable"); 1.2201 + assert(!is_complete(), ""); 1.2202 + return new_st; 1.2203 +} 1.2204 + 1.2205 +static bool store_constant(jlong* tiles, int num_tiles, 1.2206 + intptr_t st_off, int st_size, 1.2207 + jlong con) { 1.2208 + if ((st_off & (st_size-1)) != 0) 1.2209 + return false; // strange store offset (assume size==2**N) 1.2210 + address addr = (address)tiles + st_off; 1.2211 + assert(st_off >= 0 && addr+st_size <= (address)&tiles[num_tiles], "oob"); 1.2212 + switch (st_size) { 1.2213 + case sizeof(jbyte): *(jbyte*) addr = (jbyte) con; break; 1.2214 + case sizeof(jchar): *(jchar*) addr = (jchar) con; break; 1.2215 + case sizeof(jint): *(jint*) addr = (jint) con; break; 1.2216 + case sizeof(jlong): *(jlong*) addr = (jlong) con; break; 1.2217 + default: return false; // strange store size (detect size!=2**N here) 1.2218 + } 1.2219 + return true; // return success to caller 1.2220 +} 1.2221 + 1.2222 +// Coalesce subword constants into int constants and possibly 1.2223 +// into long constants. The goal, if the CPU permits, 1.2224 +// is to initialize the object with a small number of 64-bit tiles. 1.2225 +// Also, convert floating-point constants to bit patterns. 1.2226 +// Non-constants are not relevant to this pass. 1.2227 +// 1.2228 +// In terms of the running example on InitializeNode::InitializeNode 1.2229 +// and InitializeNode::capture_store, here is the transformation 1.2230 +// of rawstore1 and rawstore2 into rawstore12: 1.2231 +// alloc = (Allocate ...) 1.2232 +// rawoop = alloc.RawAddress 1.2233 +// tile12 = 0x00010002 1.2234 +// rawstore12 = (StoreI alloc.Control alloc.Memory (+ rawoop 12) tile12) 1.2235 +// init = (Initialize alloc.Control alloc.Memory rawoop rawstore12) 1.2236 +// 1.2237 +void 1.2238 +InitializeNode::coalesce_subword_stores(intptr_t header_size, 1.2239 + Node* size_in_bytes, 1.2240 + PhaseGVN* phase) { 1.2241 + Compile* C = phase->C; 1.2242 + 1.2243 + assert(stores_are_sane(phase), ""); 1.2244 + // Note: After this pass, they are not completely sane, 1.2245 + // since there may be some overlaps. 1.2246 + 1.2247 + int old_subword = 0, old_long = 0, new_int = 0, new_long = 0; 1.2248 + 1.2249 + intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize); 1.2250 + intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, ti_limit); 1.2251 + size_limit = MIN2(size_limit, ti_limit); 1.2252 + size_limit = align_size_up(size_limit, BytesPerLong); 1.2253 + int num_tiles = size_limit / BytesPerLong; 1.2254 + 1.2255 + // allocate space for the tile map: 1.2256 + const int small_len = DEBUG_ONLY(true ? 3 :) 30; // keep stack frames small 1.2257 + jlong tiles_buf[small_len]; 1.2258 + Node* nodes_buf[small_len]; 1.2259 + jlong inits_buf[small_len]; 1.2260 + jlong* tiles = ((num_tiles <= small_len) ? &tiles_buf[0] 1.2261 + : NEW_RESOURCE_ARRAY(jlong, num_tiles)); 1.2262 + Node** nodes = ((num_tiles <= small_len) ? &nodes_buf[0] 1.2263 + : NEW_RESOURCE_ARRAY(Node*, num_tiles)); 1.2264 + jlong* inits = ((num_tiles <= small_len) ? &inits_buf[0] 1.2265 + : NEW_RESOURCE_ARRAY(jlong, num_tiles)); 1.2266 + // tiles: exact bitwise model of all primitive constants 1.2267 + // nodes: last constant-storing node subsumed into the tiles model 1.2268 + // inits: which bytes (in each tile) are touched by any initializations 1.2269 + 1.2270 + //// Pass A: Fill in the tile model with any relevant stores. 1.2271 + 1.2272 + Copy::zero_to_bytes(tiles, sizeof(tiles[0]) * num_tiles); 1.2273 + Copy::zero_to_bytes(nodes, sizeof(nodes[0]) * num_tiles); 1.2274 + Copy::zero_to_bytes(inits, sizeof(inits[0]) * num_tiles); 1.2275 + Node* zmem = zero_memory(); // initially zero memory state 1.2276 + for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) { 1.2277 + Node* st = in(i); 1.2278 + intptr_t st_off = get_store_offset(st, phase); 1.2279 + 1.2280 + // Figure out the store's offset and constant value: 1.2281 + if (st_off < header_size) continue; //skip (ignore header) 1.2282 + if (st->in(MemNode::Memory) != zmem) continue; //skip (odd store chain) 1.2283 + int st_size = st->as_Store()->memory_size(); 1.2284 + if (st_off + st_size > size_limit) break; 1.2285 + 1.2286 + // Record which bytes are touched, whether by constant or not. 1.2287 + if (!store_constant(inits, num_tiles, st_off, st_size, (jlong) -1)) 1.2288 + continue; // skip (strange store size) 1.2289 + 1.2290 + const Type* val = phase->type(st->in(MemNode::ValueIn)); 1.2291 + if (!val->singleton()) continue; //skip (non-con store) 1.2292 + BasicType type = val->basic_type(); 1.2293 + 1.2294 + jlong con = 0; 1.2295 + switch (type) { 1.2296 + case T_INT: con = val->is_int()->get_con(); break; 1.2297 + case T_LONG: con = val->is_long()->get_con(); break; 1.2298 + case T_FLOAT: con = jint_cast(val->getf()); break; 1.2299 + case T_DOUBLE: con = jlong_cast(val->getd()); break; 1.2300 + default: continue; //skip (odd store type) 1.2301 + } 1.2302 + 1.2303 + if (type == T_LONG && Matcher::isSimpleConstant64(con) && 1.2304 + st->Opcode() == Op_StoreL) { 1.2305 + continue; // This StoreL is already optimal. 1.2306 + } 1.2307 + 1.2308 + // Store down the constant. 1.2309 + store_constant(tiles, num_tiles, st_off, st_size, con); 1.2310 + 1.2311 + intptr_t j = st_off >> LogBytesPerLong; 1.2312 + 1.2313 + if (type == T_INT && st_size == BytesPerInt 1.2314 + && (st_off & BytesPerInt) == BytesPerInt) { 1.2315 + jlong lcon = tiles[j]; 1.2316 + if (!Matcher::isSimpleConstant64(lcon) && 1.2317 + st->Opcode() == Op_StoreI) { 1.2318 + // This StoreI is already optimal by itself. 1.2319 + jint* intcon = (jint*) &tiles[j]; 1.2320 + intcon[1] = 0; // undo the store_constant() 1.2321 + 1.2322 + // If the previous store is also optimal by itself, back up and 1.2323 + // undo the action of the previous loop iteration... if we can. 1.2324 + // But if we can't, just let the previous half take care of itself. 1.2325 + st = nodes[j]; 1.2326 + st_off -= BytesPerInt; 1.2327 + con = intcon[0]; 1.2328 + if (con != 0 && st != NULL && st->Opcode() == Op_StoreI) { 1.2329 + assert(st_off >= header_size, "still ignoring header"); 1.2330 + assert(get_store_offset(st, phase) == st_off, "must be"); 1.2331 + assert(in(i-1) == zmem, "must be"); 1.2332 + DEBUG_ONLY(const Type* tcon = phase->type(st->in(MemNode::ValueIn))); 1.2333 + assert(con == tcon->is_int()->get_con(), "must be"); 1.2334 + // Undo the effects of the previous loop trip, which swallowed st: 1.2335 + intcon[0] = 0; // undo store_constant() 1.2336 + set_req(i-1, st); // undo set_req(i, zmem) 1.2337 + nodes[j] = NULL; // undo nodes[j] = st 1.2338 + --old_subword; // undo ++old_subword 1.2339 + } 1.2340 + continue; // This StoreI is already optimal. 1.2341 + } 1.2342 + } 1.2343 + 1.2344 + // This store is not needed. 1.2345 + set_req(i, zmem); 1.2346 + nodes[j] = st; // record for the moment 1.2347 + if (st_size < BytesPerLong) // something has changed 1.2348 + ++old_subword; // includes int/float, but who's counting... 1.2349 + else ++old_long; 1.2350 + } 1.2351 + 1.2352 + if ((old_subword + old_long) == 0) 1.2353 + return; // nothing more to do 1.2354 + 1.2355 + //// Pass B: Convert any non-zero tiles into optimal constant stores. 1.2356 + // Be sure to insert them before overlapping non-constant stores. 1.2357 + // (E.g., byte[] x = { 1,2,y,4 } => x[int 0] = 0x01020004, x[2]=y.) 1.2358 + for (int j = 0; j < num_tiles; j++) { 1.2359 + jlong con = tiles[j]; 1.2360 + jlong init = inits[j]; 1.2361 + if (con == 0) continue; 1.2362 + jint con0, con1; // split the constant, address-wise 1.2363 + jint init0, init1; // split the init map, address-wise 1.2364 + { union { jlong con; jint intcon[2]; } u; 1.2365 + u.con = con; 1.2366 + con0 = u.intcon[0]; 1.2367 + con1 = u.intcon[1]; 1.2368 + u.con = init; 1.2369 + init0 = u.intcon[0]; 1.2370 + init1 = u.intcon[1]; 1.2371 + } 1.2372 + 1.2373 + Node* old = nodes[j]; 1.2374 + assert(old != NULL, "need the prior store"); 1.2375 + intptr_t offset = (j * BytesPerLong); 1.2376 + 1.2377 + bool split = !Matcher::isSimpleConstant64(con); 1.2378 + 1.2379 + if (offset < header_size) { 1.2380 + assert(offset + BytesPerInt >= header_size, "second int counts"); 1.2381 + assert(*(jint*)&tiles[j] == 0, "junk in header"); 1.2382 + split = true; // only the second word counts 1.2383 + // Example: int a[] = { 42 ... } 1.2384 + } else if (con0 == 0 && init0 == -1) { 1.2385 + split = true; // first word is covered by full inits 1.2386 + // Example: int a[] = { ... foo(), 42 ... } 1.2387 + } else if (con1 == 0 && init1 == -1) { 1.2388 + split = true; // second word is covered by full inits 1.2389 + // Example: int a[] = { ... 42, foo() ... } 1.2390 + } 1.2391 + 1.2392 + // Here's a case where init0 is neither 0 nor -1: 1.2393 + // byte a[] = { ... 0,0,foo(),0, 0,0,0,42 ... } 1.2394 + // Assuming big-endian memory, init0, init1 are 0x0000FF00, 0x000000FF. 1.2395 + // In this case the tile is not split; it is (jlong)42. 1.2396 + // The big tile is stored down, and then the foo() value is inserted. 1.2397 + // (If there were foo(),foo() instead of foo(),0, init0 would be -1.) 1.2398 + 1.2399 + Node* ctl = old->in(MemNode::Control); 1.2400 + Node* adr = make_raw_address(offset, phase); 1.2401 + const TypePtr* atp = TypeRawPtr::BOTTOM; 1.2402 + 1.2403 + // One or two coalesced stores to plop down. 1.2404 + Node* st[2]; 1.2405 + intptr_t off[2]; 1.2406 + int nst = 0; 1.2407 + if (!split) { 1.2408 + ++new_long; 1.2409 + off[nst] = offset; 1.2410 + st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp, 1.2411 + phase->longcon(con), T_LONG); 1.2412 + } else { 1.2413 + // Omit either if it is a zero. 1.2414 + if (con0 != 0) { 1.2415 + ++new_int; 1.2416 + off[nst] = offset; 1.2417 + st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp, 1.2418 + phase->intcon(con0), T_INT); 1.2419 + } 1.2420 + if (con1 != 0) { 1.2421 + ++new_int; 1.2422 + offset += BytesPerInt; 1.2423 + adr = make_raw_address(offset, phase); 1.2424 + off[nst] = offset; 1.2425 + st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp, 1.2426 + phase->intcon(con1), T_INT); 1.2427 + } 1.2428 + } 1.2429 + 1.2430 + // Insert second store first, then the first before the second. 1.2431 + // Insert each one just before any overlapping non-constant stores. 1.2432 + while (nst > 0) { 1.2433 + Node* st1 = st[--nst]; 1.2434 + C->copy_node_notes_to(st1, old); 1.2435 + st1 = phase->transform(st1); 1.2436 + offset = off[nst]; 1.2437 + assert(offset >= header_size, "do not smash header"); 1.2438 + int ins_idx = captured_store_insertion_point(offset, /*size:*/0, phase); 1.2439 + guarantee(ins_idx != 0, "must re-insert constant store"); 1.2440 + if (ins_idx < 0) ins_idx = -ins_idx; // never overlap 1.2441 + if (ins_idx > InitializeNode::RawStores && in(ins_idx-1) == zmem) 1.2442 + set_req(--ins_idx, st1); 1.2443 + else 1.2444 + ins_req(ins_idx, st1); 1.2445 + } 1.2446 + } 1.2447 + 1.2448 + if (PrintCompilation && WizardMode) 1.2449 + tty->print_cr("Changed %d/%d subword/long constants into %d/%d int/long", 1.2450 + old_subword, old_long, new_int, new_long); 1.2451 + if (C->log() != NULL) 1.2452 + C->log()->elem("comment that='%d/%d subword/long to %d/%d int/long'", 1.2453 + old_subword, old_long, new_int, new_long); 1.2454 + 1.2455 + // Clean up any remaining occurrences of zmem: 1.2456 + remove_extra_zeroes(); 1.2457 +} 1.2458 + 1.2459 +// Explore forward from in(start) to find the first fully initialized 1.2460 +// word, and return its offset. Skip groups of subword stores which 1.2461 +// together initialize full words. If in(start) is itself part of a 1.2462 +// fully initialized word, return the offset of in(start). If there 1.2463 +// are no following full-word stores, or if something is fishy, return 1.2464 +// a negative value. 1.2465 +intptr_t InitializeNode::find_next_fullword_store(uint start, PhaseGVN* phase) { 1.2466 + int int_map = 0; 1.2467 + intptr_t int_map_off = 0; 1.2468 + const int FULL_MAP = right_n_bits(BytesPerInt); // the int_map we hope for 1.2469 + 1.2470 + for (uint i = start, limit = req(); i < limit; i++) { 1.2471 + Node* st = in(i); 1.2472 + 1.2473 + intptr_t st_off = get_store_offset(st, phase); 1.2474 + if (st_off < 0) break; // return conservative answer 1.2475 + 1.2476 + int st_size = st->as_Store()->memory_size(); 1.2477 + if (st_size >= BytesPerInt && (st_off % BytesPerInt) == 0) { 1.2478 + return st_off; // we found a complete word init 1.2479 + } 1.2480 + 1.2481 + // update the map: 1.2482 + 1.2483 + intptr_t this_int_off = align_size_down(st_off, BytesPerInt); 1.2484 + if (this_int_off != int_map_off) { 1.2485 + // reset the map: 1.2486 + int_map = 0; 1.2487 + int_map_off = this_int_off; 1.2488 + } 1.2489 + 1.2490 + int subword_off = st_off - this_int_off; 1.2491 + int_map |= right_n_bits(st_size) << subword_off; 1.2492 + if ((int_map & FULL_MAP) == FULL_MAP) { 1.2493 + return this_int_off; // we found a complete word init 1.2494 + } 1.2495 + 1.2496 + // Did this store hit or cross the word boundary? 1.2497 + intptr_t next_int_off = align_size_down(st_off + st_size, BytesPerInt); 1.2498 + if (next_int_off == this_int_off + BytesPerInt) { 1.2499 + // We passed the current int, without fully initializing it. 1.2500 + int_map_off = next_int_off; 1.2501 + int_map >>= BytesPerInt; 1.2502 + } else if (next_int_off > this_int_off + BytesPerInt) { 1.2503 + // We passed the current and next int. 1.2504 + return this_int_off + BytesPerInt; 1.2505 + } 1.2506 + } 1.2507 + 1.2508 + return -1; 1.2509 +} 1.2510 + 1.2511 + 1.2512 +// Called when the associated AllocateNode is expanded into CFG. 1.2513 +// At this point, we may perform additional optimizations. 1.2514 +// Linearize the stores by ascending offset, to make memory 1.2515 +// activity as coherent as possible. 1.2516 +Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr, 1.2517 + intptr_t header_size, 1.2518 + Node* size_in_bytes, 1.2519 + PhaseGVN* phase) { 1.2520 + assert(!is_complete(), "not already complete"); 1.2521 + assert(stores_are_sane(phase), ""); 1.2522 + assert(allocation() != NULL, "must be present"); 1.2523 + 1.2524 + remove_extra_zeroes(); 1.2525 + 1.2526 + if (ReduceFieldZeroing || ReduceBulkZeroing) 1.2527 + // reduce instruction count for common initialization patterns 1.2528 + coalesce_subword_stores(header_size, size_in_bytes, phase); 1.2529 + 1.2530 + Node* zmem = zero_memory(); // initially zero memory state 1.2531 + Node* inits = zmem; // accumulating a linearized chain of inits 1.2532 + #ifdef ASSERT 1.2533 + intptr_t last_init_off = sizeof(oopDesc); // previous init offset 1.2534 + intptr_t last_init_end = sizeof(oopDesc); // previous init offset+size 1.2535 + intptr_t last_tile_end = sizeof(oopDesc); // previous tile offset+size 1.2536 + #endif 1.2537 + intptr_t zeroes_done = header_size; 1.2538 + 1.2539 + bool do_zeroing = true; // we might give up if inits are very sparse 1.2540 + int big_init_gaps = 0; // how many large gaps have we seen? 1.2541 + 1.2542 + if (ZeroTLAB) do_zeroing = false; 1.2543 + if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false; 1.2544 + 1.2545 + for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) { 1.2546 + Node* st = in(i); 1.2547 + intptr_t st_off = get_store_offset(st, phase); 1.2548 + if (st_off < 0) 1.2549 + break; // unknown junk in the inits 1.2550 + if (st->in(MemNode::Memory) != zmem) 1.2551 + break; // complicated store chains somehow in list 1.2552 + 1.2553 + int st_size = st->as_Store()->memory_size(); 1.2554 + intptr_t next_init_off = st_off + st_size; 1.2555 + 1.2556 + if (do_zeroing && zeroes_done < next_init_off) { 1.2557 + // See if this store needs a zero before it or under it. 1.2558 + intptr_t zeroes_needed = st_off; 1.2559 + 1.2560 + if (st_size < BytesPerInt) { 1.2561 + // Look for subword stores which only partially initialize words. 1.2562 + // If we find some, we must lay down some word-level zeroes first, 1.2563 + // underneath the subword stores. 1.2564 + // 1.2565 + // Examples: 1.2566 + // byte[] a = { p,q,r,s } => a[0]=p,a[1]=q,a[2]=r,a[3]=s 1.2567 + // byte[] a = { x,y,0,0 } => a[0..3] = 0, a[0]=x,a[1]=y 1.2568 + // byte[] a = { 0,0,z,0 } => a[0..3] = 0, a[2]=z 1.2569 + // 1.2570 + // Note: coalesce_subword_stores may have already done this, 1.2571 + // if it was prompted by constant non-zero subword initializers. 1.2572 + // But this case can still arise with non-constant stores. 1.2573 + 1.2574 + intptr_t next_full_store = find_next_fullword_store(i, phase); 1.2575 + 1.2576 + // In the examples above: 1.2577 + // in(i) p q r s x y z 1.2578 + // st_off 12 13 14 15 12 13 14 1.2579 + // st_size 1 1 1 1 1 1 1 1.2580 + // next_full_s. 12 16 16 16 16 16 16 1.2581 + // z's_done 12 16 16 16 12 16 12 1.2582 + // z's_needed 12 16 16 16 16 16 16 1.2583 + // zsize 0 0 0 0 4 0 4 1.2584 + if (next_full_store < 0) { 1.2585 + // Conservative tack: Zero to end of current word. 1.2586 + zeroes_needed = align_size_up(zeroes_needed, BytesPerInt); 1.2587 + } else { 1.2588 + // Zero to beginning of next fully initialized word. 1.2589 + // Or, don't zero at all, if we are already in that word. 1.2590 + assert(next_full_store >= zeroes_needed, "must go forward"); 1.2591 + assert((next_full_store & (BytesPerInt-1)) == 0, "even boundary"); 1.2592 + zeroes_needed = next_full_store; 1.2593 + } 1.2594 + } 1.2595 + 1.2596 + if (zeroes_needed > zeroes_done) { 1.2597 + intptr_t zsize = zeroes_needed - zeroes_done; 1.2598 + // Do some incremental zeroing on rawmem, in parallel with inits. 1.2599 + zeroes_done = align_size_down(zeroes_done, BytesPerInt); 1.2600 + rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr, 1.2601 + zeroes_done, zeroes_needed, 1.2602 + phase); 1.2603 + zeroes_done = zeroes_needed; 1.2604 + if (zsize > Matcher::init_array_short_size && ++big_init_gaps > 2) 1.2605 + do_zeroing = false; // leave the hole, next time 1.2606 + } 1.2607 + } 1.2608 + 1.2609 + // Collect the store and move on: 1.2610 + st->set_req(MemNode::Memory, inits); 1.2611 + inits = st; // put it on the linearized chain 1.2612 + set_req(i, zmem); // unhook from previous position 1.2613 + 1.2614 + if (zeroes_done == st_off) 1.2615 + zeroes_done = next_init_off; 1.2616 + 1.2617 + assert(!do_zeroing || zeroes_done >= next_init_off, "don't miss any"); 1.2618 + 1.2619 + #ifdef ASSERT 1.2620 + // Various order invariants. Weaker than stores_are_sane because 1.2621 + // a large constant tile can be filled in by smaller non-constant stores. 1.2622 + assert(st_off >= last_init_off, "inits do not reverse"); 1.2623 + last_init_off = st_off; 1.2624 + const Type* val = NULL; 1.2625 + if (st_size >= BytesPerInt && 1.2626 + (val = phase->type(st->in(MemNode::ValueIn)))->singleton() && 1.2627 + (int)val->basic_type() < (int)T_OBJECT) { 1.2628 + assert(st_off >= last_tile_end, "tiles do not overlap"); 1.2629 + assert(st_off >= last_init_end, "tiles do not overwrite inits"); 1.2630 + last_tile_end = MAX2(last_tile_end, next_init_off); 1.2631 + } else { 1.2632 + intptr_t st_tile_end = align_size_up(next_init_off, BytesPerLong); 1.2633 + assert(st_tile_end >= last_tile_end, "inits stay with tiles"); 1.2634 + assert(st_off >= last_init_end, "inits do not overlap"); 1.2635 + last_init_end = next_init_off; // it's a non-tile 1.2636 + } 1.2637 + #endif //ASSERT 1.2638 + } 1.2639 + 1.2640 + remove_extra_zeroes(); // clear out all the zmems left over 1.2641 + add_req(inits); 1.2642 + 1.2643 + if (!ZeroTLAB) { 1.2644 + // If anything remains to be zeroed, zero it all now. 1.2645 + zeroes_done = align_size_down(zeroes_done, BytesPerInt); 1.2646 + // if it is the last unused 4 bytes of an instance, forget about it 1.2647 + intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, max_jint); 1.2648 + if (zeroes_done + BytesPerLong >= size_limit) { 1.2649 + assert(allocation() != NULL, ""); 1.2650 + Node* klass_node = allocation()->in(AllocateNode::KlassNode); 1.2651 + ciKlass* k = phase->type(klass_node)->is_klassptr()->klass(); 1.2652 + if (zeroes_done == k->layout_helper()) 1.2653 + zeroes_done = size_limit; 1.2654 + } 1.2655 + if (zeroes_done < size_limit) { 1.2656 + rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr, 1.2657 + zeroes_done, size_in_bytes, phase); 1.2658 + } 1.2659 + } 1.2660 + 1.2661 + set_complete(phase); 1.2662 + return rawmem; 1.2663 +} 1.2664 + 1.2665 + 1.2666 +#ifdef ASSERT 1.2667 +bool InitializeNode::stores_are_sane(PhaseTransform* phase) { 1.2668 + if (is_complete()) 1.2669 + return true; // stores could be anything at this point 1.2670 + intptr_t last_off = sizeof(oopDesc); 1.2671 + for (uint i = InitializeNode::RawStores; i < req(); i++) { 1.2672 + Node* st = in(i); 1.2673 + intptr_t st_off = get_store_offset(st, phase); 1.2674 + if (st_off < 0) continue; // ignore dead garbage 1.2675 + if (last_off > st_off) { 1.2676 + tty->print_cr("*** bad store offset at %d: %d > %d", i, last_off, st_off); 1.2677 + this->dump(2); 1.2678 + assert(false, "ascending store offsets"); 1.2679 + return false; 1.2680 + } 1.2681 + last_off = st_off + st->as_Store()->memory_size(); 1.2682 + } 1.2683 + return true; 1.2684 +} 1.2685 +#endif //ASSERT 1.2686 + 1.2687 + 1.2688 + 1.2689 + 1.2690 +//============================MergeMemNode===================================== 1.2691 +// 1.2692 +// SEMANTICS OF MEMORY MERGES: A MergeMem is a memory state assembled from several 1.2693 +// contributing store or call operations. Each contributor provides the memory 1.2694 +// state for a particular "alias type" (see Compile::alias_type). For example, 1.2695 +// if a MergeMem has an input X for alias category #6, then any memory reference 1.2696 +// to alias category #6 may use X as its memory state input, as an exact equivalent 1.2697 +// to using the MergeMem as a whole. 1.2698 +// Load<6>( MergeMem(<6>: X, ...), p ) <==> Load<6>(X,p) 1.2699 +// 1.2700 +// (Here, the <N> notation gives the index of the relevant adr_type.) 1.2701 +// 1.2702 +// In one special case (and more cases in the future), alias categories overlap. 1.2703 +// The special alias category "Bot" (Compile::AliasIdxBot) includes all memory 1.2704 +// states. Therefore, if a MergeMem has only one contributing input W for Bot, 1.2705 +// it is exactly equivalent to that state W: 1.2706 +// MergeMem(<Bot>: W) <==> W 1.2707 +// 1.2708 +// Usually, the merge has more than one input. In that case, where inputs 1.2709 +// overlap (i.e., one is Bot), the narrower alias type determines the memory 1.2710 +// state for that type, and the wider alias type (Bot) fills in everywhere else: 1.2711 +// Load<5>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<5>(W,p) 1.2712 +// Load<6>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<6>(X,p) 1.2713 +// 1.2714 +// A merge can take a "wide" memory state as one of its narrow inputs. 1.2715 +// This simply means that the merge observes out only the relevant parts of 1.2716 +// the wide input. That is, wide memory states arriving at narrow merge inputs 1.2717 +// are implicitly "filtered" or "sliced" as necessary. (This is rare.) 1.2718 +// 1.2719 +// These rules imply that MergeMem nodes may cascade (via their <Bot> links), 1.2720 +// and that memory slices "leak through": 1.2721 +// MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y)) <==> MergeMem(<Bot>: W, <7>: Y) 1.2722 +// 1.2723 +// But, in such a cascade, repeated memory slices can "block the leak": 1.2724 +// MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y), <7>: Y') <==> MergeMem(<Bot>: W, <7>: Y') 1.2725 +// 1.2726 +// In the last example, Y is not part of the combined memory state of the 1.2727 +// outermost MergeMem. The system must, of course, prevent unschedulable 1.2728 +// memory states from arising, so you can be sure that the state Y is somehow 1.2729 +// a precursor to state Y'. 1.2730 +// 1.2731 +// 1.2732 +// REPRESENTATION OF MEMORY MERGES: The indexes used to address the Node::in array 1.2733 +// of each MergeMemNode array are exactly the numerical alias indexes, including 1.2734 +// but not limited to AliasIdxTop, AliasIdxBot, and AliasIdxRaw. The functions 1.2735 +// Compile::alias_type (and kin) produce and manage these indexes. 1.2736 +// 1.2737 +// By convention, the value of in(AliasIdxTop) (i.e., in(1)) is always the top node. 1.2738 +// (Note that this provides quick access to the top node inside MergeMem methods, 1.2739 +// without the need to reach out via TLS to Compile::current.) 1.2740 +// 1.2741 +// As a consequence of what was just described, a MergeMem that represents a full 1.2742 +// memory state has an edge in(AliasIdxBot) which is a "wide" memory state, 1.2743 +// containing all alias categories. 1.2744 +// 1.2745 +// MergeMem nodes never (?) have control inputs, so in(0) is NULL. 1.2746 +// 1.2747 +// All other edges in(N) (including in(AliasIdxRaw), which is in(3)) are either 1.2748 +// a memory state for the alias type <N>, or else the top node, meaning that 1.2749 +// there is no particular input for that alias type. Note that the length of 1.2750 +// a MergeMem is variable, and may be extended at any time to accommodate new 1.2751 +// memory states at larger alias indexes. When merges grow, they are of course 1.2752 +// filled with "top" in the unused in() positions. 1.2753 +// 1.2754 +// This use of top is named "empty_memory()", or "empty_mem" (no-memory) as a variable. 1.2755 +// (Top was chosen because it works smoothly with passes like GCM.) 1.2756 +// 1.2757 +// For convenience, we hardwire the alias index for TypeRawPtr::BOTTOM. (It is 1.2758 +// the type of random VM bits like TLS references.) Since it is always the 1.2759 +// first non-Bot memory slice, some low-level loops use it to initialize an 1.2760 +// index variable: for (i = AliasIdxRaw; i < req(); i++). 1.2761 +// 1.2762 +// 1.2763 +// ACCESSORS: There is a special accessor MergeMemNode::base_memory which returns 1.2764 +// the distinguished "wide" state. The accessor MergeMemNode::memory_at(N) returns 1.2765 +// the memory state for alias type <N>, or (if there is no particular slice at <N>, 1.2766 +// it returns the base memory. To prevent bugs, memory_at does not accept <Top> 1.2767 +// or <Bot> indexes. The iterator MergeMemStream provides robust iteration over 1.2768 +// MergeMem nodes or pairs of such nodes, ensuring that the non-top edges are visited. 1.2769 +// 1.2770 +// %%%% We may get rid of base_memory as a separate accessor at some point; it isn't 1.2771 +// really that different from the other memory inputs. An abbreviation called 1.2772 +// "bot_memory()" for "memory_at(AliasIdxBot)" would keep code tidy. 1.2773 +// 1.2774 +// 1.2775 +// PARTIAL MEMORY STATES: During optimization, MergeMem nodes may arise that represent 1.2776 +// partial memory states. When a Phi splits through a MergeMem, the copy of the Phi 1.2777 +// that "emerges though" the base memory will be marked as excluding the alias types 1.2778 +// of the other (narrow-memory) copies which "emerged through" the narrow edges: 1.2779 +// 1.2780 +// Phi<Bot>(U, MergeMem(<Bot>: W, <8>: Y)) 1.2781 +// ==Ideal=> MergeMem(<Bot>: Phi<Bot-8>(U, W), Phi<8>(U, Y)) 1.2782 +// 1.2783 +// This strange "subtraction" effect is necessary to ensure IGVN convergence. 1.2784 +// (It is currently unimplemented.) As you can see, the resulting merge is 1.2785 +// actually a disjoint union of memory states, rather than an overlay. 1.2786 +// 1.2787 + 1.2788 +//------------------------------MergeMemNode----------------------------------- 1.2789 +Node* MergeMemNode::make_empty_memory() { 1.2790 + Node* empty_memory = (Node*) Compile::current()->top(); 1.2791 + assert(empty_memory->is_top(), "correct sentinel identity"); 1.2792 + return empty_memory; 1.2793 +} 1.2794 + 1.2795 +MergeMemNode::MergeMemNode(Node *new_base) : Node(1+Compile::AliasIdxRaw) { 1.2796 + init_class_id(Class_MergeMem); 1.2797 + // all inputs are nullified in Node::Node(int) 1.2798 + // set_input(0, NULL); // no control input 1.2799 + 1.2800 + // Initialize the edges uniformly to top, for starters. 1.2801 + Node* empty_mem = make_empty_memory(); 1.2802 + for (uint i = Compile::AliasIdxTop; i < req(); i++) { 1.2803 + init_req(i,empty_mem); 1.2804 + } 1.2805 + assert(empty_memory() == empty_mem, ""); 1.2806 + 1.2807 + if( new_base != NULL && new_base->is_MergeMem() ) { 1.2808 + MergeMemNode* mdef = new_base->as_MergeMem(); 1.2809 + assert(mdef->empty_memory() == empty_mem, "consistent sentinels"); 1.2810 + for (MergeMemStream mms(this, mdef); mms.next_non_empty2(); ) { 1.2811 + mms.set_memory(mms.memory2()); 1.2812 + } 1.2813 + assert(base_memory() == mdef->base_memory(), ""); 1.2814 + } else { 1.2815 + set_base_memory(new_base); 1.2816 + } 1.2817 +} 1.2818 + 1.2819 +// Make a new, untransformed MergeMem with the same base as 'mem'. 1.2820 +// If mem is itself a MergeMem, populate the result with the same edges. 1.2821 +MergeMemNode* MergeMemNode::make(Compile* C, Node* mem) { 1.2822 + return new(C, 1+Compile::AliasIdxRaw) MergeMemNode(mem); 1.2823 +} 1.2824 + 1.2825 +//------------------------------cmp-------------------------------------------- 1.2826 +uint MergeMemNode::hash() const { return NO_HASH; } 1.2827 +uint MergeMemNode::cmp( const Node &n ) const { 1.2828 + return (&n == this); // Always fail except on self 1.2829 +} 1.2830 + 1.2831 +//------------------------------Identity--------------------------------------- 1.2832 +Node* MergeMemNode::Identity(PhaseTransform *phase) { 1.2833 + // Identity if this merge point does not record any interesting memory 1.2834 + // disambiguations. 1.2835 + Node* base_mem = base_memory(); 1.2836 + Node* empty_mem = empty_memory(); 1.2837 + if (base_mem != empty_mem) { // Memory path is not dead? 1.2838 + for (uint i = Compile::AliasIdxRaw; i < req(); i++) { 1.2839 + Node* mem = in(i); 1.2840 + if (mem != empty_mem && mem != base_mem) { 1.2841 + return this; // Many memory splits; no change 1.2842 + } 1.2843 + } 1.2844 + } 1.2845 + return base_mem; // No memory splits; ID on the one true input 1.2846 +} 1.2847 + 1.2848 +//------------------------------Ideal------------------------------------------ 1.2849 +// This method is invoked recursively on chains of MergeMem nodes 1.2850 +Node *MergeMemNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.2851 + // Remove chain'd MergeMems 1.2852 + // 1.2853 + // This is delicate, because the each "in(i)" (i >= Raw) is interpreted 1.2854 + // relative to the "in(Bot)". Since we are patching both at the same time, 1.2855 + // we have to be careful to read each "in(i)" relative to the old "in(Bot)", 1.2856 + // but rewrite each "in(i)" relative to the new "in(Bot)". 1.2857 + Node *progress = NULL; 1.2858 + 1.2859 + 1.2860 + Node* old_base = base_memory(); 1.2861 + Node* empty_mem = empty_memory(); 1.2862 + if (old_base == empty_mem) 1.2863 + return NULL; // Dead memory path. 1.2864 + 1.2865 + MergeMemNode* old_mbase; 1.2866 + if (old_base != NULL && old_base->is_MergeMem()) 1.2867 + old_mbase = old_base->as_MergeMem(); 1.2868 + else 1.2869 + old_mbase = NULL; 1.2870 + Node* new_base = old_base; 1.2871 + 1.2872 + // simplify stacked MergeMems in base memory 1.2873 + if (old_mbase) new_base = old_mbase->base_memory(); 1.2874 + 1.2875 + // the base memory might contribute new slices beyond my req() 1.2876 + if (old_mbase) grow_to_match(old_mbase); 1.2877 + 1.2878 + // Look carefully at the base node if it is a phi. 1.2879 + PhiNode* phi_base; 1.2880 + if (new_base != NULL && new_base->is_Phi()) 1.2881 + phi_base = new_base->as_Phi(); 1.2882 + else 1.2883 + phi_base = NULL; 1.2884 + 1.2885 + Node* phi_reg = NULL; 1.2886 + uint phi_len = (uint)-1; 1.2887 + if (phi_base != NULL && !phi_base->is_copy()) { 1.2888 + // do not examine phi if degraded to a copy 1.2889 + phi_reg = phi_base->region(); 1.2890 + phi_len = phi_base->req(); 1.2891 + // see if the phi is unfinished 1.2892 + for (uint i = 1; i < phi_len; i++) { 1.2893 + if (phi_base->in(i) == NULL) { 1.2894 + // incomplete phi; do not look at it yet! 1.2895 + phi_reg = NULL; 1.2896 + phi_len = (uint)-1; 1.2897 + break; 1.2898 + } 1.2899 + } 1.2900 + } 1.2901 + 1.2902 + // Note: We do not call verify_sparse on entry, because inputs 1.2903 + // can normalize to the base_memory via subsume_node or similar 1.2904 + // mechanisms. This method repairs that damage. 1.2905 + 1.2906 + assert(!old_mbase || old_mbase->is_empty_memory(empty_mem), "consistent sentinels"); 1.2907 + 1.2908 + // Look at each slice. 1.2909 + for (uint i = Compile::AliasIdxRaw; i < req(); i++) { 1.2910 + Node* old_in = in(i); 1.2911 + // calculate the old memory value 1.2912 + Node* old_mem = old_in; 1.2913 + if (old_mem == empty_mem) old_mem = old_base; 1.2914 + assert(old_mem == memory_at(i), ""); 1.2915 + 1.2916 + // maybe update (reslice) the old memory value 1.2917 + 1.2918 + // simplify stacked MergeMems 1.2919 + Node* new_mem = old_mem; 1.2920 + MergeMemNode* old_mmem; 1.2921 + if (old_mem != NULL && old_mem->is_MergeMem()) 1.2922 + old_mmem = old_mem->as_MergeMem(); 1.2923 + else 1.2924 + old_mmem = NULL; 1.2925 + if (old_mmem == this) { 1.2926 + // This can happen if loops break up and safepoints disappear. 1.2927 + // A merge of BotPtr (default) with a RawPtr memory derived from a 1.2928 + // safepoint can be rewritten to a merge of the same BotPtr with 1.2929 + // the BotPtr phi coming into the loop. If that phi disappears 1.2930 + // also, we can end up with a self-loop of the mergemem. 1.2931 + // In general, if loops degenerate and memory effects disappear, 1.2932 + // a mergemem can be left looking at itself. This simply means 1.2933 + // that the mergemem's default should be used, since there is 1.2934 + // no longer any apparent effect on this slice. 1.2935 + // Note: If a memory slice is a MergeMem cycle, it is unreachable 1.2936 + // from start. Update the input to TOP. 1.2937 + new_mem = (new_base == this || new_base == empty_mem)? empty_mem : new_base; 1.2938 + } 1.2939 + else if (old_mmem != NULL) { 1.2940 + new_mem = old_mmem->memory_at(i); 1.2941 + } 1.2942 + // else preceeding memory was not a MergeMem 1.2943 + 1.2944 + // replace equivalent phis (unfortunately, they do not GVN together) 1.2945 + if (new_mem != NULL && new_mem != new_base && 1.2946 + new_mem->req() == phi_len && new_mem->in(0) == phi_reg) { 1.2947 + if (new_mem->is_Phi()) { 1.2948 + PhiNode* phi_mem = new_mem->as_Phi(); 1.2949 + for (uint i = 1; i < phi_len; i++) { 1.2950 + if (phi_base->in(i) != phi_mem->in(i)) { 1.2951 + phi_mem = NULL; 1.2952 + break; 1.2953 + } 1.2954 + } 1.2955 + if (phi_mem != NULL) { 1.2956 + // equivalent phi nodes; revert to the def 1.2957 + new_mem = new_base; 1.2958 + } 1.2959 + } 1.2960 + } 1.2961 + 1.2962 + // maybe store down a new value 1.2963 + Node* new_in = new_mem; 1.2964 + if (new_in == new_base) new_in = empty_mem; 1.2965 + 1.2966 + if (new_in != old_in) { 1.2967 + // Warning: Do not combine this "if" with the previous "if" 1.2968 + // A memory slice might have be be rewritten even if it is semantically 1.2969 + // unchanged, if the base_memory value has changed. 1.2970 + set_req(i, new_in); 1.2971 + progress = this; // Report progress 1.2972 + } 1.2973 + } 1.2974 + 1.2975 + if (new_base != old_base) { 1.2976 + set_req(Compile::AliasIdxBot, new_base); 1.2977 + // Don't use set_base_memory(new_base), because we need to update du. 1.2978 + assert(base_memory() == new_base, ""); 1.2979 + progress = this; 1.2980 + } 1.2981 + 1.2982 + if( base_memory() == this ) { 1.2983 + // a self cycle indicates this memory path is dead 1.2984 + set_req(Compile::AliasIdxBot, empty_mem); 1.2985 + } 1.2986 + 1.2987 + // Resolve external cycles by calling Ideal on a MergeMem base_memory 1.2988 + // Recursion must occur after the self cycle check above 1.2989 + if( base_memory()->is_MergeMem() ) { 1.2990 + MergeMemNode *new_mbase = base_memory()->as_MergeMem(); 1.2991 + Node *m = phase->transform(new_mbase); // Rollup any cycles 1.2992 + if( m != NULL && (m->is_top() || 1.2993 + m->is_MergeMem() && m->as_MergeMem()->base_memory() == empty_mem) ) { 1.2994 + // propagate rollup of dead cycle to self 1.2995 + set_req(Compile::AliasIdxBot, empty_mem); 1.2996 + } 1.2997 + } 1.2998 + 1.2999 + if( base_memory() == empty_mem ) { 1.3000 + progress = this; 1.3001 + // Cut inputs during Parse phase only. 1.3002 + // During Optimize phase a dead MergeMem node will be subsumed by Top. 1.3003 + if( !can_reshape ) { 1.3004 + for (uint i = Compile::AliasIdxRaw; i < req(); i++) { 1.3005 + if( in(i) != empty_mem ) { set_req(i, empty_mem); } 1.3006 + } 1.3007 + } 1.3008 + } 1.3009 + 1.3010 + if( !progress && base_memory()->is_Phi() && can_reshape ) { 1.3011 + // Check if PhiNode::Ideal's "Split phis through memory merges" 1.3012 + // transform should be attempted. Look for this->phi->this cycle. 1.3013 + uint merge_width = req(); 1.3014 + if (merge_width > Compile::AliasIdxRaw) { 1.3015 + PhiNode* phi = base_memory()->as_Phi(); 1.3016 + for( uint i = 1; i < phi->req(); ++i ) {// For all paths in 1.3017 + if (phi->in(i) == this) { 1.3018 + phase->is_IterGVN()->_worklist.push(phi); 1.3019 + break; 1.3020 + } 1.3021 + } 1.3022 + } 1.3023 + } 1.3024 + 1.3025 + assert(verify_sparse(), "please, no dups of base"); 1.3026 + return progress; 1.3027 +} 1.3028 + 1.3029 +//-------------------------set_base_memory------------------------------------- 1.3030 +void MergeMemNode::set_base_memory(Node *new_base) { 1.3031 + Node* empty_mem = empty_memory(); 1.3032 + set_req(Compile::AliasIdxBot, new_base); 1.3033 + assert(memory_at(req()) == new_base, "must set default memory"); 1.3034 + // Clear out other occurrences of new_base: 1.3035 + if (new_base != empty_mem) { 1.3036 + for (uint i = Compile::AliasIdxRaw; i < req(); i++) { 1.3037 + if (in(i) == new_base) set_req(i, empty_mem); 1.3038 + } 1.3039 + } 1.3040 +} 1.3041 + 1.3042 +//------------------------------out_RegMask------------------------------------ 1.3043 +const RegMask &MergeMemNode::out_RegMask() const { 1.3044 + return RegMask::Empty; 1.3045 +} 1.3046 + 1.3047 +//------------------------------dump_spec-------------------------------------- 1.3048 +#ifndef PRODUCT 1.3049 +void MergeMemNode::dump_spec(outputStream *st) const { 1.3050 + st->print(" {"); 1.3051 + Node* base_mem = base_memory(); 1.3052 + for( uint i = Compile::AliasIdxRaw; i < req(); i++ ) { 1.3053 + Node* mem = memory_at(i); 1.3054 + if (mem == base_mem) { st->print(" -"); continue; } 1.3055 + st->print( " N%d:", mem->_idx ); 1.3056 + Compile::current()->get_adr_type(i)->dump_on(st); 1.3057 + } 1.3058 + st->print(" }"); 1.3059 +} 1.3060 +#endif // !PRODUCT 1.3061 + 1.3062 + 1.3063 +#ifdef ASSERT 1.3064 +static bool might_be_same(Node* a, Node* b) { 1.3065 + if (a == b) return true; 1.3066 + if (!(a->is_Phi() || b->is_Phi())) return false; 1.3067 + // phis shift around during optimization 1.3068 + return true; // pretty stupid... 1.3069 +} 1.3070 + 1.3071 +// verify a narrow slice (either incoming or outgoing) 1.3072 +static void verify_memory_slice(const MergeMemNode* m, int alias_idx, Node* n) { 1.3073 + if (!VerifyAliases) return; // don't bother to verify unless requested 1.3074 + if (is_error_reported()) return; // muzzle asserts when debugging an error 1.3075 + if (Node::in_dump()) return; // muzzle asserts when printing 1.3076 + assert(alias_idx >= Compile::AliasIdxRaw, "must not disturb base_memory or sentinel"); 1.3077 + assert(n != NULL, ""); 1.3078 + // Elide intervening MergeMem's 1.3079 + while (n->is_MergeMem()) { 1.3080 + n = n->as_MergeMem()->memory_at(alias_idx); 1.3081 + } 1.3082 + Compile* C = Compile::current(); 1.3083 + const TypePtr* n_adr_type = n->adr_type(); 1.3084 + if (n == m->empty_memory()) { 1.3085 + // Implicit copy of base_memory() 1.3086 + } else if (n_adr_type != TypePtr::BOTTOM) { 1.3087 + assert(n_adr_type != NULL, "new memory must have a well-defined adr_type"); 1.3088 + assert(C->must_alias(n_adr_type, alias_idx), "new memory must match selected slice"); 1.3089 + } else { 1.3090 + // A few places like make_runtime_call "know" that VM calls are narrow, 1.3091 + // and can be used to update only the VM bits stored as TypeRawPtr::BOTTOM. 1.3092 + bool expected_wide_mem = false; 1.3093 + if (n == m->base_memory()) { 1.3094 + expected_wide_mem = true; 1.3095 + } else if (alias_idx == Compile::AliasIdxRaw || 1.3096 + n == m->memory_at(Compile::AliasIdxRaw)) { 1.3097 + expected_wide_mem = true; 1.3098 + } else if (!C->alias_type(alias_idx)->is_rewritable()) { 1.3099 + // memory can "leak through" calls on channels that 1.3100 + // are write-once. Allow this also. 1.3101 + expected_wide_mem = true; 1.3102 + } 1.3103 + assert(expected_wide_mem, "expected narrow slice replacement"); 1.3104 + } 1.3105 +} 1.3106 +#else // !ASSERT 1.3107 +#define verify_memory_slice(m,i,n) (0) // PRODUCT version is no-op 1.3108 +#endif 1.3109 + 1.3110 + 1.3111 +//-----------------------------memory_at--------------------------------------- 1.3112 +Node* MergeMemNode::memory_at(uint alias_idx) const { 1.3113 + assert(alias_idx >= Compile::AliasIdxRaw || 1.3114 + alias_idx == Compile::AliasIdxBot && Compile::current()->AliasLevel() == 0, 1.3115 + "must avoid base_memory and AliasIdxTop"); 1.3116 + 1.3117 + // Otherwise, it is a narrow slice. 1.3118 + Node* n = alias_idx < req() ? in(alias_idx) : empty_memory(); 1.3119 + Compile *C = Compile::current(); 1.3120 + if (is_empty_memory(n)) { 1.3121 + // the array is sparse; empty slots are the "top" node 1.3122 + n = base_memory(); 1.3123 + assert(Node::in_dump() 1.3124 + || n == NULL || n->bottom_type() == Type::TOP 1.3125 + || n->adr_type() == TypePtr::BOTTOM 1.3126 + || n->adr_type() == TypeRawPtr::BOTTOM 1.3127 + || Compile::current()->AliasLevel() == 0, 1.3128 + "must be a wide memory"); 1.3129 + // AliasLevel == 0 if we are organizing the memory states manually. 1.3130 + // See verify_memory_slice for comments on TypeRawPtr::BOTTOM. 1.3131 + } else { 1.3132 + // make sure the stored slice is sane 1.3133 + #ifdef ASSERT 1.3134 + if (is_error_reported() || Node::in_dump()) { 1.3135 + } else if (might_be_same(n, base_memory())) { 1.3136 + // Give it a pass: It is a mostly harmless repetition of the base. 1.3137 + // This can arise normally from node subsumption during optimization. 1.3138 + } else { 1.3139 + verify_memory_slice(this, alias_idx, n); 1.3140 + } 1.3141 + #endif 1.3142 + } 1.3143 + return n; 1.3144 +} 1.3145 + 1.3146 +//---------------------------set_memory_at------------------------------------- 1.3147 +void MergeMemNode::set_memory_at(uint alias_idx, Node *n) { 1.3148 + verify_memory_slice(this, alias_idx, n); 1.3149 + Node* empty_mem = empty_memory(); 1.3150 + if (n == base_memory()) n = empty_mem; // collapse default 1.3151 + uint need_req = alias_idx+1; 1.3152 + if (req() < need_req) { 1.3153 + if (n == empty_mem) return; // already the default, so do not grow me 1.3154 + // grow the sparse array 1.3155 + do { 1.3156 + add_req(empty_mem); 1.3157 + } while (req() < need_req); 1.3158 + } 1.3159 + set_req( alias_idx, n ); 1.3160 +} 1.3161 + 1.3162 + 1.3163 + 1.3164 +//--------------------------iteration_setup------------------------------------ 1.3165 +void MergeMemNode::iteration_setup(const MergeMemNode* other) { 1.3166 + if (other != NULL) { 1.3167 + grow_to_match(other); 1.3168 + // invariant: the finite support of mm2 is within mm->req() 1.3169 + #ifdef ASSERT 1.3170 + for (uint i = req(); i < other->req(); i++) { 1.3171 + assert(other->is_empty_memory(other->in(i)), "slice left uncovered"); 1.3172 + } 1.3173 + #endif 1.3174 + } 1.3175 + // Replace spurious copies of base_memory by top. 1.3176 + Node* base_mem = base_memory(); 1.3177 + if (base_mem != NULL && !base_mem->is_top()) { 1.3178 + for (uint i = Compile::AliasIdxBot+1, imax = req(); i < imax; i++) { 1.3179 + if (in(i) == base_mem) 1.3180 + set_req(i, empty_memory()); 1.3181 + } 1.3182 + } 1.3183 +} 1.3184 + 1.3185 +//---------------------------grow_to_match------------------------------------- 1.3186 +void MergeMemNode::grow_to_match(const MergeMemNode* other) { 1.3187 + Node* empty_mem = empty_memory(); 1.3188 + assert(other->is_empty_memory(empty_mem), "consistent sentinels"); 1.3189 + // look for the finite support of the other memory 1.3190 + for (uint i = other->req(); --i >= req(); ) { 1.3191 + if (other->in(i) != empty_mem) { 1.3192 + uint new_len = i+1; 1.3193 + while (req() < new_len) add_req(empty_mem); 1.3194 + break; 1.3195 + } 1.3196 + } 1.3197 +} 1.3198 + 1.3199 +//---------------------------verify_sparse------------------------------------- 1.3200 +#ifndef PRODUCT 1.3201 +bool MergeMemNode::verify_sparse() const { 1.3202 + assert(is_empty_memory(make_empty_memory()), "sane sentinel"); 1.3203 + Node* base_mem = base_memory(); 1.3204 + // The following can happen in degenerate cases, since empty==top. 1.3205 + if (is_empty_memory(base_mem)) return true; 1.3206 + for (uint i = Compile::AliasIdxRaw; i < req(); i++) { 1.3207 + assert(in(i) != NULL, "sane slice"); 1.3208 + if (in(i) == base_mem) return false; // should have been the sentinel value! 1.3209 + } 1.3210 + return true; 1.3211 +} 1.3212 + 1.3213 +bool MergeMemStream::match_memory(Node* mem, const MergeMemNode* mm, int idx) { 1.3214 + Node* n; 1.3215 + n = mm->in(idx); 1.3216 + if (mem == n) return true; // might be empty_memory() 1.3217 + n = (idx == Compile::AliasIdxBot)? mm->base_memory(): mm->memory_at(idx); 1.3218 + if (mem == n) return true; 1.3219 + while (n->is_Phi() && (n = n->as_Phi()->is_copy()) != NULL) { 1.3220 + if (mem == n) return true; 1.3221 + if (n == NULL) break; 1.3222 + } 1.3223 + return false; 1.3224 +} 1.3225 +#endif // !PRODUCT