1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/opto/memnode.cpp Wed Apr 27 01:25:04 2016 +0800 1.3 @@ -0,0 +1,4576 @@ 1.4 +/* 1.5 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 1.23 + * or visit www.oracle.com if you need additional information or have any 1.24 + * questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "precompiled.hpp" 1.29 +#include "classfile/systemDictionary.hpp" 1.30 +#include "compiler/compileLog.hpp" 1.31 +#include "memory/allocation.inline.hpp" 1.32 +#include "oops/objArrayKlass.hpp" 1.33 +#include "opto/addnode.hpp" 1.34 +#include "opto/cfgnode.hpp" 1.35 +#include "opto/compile.hpp" 1.36 +#include "opto/connode.hpp" 1.37 +#include "opto/loopnode.hpp" 1.38 +#include "opto/machnode.hpp" 1.39 +#include "opto/matcher.hpp" 1.40 +#include "opto/memnode.hpp" 1.41 +#include "opto/mulnode.hpp" 1.42 +#include "opto/phaseX.hpp" 1.43 +#include "opto/regmask.hpp" 1.44 + 1.45 +// Portions of code courtesy of Clifford Click 1.46 + 1.47 +// Optimization - Graph Style 1.48 + 1.49 +static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem, const TypePtr *tp, const TypePtr *adr_check, outputStream *st); 1.50 + 1.51 +//============================================================================= 1.52 +uint MemNode::size_of() const { return sizeof(*this); } 1.53 + 1.54 +const TypePtr *MemNode::adr_type() const { 1.55 + Node* adr = in(Address); 1.56 + const TypePtr* cross_check = NULL; 1.57 + DEBUG_ONLY(cross_check = _adr_type); 1.58 + return calculate_adr_type(adr->bottom_type(), cross_check); 1.59 +} 1.60 + 1.61 +#ifndef PRODUCT 1.62 +void MemNode::dump_spec(outputStream *st) const { 1.63 + if (in(Address) == NULL) return; // node is dead 1.64 +#ifndef ASSERT 1.65 + // fake the missing field 1.66 + const TypePtr* _adr_type = NULL; 1.67 + if (in(Address) != NULL) 1.68 + _adr_type = in(Address)->bottom_type()->isa_ptr(); 1.69 +#endif 1.70 + dump_adr_type(this, _adr_type, st); 1.71 + 1.72 + Compile* C = Compile::current(); 1.73 + if( C->alias_type(_adr_type)->is_volatile() ) 1.74 + st->print(" Volatile!"); 1.75 +} 1.76 + 1.77 +void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) { 1.78 + st->print(" @"); 1.79 + if (adr_type == NULL) { 1.80 + st->print("NULL"); 1.81 + } else { 1.82 + adr_type->dump_on(st); 1.83 + Compile* C = Compile::current(); 1.84 + Compile::AliasType* atp = NULL; 1.85 + if (C->have_alias_type(adr_type)) atp = C->alias_type(adr_type); 1.86 + if (atp == NULL) 1.87 + st->print(", idx=?\?;"); 1.88 + else if (atp->index() == Compile::AliasIdxBot) 1.89 + st->print(", idx=Bot;"); 1.90 + else if (atp->index() == Compile::AliasIdxTop) 1.91 + st->print(", idx=Top;"); 1.92 + else if (atp->index() == Compile::AliasIdxRaw) 1.93 + st->print(", idx=Raw;"); 1.94 + else { 1.95 + ciField* field = atp->field(); 1.96 + if (field) { 1.97 + st->print(", name="); 1.98 + field->print_name_on(st); 1.99 + } 1.100 + st->print(", idx=%d;", atp->index()); 1.101 + } 1.102 + } 1.103 +} 1.104 + 1.105 +extern void print_alias_types(); 1.106 + 1.107 +#endif 1.108 + 1.109 +Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypeOopPtr *t_oop, Node *load, PhaseGVN *phase) { 1.110 + assert((t_oop != NULL), "sanity"); 1.111 + bool is_instance = t_oop->is_known_instance_field(); 1.112 + bool is_boxed_value_load = t_oop->is_ptr_to_boxed_value() && 1.113 + (load != NULL) && load->is_Load() && 1.114 + (phase->is_IterGVN() != NULL); 1.115 + if (!(is_instance || is_boxed_value_load)) 1.116 + return mchain; // don't try to optimize non-instance types 1.117 + uint instance_id = t_oop->instance_id(); 1.118 + Node *start_mem = phase->C->start()->proj_out(TypeFunc::Memory); 1.119 + Node *prev = NULL; 1.120 + Node *result = mchain; 1.121 + while (prev != result) { 1.122 + prev = result; 1.123 + if (result == start_mem) 1.124 + break; // hit one of our sentinels 1.125 + // skip over a call which does not affect this memory slice 1.126 + if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) { 1.127 + Node *proj_in = result->in(0); 1.128 + if (proj_in->is_Allocate() && proj_in->_idx == instance_id) { 1.129 + break; // hit one of our sentinels 1.130 + } else if (proj_in->is_Call()) { 1.131 + CallNode *call = proj_in->as_Call(); 1.132 + if (!call->may_modify(t_oop, phase)) { // returns false for instances 1.133 + result = call->in(TypeFunc::Memory); 1.134 + } 1.135 + } else if (proj_in->is_Initialize()) { 1.136 + AllocateNode* alloc = proj_in->as_Initialize()->allocation(); 1.137 + // Stop if this is the initialization for the object instance which 1.138 + // which contains this memory slice, otherwise skip over it. 1.139 + if ((alloc == NULL) || (alloc->_idx == instance_id)) { 1.140 + break; 1.141 + } 1.142 + if (is_instance) { 1.143 + result = proj_in->in(TypeFunc::Memory); 1.144 + } else if (is_boxed_value_load) { 1.145 + Node* klass = alloc->in(AllocateNode::KlassNode); 1.146 + const TypeKlassPtr* tklass = phase->type(klass)->is_klassptr(); 1.147 + if (tklass->klass_is_exact() && !tklass->klass()->equals(t_oop->klass())) { 1.148 + result = proj_in->in(TypeFunc::Memory); // not related allocation 1.149 + } 1.150 + } 1.151 + } else if (proj_in->is_MemBar()) { 1.152 + result = proj_in->in(TypeFunc::Memory); 1.153 + } else { 1.154 + assert(false, "unexpected projection"); 1.155 + } 1.156 + } else if (result->is_ClearArray()) { 1.157 + if (!is_instance || !ClearArrayNode::step_through(&result, instance_id, phase)) { 1.158 + // Can not bypass initialization of the instance 1.159 + // we are looking for. 1.160 + break; 1.161 + } 1.162 + // Otherwise skip it (the call updated 'result' value). 1.163 + } else if (result->is_MergeMem()) { 1.164 + result = step_through_mergemem(phase, result->as_MergeMem(), t_oop, NULL, tty); 1.165 + } 1.166 + } 1.167 + return result; 1.168 +} 1.169 + 1.170 +Node *MemNode::optimize_memory_chain(Node *mchain, const TypePtr *t_adr, Node *load, PhaseGVN *phase) { 1.171 + const TypeOopPtr* t_oop = t_adr->isa_oopptr(); 1.172 + if (t_oop == NULL) 1.173 + return mchain; // don't try to optimize non-oop types 1.174 + Node* result = optimize_simple_memory_chain(mchain, t_oop, load, phase); 1.175 + bool is_instance = t_oop->is_known_instance_field(); 1.176 + PhaseIterGVN *igvn = phase->is_IterGVN(); 1.177 + if (is_instance && igvn != NULL && result->is_Phi()) { 1.178 + PhiNode *mphi = result->as_Phi(); 1.179 + assert(mphi->bottom_type() == Type::MEMORY, "memory phi required"); 1.180 + const TypePtr *t = mphi->adr_type(); 1.181 + if (t == TypePtr::BOTTOM || t == TypeRawPtr::BOTTOM || 1.182 + t->isa_oopptr() && !t->is_oopptr()->is_known_instance() && 1.183 + t->is_oopptr()->cast_to_exactness(true) 1.184 + ->is_oopptr()->cast_to_ptr_type(t_oop->ptr()) 1.185 + ->is_oopptr()->cast_to_instance_id(t_oop->instance_id()) == t_oop) { 1.186 + // clone the Phi with our address type 1.187 + result = mphi->split_out_instance(t_adr, igvn); 1.188 + } else { 1.189 + assert(phase->C->get_alias_index(t) == phase->C->get_alias_index(t_adr), "correct memory chain"); 1.190 + } 1.191 + } 1.192 + return result; 1.193 +} 1.194 + 1.195 +static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem, const TypePtr *tp, const TypePtr *adr_check, outputStream *st) { 1.196 + uint alias_idx = phase->C->get_alias_index(tp); 1.197 + Node *mem = mmem; 1.198 +#ifdef ASSERT 1.199 + { 1.200 + // Check that current type is consistent with the alias index used during graph construction 1.201 + assert(alias_idx >= Compile::AliasIdxRaw, "must not be a bad alias_idx"); 1.202 + bool consistent = adr_check == NULL || adr_check->empty() || 1.203 + phase->C->must_alias(adr_check, alias_idx ); 1.204 + // Sometimes dead array references collapse to a[-1], a[-2], or a[-3] 1.205 + if( !consistent && adr_check != NULL && !adr_check->empty() && 1.206 + tp->isa_aryptr() && tp->offset() == Type::OffsetBot && 1.207 + adr_check->isa_aryptr() && adr_check->offset() != Type::OffsetBot && 1.208 + ( adr_check->offset() == arrayOopDesc::length_offset_in_bytes() || 1.209 + adr_check->offset() == oopDesc::klass_offset_in_bytes() || 1.210 + adr_check->offset() == oopDesc::mark_offset_in_bytes() ) ) { 1.211 + // don't assert if it is dead code. 1.212 + consistent = true; 1.213 + } 1.214 + if( !consistent ) { 1.215 + st->print("alias_idx==%d, adr_check==", alias_idx); 1.216 + if( adr_check == NULL ) { 1.217 + st->print("NULL"); 1.218 + } else { 1.219 + adr_check->dump(); 1.220 + } 1.221 + st->cr(); 1.222 + print_alias_types(); 1.223 + assert(consistent, "adr_check must match alias idx"); 1.224 + } 1.225 + } 1.226 +#endif 1.227 + // TypeOopPtr::NOTNULL+any is an OOP with unknown offset - generally 1.228 + // means an array I have not precisely typed yet. Do not do any 1.229 + // alias stuff with it any time soon. 1.230 + const TypeOopPtr *toop = tp->isa_oopptr(); 1.231 + if( tp->base() != Type::AnyPtr && 1.232 + !(toop && 1.233 + toop->klass() != NULL && 1.234 + toop->klass()->is_java_lang_Object() && 1.235 + toop->offset() == Type::OffsetBot) ) { 1.236 + // compress paths and change unreachable cycles to TOP 1.237 + // If not, we can update the input infinitely along a MergeMem cycle 1.238 + // Equivalent code in PhiNode::Ideal 1.239 + Node* m = phase->transform(mmem); 1.240 + // If transformed to a MergeMem, get the desired slice 1.241 + // Otherwise the returned node represents memory for every slice 1.242 + mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m; 1.243 + // Update input if it is progress over what we have now 1.244 + } 1.245 + return mem; 1.246 +} 1.247 + 1.248 +//--------------------------Ideal_common--------------------------------------- 1.249 +// Look for degenerate control and memory inputs. Bypass MergeMem inputs. 1.250 +// Unhook non-raw memories from complete (macro-expanded) initializations. 1.251 +Node *MemNode::Ideal_common(PhaseGVN *phase, bool can_reshape) { 1.252 + // If our control input is a dead region, kill all below the region 1.253 + Node *ctl = in(MemNode::Control); 1.254 + if (ctl && remove_dead_region(phase, can_reshape)) 1.255 + return this; 1.256 + ctl = in(MemNode::Control); 1.257 + // Don't bother trying to transform a dead node 1.258 + if (ctl && ctl->is_top()) return NodeSentinel; 1.259 + 1.260 + PhaseIterGVN *igvn = phase->is_IterGVN(); 1.261 + // Wait if control on the worklist. 1.262 + if (ctl && can_reshape && igvn != NULL) { 1.263 + Node* bol = NULL; 1.264 + Node* cmp = NULL; 1.265 + if (ctl->in(0)->is_If()) { 1.266 + assert(ctl->is_IfTrue() || ctl->is_IfFalse(), "sanity"); 1.267 + bol = ctl->in(0)->in(1); 1.268 + if (bol->is_Bool()) 1.269 + cmp = ctl->in(0)->in(1)->in(1); 1.270 + } 1.271 + if (igvn->_worklist.member(ctl) || 1.272 + (bol != NULL && igvn->_worklist.member(bol)) || 1.273 + (cmp != NULL && igvn->_worklist.member(cmp)) ) { 1.274 + // This control path may be dead. 1.275 + // Delay this memory node transformation until the control is processed. 1.276 + phase->is_IterGVN()->_worklist.push(this); 1.277 + return NodeSentinel; // caller will return NULL 1.278 + } 1.279 + } 1.280 + // Ignore if memory is dead, or self-loop 1.281 + Node *mem = in(MemNode::Memory); 1.282 + if (phase->type( mem ) == Type::TOP) return NodeSentinel; // caller will return NULL 1.283 + assert(mem != this, "dead loop in MemNode::Ideal"); 1.284 + 1.285 + if (can_reshape && igvn != NULL && igvn->_worklist.member(mem)) { 1.286 + // This memory slice may be dead. 1.287 + // Delay this mem node transformation until the memory is processed. 1.288 + phase->is_IterGVN()->_worklist.push(this); 1.289 + return NodeSentinel; // caller will return NULL 1.290 + } 1.291 + 1.292 + Node *address = in(MemNode::Address); 1.293 + const Type *t_adr = phase->type(address); 1.294 + if (t_adr == Type::TOP) return NodeSentinel; // caller will return NULL 1.295 + 1.296 + if (can_reshape && igvn != NULL && 1.297 + (igvn->_worklist.member(address) || 1.298 + igvn->_worklist.size() > 0 && (t_adr != adr_type())) ) { 1.299 + // The address's base and type may change when the address is processed. 1.300 + // Delay this mem node transformation until the address is processed. 1.301 + phase->is_IterGVN()->_worklist.push(this); 1.302 + return NodeSentinel; // caller will return NULL 1.303 + } 1.304 + 1.305 + // Do NOT remove or optimize the next lines: ensure a new alias index 1.306 + // is allocated for an oop pointer type before Escape Analysis. 1.307 + // Note: C++ will not remove it since the call has side effect. 1.308 + if (t_adr->isa_oopptr()) { 1.309 + int alias_idx = phase->C->get_alias_index(t_adr->is_ptr()); 1.310 + } 1.311 + 1.312 + Node* base = NULL; 1.313 + if (address->is_AddP()) { 1.314 + base = address->in(AddPNode::Base); 1.315 + } 1.316 + if (base != NULL && phase->type(base)->higher_equal(TypePtr::NULL_PTR) && 1.317 + !t_adr->isa_rawptr()) { 1.318 + // Note: raw address has TOP base and top->higher_equal(TypePtr::NULL_PTR) is true. 1.319 + // Skip this node optimization if its address has TOP base. 1.320 + return NodeSentinel; // caller will return NULL 1.321 + } 1.322 + 1.323 + // Avoid independent memory operations 1.324 + Node* old_mem = mem; 1.325 + 1.326 + // The code which unhooks non-raw memories from complete (macro-expanded) 1.327 + // initializations was removed. After macro-expansion all stores catched 1.328 + // by Initialize node became raw stores and there is no information 1.329 + // which memory slices they modify. So it is unsafe to move any memory 1.330 + // operation above these stores. Also in most cases hooked non-raw memories 1.331 + // were already unhooked by using information from detect_ptr_independence() 1.332 + // and find_previous_store(). 1.333 + 1.334 + if (mem->is_MergeMem()) { 1.335 + MergeMemNode* mmem = mem->as_MergeMem(); 1.336 + const TypePtr *tp = t_adr->is_ptr(); 1.337 + 1.338 + mem = step_through_mergemem(phase, mmem, tp, adr_type(), tty); 1.339 + } 1.340 + 1.341 + if (mem != old_mem) { 1.342 + set_req(MemNode::Memory, mem); 1.343 + if (can_reshape && old_mem->outcnt() == 0) { 1.344 + igvn->_worklist.push(old_mem); 1.345 + } 1.346 + if (phase->type( mem ) == Type::TOP) return NodeSentinel; 1.347 + return this; 1.348 + } 1.349 + 1.350 + // let the subclass continue analyzing... 1.351 + return NULL; 1.352 +} 1.353 + 1.354 +// Helper function for proving some simple control dominations. 1.355 +// Attempt to prove that all control inputs of 'dom' dominate 'sub'. 1.356 +// Already assumes that 'dom' is available at 'sub', and that 'sub' 1.357 +// is not a constant (dominated by the method's StartNode). 1.358 +// Used by MemNode::find_previous_store to prove that the 1.359 +// control input of a memory operation predates (dominates) 1.360 +// an allocation it wants to look past. 1.361 +bool MemNode::all_controls_dominate(Node* dom, Node* sub) { 1.362 + if (dom == NULL || dom->is_top() || sub == NULL || sub->is_top()) 1.363 + return false; // Conservative answer for dead code 1.364 + 1.365 + // Check 'dom'. Skip Proj and CatchProj nodes. 1.366 + dom = dom->find_exact_control(dom); 1.367 + if (dom == NULL || dom->is_top()) 1.368 + return false; // Conservative answer for dead code 1.369 + 1.370 + if (dom == sub) { 1.371 + // For the case when, for example, 'sub' is Initialize and the original 1.372 + // 'dom' is Proj node of the 'sub'. 1.373 + return false; 1.374 + } 1.375 + 1.376 + if (dom->is_Con() || dom->is_Start() || dom->is_Root() || dom == sub) 1.377 + return true; 1.378 + 1.379 + // 'dom' dominates 'sub' if its control edge and control edges 1.380 + // of all its inputs dominate or equal to sub's control edge. 1.381 + 1.382 + // Currently 'sub' is either Allocate, Initialize or Start nodes. 1.383 + // Or Region for the check in LoadNode::Ideal(); 1.384 + // 'sub' should have sub->in(0) != NULL. 1.385 + assert(sub->is_Allocate() || sub->is_Initialize() || sub->is_Start() || 1.386 + sub->is_Region() || sub->is_Call(), "expecting only these nodes"); 1.387 + 1.388 + // Get control edge of 'sub'. 1.389 + Node* orig_sub = sub; 1.390 + sub = sub->find_exact_control(sub->in(0)); 1.391 + if (sub == NULL || sub->is_top()) 1.392 + return false; // Conservative answer for dead code 1.393 + 1.394 + assert(sub->is_CFG(), "expecting control"); 1.395 + 1.396 + if (sub == dom) 1.397 + return true; 1.398 + 1.399 + if (sub->is_Start() || sub->is_Root()) 1.400 + return false; 1.401 + 1.402 + { 1.403 + // Check all control edges of 'dom'. 1.404 + 1.405 + ResourceMark rm; 1.406 + Arena* arena = Thread::current()->resource_area(); 1.407 + Node_List nlist(arena); 1.408 + Unique_Node_List dom_list(arena); 1.409 + 1.410 + dom_list.push(dom); 1.411 + bool only_dominating_controls = false; 1.412 + 1.413 + for (uint next = 0; next < dom_list.size(); next++) { 1.414 + Node* n = dom_list.at(next); 1.415 + if (n == orig_sub) 1.416 + return false; // One of dom's inputs dominated by sub. 1.417 + if (!n->is_CFG() && n->pinned()) { 1.418 + // Check only own control edge for pinned non-control nodes. 1.419 + n = n->find_exact_control(n->in(0)); 1.420 + if (n == NULL || n->is_top()) 1.421 + return false; // Conservative answer for dead code 1.422 + assert(n->is_CFG(), "expecting control"); 1.423 + dom_list.push(n); 1.424 + } else if (n->is_Con() || n->is_Start() || n->is_Root()) { 1.425 + only_dominating_controls = true; 1.426 + } else if (n->is_CFG()) { 1.427 + if (n->dominates(sub, nlist)) 1.428 + only_dominating_controls = true; 1.429 + else 1.430 + return false; 1.431 + } else { 1.432 + // First, own control edge. 1.433 + Node* m = n->find_exact_control(n->in(0)); 1.434 + if (m != NULL) { 1.435 + if (m->is_top()) 1.436 + return false; // Conservative answer for dead code 1.437 + dom_list.push(m); 1.438 + } 1.439 + // Now, the rest of edges. 1.440 + uint cnt = n->req(); 1.441 + for (uint i = 1; i < cnt; i++) { 1.442 + m = n->find_exact_control(n->in(i)); 1.443 + if (m == NULL || m->is_top()) 1.444 + continue; 1.445 + dom_list.push(m); 1.446 + } 1.447 + } 1.448 + } 1.449 + return only_dominating_controls; 1.450 + } 1.451 +} 1.452 + 1.453 +//---------------------detect_ptr_independence--------------------------------- 1.454 +// Used by MemNode::find_previous_store to prove that two base 1.455 +// pointers are never equal. 1.456 +// The pointers are accompanied by their associated allocations, 1.457 +// if any, which have been previously discovered by the caller. 1.458 +bool MemNode::detect_ptr_independence(Node* p1, AllocateNode* a1, 1.459 + Node* p2, AllocateNode* a2, 1.460 + PhaseTransform* phase) { 1.461 + // Attempt to prove that these two pointers cannot be aliased. 1.462 + // They may both manifestly be allocations, and they should differ. 1.463 + // Or, if they are not both allocations, they can be distinct constants. 1.464 + // Otherwise, one is an allocation and the other a pre-existing value. 1.465 + if (a1 == NULL && a2 == NULL) { // neither an allocation 1.466 + return (p1 != p2) && p1->is_Con() && p2->is_Con(); 1.467 + } else if (a1 != NULL && a2 != NULL) { // both allocations 1.468 + return (a1 != a2); 1.469 + } else if (a1 != NULL) { // one allocation a1 1.470 + // (Note: p2->is_Con implies p2->in(0)->is_Root, which dominates.) 1.471 + return all_controls_dominate(p2, a1); 1.472 + } else { //(a2 != NULL) // one allocation a2 1.473 + return all_controls_dominate(p1, a2); 1.474 + } 1.475 + return false; 1.476 +} 1.477 + 1.478 + 1.479 +// The logic for reordering loads and stores uses four steps: 1.480 +// (a) Walk carefully past stores and initializations which we 1.481 +// can prove are independent of this load. 1.482 +// (b) Observe that the next memory state makes an exact match 1.483 +// with self (load or store), and locate the relevant store. 1.484 +// (c) Ensure that, if we were to wire self directly to the store, 1.485 +// the optimizer would fold it up somehow. 1.486 +// (d) Do the rewiring, and return, depending on some other part of 1.487 +// the optimizer to fold up the load. 1.488 +// This routine handles steps (a) and (b). Steps (c) and (d) are 1.489 +// specific to loads and stores, so they are handled by the callers. 1.490 +// (Currently, only LoadNode::Ideal has steps (c), (d). More later.) 1.491 +// 1.492 +Node* MemNode::find_previous_store(PhaseTransform* phase) { 1.493 + Node* ctrl = in(MemNode::Control); 1.494 + Node* adr = in(MemNode::Address); 1.495 + intptr_t offset = 0; 1.496 + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); 1.497 + AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase); 1.498 + 1.499 + if (offset == Type::OffsetBot) 1.500 + return NULL; // cannot unalias unless there are precise offsets 1.501 + 1.502 + const TypeOopPtr *addr_t = adr->bottom_type()->isa_oopptr(); 1.503 + 1.504 + intptr_t size_in_bytes = memory_size(); 1.505 + 1.506 + Node* mem = in(MemNode::Memory); // start searching here... 1.507 + 1.508 + int cnt = 50; // Cycle limiter 1.509 + for (;;) { // While we can dance past unrelated stores... 1.510 + if (--cnt < 0) break; // Caught in cycle or a complicated dance? 1.511 + 1.512 + if (mem->is_Store()) { 1.513 + Node* st_adr = mem->in(MemNode::Address); 1.514 + intptr_t st_offset = 0; 1.515 + Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset); 1.516 + if (st_base == NULL) 1.517 + break; // inscrutable pointer 1.518 + if (st_offset != offset && st_offset != Type::OffsetBot) { 1.519 + const int MAX_STORE = BytesPerLong; 1.520 + if (st_offset >= offset + size_in_bytes || 1.521 + st_offset <= offset - MAX_STORE || 1.522 + st_offset <= offset - mem->as_Store()->memory_size()) { 1.523 + // Success: The offsets are provably independent. 1.524 + // (You may ask, why not just test st_offset != offset and be done? 1.525 + // The answer is that stores of different sizes can co-exist 1.526 + // in the same sequence of RawMem effects. We sometimes initialize 1.527 + // a whole 'tile' of array elements with a single jint or jlong.) 1.528 + mem = mem->in(MemNode::Memory); 1.529 + continue; // (a) advance through independent store memory 1.530 + } 1.531 + } 1.532 + if (st_base != base && 1.533 + detect_ptr_independence(base, alloc, 1.534 + st_base, 1.535 + AllocateNode::Ideal_allocation(st_base, phase), 1.536 + phase)) { 1.537 + // Success: The bases are provably independent. 1.538 + mem = mem->in(MemNode::Memory); 1.539 + continue; // (a) advance through independent store memory 1.540 + } 1.541 + 1.542 + // (b) At this point, if the bases or offsets do not agree, we lose, 1.543 + // since we have not managed to prove 'this' and 'mem' independent. 1.544 + if (st_base == base && st_offset == offset) { 1.545 + return mem; // let caller handle steps (c), (d) 1.546 + } 1.547 + 1.548 + } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) { 1.549 + InitializeNode* st_init = mem->in(0)->as_Initialize(); 1.550 + AllocateNode* st_alloc = st_init->allocation(); 1.551 + if (st_alloc == NULL) 1.552 + break; // something degenerated 1.553 + bool known_identical = false; 1.554 + bool known_independent = false; 1.555 + if (alloc == st_alloc) 1.556 + known_identical = true; 1.557 + else if (alloc != NULL) 1.558 + known_independent = true; 1.559 + else if (all_controls_dominate(this, st_alloc)) 1.560 + known_independent = true; 1.561 + 1.562 + if (known_independent) { 1.563 + // The bases are provably independent: Either they are 1.564 + // manifestly distinct allocations, or else the control 1.565 + // of this load dominates the store's allocation. 1.566 + int alias_idx = phase->C->get_alias_index(adr_type()); 1.567 + if (alias_idx == Compile::AliasIdxRaw) { 1.568 + mem = st_alloc->in(TypeFunc::Memory); 1.569 + } else { 1.570 + mem = st_init->memory(alias_idx); 1.571 + } 1.572 + continue; // (a) advance through independent store memory 1.573 + } 1.574 + 1.575 + // (b) at this point, if we are not looking at a store initializing 1.576 + // the same allocation we are loading from, we lose. 1.577 + if (known_identical) { 1.578 + // From caller, can_see_stored_value will consult find_captured_store. 1.579 + return mem; // let caller handle steps (c), (d) 1.580 + } 1.581 + 1.582 + } else if (addr_t != NULL && addr_t->is_known_instance_field()) { 1.583 + // Can't use optimize_simple_memory_chain() since it needs PhaseGVN. 1.584 + if (mem->is_Proj() && mem->in(0)->is_Call()) { 1.585 + CallNode *call = mem->in(0)->as_Call(); 1.586 + if (!call->may_modify(addr_t, phase)) { 1.587 + mem = call->in(TypeFunc::Memory); 1.588 + continue; // (a) advance through independent call memory 1.589 + } 1.590 + } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) { 1.591 + mem = mem->in(0)->in(TypeFunc::Memory); 1.592 + continue; // (a) advance through independent MemBar memory 1.593 + } else if (mem->is_ClearArray()) { 1.594 + if (ClearArrayNode::step_through(&mem, (uint)addr_t->instance_id(), phase)) { 1.595 + // (the call updated 'mem' value) 1.596 + continue; // (a) advance through independent allocation memory 1.597 + } else { 1.598 + // Can not bypass initialization of the instance 1.599 + // we are looking for. 1.600 + return mem; 1.601 + } 1.602 + } else if (mem->is_MergeMem()) { 1.603 + int alias_idx = phase->C->get_alias_index(adr_type()); 1.604 + mem = mem->as_MergeMem()->memory_at(alias_idx); 1.605 + continue; // (a) advance through independent MergeMem memory 1.606 + } 1.607 + } 1.608 + 1.609 + // Unless there is an explicit 'continue', we must bail out here, 1.610 + // because 'mem' is an inscrutable memory state (e.g., a call). 1.611 + break; 1.612 + } 1.613 + 1.614 + return NULL; // bail out 1.615 +} 1.616 + 1.617 +//----------------------calculate_adr_type------------------------------------- 1.618 +// Helper function. Notices when the given type of address hits top or bottom. 1.619 +// Also, asserts a cross-check of the type against the expected address type. 1.620 +const TypePtr* MemNode::calculate_adr_type(const Type* t, const TypePtr* cross_check) { 1.621 + if (t == Type::TOP) return NULL; // does not touch memory any more? 1.622 + #ifdef PRODUCT 1.623 + cross_check = NULL; 1.624 + #else 1.625 + if (!VerifyAliases || is_error_reported() || Node::in_dump()) cross_check = NULL; 1.626 + #endif 1.627 + const TypePtr* tp = t->isa_ptr(); 1.628 + if (tp == NULL) { 1.629 + assert(cross_check == NULL || cross_check == TypePtr::BOTTOM, "expected memory type must be wide"); 1.630 + return TypePtr::BOTTOM; // touches lots of memory 1.631 + } else { 1.632 + #ifdef ASSERT 1.633 + // %%%% [phh] We don't check the alias index if cross_check is 1.634 + // TypeRawPtr::BOTTOM. Needs to be investigated. 1.635 + if (cross_check != NULL && 1.636 + cross_check != TypePtr::BOTTOM && 1.637 + cross_check != TypeRawPtr::BOTTOM) { 1.638 + // Recheck the alias index, to see if it has changed (due to a bug). 1.639 + Compile* C = Compile::current(); 1.640 + assert(C->get_alias_index(cross_check) == C->get_alias_index(tp), 1.641 + "must stay in the original alias category"); 1.642 + // The type of the address must be contained in the adr_type, 1.643 + // disregarding "null"-ness. 1.644 + // (We make an exception for TypeRawPtr::BOTTOM, which is a bit bucket.) 1.645 + const TypePtr* tp_notnull = tp->join(TypePtr::NOTNULL)->is_ptr(); 1.646 + assert(cross_check->meet(tp_notnull) == cross_check->remove_speculative(), 1.647 + "real address must not escape from expected memory type"); 1.648 + } 1.649 + #endif 1.650 + return tp; 1.651 + } 1.652 +} 1.653 + 1.654 +//------------------------adr_phi_is_loop_invariant---------------------------- 1.655 +// A helper function for Ideal_DU_postCCP to check if a Phi in a counted 1.656 +// loop is loop invariant. Make a quick traversal of Phi and associated 1.657 +// CastPP nodes, looking to see if they are a closed group within the loop. 1.658 +bool MemNode::adr_phi_is_loop_invariant(Node* adr_phi, Node* cast) { 1.659 + // The idea is that the phi-nest must boil down to only CastPP nodes 1.660 + // with the same data. This implies that any path into the loop already 1.661 + // includes such a CastPP, and so the original cast, whatever its input, 1.662 + // must be covered by an equivalent cast, with an earlier control input. 1.663 + ResourceMark rm; 1.664 + 1.665 + // The loop entry input of the phi should be the unique dominating 1.666 + // node for every Phi/CastPP in the loop. 1.667 + Unique_Node_List closure; 1.668 + closure.push(adr_phi->in(LoopNode::EntryControl)); 1.669 + 1.670 + // Add the phi node and the cast to the worklist. 1.671 + Unique_Node_List worklist; 1.672 + worklist.push(adr_phi); 1.673 + if( cast != NULL ){ 1.674 + if( !cast->is_ConstraintCast() ) return false; 1.675 + worklist.push(cast); 1.676 + } 1.677 + 1.678 + // Begin recursive walk of phi nodes. 1.679 + while( worklist.size() ){ 1.680 + // Take a node off the worklist 1.681 + Node *n = worklist.pop(); 1.682 + if( !closure.member(n) ){ 1.683 + // Add it to the closure. 1.684 + closure.push(n); 1.685 + // Make a sanity check to ensure we don't waste too much time here. 1.686 + if( closure.size() > 20) return false; 1.687 + // This node is OK if: 1.688 + // - it is a cast of an identical value 1.689 + // - or it is a phi node (then we add its inputs to the worklist) 1.690 + // Otherwise, the node is not OK, and we presume the cast is not invariant 1.691 + if( n->is_ConstraintCast() ){ 1.692 + worklist.push(n->in(1)); 1.693 + } else if( n->is_Phi() ) { 1.694 + for( uint i = 1; i < n->req(); i++ ) { 1.695 + worklist.push(n->in(i)); 1.696 + } 1.697 + } else { 1.698 + return false; 1.699 + } 1.700 + } 1.701 + } 1.702 + 1.703 + // Quit when the worklist is empty, and we've found no offending nodes. 1.704 + return true; 1.705 +} 1.706 + 1.707 +//------------------------------Ideal_DU_postCCP------------------------------- 1.708 +// Find any cast-away of null-ness and keep its control. Null cast-aways are 1.709 +// going away in this pass and we need to make this memory op depend on the 1.710 +// gating null check. 1.711 +Node *MemNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { 1.712 + return Ideal_common_DU_postCCP(ccp, this, in(MemNode::Address)); 1.713 +} 1.714 + 1.715 +// I tried to leave the CastPP's in. This makes the graph more accurate in 1.716 +// some sense; we get to keep around the knowledge that an oop is not-null 1.717 +// after some test. Alas, the CastPP's interfere with GVN (some values are 1.718 +// the regular oop, some are the CastPP of the oop, all merge at Phi's which 1.719 +// cannot collapse, etc). This cost us 10% on SpecJVM, even when I removed 1.720 +// some of the more trivial cases in the optimizer. Removing more useless 1.721 +// Phi's started allowing Loads to illegally float above null checks. I gave 1.722 +// up on this approach. CNC 10/20/2000 1.723 +// This static method may be called not from MemNode (EncodePNode calls it). 1.724 +// Only the control edge of the node 'n' might be updated. 1.725 +Node *MemNode::Ideal_common_DU_postCCP( PhaseCCP *ccp, Node* n, Node* adr ) { 1.726 + Node *skipped_cast = NULL; 1.727 + // Need a null check? Regular static accesses do not because they are 1.728 + // from constant addresses. Array ops are gated by the range check (which 1.729 + // always includes a NULL check). Just check field ops. 1.730 + if( n->in(MemNode::Control) == NULL ) { 1.731 + // Scan upwards for the highest location we can place this memory op. 1.732 + while( true ) { 1.733 + switch( adr->Opcode() ) { 1.734 + 1.735 + case Op_AddP: // No change to NULL-ness, so peek thru AddP's 1.736 + adr = adr->in(AddPNode::Base); 1.737 + continue; 1.738 + 1.739 + case Op_DecodeN: // No change to NULL-ness, so peek thru 1.740 + case Op_DecodeNKlass: 1.741 + adr = adr->in(1); 1.742 + continue; 1.743 + 1.744 + case Op_EncodeP: 1.745 + case Op_EncodePKlass: 1.746 + // EncodeP node's control edge could be set by this method 1.747 + // when EncodeP node depends on CastPP node. 1.748 + // 1.749 + // Use its control edge for memory op because EncodeP may go away 1.750 + // later when it is folded with following or preceding DecodeN node. 1.751 + if (adr->in(0) == NULL) { 1.752 + // Keep looking for cast nodes. 1.753 + adr = adr->in(1); 1.754 + continue; 1.755 + } 1.756 + ccp->hash_delete(n); 1.757 + n->set_req(MemNode::Control, adr->in(0)); 1.758 + ccp->hash_insert(n); 1.759 + return n; 1.760 + 1.761 + case Op_CastPP: 1.762 + // If the CastPP is useless, just peek on through it. 1.763 + if( ccp->type(adr) == ccp->type(adr->in(1)) ) { 1.764 + // Remember the cast that we've peeked though. If we peek 1.765 + // through more than one, then we end up remembering the highest 1.766 + // one, that is, if in a loop, the one closest to the top. 1.767 + skipped_cast = adr; 1.768 + adr = adr->in(1); 1.769 + continue; 1.770 + } 1.771 + // CastPP is going away in this pass! We need this memory op to be 1.772 + // control-dependent on the test that is guarding the CastPP. 1.773 + ccp->hash_delete(n); 1.774 + n->set_req(MemNode::Control, adr->in(0)); 1.775 + ccp->hash_insert(n); 1.776 + return n; 1.777 + 1.778 + case Op_Phi: 1.779 + // Attempt to float above a Phi to some dominating point. 1.780 + if (adr->in(0) != NULL && adr->in(0)->is_CountedLoop()) { 1.781 + // If we've already peeked through a Cast (which could have set the 1.782 + // control), we can't float above a Phi, because the skipped Cast 1.783 + // may not be loop invariant. 1.784 + if (adr_phi_is_loop_invariant(adr, skipped_cast)) { 1.785 + adr = adr->in(1); 1.786 + continue; 1.787 + } 1.788 + } 1.789 + 1.790 + // Intentional fallthrough! 1.791 + 1.792 + // No obvious dominating point. The mem op is pinned below the Phi 1.793 + // by the Phi itself. If the Phi goes away (no true value is merged) 1.794 + // then the mem op can float, but not indefinitely. It must be pinned 1.795 + // behind the controls leading to the Phi. 1.796 + case Op_CheckCastPP: 1.797 + // These usually stick around to change address type, however a 1.798 + // useless one can be elided and we still need to pick up a control edge 1.799 + if (adr->in(0) == NULL) { 1.800 + // This CheckCastPP node has NO control and is likely useless. But we 1.801 + // need check further up the ancestor chain for a control input to keep 1.802 + // the node in place. 4959717. 1.803 + skipped_cast = adr; 1.804 + adr = adr->in(1); 1.805 + continue; 1.806 + } 1.807 + ccp->hash_delete(n); 1.808 + n->set_req(MemNode::Control, adr->in(0)); 1.809 + ccp->hash_insert(n); 1.810 + return n; 1.811 + 1.812 + // List of "safe" opcodes; those that implicitly block the memory 1.813 + // op below any null check. 1.814 + case Op_CastX2P: // no null checks on native pointers 1.815 + case Op_Parm: // 'this' pointer is not null 1.816 + case Op_LoadP: // Loading from within a klass 1.817 + case Op_LoadN: // Loading from within a klass 1.818 + case Op_LoadKlass: // Loading from within a klass 1.819 + case Op_LoadNKlass: // Loading from within a klass 1.820 + case Op_ConP: // Loading from a klass 1.821 + case Op_ConN: // Loading from a klass 1.822 + case Op_ConNKlass: // Loading from a klass 1.823 + case Op_CreateEx: // Sucking up the guts of an exception oop 1.824 + case Op_Con: // Reading from TLS 1.825 + case Op_CMoveP: // CMoveP is pinned 1.826 + case Op_CMoveN: // CMoveN is pinned 1.827 + break; // No progress 1.828 + 1.829 + case Op_Proj: // Direct call to an allocation routine 1.830 + case Op_SCMemProj: // Memory state from store conditional ops 1.831 +#ifdef ASSERT 1.832 + { 1.833 + assert(adr->as_Proj()->_con == TypeFunc::Parms, "must be return value"); 1.834 + const Node* call = adr->in(0); 1.835 + if (call->is_CallJava()) { 1.836 + const CallJavaNode* call_java = call->as_CallJava(); 1.837 + const TypeTuple *r = call_java->tf()->range(); 1.838 + assert(r->cnt() > TypeFunc::Parms, "must return value"); 1.839 + const Type* ret_type = r->field_at(TypeFunc::Parms); 1.840 + assert(ret_type && ret_type->isa_ptr(), "must return pointer"); 1.841 + // We further presume that this is one of 1.842 + // new_instance_Java, new_array_Java, or 1.843 + // the like, but do not assert for this. 1.844 + } else if (call->is_Allocate()) { 1.845 + // similar case to new_instance_Java, etc. 1.846 + } else if (!call->is_CallLeaf()) { 1.847 + // Projections from fetch_oop (OSR) are allowed as well. 1.848 + ShouldNotReachHere(); 1.849 + } 1.850 + } 1.851 +#endif 1.852 + break; 1.853 + default: 1.854 + ShouldNotReachHere(); 1.855 + } 1.856 + break; 1.857 + } 1.858 + } 1.859 + 1.860 + return NULL; // No progress 1.861 +} 1.862 + 1.863 + 1.864 +//============================================================================= 1.865 +uint LoadNode::size_of() const { return sizeof(*this); } 1.866 +uint LoadNode::cmp( const Node &n ) const 1.867 +{ return !Type::cmp( _type, ((LoadNode&)n)._type ); } 1.868 +const Type *LoadNode::bottom_type() const { return _type; } 1.869 +uint LoadNode::ideal_reg() const { 1.870 + return _type->ideal_reg(); 1.871 +} 1.872 + 1.873 +#ifndef PRODUCT 1.874 +void LoadNode::dump_spec(outputStream *st) const { 1.875 + MemNode::dump_spec(st); 1.876 + if( !Verbose && !WizardMode ) { 1.877 + // standard dump does this in Verbose and WizardMode 1.878 + st->print(" #"); _type->dump_on(st); 1.879 + } 1.880 +} 1.881 +#endif 1.882 + 1.883 +#ifdef ASSERT 1.884 +//----------------------------is_immutable_value------------------------------- 1.885 +// Helper function to allow a raw load without control edge for some cases 1.886 +bool LoadNode::is_immutable_value(Node* adr) { 1.887 + return (adr->is_AddP() && adr->in(AddPNode::Base)->is_top() && 1.888 + adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal && 1.889 + (adr->in(AddPNode::Offset)->find_intptr_t_con(-1) == 1.890 + in_bytes(JavaThread::osthread_offset()))); 1.891 +} 1.892 +#endif 1.893 + 1.894 +//----------------------------LoadNode::make----------------------------------- 1.895 +// Polymorphic factory method: 1.896 +Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo) { 1.897 + Compile* C = gvn.C; 1.898 + 1.899 + // sanity check the alias category against the created node type 1.900 + assert(!(adr_type->isa_oopptr() && 1.901 + adr_type->offset() == oopDesc::klass_offset_in_bytes()), 1.902 + "use LoadKlassNode instead"); 1.903 + assert(!(adr_type->isa_aryptr() && 1.904 + adr_type->offset() == arrayOopDesc::length_offset_in_bytes()), 1.905 + "use LoadRangeNode instead"); 1.906 + // Check control edge of raw loads 1.907 + assert( ctl != NULL || C->get_alias_index(adr_type) != Compile::AliasIdxRaw || 1.908 + // oop will be recorded in oop map if load crosses safepoint 1.909 + rt->isa_oopptr() || is_immutable_value(adr), 1.910 + "raw memory operations should have control edge"); 1.911 + switch (bt) { 1.912 + case T_BOOLEAN: return new (C) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(), mo); 1.913 + case T_BYTE: return new (C) LoadBNode (ctl, mem, adr, adr_type, rt->is_int(), mo); 1.914 + case T_INT: return new (C) LoadINode (ctl, mem, adr, adr_type, rt->is_int(), mo); 1.915 + case T_CHAR: return new (C) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(), mo); 1.916 + case T_SHORT: return new (C) LoadSNode (ctl, mem, adr, adr_type, rt->is_int(), mo); 1.917 + case T_LONG: return new (C) LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), mo); 1.918 + case T_FLOAT: return new (C) LoadFNode (ctl, mem, adr, adr_type, rt, mo); 1.919 + case T_DOUBLE: return new (C) LoadDNode (ctl, mem, adr, adr_type, rt, mo); 1.920 + case T_ADDRESS: return new (C) LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(), mo); 1.921 + case T_OBJECT: 1.922 +#ifdef _LP64 1.923 + if (adr->bottom_type()->is_ptr_to_narrowoop()) { 1.924 + Node* load = gvn.transform(new (C) LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), mo)); 1.925 + return new (C) DecodeNNode(load, load->bottom_type()->make_ptr()); 1.926 + } else 1.927 +#endif 1.928 + { 1.929 + assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop"); 1.930 + return new (C) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo); 1.931 + } 1.932 + } 1.933 + ShouldNotReachHere(); 1.934 + return (LoadNode*)NULL; 1.935 +} 1.936 + 1.937 +LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo) { 1.938 + bool require_atomic = true; 1.939 + return new (C) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, require_atomic); 1.940 +} 1.941 + 1.942 + 1.943 + 1.944 + 1.945 +//------------------------------hash------------------------------------------- 1.946 +uint LoadNode::hash() const { 1.947 + // unroll addition of interesting fields 1.948 + return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address); 1.949 +} 1.950 + 1.951 +static bool skip_through_membars(Compile::AliasType* atp, const TypeInstPtr* tp, bool eliminate_boxing) { 1.952 + if ((atp != NULL) && (atp->index() >= Compile::AliasIdxRaw)) { 1.953 + bool non_volatile = (atp->field() != NULL) && !atp->field()->is_volatile(); 1.954 + bool is_stable_ary = FoldStableValues && 1.955 + (tp != NULL) && (tp->isa_aryptr() != NULL) && 1.956 + tp->isa_aryptr()->is_stable(); 1.957 + 1.958 + return (eliminate_boxing && non_volatile) || is_stable_ary; 1.959 + } 1.960 + 1.961 + return false; 1.962 +} 1.963 + 1.964 +//---------------------------can_see_stored_value------------------------------ 1.965 +// This routine exists to make sure this set of tests is done the same 1.966 +// everywhere. We need to make a coordinated change: first LoadNode::Ideal 1.967 +// will change the graph shape in a way which makes memory alive twice at the 1.968 +// same time (uses the Oracle model of aliasing), then some 1.969 +// LoadXNode::Identity will fold things back to the equivalence-class model 1.970 +// of aliasing. 1.971 +Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const { 1.972 + Node* ld_adr = in(MemNode::Address); 1.973 + intptr_t ld_off = 0; 1.974 + AllocateNode* ld_alloc = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off); 1.975 + const TypeInstPtr* tp = phase->type(ld_adr)->isa_instptr(); 1.976 + Compile::AliasType* atp = (tp != NULL) ? phase->C->alias_type(tp) : NULL; 1.977 + // This is more general than load from boxing objects. 1.978 + if (skip_through_membars(atp, tp, phase->C->eliminate_boxing())) { 1.979 + uint alias_idx = atp->index(); 1.980 + bool final = !atp->is_rewritable(); 1.981 + Node* result = NULL; 1.982 + Node* current = st; 1.983 + // Skip through chains of MemBarNodes checking the MergeMems for 1.984 + // new states for the slice of this load. Stop once any other 1.985 + // kind of node is encountered. Loads from final memory can skip 1.986 + // through any kind of MemBar but normal loads shouldn't skip 1.987 + // through MemBarAcquire since the could allow them to move out of 1.988 + // a synchronized region. 1.989 + while (current->is_Proj()) { 1.990 + int opc = current->in(0)->Opcode(); 1.991 + if ((final && (opc == Op_MemBarAcquire || 1.992 + opc == Op_MemBarAcquireLock || 1.993 + opc == Op_LoadFence)) || 1.994 + opc == Op_MemBarRelease || 1.995 + opc == Op_StoreFence || 1.996 + opc == Op_MemBarReleaseLock || 1.997 + opc == Op_MemBarCPUOrder) { 1.998 + Node* mem = current->in(0)->in(TypeFunc::Memory); 1.999 + if (mem->is_MergeMem()) { 1.1000 + MergeMemNode* merge = mem->as_MergeMem(); 1.1001 + Node* new_st = merge->memory_at(alias_idx); 1.1002 + if (new_st == merge->base_memory()) { 1.1003 + // Keep searching 1.1004 + current = new_st; 1.1005 + continue; 1.1006 + } 1.1007 + // Save the new memory state for the slice and fall through 1.1008 + // to exit. 1.1009 + result = new_st; 1.1010 + } 1.1011 + } 1.1012 + break; 1.1013 + } 1.1014 + if (result != NULL) { 1.1015 + st = result; 1.1016 + } 1.1017 + } 1.1018 + 1.1019 + // Loop around twice in the case Load -> Initialize -> Store. 1.1020 + // (See PhaseIterGVN::add_users_to_worklist, which knows about this case.) 1.1021 + for (int trip = 0; trip <= 1; trip++) { 1.1022 + 1.1023 + if (st->is_Store()) { 1.1024 + Node* st_adr = st->in(MemNode::Address); 1.1025 + if (!phase->eqv(st_adr, ld_adr)) { 1.1026 + // Try harder before giving up... Match raw and non-raw pointers. 1.1027 + intptr_t st_off = 0; 1.1028 + AllocateNode* alloc = AllocateNode::Ideal_allocation(st_adr, phase, st_off); 1.1029 + if (alloc == NULL) return NULL; 1.1030 + if (alloc != ld_alloc) return NULL; 1.1031 + if (ld_off != st_off) return NULL; 1.1032 + // At this point we have proven something like this setup: 1.1033 + // A = Allocate(...) 1.1034 + // L = LoadQ(, AddP(CastPP(, A.Parm),, #Off)) 1.1035 + // S = StoreQ(, AddP(, A.Parm , #Off), V) 1.1036 + // (Actually, we haven't yet proven the Q's are the same.) 1.1037 + // In other words, we are loading from a casted version of 1.1038 + // the same pointer-and-offset that we stored to. 1.1039 + // Thus, we are able to replace L by V. 1.1040 + } 1.1041 + // Now prove that we have a LoadQ matched to a StoreQ, for some Q. 1.1042 + if (store_Opcode() != st->Opcode()) 1.1043 + return NULL; 1.1044 + return st->in(MemNode::ValueIn); 1.1045 + } 1.1046 + 1.1047 + // A load from a freshly-created object always returns zero. 1.1048 + // (This can happen after LoadNode::Ideal resets the load's memory input 1.1049 + // to find_captured_store, which returned InitializeNode::zero_memory.) 1.1050 + if (st->is_Proj() && st->in(0)->is_Allocate() && 1.1051 + (st->in(0) == ld_alloc) && 1.1052 + (ld_off >= st->in(0)->as_Allocate()->minimum_header_size())) { 1.1053 + // return a zero value for the load's basic type 1.1054 + // (This is one of the few places where a generic PhaseTransform 1.1055 + // can create new nodes. Think of it as lazily manifesting 1.1056 + // virtually pre-existing constants.) 1.1057 + return phase->zerocon(memory_type()); 1.1058 + } 1.1059 + 1.1060 + // A load from an initialization barrier can match a captured store. 1.1061 + if (st->is_Proj() && st->in(0)->is_Initialize()) { 1.1062 + InitializeNode* init = st->in(0)->as_Initialize(); 1.1063 + AllocateNode* alloc = init->allocation(); 1.1064 + if ((alloc != NULL) && (alloc == ld_alloc)) { 1.1065 + // examine a captured store value 1.1066 + st = init->find_captured_store(ld_off, memory_size(), phase); 1.1067 + if (st != NULL) 1.1068 + continue; // take one more trip around 1.1069 + } 1.1070 + } 1.1071 + 1.1072 + // Load boxed value from result of valueOf() call is input parameter. 1.1073 + if (this->is_Load() && ld_adr->is_AddP() && 1.1074 + (tp != NULL) && tp->is_ptr_to_boxed_value()) { 1.1075 + intptr_t ignore = 0; 1.1076 + Node* base = AddPNode::Ideal_base_and_offset(ld_adr, phase, ignore); 1.1077 + if (base != NULL && base->is_Proj() && 1.1078 + base->as_Proj()->_con == TypeFunc::Parms && 1.1079 + base->in(0)->is_CallStaticJava() && 1.1080 + base->in(0)->as_CallStaticJava()->is_boxing_method()) { 1.1081 + return base->in(0)->in(TypeFunc::Parms); 1.1082 + } 1.1083 + } 1.1084 + 1.1085 + break; 1.1086 + } 1.1087 + 1.1088 + return NULL; 1.1089 +} 1.1090 + 1.1091 +//----------------------is_instance_field_load_with_local_phi------------------ 1.1092 +bool LoadNode::is_instance_field_load_with_local_phi(Node* ctrl) { 1.1093 + if( in(Memory)->is_Phi() && in(Memory)->in(0) == ctrl && 1.1094 + in(Address)->is_AddP() ) { 1.1095 + const TypeOopPtr* t_oop = in(Address)->bottom_type()->isa_oopptr(); 1.1096 + // Only instances and boxed values. 1.1097 + if( t_oop != NULL && 1.1098 + (t_oop->is_ptr_to_boxed_value() || 1.1099 + t_oop->is_known_instance_field()) && 1.1100 + t_oop->offset() != Type::OffsetBot && 1.1101 + t_oop->offset() != Type::OffsetTop) { 1.1102 + return true; 1.1103 + } 1.1104 + } 1.1105 + return false; 1.1106 +} 1.1107 + 1.1108 +//------------------------------Identity--------------------------------------- 1.1109 +// Loads are identity if previous store is to same address 1.1110 +Node *LoadNode::Identity( PhaseTransform *phase ) { 1.1111 + // If the previous store-maker is the right kind of Store, and the store is 1.1112 + // to the same address, then we are equal to the value stored. 1.1113 + Node* mem = in(Memory); 1.1114 + Node* value = can_see_stored_value(mem, phase); 1.1115 + if( value ) { 1.1116 + // byte, short & char stores truncate naturally. 1.1117 + // A load has to load the truncated value which requires 1.1118 + // some sort of masking operation and that requires an 1.1119 + // Ideal call instead of an Identity call. 1.1120 + if (memory_size() < BytesPerInt) { 1.1121 + // If the input to the store does not fit with the load's result type, 1.1122 + // it must be truncated via an Ideal call. 1.1123 + if (!phase->type(value)->higher_equal(phase->type(this))) 1.1124 + return this; 1.1125 + } 1.1126 + // (This works even when value is a Con, but LoadNode::Value 1.1127 + // usually runs first, producing the singleton type of the Con.) 1.1128 + return value; 1.1129 + } 1.1130 + 1.1131 + // Search for an existing data phi which was generated before for the same 1.1132 + // instance's field to avoid infinite generation of phis in a loop. 1.1133 + Node *region = mem->in(0); 1.1134 + if (is_instance_field_load_with_local_phi(region)) { 1.1135 + const TypeOopPtr *addr_t = in(Address)->bottom_type()->isa_oopptr(); 1.1136 + int this_index = phase->C->get_alias_index(addr_t); 1.1137 + int this_offset = addr_t->offset(); 1.1138 + int this_iid = addr_t->instance_id(); 1.1139 + if (!addr_t->is_known_instance() && 1.1140 + addr_t->is_ptr_to_boxed_value()) { 1.1141 + // Use _idx of address base (could be Phi node) for boxed values. 1.1142 + intptr_t ignore = 0; 1.1143 + Node* base = AddPNode::Ideal_base_and_offset(in(Address), phase, ignore); 1.1144 + this_iid = base->_idx; 1.1145 + } 1.1146 + const Type* this_type = bottom_type(); 1.1147 + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { 1.1148 + Node* phi = region->fast_out(i); 1.1149 + if (phi->is_Phi() && phi != mem && 1.1150 + phi->as_Phi()->is_same_inst_field(this_type, this_iid, this_index, this_offset)) { 1.1151 + return phi; 1.1152 + } 1.1153 + } 1.1154 + } 1.1155 + 1.1156 + return this; 1.1157 +} 1.1158 + 1.1159 +// We're loading from an object which has autobox behaviour. 1.1160 +// If this object is result of a valueOf call we'll have a phi 1.1161 +// merging a newly allocated object and a load from the cache. 1.1162 +// We want to replace this load with the original incoming 1.1163 +// argument to the valueOf call. 1.1164 +Node* LoadNode::eliminate_autobox(PhaseGVN* phase) { 1.1165 + assert(phase->C->eliminate_boxing(), "sanity"); 1.1166 + intptr_t ignore = 0; 1.1167 + Node* base = AddPNode::Ideal_base_and_offset(in(Address), phase, ignore); 1.1168 + if ((base == NULL) || base->is_Phi()) { 1.1169 + // Push the loads from the phi that comes from valueOf up 1.1170 + // through it to allow elimination of the loads and the recovery 1.1171 + // of the original value. It is done in split_through_phi(). 1.1172 + return NULL; 1.1173 + } else if (base->is_Load() || 1.1174 + base->is_DecodeN() && base->in(1)->is_Load()) { 1.1175 + // Eliminate the load of boxed value for integer types from the cache 1.1176 + // array by deriving the value from the index into the array. 1.1177 + // Capture the offset of the load and then reverse the computation. 1.1178 + 1.1179 + // Get LoadN node which loads a boxing object from 'cache' array. 1.1180 + if (base->is_DecodeN()) { 1.1181 + base = base->in(1); 1.1182 + } 1.1183 + if (!base->in(Address)->is_AddP()) { 1.1184 + return NULL; // Complex address 1.1185 + } 1.1186 + AddPNode* address = base->in(Address)->as_AddP(); 1.1187 + Node* cache_base = address->in(AddPNode::Base); 1.1188 + if ((cache_base != NULL) && cache_base->is_DecodeN()) { 1.1189 + // Get ConP node which is static 'cache' field. 1.1190 + cache_base = cache_base->in(1); 1.1191 + } 1.1192 + if ((cache_base != NULL) && cache_base->is_Con()) { 1.1193 + const TypeAryPtr* base_type = cache_base->bottom_type()->isa_aryptr(); 1.1194 + if ((base_type != NULL) && base_type->is_autobox_cache()) { 1.1195 + Node* elements[4]; 1.1196 + int shift = exact_log2(type2aelembytes(T_OBJECT)); 1.1197 + int count = address->unpack_offsets(elements, ARRAY_SIZE(elements)); 1.1198 + if ((count > 0) && elements[0]->is_Con() && 1.1199 + ((count == 1) || 1.1200 + (count == 2) && elements[1]->Opcode() == Op_LShiftX && 1.1201 + elements[1]->in(2) == phase->intcon(shift))) { 1.1202 + ciObjArray* array = base_type->const_oop()->as_obj_array(); 1.1203 + // Fetch the box object cache[0] at the base of the array and get its value 1.1204 + ciInstance* box = array->obj_at(0)->as_instance(); 1.1205 + ciInstanceKlass* ik = box->klass()->as_instance_klass(); 1.1206 + assert(ik->is_box_klass(), "sanity"); 1.1207 + assert(ik->nof_nonstatic_fields() == 1, "change following code"); 1.1208 + if (ik->nof_nonstatic_fields() == 1) { 1.1209 + // This should be true nonstatic_field_at requires calling 1.1210 + // nof_nonstatic_fields so check it anyway 1.1211 + ciConstant c = box->field_value(ik->nonstatic_field_at(0)); 1.1212 + BasicType bt = c.basic_type(); 1.1213 + // Only integer types have boxing cache. 1.1214 + assert(bt == T_BOOLEAN || bt == T_CHAR || 1.1215 + bt == T_BYTE || bt == T_SHORT || 1.1216 + bt == T_INT || bt == T_LONG, err_msg_res("wrong type = %s", type2name(bt))); 1.1217 + jlong cache_low = (bt == T_LONG) ? c.as_long() : c.as_int(); 1.1218 + if (cache_low != (int)cache_low) { 1.1219 + return NULL; // should not happen since cache is array indexed by value 1.1220 + } 1.1221 + jlong offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT) - (cache_low << shift); 1.1222 + if (offset != (int)offset) { 1.1223 + return NULL; // should not happen since cache is array indexed by value 1.1224 + } 1.1225 + // Add up all the offsets making of the address of the load 1.1226 + Node* result = elements[0]; 1.1227 + for (int i = 1; i < count; i++) { 1.1228 + result = phase->transform(new (phase->C) AddXNode(result, elements[i])); 1.1229 + } 1.1230 + // Remove the constant offset from the address and then 1.1231 + result = phase->transform(new (phase->C) AddXNode(result, phase->MakeConX(-(int)offset))); 1.1232 + // remove the scaling of the offset to recover the original index. 1.1233 + if (result->Opcode() == Op_LShiftX && result->in(2) == phase->intcon(shift)) { 1.1234 + // Peel the shift off directly but wrap it in a dummy node 1.1235 + // since Ideal can't return existing nodes 1.1236 + result = new (phase->C) RShiftXNode(result->in(1), phase->intcon(0)); 1.1237 + } else if (result->is_Add() && result->in(2)->is_Con() && 1.1238 + result->in(1)->Opcode() == Op_LShiftX && 1.1239 + result->in(1)->in(2) == phase->intcon(shift)) { 1.1240 + // We can't do general optimization: ((X<<Z) + Y) >> Z ==> X + (Y>>Z) 1.1241 + // but for boxing cache access we know that X<<Z will not overflow 1.1242 + // (there is range check) so we do this optimizatrion by hand here. 1.1243 + Node* add_con = new (phase->C) RShiftXNode(result->in(2), phase->intcon(shift)); 1.1244 + result = new (phase->C) AddXNode(result->in(1)->in(1), phase->transform(add_con)); 1.1245 + } else { 1.1246 + result = new (phase->C) RShiftXNode(result, phase->intcon(shift)); 1.1247 + } 1.1248 +#ifdef _LP64 1.1249 + if (bt != T_LONG) { 1.1250 + result = new (phase->C) ConvL2INode(phase->transform(result)); 1.1251 + } 1.1252 +#else 1.1253 + if (bt == T_LONG) { 1.1254 + result = new (phase->C) ConvI2LNode(phase->transform(result)); 1.1255 + } 1.1256 +#endif 1.1257 + return result; 1.1258 + } 1.1259 + } 1.1260 + } 1.1261 + } 1.1262 + } 1.1263 + return NULL; 1.1264 +} 1.1265 + 1.1266 +static bool stable_phi(PhiNode* phi, PhaseGVN *phase) { 1.1267 + Node* region = phi->in(0); 1.1268 + if (region == NULL) { 1.1269 + return false; // Wait stable graph 1.1270 + } 1.1271 + uint cnt = phi->req(); 1.1272 + for (uint i = 1; i < cnt; i++) { 1.1273 + Node* rc = region->in(i); 1.1274 + if (rc == NULL || phase->type(rc) == Type::TOP) 1.1275 + return false; // Wait stable graph 1.1276 + Node* in = phi->in(i); 1.1277 + if (in == NULL || phase->type(in) == Type::TOP) 1.1278 + return false; // Wait stable graph 1.1279 + } 1.1280 + return true; 1.1281 +} 1.1282 +//------------------------------split_through_phi------------------------------ 1.1283 +// Split instance or boxed field load through Phi. 1.1284 +Node *LoadNode::split_through_phi(PhaseGVN *phase) { 1.1285 + Node* mem = in(Memory); 1.1286 + Node* address = in(Address); 1.1287 + const TypeOopPtr *t_oop = phase->type(address)->isa_oopptr(); 1.1288 + 1.1289 + assert((t_oop != NULL) && 1.1290 + (t_oop->is_known_instance_field() || 1.1291 + t_oop->is_ptr_to_boxed_value()), "invalide conditions"); 1.1292 + 1.1293 + Compile* C = phase->C; 1.1294 + intptr_t ignore = 0; 1.1295 + Node* base = AddPNode::Ideal_base_and_offset(address, phase, ignore); 1.1296 + bool base_is_phi = (base != NULL) && base->is_Phi(); 1.1297 + bool load_boxed_values = t_oop->is_ptr_to_boxed_value() && C->aggressive_unboxing() && 1.1298 + (base != NULL) && (base == address->in(AddPNode::Base)) && 1.1299 + phase->type(base)->higher_equal(TypePtr::NOTNULL); 1.1300 + 1.1301 + if (!((mem->is_Phi() || base_is_phi) && 1.1302 + (load_boxed_values || t_oop->is_known_instance_field()))) { 1.1303 + return NULL; // memory is not Phi 1.1304 + } 1.1305 + 1.1306 + if (mem->is_Phi()) { 1.1307 + if (!stable_phi(mem->as_Phi(), phase)) { 1.1308 + return NULL; // Wait stable graph 1.1309 + } 1.1310 + uint cnt = mem->req(); 1.1311 + // Check for loop invariant memory. 1.1312 + if (cnt == 3) { 1.1313 + for (uint i = 1; i < cnt; i++) { 1.1314 + Node* in = mem->in(i); 1.1315 + Node* m = optimize_memory_chain(in, t_oop, this, phase); 1.1316 + if (m == mem) { 1.1317 + set_req(Memory, mem->in(cnt - i)); 1.1318 + return this; // made change 1.1319 + } 1.1320 + } 1.1321 + } 1.1322 + } 1.1323 + if (base_is_phi) { 1.1324 + if (!stable_phi(base->as_Phi(), phase)) { 1.1325 + return NULL; // Wait stable graph 1.1326 + } 1.1327 + uint cnt = base->req(); 1.1328 + // Check for loop invariant memory. 1.1329 + if (cnt == 3) { 1.1330 + for (uint i = 1; i < cnt; i++) { 1.1331 + if (base->in(i) == base) { 1.1332 + return NULL; // Wait stable graph 1.1333 + } 1.1334 + } 1.1335 + } 1.1336 + } 1.1337 + 1.1338 + bool load_boxed_phi = load_boxed_values && base_is_phi && (base->in(0) == mem->in(0)); 1.1339 + 1.1340 + // Split through Phi (see original code in loopopts.cpp). 1.1341 + assert(C->have_alias_type(t_oop), "instance should have alias type"); 1.1342 + 1.1343 + // Do nothing here if Identity will find a value 1.1344 + // (to avoid infinite chain of value phis generation). 1.1345 + if (!phase->eqv(this, this->Identity(phase))) 1.1346 + return NULL; 1.1347 + 1.1348 + // Select Region to split through. 1.1349 + Node* region; 1.1350 + if (!base_is_phi) { 1.1351 + assert(mem->is_Phi(), "sanity"); 1.1352 + region = mem->in(0); 1.1353 + // Skip if the region dominates some control edge of the address. 1.1354 + if (!MemNode::all_controls_dominate(address, region)) 1.1355 + return NULL; 1.1356 + } else if (!mem->is_Phi()) { 1.1357 + assert(base_is_phi, "sanity"); 1.1358 + region = base->in(0); 1.1359 + // Skip if the region dominates some control edge of the memory. 1.1360 + if (!MemNode::all_controls_dominate(mem, region)) 1.1361 + return NULL; 1.1362 + } else if (base->in(0) != mem->in(0)) { 1.1363 + assert(base_is_phi && mem->is_Phi(), "sanity"); 1.1364 + if (MemNode::all_controls_dominate(mem, base->in(0))) { 1.1365 + region = base->in(0); 1.1366 + } else if (MemNode::all_controls_dominate(address, mem->in(0))) { 1.1367 + region = mem->in(0); 1.1368 + } else { 1.1369 + return NULL; // complex graph 1.1370 + } 1.1371 + } else { 1.1372 + assert(base->in(0) == mem->in(0), "sanity"); 1.1373 + region = mem->in(0); 1.1374 + } 1.1375 + 1.1376 + const Type* this_type = this->bottom_type(); 1.1377 + int this_index = C->get_alias_index(t_oop); 1.1378 + int this_offset = t_oop->offset(); 1.1379 + int this_iid = t_oop->instance_id(); 1.1380 + if (!t_oop->is_known_instance() && load_boxed_values) { 1.1381 + // Use _idx of address base for boxed values. 1.1382 + this_iid = base->_idx; 1.1383 + } 1.1384 + PhaseIterGVN* igvn = phase->is_IterGVN(); 1.1385 + Node* phi = new (C) PhiNode(region, this_type, NULL, this_iid, this_index, this_offset); 1.1386 + for (uint i = 1; i < region->req(); i++) { 1.1387 + Node* x; 1.1388 + Node* the_clone = NULL; 1.1389 + if (region->in(i) == C->top()) { 1.1390 + x = C->top(); // Dead path? Use a dead data op 1.1391 + } else { 1.1392 + x = this->clone(); // Else clone up the data op 1.1393 + the_clone = x; // Remember for possible deletion. 1.1394 + // Alter data node to use pre-phi inputs 1.1395 + if (this->in(0) == region) { 1.1396 + x->set_req(0, region->in(i)); 1.1397 + } else { 1.1398 + x->set_req(0, NULL); 1.1399 + } 1.1400 + if (mem->is_Phi() && (mem->in(0) == region)) { 1.1401 + x->set_req(Memory, mem->in(i)); // Use pre-Phi input for the clone. 1.1402 + } 1.1403 + if (address->is_Phi() && address->in(0) == region) { 1.1404 + x->set_req(Address, address->in(i)); // Use pre-Phi input for the clone 1.1405 + } 1.1406 + if (base_is_phi && (base->in(0) == region)) { 1.1407 + Node* base_x = base->in(i); // Clone address for loads from boxed objects. 1.1408 + Node* adr_x = phase->transform(new (C) AddPNode(base_x,base_x,address->in(AddPNode::Offset))); 1.1409 + x->set_req(Address, adr_x); 1.1410 + } 1.1411 + } 1.1412 + // Check for a 'win' on some paths 1.1413 + const Type *t = x->Value(igvn); 1.1414 + 1.1415 + bool singleton = t->singleton(); 1.1416 + 1.1417 + // See comments in PhaseIdealLoop::split_thru_phi(). 1.1418 + if (singleton && t == Type::TOP) { 1.1419 + singleton &= region->is_Loop() && (i != LoopNode::EntryControl); 1.1420 + } 1.1421 + 1.1422 + if (singleton) { 1.1423 + x = igvn->makecon(t); 1.1424 + } else { 1.1425 + // We now call Identity to try to simplify the cloned node. 1.1426 + // Note that some Identity methods call phase->type(this). 1.1427 + // Make sure that the type array is big enough for 1.1428 + // our new node, even though we may throw the node away. 1.1429 + // (This tweaking with igvn only works because x is a new node.) 1.1430 + igvn->set_type(x, t); 1.1431 + // If x is a TypeNode, capture any more-precise type permanently into Node 1.1432 + // otherwise it will be not updated during igvn->transform since 1.1433 + // igvn->type(x) is set to x->Value() already. 1.1434 + x->raise_bottom_type(t); 1.1435 + Node *y = x->Identity(igvn); 1.1436 + if (y != x) { 1.1437 + x = y; 1.1438 + } else { 1.1439 + y = igvn->hash_find_insert(x); 1.1440 + if (y) { 1.1441 + x = y; 1.1442 + } else { 1.1443 + // Else x is a new node we are keeping 1.1444 + // We do not need register_new_node_with_optimizer 1.1445 + // because set_type has already been called. 1.1446 + igvn->_worklist.push(x); 1.1447 + } 1.1448 + } 1.1449 + } 1.1450 + if (x != the_clone && the_clone != NULL) { 1.1451 + igvn->remove_dead_node(the_clone); 1.1452 + } 1.1453 + phi->set_req(i, x); 1.1454 + } 1.1455 + // Record Phi 1.1456 + igvn->register_new_node_with_optimizer(phi); 1.1457 + return phi; 1.1458 +} 1.1459 + 1.1460 +//------------------------------Ideal------------------------------------------ 1.1461 +// If the load is from Field memory and the pointer is non-null, we can 1.1462 +// zero out the control input. 1.1463 +// If the offset is constant and the base is an object allocation, 1.1464 +// try to hook me up to the exact initializing store. 1.1465 +Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.1466 + Node* p = MemNode::Ideal_common(phase, can_reshape); 1.1467 + if (p) return (p == NodeSentinel) ? NULL : p; 1.1468 + 1.1469 + Node* ctrl = in(MemNode::Control); 1.1470 + Node* address = in(MemNode::Address); 1.1471 + 1.1472 + // Skip up past a SafePoint control. Cannot do this for Stores because 1.1473 + // pointer stores & cardmarks must stay on the same side of a SafePoint. 1.1474 + if( ctrl != NULL && ctrl->Opcode() == Op_SafePoint && 1.1475 + phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw ) { 1.1476 + ctrl = ctrl->in(0); 1.1477 + set_req(MemNode::Control,ctrl); 1.1478 + } 1.1479 + 1.1480 + intptr_t ignore = 0; 1.1481 + Node* base = AddPNode::Ideal_base_and_offset(address, phase, ignore); 1.1482 + if (base != NULL 1.1483 + && phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw) { 1.1484 + // Check for useless control edge in some common special cases 1.1485 + if (in(MemNode::Control) != NULL 1.1486 + && phase->type(base)->higher_equal(TypePtr::NOTNULL) 1.1487 + && all_controls_dominate(base, phase->C->start())) { 1.1488 + // A method-invariant, non-null address (constant or 'this' argument). 1.1489 + set_req(MemNode::Control, NULL); 1.1490 + } 1.1491 + } 1.1492 + 1.1493 + Node* mem = in(MemNode::Memory); 1.1494 + const TypePtr *addr_t = phase->type(address)->isa_ptr(); 1.1495 + 1.1496 + if (can_reshape && (addr_t != NULL)) { 1.1497 + // try to optimize our memory input 1.1498 + Node* opt_mem = MemNode::optimize_memory_chain(mem, addr_t, this, phase); 1.1499 + if (opt_mem != mem) { 1.1500 + set_req(MemNode::Memory, opt_mem); 1.1501 + if (phase->type( opt_mem ) == Type::TOP) return NULL; 1.1502 + return this; 1.1503 + } 1.1504 + const TypeOopPtr *t_oop = addr_t->isa_oopptr(); 1.1505 + if ((t_oop != NULL) && 1.1506 + (t_oop->is_known_instance_field() || 1.1507 + t_oop->is_ptr_to_boxed_value())) { 1.1508 + PhaseIterGVN *igvn = phase->is_IterGVN(); 1.1509 + if (igvn != NULL && igvn->_worklist.member(opt_mem)) { 1.1510 + // Delay this transformation until memory Phi is processed. 1.1511 + phase->is_IterGVN()->_worklist.push(this); 1.1512 + return NULL; 1.1513 + } 1.1514 + // Split instance field load through Phi. 1.1515 + Node* result = split_through_phi(phase); 1.1516 + if (result != NULL) return result; 1.1517 + 1.1518 + if (t_oop->is_ptr_to_boxed_value()) { 1.1519 + Node* result = eliminate_autobox(phase); 1.1520 + if (result != NULL) return result; 1.1521 + } 1.1522 + } 1.1523 + } 1.1524 + 1.1525 + // Check for prior store with a different base or offset; make Load 1.1526 + // independent. Skip through any number of them. Bail out if the stores 1.1527 + // are in an endless dead cycle and report no progress. This is a key 1.1528 + // transform for Reflection. However, if after skipping through the Stores 1.1529 + // we can't then fold up against a prior store do NOT do the transform as 1.1530 + // this amounts to using the 'Oracle' model of aliasing. It leaves the same 1.1531 + // array memory alive twice: once for the hoisted Load and again after the 1.1532 + // bypassed Store. This situation only works if EVERYBODY who does 1.1533 + // anti-dependence work knows how to bypass. I.e. we need all 1.1534 + // anti-dependence checks to ask the same Oracle. Right now, that Oracle is 1.1535 + // the alias index stuff. So instead, peek through Stores and IFF we can 1.1536 + // fold up, do so. 1.1537 + Node* prev_mem = find_previous_store(phase); 1.1538 + // Steps (a), (b): Walk past independent stores to find an exact match. 1.1539 + if (prev_mem != NULL && prev_mem != in(MemNode::Memory)) { 1.1540 + // (c) See if we can fold up on the spot, but don't fold up here. 1.1541 + // Fold-up might require truncation (for LoadB/LoadS/LoadUS) or 1.1542 + // just return a prior value, which is done by Identity calls. 1.1543 + if (can_see_stored_value(prev_mem, phase)) { 1.1544 + // Make ready for step (d): 1.1545 + set_req(MemNode::Memory, prev_mem); 1.1546 + return this; 1.1547 + } 1.1548 + } 1.1549 + 1.1550 + return NULL; // No further progress 1.1551 +} 1.1552 + 1.1553 +// Helper to recognize certain Klass fields which are invariant across 1.1554 +// some group of array types (e.g., int[] or all T[] where T < Object). 1.1555 +const Type* 1.1556 +LoadNode::load_array_final_field(const TypeKlassPtr *tkls, 1.1557 + ciKlass* klass) const { 1.1558 + if (tkls->offset() == in_bytes(Klass::modifier_flags_offset())) { 1.1559 + // The field is Klass::_modifier_flags. Return its (constant) value. 1.1560 + // (Folds up the 2nd indirection in aClassConstant.getModifiers().) 1.1561 + assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags"); 1.1562 + return TypeInt::make(klass->modifier_flags()); 1.1563 + } 1.1564 + if (tkls->offset() == in_bytes(Klass::access_flags_offset())) { 1.1565 + // The field is Klass::_access_flags. Return its (constant) value. 1.1566 + // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).) 1.1567 + assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags"); 1.1568 + return TypeInt::make(klass->access_flags()); 1.1569 + } 1.1570 + if (tkls->offset() == in_bytes(Klass::layout_helper_offset())) { 1.1571 + // The field is Klass::_layout_helper. Return its constant value if known. 1.1572 + assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper"); 1.1573 + return TypeInt::make(klass->layout_helper()); 1.1574 + } 1.1575 + 1.1576 + // No match. 1.1577 + return NULL; 1.1578 +} 1.1579 + 1.1580 +// Try to constant-fold a stable array element. 1.1581 +static const Type* fold_stable_ary_elem(const TypeAryPtr* ary, int off, BasicType loadbt) { 1.1582 + assert(ary->const_oop(), "array should be constant"); 1.1583 + assert(ary->is_stable(), "array should be stable"); 1.1584 + 1.1585 + // Decode the results of GraphKit::array_element_address. 1.1586 + ciArray* aobj = ary->const_oop()->as_array(); 1.1587 + ciConstant con = aobj->element_value_by_offset(off); 1.1588 + 1.1589 + if (con.basic_type() != T_ILLEGAL && !con.is_null_or_zero()) { 1.1590 + const Type* con_type = Type::make_from_constant(con); 1.1591 + if (con_type != NULL) { 1.1592 + if (con_type->isa_aryptr()) { 1.1593 + // Join with the array element type, in case it is also stable. 1.1594 + int dim = ary->stable_dimension(); 1.1595 + con_type = con_type->is_aryptr()->cast_to_stable(true, dim-1); 1.1596 + } 1.1597 + if (loadbt == T_NARROWOOP && con_type->isa_oopptr()) { 1.1598 + con_type = con_type->make_narrowoop(); 1.1599 + } 1.1600 +#ifndef PRODUCT 1.1601 + if (TraceIterativeGVN) { 1.1602 + tty->print("FoldStableValues: array element [off=%d]: con_type=", off); 1.1603 + con_type->dump(); tty->cr(); 1.1604 + } 1.1605 +#endif //PRODUCT 1.1606 + return con_type; 1.1607 + } 1.1608 + } 1.1609 + return NULL; 1.1610 +} 1.1611 + 1.1612 +//------------------------------Value----------------------------------------- 1.1613 +const Type *LoadNode::Value( PhaseTransform *phase ) const { 1.1614 + // Either input is TOP ==> the result is TOP 1.1615 + Node* mem = in(MemNode::Memory); 1.1616 + const Type *t1 = phase->type(mem); 1.1617 + if (t1 == Type::TOP) return Type::TOP; 1.1618 + Node* adr = in(MemNode::Address); 1.1619 + const TypePtr* tp = phase->type(adr)->isa_ptr(); 1.1620 + if (tp == NULL || tp->empty()) return Type::TOP; 1.1621 + int off = tp->offset(); 1.1622 + assert(off != Type::OffsetTop, "case covered by TypePtr::empty"); 1.1623 + Compile* C = phase->C; 1.1624 + 1.1625 + // Try to guess loaded type from pointer type 1.1626 + if (tp->isa_aryptr()) { 1.1627 + const TypeAryPtr* ary = tp->is_aryptr(); 1.1628 + const Type* t = ary->elem(); 1.1629 + 1.1630 + // Determine whether the reference is beyond the header or not, by comparing 1.1631 + // the offset against the offset of the start of the array's data. 1.1632 + // Different array types begin at slightly different offsets (12 vs. 16). 1.1633 + // We choose T_BYTE as an example base type that is least restrictive 1.1634 + // as to alignment, which will therefore produce the smallest 1.1635 + // possible base offset. 1.1636 + const int min_base_off = arrayOopDesc::base_offset_in_bytes(T_BYTE); 1.1637 + const bool off_beyond_header = ((uint)off >= (uint)min_base_off); 1.1638 + 1.1639 + // Try to constant-fold a stable array element. 1.1640 + if (FoldStableValues && ary->is_stable() && ary->const_oop() != NULL) { 1.1641 + // Make sure the reference is not into the header and the offset is constant 1.1642 + if (off_beyond_header && adr->is_AddP() && off != Type::OffsetBot) { 1.1643 + const Type* con_type = fold_stable_ary_elem(ary, off, memory_type()); 1.1644 + if (con_type != NULL) { 1.1645 + return con_type; 1.1646 + } 1.1647 + } 1.1648 + } 1.1649 + 1.1650 + // Don't do this for integer types. There is only potential profit if 1.1651 + // the element type t is lower than _type; that is, for int types, if _type is 1.1652 + // more restrictive than t. This only happens here if one is short and the other 1.1653 + // char (both 16 bits), and in those cases we've made an intentional decision 1.1654 + // to use one kind of load over the other. See AndINode::Ideal and 4965907. 1.1655 + // Also, do not try to narrow the type for a LoadKlass, regardless of offset. 1.1656 + // 1.1657 + // Yes, it is possible to encounter an expression like (LoadKlass p1:(AddP x x 8)) 1.1658 + // where the _gvn.type of the AddP is wider than 8. This occurs when an earlier 1.1659 + // copy p0 of (AddP x x 8) has been proven equal to p1, and the p0 has been 1.1660 + // subsumed by p1. If p1 is on the worklist but has not yet been re-transformed, 1.1661 + // it is possible that p1 will have a type like Foo*[int+]:NotNull*+any. 1.1662 + // In fact, that could have been the original type of p1, and p1 could have 1.1663 + // had an original form like p1:(AddP x x (LShiftL quux 3)), where the 1.1664 + // expression (LShiftL quux 3) independently optimized to the constant 8. 1.1665 + if ((t->isa_int() == NULL) && (t->isa_long() == NULL) 1.1666 + && (_type->isa_vect() == NULL) 1.1667 + && Opcode() != Op_LoadKlass && Opcode() != Op_LoadNKlass) { 1.1668 + // t might actually be lower than _type, if _type is a unique 1.1669 + // concrete subclass of abstract class t. 1.1670 + if (off_beyond_header) { // is the offset beyond the header? 1.1671 + const Type* jt = t->join_speculative(_type); 1.1672 + // In any case, do not allow the join, per se, to empty out the type. 1.1673 + if (jt->empty() && !t->empty()) { 1.1674 + // This can happen if a interface-typed array narrows to a class type. 1.1675 + jt = _type; 1.1676 + } 1.1677 +#ifdef ASSERT 1.1678 + if (phase->C->eliminate_boxing() && adr->is_AddP()) { 1.1679 + // The pointers in the autobox arrays are always non-null 1.1680 + Node* base = adr->in(AddPNode::Base); 1.1681 + if ((base != NULL) && base->is_DecodeN()) { 1.1682 + // Get LoadN node which loads IntegerCache.cache field 1.1683 + base = base->in(1); 1.1684 + } 1.1685 + if ((base != NULL) && base->is_Con()) { 1.1686 + const TypeAryPtr* base_type = base->bottom_type()->isa_aryptr(); 1.1687 + if ((base_type != NULL) && base_type->is_autobox_cache()) { 1.1688 + // It could be narrow oop 1.1689 + assert(jt->make_ptr()->ptr() == TypePtr::NotNull,"sanity"); 1.1690 + } 1.1691 + } 1.1692 + } 1.1693 +#endif 1.1694 + return jt; 1.1695 + } 1.1696 + } 1.1697 + } else if (tp->base() == Type::InstPtr) { 1.1698 + ciEnv* env = C->env(); 1.1699 + const TypeInstPtr* tinst = tp->is_instptr(); 1.1700 + ciKlass* klass = tinst->klass(); 1.1701 + assert( off != Type::OffsetBot || 1.1702 + // arrays can be cast to Objects 1.1703 + tp->is_oopptr()->klass()->is_java_lang_Object() || 1.1704 + // unsafe field access may not have a constant offset 1.1705 + C->has_unsafe_access(), 1.1706 + "Field accesses must be precise" ); 1.1707 + // For oop loads, we expect the _type to be precise 1.1708 + if (klass == env->String_klass() && 1.1709 + adr->is_AddP() && off != Type::OffsetBot) { 1.1710 + // For constant Strings treat the final fields as compile time constants. 1.1711 + Node* base = adr->in(AddPNode::Base); 1.1712 + const TypeOopPtr* t = phase->type(base)->isa_oopptr(); 1.1713 + if (t != NULL && t->singleton()) { 1.1714 + ciField* field = env->String_klass()->get_field_by_offset(off, false); 1.1715 + if (field != NULL && field->is_final()) { 1.1716 + ciObject* string = t->const_oop(); 1.1717 + ciConstant constant = string->as_instance()->field_value(field); 1.1718 + if (constant.basic_type() == T_INT) { 1.1719 + return TypeInt::make(constant.as_int()); 1.1720 + } else if (constant.basic_type() == T_ARRAY) { 1.1721 + if (adr->bottom_type()->is_ptr_to_narrowoop()) { 1.1722 + return TypeNarrowOop::make_from_constant(constant.as_object(), true); 1.1723 + } else { 1.1724 + return TypeOopPtr::make_from_constant(constant.as_object(), true); 1.1725 + } 1.1726 + } 1.1727 + } 1.1728 + } 1.1729 + } 1.1730 + // Optimizations for constant objects 1.1731 + ciObject* const_oop = tinst->const_oop(); 1.1732 + if (const_oop != NULL) { 1.1733 + // For constant Boxed value treat the target field as a compile time constant. 1.1734 + if (tinst->is_ptr_to_boxed_value()) { 1.1735 + return tinst->get_const_boxed_value(); 1.1736 + } else 1.1737 + // For constant CallSites treat the target field as a compile time constant. 1.1738 + if (const_oop->is_call_site()) { 1.1739 + ciCallSite* call_site = const_oop->as_call_site(); 1.1740 + ciField* field = call_site->klass()->as_instance_klass()->get_field_by_offset(off, /*is_static=*/ false); 1.1741 + if (field != NULL && field->is_call_site_target()) { 1.1742 + ciMethodHandle* target = call_site->get_target(); 1.1743 + if (target != NULL) { // just in case 1.1744 + ciConstant constant(T_OBJECT, target); 1.1745 + const Type* t; 1.1746 + if (adr->bottom_type()->is_ptr_to_narrowoop()) { 1.1747 + t = TypeNarrowOop::make_from_constant(constant.as_object(), true); 1.1748 + } else { 1.1749 + t = TypeOopPtr::make_from_constant(constant.as_object(), true); 1.1750 + } 1.1751 + // Add a dependence for invalidation of the optimization. 1.1752 + if (!call_site->is_constant_call_site()) { 1.1753 + C->dependencies()->assert_call_site_target_value(call_site, target); 1.1754 + } 1.1755 + return t; 1.1756 + } 1.1757 + } 1.1758 + } 1.1759 + } 1.1760 + } else if (tp->base() == Type::KlassPtr) { 1.1761 + assert( off != Type::OffsetBot || 1.1762 + // arrays can be cast to Objects 1.1763 + tp->is_klassptr()->klass()->is_java_lang_Object() || 1.1764 + // also allow array-loading from the primary supertype 1.1765 + // array during subtype checks 1.1766 + Opcode() == Op_LoadKlass, 1.1767 + "Field accesses must be precise" ); 1.1768 + // For klass/static loads, we expect the _type to be precise 1.1769 + } 1.1770 + 1.1771 + const TypeKlassPtr *tkls = tp->isa_klassptr(); 1.1772 + if (tkls != NULL && !StressReflectiveCode) { 1.1773 + ciKlass* klass = tkls->klass(); 1.1774 + if (klass->is_loaded() && tkls->klass_is_exact()) { 1.1775 + // We are loading a field from a Klass metaobject whose identity 1.1776 + // is known at compile time (the type is "exact" or "precise"). 1.1777 + // Check for fields we know are maintained as constants by the VM. 1.1778 + if (tkls->offset() == in_bytes(Klass::super_check_offset_offset())) { 1.1779 + // The field is Klass::_super_check_offset. Return its (constant) value. 1.1780 + // (Folds up type checking code.) 1.1781 + assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset"); 1.1782 + return TypeInt::make(klass->super_check_offset()); 1.1783 + } 1.1784 + // Compute index into primary_supers array 1.1785 + juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(Klass*); 1.1786 + // Check for overflowing; use unsigned compare to handle the negative case. 1.1787 + if( depth < ciKlass::primary_super_limit() ) { 1.1788 + // The field is an element of Klass::_primary_supers. Return its (constant) value. 1.1789 + // (Folds up type checking code.) 1.1790 + assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers"); 1.1791 + ciKlass *ss = klass->super_of_depth(depth); 1.1792 + return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR; 1.1793 + } 1.1794 + const Type* aift = load_array_final_field(tkls, klass); 1.1795 + if (aift != NULL) return aift; 1.1796 + if (tkls->offset() == in_bytes(ArrayKlass::component_mirror_offset()) 1.1797 + && klass->is_array_klass()) { 1.1798 + // The field is ArrayKlass::_component_mirror. Return its (constant) value. 1.1799 + // (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.) 1.1800 + assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror"); 1.1801 + return TypeInstPtr::make(klass->as_array_klass()->component_mirror()); 1.1802 + } 1.1803 + if (tkls->offset() == in_bytes(Klass::java_mirror_offset())) { 1.1804 + // The field is Klass::_java_mirror. Return its (constant) value. 1.1805 + // (Folds up the 2nd indirection in anObjConstant.getClass().) 1.1806 + assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror"); 1.1807 + return TypeInstPtr::make(klass->java_mirror()); 1.1808 + } 1.1809 + } 1.1810 + 1.1811 + // We can still check if we are loading from the primary_supers array at a 1.1812 + // shallow enough depth. Even though the klass is not exact, entries less 1.1813 + // than or equal to its super depth are correct. 1.1814 + if (klass->is_loaded() ) { 1.1815 + ciType *inner = klass; 1.1816 + while( inner->is_obj_array_klass() ) 1.1817 + inner = inner->as_obj_array_klass()->base_element_type(); 1.1818 + if( inner->is_instance_klass() && 1.1819 + !inner->as_instance_klass()->flags().is_interface() ) { 1.1820 + // Compute index into primary_supers array 1.1821 + juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(Klass*); 1.1822 + // Check for overflowing; use unsigned compare to handle the negative case. 1.1823 + if( depth < ciKlass::primary_super_limit() && 1.1824 + depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case 1.1825 + // The field is an element of Klass::_primary_supers. Return its (constant) value. 1.1826 + // (Folds up type checking code.) 1.1827 + assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers"); 1.1828 + ciKlass *ss = klass->super_of_depth(depth); 1.1829 + return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR; 1.1830 + } 1.1831 + } 1.1832 + } 1.1833 + 1.1834 + // If the type is enough to determine that the thing is not an array, 1.1835 + // we can give the layout_helper a positive interval type. 1.1836 + // This will help short-circuit some reflective code. 1.1837 + if (tkls->offset() == in_bytes(Klass::layout_helper_offset()) 1.1838 + && !klass->is_array_klass() // not directly typed as an array 1.1839 + && !klass->is_interface() // specifically not Serializable & Cloneable 1.1840 + && !klass->is_java_lang_Object() // not the supertype of all T[] 1.1841 + ) { 1.1842 + // Note: When interfaces are reliable, we can narrow the interface 1.1843 + // test to (klass != Serializable && klass != Cloneable). 1.1844 + assert(Opcode() == Op_LoadI, "must load an int from _layout_helper"); 1.1845 + jint min_size = Klass::instance_layout_helper(oopDesc::header_size(), false); 1.1846 + // The key property of this type is that it folds up tests 1.1847 + // for array-ness, since it proves that the layout_helper is positive. 1.1848 + // Thus, a generic value like the basic object layout helper works fine. 1.1849 + return TypeInt::make(min_size, max_jint, Type::WidenMin); 1.1850 + } 1.1851 + } 1.1852 + 1.1853 + // If we are loading from a freshly-allocated object, produce a zero, 1.1854 + // if the load is provably beyond the header of the object. 1.1855 + // (Also allow a variable load from a fresh array to produce zero.) 1.1856 + const TypeOopPtr *tinst = tp->isa_oopptr(); 1.1857 + bool is_instance = (tinst != NULL) && tinst->is_known_instance_field(); 1.1858 + bool is_boxed_value = (tinst != NULL) && tinst->is_ptr_to_boxed_value(); 1.1859 + if (ReduceFieldZeroing || is_instance || is_boxed_value) { 1.1860 + Node* value = can_see_stored_value(mem,phase); 1.1861 + if (value != NULL && value->is_Con()) { 1.1862 + assert(value->bottom_type()->higher_equal(_type),"sanity"); 1.1863 + return value->bottom_type(); 1.1864 + } 1.1865 + } 1.1866 + 1.1867 + if (is_instance) { 1.1868 + // If we have an instance type and our memory input is the 1.1869 + // programs's initial memory state, there is no matching store, 1.1870 + // so just return a zero of the appropriate type 1.1871 + Node *mem = in(MemNode::Memory); 1.1872 + if (mem->is_Parm() && mem->in(0)->is_Start()) { 1.1873 + assert(mem->as_Parm()->_con == TypeFunc::Memory, "must be memory Parm"); 1.1874 + return Type::get_zero_type(_type->basic_type()); 1.1875 + } 1.1876 + } 1.1877 + return _type; 1.1878 +} 1.1879 + 1.1880 +//------------------------------match_edge------------------------------------- 1.1881 +// Do we Match on this edge index or not? Match only the address. 1.1882 +uint LoadNode::match_edge(uint idx) const { 1.1883 + return idx == MemNode::Address; 1.1884 +} 1.1885 + 1.1886 +//--------------------------LoadBNode::Ideal-------------------------------------- 1.1887 +// 1.1888 +// If the previous store is to the same address as this load, 1.1889 +// and the value stored was larger than a byte, replace this load 1.1890 +// with the value stored truncated to a byte. If no truncation is 1.1891 +// needed, the replacement is done in LoadNode::Identity(). 1.1892 +// 1.1893 +Node *LoadBNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.1894 + Node* mem = in(MemNode::Memory); 1.1895 + Node* value = can_see_stored_value(mem,phase); 1.1896 + if( value && !phase->type(value)->higher_equal( _type ) ) { 1.1897 + Node *result = phase->transform( new (phase->C) LShiftINode(value, phase->intcon(24)) ); 1.1898 + return new (phase->C) RShiftINode(result, phase->intcon(24)); 1.1899 + } 1.1900 + // Identity call will handle the case where truncation is not needed. 1.1901 + return LoadNode::Ideal(phase, can_reshape); 1.1902 +} 1.1903 + 1.1904 +const Type* LoadBNode::Value(PhaseTransform *phase) const { 1.1905 + Node* mem = in(MemNode::Memory); 1.1906 + Node* value = can_see_stored_value(mem,phase); 1.1907 + if (value != NULL && value->is_Con() && 1.1908 + !value->bottom_type()->higher_equal(_type)) { 1.1909 + // If the input to the store does not fit with the load's result type, 1.1910 + // it must be truncated. We can't delay until Ideal call since 1.1911 + // a singleton Value is needed for split_thru_phi optimization. 1.1912 + int con = value->get_int(); 1.1913 + return TypeInt::make((con << 24) >> 24); 1.1914 + } 1.1915 + return LoadNode::Value(phase); 1.1916 +} 1.1917 + 1.1918 +//--------------------------LoadUBNode::Ideal------------------------------------- 1.1919 +// 1.1920 +// If the previous store is to the same address as this load, 1.1921 +// and the value stored was larger than a byte, replace this load 1.1922 +// with the value stored truncated to a byte. If no truncation is 1.1923 +// needed, the replacement is done in LoadNode::Identity(). 1.1924 +// 1.1925 +Node* LoadUBNode::Ideal(PhaseGVN* phase, bool can_reshape) { 1.1926 + Node* mem = in(MemNode::Memory); 1.1927 + Node* value = can_see_stored_value(mem, phase); 1.1928 + if (value && !phase->type(value)->higher_equal(_type)) 1.1929 + return new (phase->C) AndINode(value, phase->intcon(0xFF)); 1.1930 + // Identity call will handle the case where truncation is not needed. 1.1931 + return LoadNode::Ideal(phase, can_reshape); 1.1932 +} 1.1933 + 1.1934 +const Type* LoadUBNode::Value(PhaseTransform *phase) const { 1.1935 + Node* mem = in(MemNode::Memory); 1.1936 + Node* value = can_see_stored_value(mem,phase); 1.1937 + if (value != NULL && value->is_Con() && 1.1938 + !value->bottom_type()->higher_equal(_type)) { 1.1939 + // If the input to the store does not fit with the load's result type, 1.1940 + // it must be truncated. We can't delay until Ideal call since 1.1941 + // a singleton Value is needed for split_thru_phi optimization. 1.1942 + int con = value->get_int(); 1.1943 + return TypeInt::make(con & 0xFF); 1.1944 + } 1.1945 + return LoadNode::Value(phase); 1.1946 +} 1.1947 + 1.1948 +//--------------------------LoadUSNode::Ideal------------------------------------- 1.1949 +// 1.1950 +// If the previous store is to the same address as this load, 1.1951 +// and the value stored was larger than a char, replace this load 1.1952 +// with the value stored truncated to a char. If no truncation is 1.1953 +// needed, the replacement is done in LoadNode::Identity(). 1.1954 +// 1.1955 +Node *LoadUSNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.1956 + Node* mem = in(MemNode::Memory); 1.1957 + Node* value = can_see_stored_value(mem,phase); 1.1958 + if( value && !phase->type(value)->higher_equal( _type ) ) 1.1959 + return new (phase->C) AndINode(value,phase->intcon(0xFFFF)); 1.1960 + // Identity call will handle the case where truncation is not needed. 1.1961 + return LoadNode::Ideal(phase, can_reshape); 1.1962 +} 1.1963 + 1.1964 +const Type* LoadUSNode::Value(PhaseTransform *phase) const { 1.1965 + Node* mem = in(MemNode::Memory); 1.1966 + Node* value = can_see_stored_value(mem,phase); 1.1967 + if (value != NULL && value->is_Con() && 1.1968 + !value->bottom_type()->higher_equal(_type)) { 1.1969 + // If the input to the store does not fit with the load's result type, 1.1970 + // it must be truncated. We can't delay until Ideal call since 1.1971 + // a singleton Value is needed for split_thru_phi optimization. 1.1972 + int con = value->get_int(); 1.1973 + return TypeInt::make(con & 0xFFFF); 1.1974 + } 1.1975 + return LoadNode::Value(phase); 1.1976 +} 1.1977 + 1.1978 +//--------------------------LoadSNode::Ideal-------------------------------------- 1.1979 +// 1.1980 +// If the previous store is to the same address as this load, 1.1981 +// and the value stored was larger than a short, replace this load 1.1982 +// with the value stored truncated to a short. If no truncation is 1.1983 +// needed, the replacement is done in LoadNode::Identity(). 1.1984 +// 1.1985 +Node *LoadSNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.1986 + Node* mem = in(MemNode::Memory); 1.1987 + Node* value = can_see_stored_value(mem,phase); 1.1988 + if( value && !phase->type(value)->higher_equal( _type ) ) { 1.1989 + Node *result = phase->transform( new (phase->C) LShiftINode(value, phase->intcon(16)) ); 1.1990 + return new (phase->C) RShiftINode(result, phase->intcon(16)); 1.1991 + } 1.1992 + // Identity call will handle the case where truncation is not needed. 1.1993 + return LoadNode::Ideal(phase, can_reshape); 1.1994 +} 1.1995 + 1.1996 +const Type* LoadSNode::Value(PhaseTransform *phase) const { 1.1997 + Node* mem = in(MemNode::Memory); 1.1998 + Node* value = can_see_stored_value(mem,phase); 1.1999 + if (value != NULL && value->is_Con() && 1.2000 + !value->bottom_type()->higher_equal(_type)) { 1.2001 + // If the input to the store does not fit with the load's result type, 1.2002 + // it must be truncated. We can't delay until Ideal call since 1.2003 + // a singleton Value is needed for split_thru_phi optimization. 1.2004 + int con = value->get_int(); 1.2005 + return TypeInt::make((con << 16) >> 16); 1.2006 + } 1.2007 + return LoadNode::Value(phase); 1.2008 +} 1.2009 + 1.2010 +//============================================================================= 1.2011 +//----------------------------LoadKlassNode::make------------------------------ 1.2012 +// Polymorphic factory method: 1.2013 +Node *LoadKlassNode::make( PhaseGVN& gvn, Node *mem, Node *adr, const TypePtr* at, const TypeKlassPtr *tk ) { 1.2014 + Compile* C = gvn.C; 1.2015 + Node *ctl = NULL; 1.2016 + // sanity check the alias category against the created node type 1.2017 + const TypePtr *adr_type = adr->bottom_type()->isa_ptr(); 1.2018 + assert(adr_type != NULL, "expecting TypeKlassPtr"); 1.2019 +#ifdef _LP64 1.2020 + if (adr_type->is_ptr_to_narrowklass()) { 1.2021 + assert(UseCompressedClassPointers, "no compressed klasses"); 1.2022 + Node* load_klass = gvn.transform(new (C) LoadNKlassNode(ctl, mem, adr, at, tk->make_narrowklass(), MemNode::unordered)); 1.2023 + return new (C) DecodeNKlassNode(load_klass, load_klass->bottom_type()->make_ptr()); 1.2024 + } 1.2025 +#endif 1.2026 + assert(!adr_type->is_ptr_to_narrowklass() && !adr_type->is_ptr_to_narrowoop(), "should have got back a narrow oop"); 1.2027 + return new (C) LoadKlassNode(ctl, mem, adr, at, tk, MemNode::unordered); 1.2028 +} 1.2029 + 1.2030 +//------------------------------Value------------------------------------------ 1.2031 +const Type *LoadKlassNode::Value( PhaseTransform *phase ) const { 1.2032 + return klass_value_common(phase); 1.2033 +} 1.2034 + 1.2035 +const Type *LoadNode::klass_value_common( PhaseTransform *phase ) const { 1.2036 + // Either input is TOP ==> the result is TOP 1.2037 + const Type *t1 = phase->type( in(MemNode::Memory) ); 1.2038 + if (t1 == Type::TOP) return Type::TOP; 1.2039 + Node *adr = in(MemNode::Address); 1.2040 + const Type *t2 = phase->type( adr ); 1.2041 + if (t2 == Type::TOP) return Type::TOP; 1.2042 + const TypePtr *tp = t2->is_ptr(); 1.2043 + if (TypePtr::above_centerline(tp->ptr()) || 1.2044 + tp->ptr() == TypePtr::Null) return Type::TOP; 1.2045 + 1.2046 + // Return a more precise klass, if possible 1.2047 + const TypeInstPtr *tinst = tp->isa_instptr(); 1.2048 + if (tinst != NULL) { 1.2049 + ciInstanceKlass* ik = tinst->klass()->as_instance_klass(); 1.2050 + int offset = tinst->offset(); 1.2051 + if (ik == phase->C->env()->Class_klass() 1.2052 + && (offset == java_lang_Class::klass_offset_in_bytes() || 1.2053 + offset == java_lang_Class::array_klass_offset_in_bytes())) { 1.2054 + // We are loading a special hidden field from a Class mirror object, 1.2055 + // the field which points to the VM's Klass metaobject. 1.2056 + ciType* t = tinst->java_mirror_type(); 1.2057 + // java_mirror_type returns non-null for compile-time Class constants. 1.2058 + if (t != NULL) { 1.2059 + // constant oop => constant klass 1.2060 + if (offset == java_lang_Class::array_klass_offset_in_bytes()) { 1.2061 + if (t->is_void()) { 1.2062 + // We cannot create a void array. Since void is a primitive type return null 1.2063 + // klass. Users of this result need to do a null check on the returned klass. 1.2064 + return TypePtr::NULL_PTR; 1.2065 + } 1.2066 + return TypeKlassPtr::make(ciArrayKlass::make(t)); 1.2067 + } 1.2068 + if (!t->is_klass()) { 1.2069 + // a primitive Class (e.g., int.class) has NULL for a klass field 1.2070 + return TypePtr::NULL_PTR; 1.2071 + } 1.2072 + // (Folds up the 1st indirection in aClassConstant.getModifiers().) 1.2073 + return TypeKlassPtr::make(t->as_klass()); 1.2074 + } 1.2075 + // non-constant mirror, so we can't tell what's going on 1.2076 + } 1.2077 + if( !ik->is_loaded() ) 1.2078 + return _type; // Bail out if not loaded 1.2079 + if (offset == oopDesc::klass_offset_in_bytes()) { 1.2080 + if (tinst->klass_is_exact()) { 1.2081 + return TypeKlassPtr::make(ik); 1.2082 + } 1.2083 + // See if we can become precise: no subklasses and no interface 1.2084 + // (Note: We need to support verified interfaces.) 1.2085 + if (!ik->is_interface() && !ik->has_subklass()) { 1.2086 + //assert(!UseExactTypes, "this code should be useless with exact types"); 1.2087 + // Add a dependence; if any subclass added we need to recompile 1.2088 + if (!ik->is_final()) { 1.2089 + // %%% should use stronger assert_unique_concrete_subtype instead 1.2090 + phase->C->dependencies()->assert_leaf_type(ik); 1.2091 + } 1.2092 + // Return precise klass 1.2093 + return TypeKlassPtr::make(ik); 1.2094 + } 1.2095 + 1.2096 + // Return root of possible klass 1.2097 + return TypeKlassPtr::make(TypePtr::NotNull, ik, 0/*offset*/); 1.2098 + } 1.2099 + } 1.2100 + 1.2101 + // Check for loading klass from an array 1.2102 + const TypeAryPtr *tary = tp->isa_aryptr(); 1.2103 + if( tary != NULL ) { 1.2104 + ciKlass *tary_klass = tary->klass(); 1.2105 + if (tary_klass != NULL // can be NULL when at BOTTOM or TOP 1.2106 + && tary->offset() == oopDesc::klass_offset_in_bytes()) { 1.2107 + if (tary->klass_is_exact()) { 1.2108 + return TypeKlassPtr::make(tary_klass); 1.2109 + } 1.2110 + ciArrayKlass *ak = tary->klass()->as_array_klass(); 1.2111 + // If the klass is an object array, we defer the question to the 1.2112 + // array component klass. 1.2113 + if( ak->is_obj_array_klass() ) { 1.2114 + assert( ak->is_loaded(), "" ); 1.2115 + ciKlass *base_k = ak->as_obj_array_klass()->base_element_klass(); 1.2116 + if( base_k->is_loaded() && base_k->is_instance_klass() ) { 1.2117 + ciInstanceKlass* ik = base_k->as_instance_klass(); 1.2118 + // See if we can become precise: no subklasses and no interface 1.2119 + if (!ik->is_interface() && !ik->has_subklass()) { 1.2120 + //assert(!UseExactTypes, "this code should be useless with exact types"); 1.2121 + // Add a dependence; if any subclass added we need to recompile 1.2122 + if (!ik->is_final()) { 1.2123 + phase->C->dependencies()->assert_leaf_type(ik); 1.2124 + } 1.2125 + // Return precise array klass 1.2126 + return TypeKlassPtr::make(ak); 1.2127 + } 1.2128 + } 1.2129 + return TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/); 1.2130 + } else { // Found a type-array? 1.2131 + //assert(!UseExactTypes, "this code should be useless with exact types"); 1.2132 + assert( ak->is_type_array_klass(), "" ); 1.2133 + return TypeKlassPtr::make(ak); // These are always precise 1.2134 + } 1.2135 + } 1.2136 + } 1.2137 + 1.2138 + // Check for loading klass from an array klass 1.2139 + const TypeKlassPtr *tkls = tp->isa_klassptr(); 1.2140 + if (tkls != NULL && !StressReflectiveCode) { 1.2141 + ciKlass* klass = tkls->klass(); 1.2142 + if( !klass->is_loaded() ) 1.2143 + return _type; // Bail out if not loaded 1.2144 + if( klass->is_obj_array_klass() && 1.2145 + tkls->offset() == in_bytes(ObjArrayKlass::element_klass_offset())) { 1.2146 + ciKlass* elem = klass->as_obj_array_klass()->element_klass(); 1.2147 + // // Always returning precise element type is incorrect, 1.2148 + // // e.g., element type could be object and array may contain strings 1.2149 + // return TypeKlassPtr::make(TypePtr::Constant, elem, 0); 1.2150 + 1.2151 + // The array's TypeKlassPtr was declared 'precise' or 'not precise' 1.2152 + // according to the element type's subclassing. 1.2153 + return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/); 1.2154 + } 1.2155 + if( klass->is_instance_klass() && tkls->klass_is_exact() && 1.2156 + tkls->offset() == in_bytes(Klass::super_offset())) { 1.2157 + ciKlass* sup = klass->as_instance_klass()->super(); 1.2158 + // The field is Klass::_super. Return its (constant) value. 1.2159 + // (Folds up the 2nd indirection in aClassConstant.getSuperClass().) 1.2160 + return sup ? TypeKlassPtr::make(sup) : TypePtr::NULL_PTR; 1.2161 + } 1.2162 + } 1.2163 + 1.2164 + // Bailout case 1.2165 + return LoadNode::Value(phase); 1.2166 +} 1.2167 + 1.2168 +//------------------------------Identity--------------------------------------- 1.2169 +// To clean up reflective code, simplify k.java_mirror.as_klass to plain k. 1.2170 +// Also feed through the klass in Allocate(...klass...)._klass. 1.2171 +Node* LoadKlassNode::Identity( PhaseTransform *phase ) { 1.2172 + return klass_identity_common(phase); 1.2173 +} 1.2174 + 1.2175 +Node* LoadNode::klass_identity_common(PhaseTransform *phase ) { 1.2176 + Node* x = LoadNode::Identity(phase); 1.2177 + if (x != this) return x; 1.2178 + 1.2179 + // Take apart the address into an oop and and offset. 1.2180 + // Return 'this' if we cannot. 1.2181 + Node* adr = in(MemNode::Address); 1.2182 + intptr_t offset = 0; 1.2183 + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); 1.2184 + if (base == NULL) return this; 1.2185 + const TypeOopPtr* toop = phase->type(adr)->isa_oopptr(); 1.2186 + if (toop == NULL) return this; 1.2187 + 1.2188 + // We can fetch the klass directly through an AllocateNode. 1.2189 + // This works even if the klass is not constant (clone or newArray). 1.2190 + if (offset == oopDesc::klass_offset_in_bytes()) { 1.2191 + Node* allocated_klass = AllocateNode::Ideal_klass(base, phase); 1.2192 + if (allocated_klass != NULL) { 1.2193 + return allocated_klass; 1.2194 + } 1.2195 + } 1.2196 + 1.2197 + // Simplify k.java_mirror.as_klass to plain k, where k is a Klass*. 1.2198 + // Simplify ak.component_mirror.array_klass to plain ak, ak an ArrayKlass. 1.2199 + // See inline_native_Class_query for occurrences of these patterns. 1.2200 + // Java Example: x.getClass().isAssignableFrom(y) 1.2201 + // Java Example: Array.newInstance(x.getClass().getComponentType(), n) 1.2202 + // 1.2203 + // This improves reflective code, often making the Class 1.2204 + // mirror go completely dead. (Current exception: Class 1.2205 + // mirrors may appear in debug info, but we could clean them out by 1.2206 + // introducing a new debug info operator for Klass*.java_mirror). 1.2207 + if (toop->isa_instptr() && toop->klass() == phase->C->env()->Class_klass() 1.2208 + && (offset == java_lang_Class::klass_offset_in_bytes() || 1.2209 + offset == java_lang_Class::array_klass_offset_in_bytes())) { 1.2210 + // We are loading a special hidden field from a Class mirror, 1.2211 + // the field which points to its Klass or ArrayKlass metaobject. 1.2212 + if (base->is_Load()) { 1.2213 + Node* adr2 = base->in(MemNode::Address); 1.2214 + const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr(); 1.2215 + if (tkls != NULL && !tkls->empty() 1.2216 + && (tkls->klass()->is_instance_klass() || 1.2217 + tkls->klass()->is_array_klass()) 1.2218 + && adr2->is_AddP() 1.2219 + ) { 1.2220 + int mirror_field = in_bytes(Klass::java_mirror_offset()); 1.2221 + if (offset == java_lang_Class::array_klass_offset_in_bytes()) { 1.2222 + mirror_field = in_bytes(ArrayKlass::component_mirror_offset()); 1.2223 + } 1.2224 + if (tkls->offset() == mirror_field) { 1.2225 + return adr2->in(AddPNode::Base); 1.2226 + } 1.2227 + } 1.2228 + } 1.2229 + } 1.2230 + 1.2231 + return this; 1.2232 +} 1.2233 + 1.2234 + 1.2235 +//------------------------------Value------------------------------------------ 1.2236 +const Type *LoadNKlassNode::Value( PhaseTransform *phase ) const { 1.2237 + const Type *t = klass_value_common(phase); 1.2238 + if (t == Type::TOP) 1.2239 + return t; 1.2240 + 1.2241 + return t->make_narrowklass(); 1.2242 +} 1.2243 + 1.2244 +//------------------------------Identity--------------------------------------- 1.2245 +// To clean up reflective code, simplify k.java_mirror.as_klass to narrow k. 1.2246 +// Also feed through the klass in Allocate(...klass...)._klass. 1.2247 +Node* LoadNKlassNode::Identity( PhaseTransform *phase ) { 1.2248 + Node *x = klass_identity_common(phase); 1.2249 + 1.2250 + const Type *t = phase->type( x ); 1.2251 + if( t == Type::TOP ) return x; 1.2252 + if( t->isa_narrowklass()) return x; 1.2253 + assert (!t->isa_narrowoop(), "no narrow oop here"); 1.2254 + 1.2255 + return phase->transform(new (phase->C) EncodePKlassNode(x, t->make_narrowklass())); 1.2256 +} 1.2257 + 1.2258 +//------------------------------Value----------------------------------------- 1.2259 +const Type *LoadRangeNode::Value( PhaseTransform *phase ) const { 1.2260 + // Either input is TOP ==> the result is TOP 1.2261 + const Type *t1 = phase->type( in(MemNode::Memory) ); 1.2262 + if( t1 == Type::TOP ) return Type::TOP; 1.2263 + Node *adr = in(MemNode::Address); 1.2264 + const Type *t2 = phase->type( adr ); 1.2265 + if( t2 == Type::TOP ) return Type::TOP; 1.2266 + const TypePtr *tp = t2->is_ptr(); 1.2267 + if (TypePtr::above_centerline(tp->ptr())) return Type::TOP; 1.2268 + const TypeAryPtr *tap = tp->isa_aryptr(); 1.2269 + if( !tap ) return _type; 1.2270 + return tap->size(); 1.2271 +} 1.2272 + 1.2273 +//-------------------------------Ideal--------------------------------------- 1.2274 +// Feed through the length in AllocateArray(...length...)._length. 1.2275 +Node *LoadRangeNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.2276 + Node* p = MemNode::Ideal_common(phase, can_reshape); 1.2277 + if (p) return (p == NodeSentinel) ? NULL : p; 1.2278 + 1.2279 + // Take apart the address into an oop and and offset. 1.2280 + // Return 'this' if we cannot. 1.2281 + Node* adr = in(MemNode::Address); 1.2282 + intptr_t offset = 0; 1.2283 + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); 1.2284 + if (base == NULL) return NULL; 1.2285 + const TypeAryPtr* tary = phase->type(adr)->isa_aryptr(); 1.2286 + if (tary == NULL) return NULL; 1.2287 + 1.2288 + // We can fetch the length directly through an AllocateArrayNode. 1.2289 + // This works even if the length is not constant (clone or newArray). 1.2290 + if (offset == arrayOopDesc::length_offset_in_bytes()) { 1.2291 + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase); 1.2292 + if (alloc != NULL) { 1.2293 + Node* allocated_length = alloc->Ideal_length(); 1.2294 + Node* len = alloc->make_ideal_length(tary, phase); 1.2295 + if (allocated_length != len) { 1.2296 + // New CastII improves on this. 1.2297 + return len; 1.2298 + } 1.2299 + } 1.2300 + } 1.2301 + 1.2302 + return NULL; 1.2303 +} 1.2304 + 1.2305 +//------------------------------Identity--------------------------------------- 1.2306 +// Feed through the length in AllocateArray(...length...)._length. 1.2307 +Node* LoadRangeNode::Identity( PhaseTransform *phase ) { 1.2308 + Node* x = LoadINode::Identity(phase); 1.2309 + if (x != this) return x; 1.2310 + 1.2311 + // Take apart the address into an oop and and offset. 1.2312 + // Return 'this' if we cannot. 1.2313 + Node* adr = in(MemNode::Address); 1.2314 + intptr_t offset = 0; 1.2315 + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); 1.2316 + if (base == NULL) return this; 1.2317 + const TypeAryPtr* tary = phase->type(adr)->isa_aryptr(); 1.2318 + if (tary == NULL) return this; 1.2319 + 1.2320 + // We can fetch the length directly through an AllocateArrayNode. 1.2321 + // This works even if the length is not constant (clone or newArray). 1.2322 + if (offset == arrayOopDesc::length_offset_in_bytes()) { 1.2323 + AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(base, phase); 1.2324 + if (alloc != NULL) { 1.2325 + Node* allocated_length = alloc->Ideal_length(); 1.2326 + // Do not allow make_ideal_length to allocate a CastII node. 1.2327 + Node* len = alloc->make_ideal_length(tary, phase, false); 1.2328 + if (allocated_length == len) { 1.2329 + // Return allocated_length only if it would not be improved by a CastII. 1.2330 + return allocated_length; 1.2331 + } 1.2332 + } 1.2333 + } 1.2334 + 1.2335 + return this; 1.2336 + 1.2337 +} 1.2338 + 1.2339 +//============================================================================= 1.2340 +//---------------------------StoreNode::make----------------------------------- 1.2341 +// Polymorphic factory method: 1.2342 +StoreNode* StoreNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt, MemOrd mo) { 1.2343 + assert((mo == unordered || mo == release), "unexpected"); 1.2344 + Compile* C = gvn.C; 1.2345 + assert(C->get_alias_index(adr_type) != Compile::AliasIdxRaw || 1.2346 + ctl != NULL, "raw memory operations should have control edge"); 1.2347 + 1.2348 + switch (bt) { 1.2349 + case T_BOOLEAN: 1.2350 + case T_BYTE: return new (C) StoreBNode(ctl, mem, adr, adr_type, val, mo); 1.2351 + case T_INT: return new (C) StoreINode(ctl, mem, adr, adr_type, val, mo); 1.2352 + case T_CHAR: 1.2353 + case T_SHORT: return new (C) StoreCNode(ctl, mem, adr, adr_type, val, mo); 1.2354 + case T_LONG: return new (C) StoreLNode(ctl, mem, adr, adr_type, val, mo); 1.2355 + case T_FLOAT: return new (C) StoreFNode(ctl, mem, adr, adr_type, val, mo); 1.2356 + case T_DOUBLE: return new (C) StoreDNode(ctl, mem, adr, adr_type, val, mo); 1.2357 + case T_METADATA: 1.2358 + case T_ADDRESS: 1.2359 + case T_OBJECT: 1.2360 +#ifdef _LP64 1.2361 + if (adr->bottom_type()->is_ptr_to_narrowoop()) { 1.2362 + val = gvn.transform(new (C) EncodePNode(val, val->bottom_type()->make_narrowoop())); 1.2363 + return new (C) StoreNNode(ctl, mem, adr, adr_type, val, mo); 1.2364 + } else if (adr->bottom_type()->is_ptr_to_narrowklass() || 1.2365 + (UseCompressedClassPointers && val->bottom_type()->isa_klassptr() && 1.2366 + adr->bottom_type()->isa_rawptr())) { 1.2367 + val = gvn.transform(new (C) EncodePKlassNode(val, val->bottom_type()->make_narrowklass())); 1.2368 + return new (C) StoreNKlassNode(ctl, mem, adr, adr_type, val, mo); 1.2369 + } 1.2370 +#endif 1.2371 + { 1.2372 + return new (C) StorePNode(ctl, mem, adr, adr_type, val, mo); 1.2373 + } 1.2374 + } 1.2375 + ShouldNotReachHere(); 1.2376 + return (StoreNode*)NULL; 1.2377 +} 1.2378 + 1.2379 +StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) { 1.2380 + bool require_atomic = true; 1.2381 + return new (C) StoreLNode(ctl, mem, adr, adr_type, val, mo, require_atomic); 1.2382 +} 1.2383 + 1.2384 + 1.2385 +//--------------------------bottom_type---------------------------------------- 1.2386 +const Type *StoreNode::bottom_type() const { 1.2387 + return Type::MEMORY; 1.2388 +} 1.2389 + 1.2390 +//------------------------------hash------------------------------------------- 1.2391 +uint StoreNode::hash() const { 1.2392 + // unroll addition of interesting fields 1.2393 + //return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address) + (uintptr_t)in(ValueIn); 1.2394 + 1.2395 + // Since they are not commoned, do not hash them: 1.2396 + return NO_HASH; 1.2397 +} 1.2398 + 1.2399 +//------------------------------Ideal------------------------------------------ 1.2400 +// Change back-to-back Store(, p, x) -> Store(m, p, y) to Store(m, p, x). 1.2401 +// When a store immediately follows a relevant allocation/initialization, 1.2402 +// try to capture it into the initialization, or hoist it above. 1.2403 +Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.2404 + Node* p = MemNode::Ideal_common(phase, can_reshape); 1.2405 + if (p) return (p == NodeSentinel) ? NULL : p; 1.2406 + 1.2407 + Node* mem = in(MemNode::Memory); 1.2408 + Node* address = in(MemNode::Address); 1.2409 + 1.2410 + // Back-to-back stores to same address? Fold em up. Generally 1.2411 + // unsafe if I have intervening uses... Also disallowed for StoreCM 1.2412 + // since they must follow each StoreP operation. Redundant StoreCMs 1.2413 + // are eliminated just before matching in final_graph_reshape. 1.2414 + if (mem->is_Store() && mem->in(MemNode::Address)->eqv_uncast(address) && 1.2415 + mem->Opcode() != Op_StoreCM) { 1.2416 + // Looking at a dead closed cycle of memory? 1.2417 + assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal"); 1.2418 + 1.2419 + assert(Opcode() == mem->Opcode() || 1.2420 + phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw, 1.2421 + "no mismatched stores, except on raw memory"); 1.2422 + 1.2423 + if (mem->outcnt() == 1 && // check for intervening uses 1.2424 + mem->as_Store()->memory_size() <= this->memory_size()) { 1.2425 + // If anybody other than 'this' uses 'mem', we cannot fold 'mem' away. 1.2426 + // For example, 'mem' might be the final state at a conditional return. 1.2427 + // Or, 'mem' might be used by some node which is live at the same time 1.2428 + // 'this' is live, which might be unschedulable. So, require exactly 1.2429 + // ONE user, the 'this' store, until such time as we clone 'mem' for 1.2430 + // each of 'mem's uses (thus making the exactly-1-user-rule hold true). 1.2431 + if (can_reshape) { // (%%% is this an anachronism?) 1.2432 + set_req_X(MemNode::Memory, mem->in(MemNode::Memory), 1.2433 + phase->is_IterGVN()); 1.2434 + } else { 1.2435 + // It's OK to do this in the parser, since DU info is always accurate, 1.2436 + // and the parser always refers to nodes via SafePointNode maps. 1.2437 + set_req(MemNode::Memory, mem->in(MemNode::Memory)); 1.2438 + } 1.2439 + return this; 1.2440 + } 1.2441 + } 1.2442 + 1.2443 + // Capture an unaliased, unconditional, simple store into an initializer. 1.2444 + // Or, if it is independent of the allocation, hoist it above the allocation. 1.2445 + if (ReduceFieldZeroing && /*can_reshape &&*/ 1.2446 + mem->is_Proj() && mem->in(0)->is_Initialize()) { 1.2447 + InitializeNode* init = mem->in(0)->as_Initialize(); 1.2448 + intptr_t offset = init->can_capture_store(this, phase, can_reshape); 1.2449 + if (offset > 0) { 1.2450 + Node* moved = init->capture_store(this, offset, phase, can_reshape); 1.2451 + // If the InitializeNode captured me, it made a raw copy of me, 1.2452 + // and I need to disappear. 1.2453 + if (moved != NULL) { 1.2454 + // %%% hack to ensure that Ideal returns a new node: 1.2455 + mem = MergeMemNode::make(phase->C, mem); 1.2456 + return mem; // fold me away 1.2457 + } 1.2458 + } 1.2459 + } 1.2460 + 1.2461 + return NULL; // No further progress 1.2462 +} 1.2463 + 1.2464 +//------------------------------Value----------------------------------------- 1.2465 +const Type *StoreNode::Value( PhaseTransform *phase ) const { 1.2466 + // Either input is TOP ==> the result is TOP 1.2467 + const Type *t1 = phase->type( in(MemNode::Memory) ); 1.2468 + if( t1 == Type::TOP ) return Type::TOP; 1.2469 + const Type *t2 = phase->type( in(MemNode::Address) ); 1.2470 + if( t2 == Type::TOP ) return Type::TOP; 1.2471 + const Type *t3 = phase->type( in(MemNode::ValueIn) ); 1.2472 + if( t3 == Type::TOP ) return Type::TOP; 1.2473 + return Type::MEMORY; 1.2474 +} 1.2475 + 1.2476 +//------------------------------Identity--------------------------------------- 1.2477 +// Remove redundant stores: 1.2478 +// Store(m, p, Load(m, p)) changes to m. 1.2479 +// Store(, p, x) -> Store(m, p, x) changes to Store(m, p, x). 1.2480 +Node *StoreNode::Identity( PhaseTransform *phase ) { 1.2481 + Node* mem = in(MemNode::Memory); 1.2482 + Node* adr = in(MemNode::Address); 1.2483 + Node* val = in(MemNode::ValueIn); 1.2484 + 1.2485 + // Load then Store? Then the Store is useless 1.2486 + if (val->is_Load() && 1.2487 + val->in(MemNode::Address)->eqv_uncast(adr) && 1.2488 + val->in(MemNode::Memory )->eqv_uncast(mem) && 1.2489 + val->as_Load()->store_Opcode() == Opcode()) { 1.2490 + return mem; 1.2491 + } 1.2492 + 1.2493 + // Two stores in a row of the same value? 1.2494 + if (mem->is_Store() && 1.2495 + mem->in(MemNode::Address)->eqv_uncast(adr) && 1.2496 + mem->in(MemNode::ValueIn)->eqv_uncast(val) && 1.2497 + mem->Opcode() == Opcode()) { 1.2498 + return mem; 1.2499 + } 1.2500 + 1.2501 + // Store of zero anywhere into a freshly-allocated object? 1.2502 + // Then the store is useless. 1.2503 + // (It must already have been captured by the InitializeNode.) 1.2504 + if (ReduceFieldZeroing && phase->type(val)->is_zero_type()) { 1.2505 + // a newly allocated object is already all-zeroes everywhere 1.2506 + if (mem->is_Proj() && mem->in(0)->is_Allocate()) { 1.2507 + return mem; 1.2508 + } 1.2509 + 1.2510 + // the store may also apply to zero-bits in an earlier object 1.2511 + Node* prev_mem = find_previous_store(phase); 1.2512 + // Steps (a), (b): Walk past independent stores to find an exact match. 1.2513 + if (prev_mem != NULL) { 1.2514 + Node* prev_val = can_see_stored_value(prev_mem, phase); 1.2515 + if (prev_val != NULL && phase->eqv(prev_val, val)) { 1.2516 + // prev_val and val might differ by a cast; it would be good 1.2517 + // to keep the more informative of the two. 1.2518 + return mem; 1.2519 + } 1.2520 + } 1.2521 + } 1.2522 + 1.2523 + return this; 1.2524 +} 1.2525 + 1.2526 +//------------------------------match_edge------------------------------------- 1.2527 +// Do we Match on this edge index or not? Match only memory & value 1.2528 +uint StoreNode::match_edge(uint idx) const { 1.2529 + return idx == MemNode::Address || idx == MemNode::ValueIn; 1.2530 +} 1.2531 + 1.2532 +//------------------------------cmp-------------------------------------------- 1.2533 +// Do not common stores up together. They generally have to be split 1.2534 +// back up anyways, so do not bother. 1.2535 +uint StoreNode::cmp( const Node &n ) const { 1.2536 + return (&n == this); // Always fail except on self 1.2537 +} 1.2538 + 1.2539 +//------------------------------Ideal_masked_input----------------------------- 1.2540 +// Check for a useless mask before a partial-word store 1.2541 +// (StoreB ... (AndI valIn conIa) ) 1.2542 +// If (conIa & mask == mask) this simplifies to 1.2543 +// (StoreB ... (valIn) ) 1.2544 +Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) { 1.2545 + Node *val = in(MemNode::ValueIn); 1.2546 + if( val->Opcode() == Op_AndI ) { 1.2547 + const TypeInt *t = phase->type( val->in(2) )->isa_int(); 1.2548 + if( t && t->is_con() && (t->get_con() & mask) == mask ) { 1.2549 + set_req(MemNode::ValueIn, val->in(1)); 1.2550 + return this; 1.2551 + } 1.2552 + } 1.2553 + return NULL; 1.2554 +} 1.2555 + 1.2556 + 1.2557 +//------------------------------Ideal_sign_extended_input---------------------- 1.2558 +// Check for useless sign-extension before a partial-word store 1.2559 +// (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) ) 1.2560 +// If (conIL == conIR && conIR <= num_bits) this simplifies to 1.2561 +// (StoreB ... (valIn) ) 1.2562 +Node *StoreNode::Ideal_sign_extended_input(PhaseGVN *phase, int num_bits) { 1.2563 + Node *val = in(MemNode::ValueIn); 1.2564 + if( val->Opcode() == Op_RShiftI ) { 1.2565 + const TypeInt *t = phase->type( val->in(2) )->isa_int(); 1.2566 + if( t && t->is_con() && (t->get_con() <= num_bits) ) { 1.2567 + Node *shl = val->in(1); 1.2568 + if( shl->Opcode() == Op_LShiftI ) { 1.2569 + const TypeInt *t2 = phase->type( shl->in(2) )->isa_int(); 1.2570 + if( t2 && t2->is_con() && (t2->get_con() == t->get_con()) ) { 1.2571 + set_req(MemNode::ValueIn, shl->in(1)); 1.2572 + return this; 1.2573 + } 1.2574 + } 1.2575 + } 1.2576 + } 1.2577 + return NULL; 1.2578 +} 1.2579 + 1.2580 +//------------------------------value_never_loaded----------------------------------- 1.2581 +// Determine whether there are any possible loads of the value stored. 1.2582 +// For simplicity, we actually check if there are any loads from the 1.2583 +// address stored to, not just for loads of the value stored by this node. 1.2584 +// 1.2585 +bool StoreNode::value_never_loaded( PhaseTransform *phase) const { 1.2586 + Node *adr = in(Address); 1.2587 + const TypeOopPtr *adr_oop = phase->type(adr)->isa_oopptr(); 1.2588 + if (adr_oop == NULL) 1.2589 + return false; 1.2590 + if (!adr_oop->is_known_instance_field()) 1.2591 + return false; // if not a distinct instance, there may be aliases of the address 1.2592 + for (DUIterator_Fast imax, i = adr->fast_outs(imax); i < imax; i++) { 1.2593 + Node *use = adr->fast_out(i); 1.2594 + int opc = use->Opcode(); 1.2595 + if (use->is_Load() || use->is_LoadStore()) { 1.2596 + return false; 1.2597 + } 1.2598 + } 1.2599 + return true; 1.2600 +} 1.2601 + 1.2602 +//============================================================================= 1.2603 +//------------------------------Ideal------------------------------------------ 1.2604 +// If the store is from an AND mask that leaves the low bits untouched, then 1.2605 +// we can skip the AND operation. If the store is from a sign-extension 1.2606 +// (a left shift, then right shift) we can skip both. 1.2607 +Node *StoreBNode::Ideal(PhaseGVN *phase, bool can_reshape){ 1.2608 + Node *progress = StoreNode::Ideal_masked_input(phase, 0xFF); 1.2609 + if( progress != NULL ) return progress; 1.2610 + 1.2611 + progress = StoreNode::Ideal_sign_extended_input(phase, 24); 1.2612 + if( progress != NULL ) return progress; 1.2613 + 1.2614 + // Finally check the default case 1.2615 + return StoreNode::Ideal(phase, can_reshape); 1.2616 +} 1.2617 + 1.2618 +//============================================================================= 1.2619 +//------------------------------Ideal------------------------------------------ 1.2620 +// If the store is from an AND mask that leaves the low bits untouched, then 1.2621 +// we can skip the AND operation 1.2622 +Node *StoreCNode::Ideal(PhaseGVN *phase, bool can_reshape){ 1.2623 + Node *progress = StoreNode::Ideal_masked_input(phase, 0xFFFF); 1.2624 + if( progress != NULL ) return progress; 1.2625 + 1.2626 + progress = StoreNode::Ideal_sign_extended_input(phase, 16); 1.2627 + if( progress != NULL ) return progress; 1.2628 + 1.2629 + // Finally check the default case 1.2630 + return StoreNode::Ideal(phase, can_reshape); 1.2631 +} 1.2632 + 1.2633 +//============================================================================= 1.2634 +//------------------------------Identity--------------------------------------- 1.2635 +Node *StoreCMNode::Identity( PhaseTransform *phase ) { 1.2636 + // No need to card mark when storing a null ptr 1.2637 + Node* my_store = in(MemNode::OopStore); 1.2638 + if (my_store->is_Store()) { 1.2639 + const Type *t1 = phase->type( my_store->in(MemNode::ValueIn) ); 1.2640 + if( t1 == TypePtr::NULL_PTR ) { 1.2641 + return in(MemNode::Memory); 1.2642 + } 1.2643 + } 1.2644 + return this; 1.2645 +} 1.2646 + 1.2647 +//============================================================================= 1.2648 +//------------------------------Ideal--------------------------------------- 1.2649 +Node *StoreCMNode::Ideal(PhaseGVN *phase, bool can_reshape){ 1.2650 + Node* progress = StoreNode::Ideal(phase, can_reshape); 1.2651 + if (progress != NULL) return progress; 1.2652 + 1.2653 + Node* my_store = in(MemNode::OopStore); 1.2654 + if (my_store->is_MergeMem()) { 1.2655 + Node* mem = my_store->as_MergeMem()->memory_at(oop_alias_idx()); 1.2656 + set_req(MemNode::OopStore, mem); 1.2657 + return this; 1.2658 + } 1.2659 + 1.2660 + return NULL; 1.2661 +} 1.2662 + 1.2663 +//------------------------------Value----------------------------------------- 1.2664 +const Type *StoreCMNode::Value( PhaseTransform *phase ) const { 1.2665 + // Either input is TOP ==> the result is TOP 1.2666 + const Type *t = phase->type( in(MemNode::Memory) ); 1.2667 + if( t == Type::TOP ) return Type::TOP; 1.2668 + t = phase->type( in(MemNode::Address) ); 1.2669 + if( t == Type::TOP ) return Type::TOP; 1.2670 + t = phase->type( in(MemNode::ValueIn) ); 1.2671 + if( t == Type::TOP ) return Type::TOP; 1.2672 + // If extra input is TOP ==> the result is TOP 1.2673 + t = phase->type( in(MemNode::OopStore) ); 1.2674 + if( t == Type::TOP ) return Type::TOP; 1.2675 + 1.2676 + return StoreNode::Value( phase ); 1.2677 +} 1.2678 + 1.2679 + 1.2680 +//============================================================================= 1.2681 +//----------------------------------SCMemProjNode------------------------------ 1.2682 +const Type * SCMemProjNode::Value( PhaseTransform *phase ) const 1.2683 +{ 1.2684 + return bottom_type(); 1.2685 +} 1.2686 + 1.2687 +//============================================================================= 1.2688 +//----------------------------------LoadStoreNode------------------------------ 1.2689 +LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required ) 1.2690 + : Node(required), 1.2691 + _type(rt), 1.2692 + _adr_type(at) 1.2693 +{ 1.2694 + init_req(MemNode::Control, c ); 1.2695 + init_req(MemNode::Memory , mem); 1.2696 + init_req(MemNode::Address, adr); 1.2697 + init_req(MemNode::ValueIn, val); 1.2698 + init_class_id(Class_LoadStore); 1.2699 +} 1.2700 + 1.2701 +uint LoadStoreNode::ideal_reg() const { 1.2702 + return _type->ideal_reg(); 1.2703 +} 1.2704 + 1.2705 +bool LoadStoreNode::result_not_used() const { 1.2706 + for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) { 1.2707 + Node *x = fast_out(i); 1.2708 + if (x->Opcode() == Op_SCMemProj) continue; 1.2709 + return false; 1.2710 + } 1.2711 + return true; 1.2712 +} 1.2713 + 1.2714 +uint LoadStoreNode::size_of() const { return sizeof(*this); } 1.2715 + 1.2716 +//============================================================================= 1.2717 +//----------------------------------LoadStoreConditionalNode-------------------- 1.2718 +LoadStoreConditionalNode::LoadStoreConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : LoadStoreNode(c, mem, adr, val, NULL, TypeInt::BOOL, 5) { 1.2719 + init_req(ExpectedIn, ex ); 1.2720 +} 1.2721 + 1.2722 +//============================================================================= 1.2723 +//-------------------------------adr_type-------------------------------------- 1.2724 +// Do we Match on this edge index or not? Do not match memory 1.2725 +const TypePtr* ClearArrayNode::adr_type() const { 1.2726 + Node *adr = in(3); 1.2727 + return MemNode::calculate_adr_type(adr->bottom_type()); 1.2728 +} 1.2729 + 1.2730 +//------------------------------match_edge------------------------------------- 1.2731 +// Do we Match on this edge index or not? Do not match memory 1.2732 +uint ClearArrayNode::match_edge(uint idx) const { 1.2733 + return idx > 1; 1.2734 +} 1.2735 + 1.2736 +//------------------------------Identity--------------------------------------- 1.2737 +// Clearing a zero length array does nothing 1.2738 +Node *ClearArrayNode::Identity( PhaseTransform *phase ) { 1.2739 + return phase->type(in(2))->higher_equal(TypeX::ZERO) ? in(1) : this; 1.2740 +} 1.2741 + 1.2742 +//------------------------------Idealize--------------------------------------- 1.2743 +// Clearing a short array is faster with stores 1.2744 +Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){ 1.2745 + const int unit = BytesPerLong; 1.2746 + const TypeX* t = phase->type(in(2))->isa_intptr_t(); 1.2747 + if (!t) return NULL; 1.2748 + if (!t->is_con()) return NULL; 1.2749 + intptr_t raw_count = t->get_con(); 1.2750 + intptr_t size = raw_count; 1.2751 + if (!Matcher::init_array_count_is_in_bytes) size *= unit; 1.2752 + // Clearing nothing uses the Identity call. 1.2753 + // Negative clears are possible on dead ClearArrays 1.2754 + // (see jck test stmt114.stmt11402.val). 1.2755 + if (size <= 0 || size % unit != 0) return NULL; 1.2756 + intptr_t count = size / unit; 1.2757 + // Length too long; use fast hardware clear 1.2758 + if (size > Matcher::init_array_short_size) return NULL; 1.2759 + Node *mem = in(1); 1.2760 + if( phase->type(mem)==Type::TOP ) return NULL; 1.2761 + Node *adr = in(3); 1.2762 + const Type* at = phase->type(adr); 1.2763 + if( at==Type::TOP ) return NULL; 1.2764 + const TypePtr* atp = at->isa_ptr(); 1.2765 + // adjust atp to be the correct array element address type 1.2766 + if (atp == NULL) atp = TypePtr::BOTTOM; 1.2767 + else atp = atp->add_offset(Type::OffsetBot); 1.2768 + // Get base for derived pointer purposes 1.2769 + if( adr->Opcode() != Op_AddP ) Unimplemented(); 1.2770 + Node *base = adr->in(1); 1.2771 + 1.2772 + Node *zero = phase->makecon(TypeLong::ZERO); 1.2773 + Node *off = phase->MakeConX(BytesPerLong); 1.2774 + mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero,MemNode::unordered,false); 1.2775 + count--; 1.2776 + while( count-- ) { 1.2777 + mem = phase->transform(mem); 1.2778 + adr = phase->transform(new (phase->C) AddPNode(base,adr,off)); 1.2779 + mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero,MemNode::unordered,false); 1.2780 + } 1.2781 + return mem; 1.2782 +} 1.2783 + 1.2784 +//----------------------------step_through---------------------------------- 1.2785 +// Return allocation input memory edge if it is different instance 1.2786 +// or itself if it is the one we are looking for. 1.2787 +bool ClearArrayNode::step_through(Node** np, uint instance_id, PhaseTransform* phase) { 1.2788 + Node* n = *np; 1.2789 + assert(n->is_ClearArray(), "sanity"); 1.2790 + intptr_t offset; 1.2791 + AllocateNode* alloc = AllocateNode::Ideal_allocation(n->in(3), phase, offset); 1.2792 + // This method is called only before Allocate nodes are expanded during 1.2793 + // macro nodes expansion. Before that ClearArray nodes are only generated 1.2794 + // in LibraryCallKit::generate_arraycopy() which follows allocations. 1.2795 + assert(alloc != NULL, "should have allocation"); 1.2796 + if (alloc->_idx == instance_id) { 1.2797 + // Can not bypass initialization of the instance we are looking for. 1.2798 + return false; 1.2799 + } 1.2800 + // Otherwise skip it. 1.2801 + InitializeNode* init = alloc->initialization(); 1.2802 + if (init != NULL) 1.2803 + *np = init->in(TypeFunc::Memory); 1.2804 + else 1.2805 + *np = alloc->in(TypeFunc::Memory); 1.2806 + return true; 1.2807 +} 1.2808 + 1.2809 +//----------------------------clear_memory------------------------------------- 1.2810 +// Generate code to initialize object storage to zero. 1.2811 +Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest, 1.2812 + intptr_t start_offset, 1.2813 + Node* end_offset, 1.2814 + PhaseGVN* phase) { 1.2815 + Compile* C = phase->C; 1.2816 + intptr_t offset = start_offset; 1.2817 + 1.2818 + int unit = BytesPerLong; 1.2819 + if ((offset % unit) != 0) { 1.2820 + Node* adr = new (C) AddPNode(dest, dest, phase->MakeConX(offset)); 1.2821 + adr = phase->transform(adr); 1.2822 + const TypePtr* atp = TypeRawPtr::BOTTOM; 1.2823 + mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered); 1.2824 + mem = phase->transform(mem); 1.2825 + offset += BytesPerInt; 1.2826 + } 1.2827 + assert((offset % unit) == 0, ""); 1.2828 + 1.2829 + // Initialize the remaining stuff, if any, with a ClearArray. 1.2830 + return clear_memory(ctl, mem, dest, phase->MakeConX(offset), end_offset, phase); 1.2831 +} 1.2832 + 1.2833 +Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest, 1.2834 + Node* start_offset, 1.2835 + Node* end_offset, 1.2836 + PhaseGVN* phase) { 1.2837 + if (start_offset == end_offset) { 1.2838 + // nothing to do 1.2839 + return mem; 1.2840 + } 1.2841 + 1.2842 + Compile* C = phase->C; 1.2843 + int unit = BytesPerLong; 1.2844 + Node* zbase = start_offset; 1.2845 + Node* zend = end_offset; 1.2846 + 1.2847 + // Scale to the unit required by the CPU: 1.2848 + if (!Matcher::init_array_count_is_in_bytes) { 1.2849 + Node* shift = phase->intcon(exact_log2(unit)); 1.2850 + zbase = phase->transform( new(C) URShiftXNode(zbase, shift) ); 1.2851 + zend = phase->transform( new(C) URShiftXNode(zend, shift) ); 1.2852 + } 1.2853 + 1.2854 + // Bulk clear double-words 1.2855 + Node* zsize = phase->transform( new(C) SubXNode(zend, zbase) ); 1.2856 + Node* adr = phase->transform( new(C) AddPNode(dest, dest, start_offset) ); 1.2857 + mem = new (C) ClearArrayNode(ctl, mem, zsize, adr); 1.2858 + return phase->transform(mem); 1.2859 +} 1.2860 + 1.2861 +Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest, 1.2862 + intptr_t start_offset, 1.2863 + intptr_t end_offset, 1.2864 + PhaseGVN* phase) { 1.2865 + if (start_offset == end_offset) { 1.2866 + // nothing to do 1.2867 + return mem; 1.2868 + } 1.2869 + 1.2870 + Compile* C = phase->C; 1.2871 + assert((end_offset % BytesPerInt) == 0, "odd end offset"); 1.2872 + intptr_t done_offset = end_offset; 1.2873 + if ((done_offset % BytesPerLong) != 0) { 1.2874 + done_offset -= BytesPerInt; 1.2875 + } 1.2876 + if (done_offset > start_offset) { 1.2877 + mem = clear_memory(ctl, mem, dest, 1.2878 + start_offset, phase->MakeConX(done_offset), phase); 1.2879 + } 1.2880 + if (done_offset < end_offset) { // emit the final 32-bit store 1.2881 + Node* adr = new (C) AddPNode(dest, dest, phase->MakeConX(done_offset)); 1.2882 + adr = phase->transform(adr); 1.2883 + const TypePtr* atp = TypeRawPtr::BOTTOM; 1.2884 + mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, MemNode::unordered); 1.2885 + mem = phase->transform(mem); 1.2886 + done_offset += BytesPerInt; 1.2887 + } 1.2888 + assert(done_offset == end_offset, ""); 1.2889 + return mem; 1.2890 +} 1.2891 + 1.2892 +//============================================================================= 1.2893 +// Do not match memory edge. 1.2894 +uint StrIntrinsicNode::match_edge(uint idx) const { 1.2895 + return idx == 2 || idx == 3; 1.2896 +} 1.2897 + 1.2898 +//------------------------------Ideal------------------------------------------ 1.2899 +// Return a node which is more "ideal" than the current node. Strip out 1.2900 +// control copies 1.2901 +Node *StrIntrinsicNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.2902 + if (remove_dead_region(phase, can_reshape)) return this; 1.2903 + // Don't bother trying to transform a dead node 1.2904 + if (in(0) && in(0)->is_top()) return NULL; 1.2905 + 1.2906 + if (can_reshape) { 1.2907 + Node* mem = phase->transform(in(MemNode::Memory)); 1.2908 + // If transformed to a MergeMem, get the desired slice 1.2909 + uint alias_idx = phase->C->get_alias_index(adr_type()); 1.2910 + mem = mem->is_MergeMem() ? mem->as_MergeMem()->memory_at(alias_idx) : mem; 1.2911 + if (mem != in(MemNode::Memory)) { 1.2912 + set_req(MemNode::Memory, mem); 1.2913 + return this; 1.2914 + } 1.2915 + } 1.2916 + return NULL; 1.2917 +} 1.2918 + 1.2919 +//------------------------------Value------------------------------------------ 1.2920 +const Type *StrIntrinsicNode::Value( PhaseTransform *phase ) const { 1.2921 + if (in(0) && phase->type(in(0)) == Type::TOP) return Type::TOP; 1.2922 + return bottom_type(); 1.2923 +} 1.2924 + 1.2925 +//============================================================================= 1.2926 +//------------------------------match_edge------------------------------------- 1.2927 +// Do not match memory edge 1.2928 +uint EncodeISOArrayNode::match_edge(uint idx) const { 1.2929 + return idx == 2 || idx == 3; // EncodeISOArray src (Binary dst len) 1.2930 +} 1.2931 + 1.2932 +//------------------------------Ideal------------------------------------------ 1.2933 +// Return a node which is more "ideal" than the current node. Strip out 1.2934 +// control copies 1.2935 +Node *EncodeISOArrayNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.2936 + return remove_dead_region(phase, can_reshape) ? this : NULL; 1.2937 +} 1.2938 + 1.2939 +//------------------------------Value------------------------------------------ 1.2940 +const Type *EncodeISOArrayNode::Value(PhaseTransform *phase) const { 1.2941 + if (in(0) && phase->type(in(0)) == Type::TOP) return Type::TOP; 1.2942 + return bottom_type(); 1.2943 +} 1.2944 + 1.2945 +//============================================================================= 1.2946 +MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent) 1.2947 + : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)), 1.2948 + _adr_type(C->get_adr_type(alias_idx)) 1.2949 +{ 1.2950 + init_class_id(Class_MemBar); 1.2951 + Node* top = C->top(); 1.2952 + init_req(TypeFunc::I_O,top); 1.2953 + init_req(TypeFunc::FramePtr,top); 1.2954 + init_req(TypeFunc::ReturnAdr,top); 1.2955 + if (precedent != NULL) 1.2956 + init_req(TypeFunc::Parms, precedent); 1.2957 +} 1.2958 + 1.2959 +//------------------------------cmp-------------------------------------------- 1.2960 +uint MemBarNode::hash() const { return NO_HASH; } 1.2961 +uint MemBarNode::cmp( const Node &n ) const { 1.2962 + return (&n == this); // Always fail except on self 1.2963 +} 1.2964 + 1.2965 +//------------------------------make------------------------------------------- 1.2966 +MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) { 1.2967 + switch (opcode) { 1.2968 + case Op_MemBarAcquire: return new(C) MemBarAcquireNode(C, atp, pn); 1.2969 + case Op_LoadFence: return new(C) LoadFenceNode(C, atp, pn); 1.2970 + case Op_MemBarRelease: return new(C) MemBarReleaseNode(C, atp, pn); 1.2971 + case Op_StoreFence: return new(C) StoreFenceNode(C, atp, pn); 1.2972 + case Op_MemBarAcquireLock: return new(C) MemBarAcquireLockNode(C, atp, pn); 1.2973 + case Op_MemBarReleaseLock: return new(C) MemBarReleaseLockNode(C, atp, pn); 1.2974 + case Op_MemBarVolatile: return new(C) MemBarVolatileNode(C, atp, pn); 1.2975 + case Op_MemBarCPUOrder: return new(C) MemBarCPUOrderNode(C, atp, pn); 1.2976 + case Op_Initialize: return new(C) InitializeNode(C, atp, pn); 1.2977 + case Op_MemBarStoreStore: return new(C) MemBarStoreStoreNode(C, atp, pn); 1.2978 + default: ShouldNotReachHere(); return NULL; 1.2979 + } 1.2980 +} 1.2981 + 1.2982 +//------------------------------Ideal------------------------------------------ 1.2983 +// Return a node which is more "ideal" than the current node. Strip out 1.2984 +// control copies 1.2985 +Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.2986 + if (remove_dead_region(phase, can_reshape)) return this; 1.2987 + // Don't bother trying to transform a dead node 1.2988 + if (in(0) && in(0)->is_top()) { 1.2989 + return NULL; 1.2990 + } 1.2991 + 1.2992 + // Eliminate volatile MemBars for scalar replaced objects. 1.2993 + if (can_reshape && req() == (Precedent+1)) { 1.2994 + bool eliminate = false; 1.2995 + int opc = Opcode(); 1.2996 + if ((opc == Op_MemBarAcquire || opc == Op_MemBarVolatile)) { 1.2997 + // Volatile field loads and stores. 1.2998 + Node* my_mem = in(MemBarNode::Precedent); 1.2999 + // The MembarAquire may keep an unused LoadNode alive through the Precedent edge 1.3000 + if ((my_mem != NULL) && (opc == Op_MemBarAcquire) && (my_mem->outcnt() == 1)) { 1.3001 + // if the Precedent is a decodeN and its input (a Load) is used at more than one place, 1.3002 + // replace this Precedent (decodeN) with the Load instead. 1.3003 + if ((my_mem->Opcode() == Op_DecodeN) && (my_mem->in(1)->outcnt() > 1)) { 1.3004 + Node* load_node = my_mem->in(1); 1.3005 + set_req(MemBarNode::Precedent, load_node); 1.3006 + phase->is_IterGVN()->_worklist.push(my_mem); 1.3007 + my_mem = load_node; 1.3008 + } else { 1.3009 + assert(my_mem->unique_out() == this, "sanity"); 1.3010 + del_req(Precedent); 1.3011 + phase->is_IterGVN()->_worklist.push(my_mem); // remove dead node later 1.3012 + my_mem = NULL; 1.3013 + } 1.3014 + } 1.3015 + if (my_mem != NULL && my_mem->is_Mem()) { 1.3016 + const TypeOopPtr* t_oop = my_mem->in(MemNode::Address)->bottom_type()->isa_oopptr(); 1.3017 + // Check for scalar replaced object reference. 1.3018 + if( t_oop != NULL && t_oop->is_known_instance_field() && 1.3019 + t_oop->offset() != Type::OffsetBot && 1.3020 + t_oop->offset() != Type::OffsetTop) { 1.3021 + eliminate = true; 1.3022 + } 1.3023 + } 1.3024 + } else if (opc == Op_MemBarRelease) { 1.3025 + // Final field stores. 1.3026 + Node* alloc = AllocateNode::Ideal_allocation(in(MemBarNode::Precedent), phase); 1.3027 + if ((alloc != NULL) && alloc->is_Allocate() && 1.3028 + alloc->as_Allocate()->_is_non_escaping) { 1.3029 + // The allocated object does not escape. 1.3030 + eliminate = true; 1.3031 + } 1.3032 + } 1.3033 + if (eliminate) { 1.3034 + // Replace MemBar projections by its inputs. 1.3035 + PhaseIterGVN* igvn = phase->is_IterGVN(); 1.3036 + igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory)); 1.3037 + igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control)); 1.3038 + // Must return either the original node (now dead) or a new node 1.3039 + // (Do not return a top here, since that would break the uniqueness of top.) 1.3040 + return new (phase->C) ConINode(TypeInt::ZERO); 1.3041 + } 1.3042 + } 1.3043 + return NULL; 1.3044 +} 1.3045 + 1.3046 +//------------------------------Value------------------------------------------ 1.3047 +const Type *MemBarNode::Value( PhaseTransform *phase ) const { 1.3048 + if( !in(0) ) return Type::TOP; 1.3049 + if( phase->type(in(0)) == Type::TOP ) 1.3050 + return Type::TOP; 1.3051 + return TypeTuple::MEMBAR; 1.3052 +} 1.3053 + 1.3054 +//------------------------------match------------------------------------------ 1.3055 +// Construct projections for memory. 1.3056 +Node *MemBarNode::match( const ProjNode *proj, const Matcher *m ) { 1.3057 + switch (proj->_con) { 1.3058 + case TypeFunc::Control: 1.3059 + case TypeFunc::Memory: 1.3060 + return new (m->C) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj); 1.3061 + } 1.3062 + ShouldNotReachHere(); 1.3063 + return NULL; 1.3064 +} 1.3065 + 1.3066 +//===========================InitializeNode==================================== 1.3067 +// SUMMARY: 1.3068 +// This node acts as a memory barrier on raw memory, after some raw stores. 1.3069 +// The 'cooked' oop value feeds from the Initialize, not the Allocation. 1.3070 +// The Initialize can 'capture' suitably constrained stores as raw inits. 1.3071 +// It can coalesce related raw stores into larger units (called 'tiles'). 1.3072 +// It can avoid zeroing new storage for memory units which have raw inits. 1.3073 +// At macro-expansion, it is marked 'complete', and does not optimize further. 1.3074 +// 1.3075 +// EXAMPLE: 1.3076 +// The object 'new short[2]' occupies 16 bytes in a 32-bit machine. 1.3077 +// ctl = incoming control; mem* = incoming memory 1.3078 +// (Note: A star * on a memory edge denotes I/O and other standard edges.) 1.3079 +// First allocate uninitialized memory and fill in the header: 1.3080 +// alloc = (Allocate ctl mem* 16 #short[].klass ...) 1.3081 +// ctl := alloc.Control; mem* := alloc.Memory* 1.3082 +// rawmem = alloc.Memory; rawoop = alloc.RawAddress 1.3083 +// Then initialize to zero the non-header parts of the raw memory block: 1.3084 +// init = (Initialize alloc.Control alloc.Memory* alloc.RawAddress) 1.3085 +// ctl := init.Control; mem.SLICE(#short[*]) := init.Memory 1.3086 +// After the initialize node executes, the object is ready for service: 1.3087 +// oop := (CheckCastPP init.Control alloc.RawAddress #short[]) 1.3088 +// Suppose its body is immediately initialized as {1,2}: 1.3089 +// store1 = (StoreC init.Control init.Memory (+ oop 12) 1) 1.3090 +// store2 = (StoreC init.Control store1 (+ oop 14) 2) 1.3091 +// mem.SLICE(#short[*]) := store2 1.3092 +// 1.3093 +// DETAILS: 1.3094 +// An InitializeNode collects and isolates object initialization after 1.3095 +// an AllocateNode and before the next possible safepoint. As a 1.3096 +// memory barrier (MemBarNode), it keeps critical stores from drifting 1.3097 +// down past any safepoint or any publication of the allocation. 1.3098 +// Before this barrier, a newly-allocated object may have uninitialized bits. 1.3099 +// After this barrier, it may be treated as a real oop, and GC is allowed. 1.3100 +// 1.3101 +// The semantics of the InitializeNode include an implicit zeroing of 1.3102 +// the new object from object header to the end of the object. 1.3103 +// (The object header and end are determined by the AllocateNode.) 1.3104 +// 1.3105 +// Certain stores may be added as direct inputs to the InitializeNode. 1.3106 +// These stores must update raw memory, and they must be to addresses 1.3107 +// derived from the raw address produced by AllocateNode, and with 1.3108 +// a constant offset. They must be ordered by increasing offset. 1.3109 +// The first one is at in(RawStores), the last at in(req()-1). 1.3110 +// Unlike most memory operations, they are not linked in a chain, 1.3111 +// but are displayed in parallel as users of the rawmem output of 1.3112 +// the allocation. 1.3113 +// 1.3114 +// (See comments in InitializeNode::capture_store, which continue 1.3115 +// the example given above.) 1.3116 +// 1.3117 +// When the associated Allocate is macro-expanded, the InitializeNode 1.3118 +// may be rewritten to optimize collected stores. A ClearArrayNode 1.3119 +// may also be created at that point to represent any required zeroing. 1.3120 +// The InitializeNode is then marked 'complete', prohibiting further 1.3121 +// capturing of nearby memory operations. 1.3122 +// 1.3123 +// During macro-expansion, all captured initializations which store 1.3124 +// constant values of 32 bits or smaller are coalesced (if advantageous) 1.3125 +// into larger 'tiles' 32 or 64 bits. This allows an object to be 1.3126 +// initialized in fewer memory operations. Memory words which are 1.3127 +// covered by neither tiles nor non-constant stores are pre-zeroed 1.3128 +// by explicit stores of zero. (The code shape happens to do all 1.3129 +// zeroing first, then all other stores, with both sequences occurring 1.3130 +// in order of ascending offsets.) 1.3131 +// 1.3132 +// Alternatively, code may be inserted between an AllocateNode and its 1.3133 +// InitializeNode, to perform arbitrary initialization of the new object. 1.3134 +// E.g., the object copying intrinsics insert complex data transfers here. 1.3135 +// The initialization must then be marked as 'complete' disable the 1.3136 +// built-in zeroing semantics and the collection of initializing stores. 1.3137 +// 1.3138 +// While an InitializeNode is incomplete, reads from the memory state 1.3139 +// produced by it are optimizable if they match the control edge and 1.3140 +// new oop address associated with the allocation/initialization. 1.3141 +// They return a stored value (if the offset matches) or else zero. 1.3142 +// A write to the memory state, if it matches control and address, 1.3143 +// and if it is to a constant offset, may be 'captured' by the 1.3144 +// InitializeNode. It is cloned as a raw memory operation and rewired 1.3145 +// inside the initialization, to the raw oop produced by the allocation. 1.3146 +// Operations on addresses which are provably distinct (e.g., to 1.3147 +// other AllocateNodes) are allowed to bypass the initialization. 1.3148 +// 1.3149 +// The effect of all this is to consolidate object initialization 1.3150 +// (both arrays and non-arrays, both piecewise and bulk) into a 1.3151 +// single location, where it can be optimized as a unit. 1.3152 +// 1.3153 +// Only stores with an offset less than TrackedInitializationLimit words 1.3154 +// will be considered for capture by an InitializeNode. This puts a 1.3155 +// reasonable limit on the complexity of optimized initializations. 1.3156 + 1.3157 +//---------------------------InitializeNode------------------------------------ 1.3158 +InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop) 1.3159 + : _is_complete(Incomplete), _does_not_escape(false), 1.3160 + MemBarNode(C, adr_type, rawoop) 1.3161 +{ 1.3162 + init_class_id(Class_Initialize); 1.3163 + 1.3164 + assert(adr_type == Compile::AliasIdxRaw, "only valid atp"); 1.3165 + assert(in(RawAddress) == rawoop, "proper init"); 1.3166 + // Note: allocation() can be NULL, for secondary initialization barriers 1.3167 +} 1.3168 + 1.3169 +// Since this node is not matched, it will be processed by the 1.3170 +// register allocator. Declare that there are no constraints 1.3171 +// on the allocation of the RawAddress edge. 1.3172 +const RegMask &InitializeNode::in_RegMask(uint idx) const { 1.3173 + // This edge should be set to top, by the set_complete. But be conservative. 1.3174 + if (idx == InitializeNode::RawAddress) 1.3175 + return *(Compile::current()->matcher()->idealreg2spillmask[in(idx)->ideal_reg()]); 1.3176 + return RegMask::Empty; 1.3177 +} 1.3178 + 1.3179 +Node* InitializeNode::memory(uint alias_idx) { 1.3180 + Node* mem = in(Memory); 1.3181 + if (mem->is_MergeMem()) { 1.3182 + return mem->as_MergeMem()->memory_at(alias_idx); 1.3183 + } else { 1.3184 + // incoming raw memory is not split 1.3185 + return mem; 1.3186 + } 1.3187 +} 1.3188 + 1.3189 +bool InitializeNode::is_non_zero() { 1.3190 + if (is_complete()) return false; 1.3191 + remove_extra_zeroes(); 1.3192 + return (req() > RawStores); 1.3193 +} 1.3194 + 1.3195 +void InitializeNode::set_complete(PhaseGVN* phase) { 1.3196 + assert(!is_complete(), "caller responsibility"); 1.3197 + _is_complete = Complete; 1.3198 + 1.3199 + // After this node is complete, it contains a bunch of 1.3200 + // raw-memory initializations. There is no need for 1.3201 + // it to have anything to do with non-raw memory effects. 1.3202 + // Therefore, tell all non-raw users to re-optimize themselves, 1.3203 + // after skipping the memory effects of this initialization. 1.3204 + PhaseIterGVN* igvn = phase->is_IterGVN(); 1.3205 + if (igvn) igvn->add_users_to_worklist(this); 1.3206 +} 1.3207 + 1.3208 +// convenience function 1.3209 +// return false if the init contains any stores already 1.3210 +bool AllocateNode::maybe_set_complete(PhaseGVN* phase) { 1.3211 + InitializeNode* init = initialization(); 1.3212 + if (init == NULL || init->is_complete()) return false; 1.3213 + init->remove_extra_zeroes(); 1.3214 + // for now, if this allocation has already collected any inits, bail: 1.3215 + if (init->is_non_zero()) return false; 1.3216 + init->set_complete(phase); 1.3217 + return true; 1.3218 +} 1.3219 + 1.3220 +void InitializeNode::remove_extra_zeroes() { 1.3221 + if (req() == RawStores) return; 1.3222 + Node* zmem = zero_memory(); 1.3223 + uint fill = RawStores; 1.3224 + for (uint i = fill; i < req(); i++) { 1.3225 + Node* n = in(i); 1.3226 + if (n->is_top() || n == zmem) continue; // skip 1.3227 + if (fill < i) set_req(fill, n); // compact 1.3228 + ++fill; 1.3229 + } 1.3230 + // delete any empty spaces created: 1.3231 + while (fill < req()) { 1.3232 + del_req(fill); 1.3233 + } 1.3234 +} 1.3235 + 1.3236 +// Helper for remembering which stores go with which offsets. 1.3237 +intptr_t InitializeNode::get_store_offset(Node* st, PhaseTransform* phase) { 1.3238 + if (!st->is_Store()) return -1; // can happen to dead code via subsume_node 1.3239 + intptr_t offset = -1; 1.3240 + Node* base = AddPNode::Ideal_base_and_offset(st->in(MemNode::Address), 1.3241 + phase, offset); 1.3242 + if (base == NULL) return -1; // something is dead, 1.3243 + if (offset < 0) return -1; // dead, dead 1.3244 + return offset; 1.3245 +} 1.3246 + 1.3247 +// Helper for proving that an initialization expression is 1.3248 +// "simple enough" to be folded into an object initialization. 1.3249 +// Attempts to prove that a store's initial value 'n' can be captured 1.3250 +// within the initialization without creating a vicious cycle, such as: 1.3251 +// { Foo p = new Foo(); p.next = p; } 1.3252 +// True for constants and parameters and small combinations thereof. 1.3253 +bool InitializeNode::detect_init_independence(Node* n, int& count) { 1.3254 + if (n == NULL) return true; // (can this really happen?) 1.3255 + if (n->is_Proj()) n = n->in(0); 1.3256 + if (n == this) return false; // found a cycle 1.3257 + if (n->is_Con()) return true; 1.3258 + if (n->is_Start()) return true; // params, etc., are OK 1.3259 + if (n->is_Root()) return true; // even better 1.3260 + 1.3261 + Node* ctl = n->in(0); 1.3262 + if (ctl != NULL && !ctl->is_top()) { 1.3263 + if (ctl->is_Proj()) ctl = ctl->in(0); 1.3264 + if (ctl == this) return false; 1.3265 + 1.3266 + // If we already know that the enclosing memory op is pinned right after 1.3267 + // the init, then any control flow that the store has picked up 1.3268 + // must have preceded the init, or else be equal to the init. 1.3269 + // Even after loop optimizations (which might change control edges) 1.3270 + // a store is never pinned *before* the availability of its inputs. 1.3271 + if (!MemNode::all_controls_dominate(n, this)) 1.3272 + return false; // failed to prove a good control 1.3273 + } 1.3274 + 1.3275 + // Check data edges for possible dependencies on 'this'. 1.3276 + if ((count += 1) > 20) return false; // complexity limit 1.3277 + for (uint i = 1; i < n->req(); i++) { 1.3278 + Node* m = n->in(i); 1.3279 + if (m == NULL || m == n || m->is_top()) continue; 1.3280 + uint first_i = n->find_edge(m); 1.3281 + if (i != first_i) continue; // process duplicate edge just once 1.3282 + if (!detect_init_independence(m, count)) { 1.3283 + return false; 1.3284 + } 1.3285 + } 1.3286 + 1.3287 + return true; 1.3288 +} 1.3289 + 1.3290 +// Here are all the checks a Store must pass before it can be moved into 1.3291 +// an initialization. Returns zero if a check fails. 1.3292 +// On success, returns the (constant) offset to which the store applies, 1.3293 +// within the initialized memory. 1.3294 +intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseTransform* phase, bool can_reshape) { 1.3295 + const int FAIL = 0; 1.3296 + if (st->req() != MemNode::ValueIn + 1) 1.3297 + return FAIL; // an inscrutable StoreNode (card mark?) 1.3298 + Node* ctl = st->in(MemNode::Control); 1.3299 + if (!(ctl != NULL && ctl->is_Proj() && ctl->in(0) == this)) 1.3300 + return FAIL; // must be unconditional after the initialization 1.3301 + Node* mem = st->in(MemNode::Memory); 1.3302 + if (!(mem->is_Proj() && mem->in(0) == this)) 1.3303 + return FAIL; // must not be preceded by other stores 1.3304 + Node* adr = st->in(MemNode::Address); 1.3305 + intptr_t offset; 1.3306 + AllocateNode* alloc = AllocateNode::Ideal_allocation(adr, phase, offset); 1.3307 + if (alloc == NULL) 1.3308 + return FAIL; // inscrutable address 1.3309 + if (alloc != allocation()) 1.3310 + return FAIL; // wrong allocation! (store needs to float up) 1.3311 + Node* val = st->in(MemNode::ValueIn); 1.3312 + int complexity_count = 0; 1.3313 + if (!detect_init_independence(val, complexity_count)) 1.3314 + return FAIL; // stored value must be 'simple enough' 1.3315 + 1.3316 + // The Store can be captured only if nothing after the allocation 1.3317 + // and before the Store is using the memory location that the store 1.3318 + // overwrites. 1.3319 + bool failed = false; 1.3320 + // If is_complete_with_arraycopy() is true the shape of the graph is 1.3321 + // well defined and is safe so no need for extra checks. 1.3322 + if (!is_complete_with_arraycopy()) { 1.3323 + // We are going to look at each use of the memory state following 1.3324 + // the allocation to make sure nothing reads the memory that the 1.3325 + // Store writes. 1.3326 + const TypePtr* t_adr = phase->type(adr)->isa_ptr(); 1.3327 + int alias_idx = phase->C->get_alias_index(t_adr); 1.3328 + ResourceMark rm; 1.3329 + Unique_Node_List mems; 1.3330 + mems.push(mem); 1.3331 + Node* unique_merge = NULL; 1.3332 + for (uint next = 0; next < mems.size(); ++next) { 1.3333 + Node *m = mems.at(next); 1.3334 + for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) { 1.3335 + Node *n = m->fast_out(j); 1.3336 + if (n->outcnt() == 0) { 1.3337 + continue; 1.3338 + } 1.3339 + if (n == st) { 1.3340 + continue; 1.3341 + } else if (n->in(0) != NULL && n->in(0) != ctl) { 1.3342 + // If the control of this use is different from the control 1.3343 + // of the Store which is right after the InitializeNode then 1.3344 + // this node cannot be between the InitializeNode and the 1.3345 + // Store. 1.3346 + continue; 1.3347 + } else if (n->is_MergeMem()) { 1.3348 + if (n->as_MergeMem()->memory_at(alias_idx) == m) { 1.3349 + // We can hit a MergeMemNode (that will likely go away 1.3350 + // later) that is a direct use of the memory state 1.3351 + // following the InitializeNode on the same slice as the 1.3352 + // store node that we'd like to capture. We need to check 1.3353 + // the uses of the MergeMemNode. 1.3354 + mems.push(n); 1.3355 + } 1.3356 + } else if (n->is_Mem()) { 1.3357 + Node* other_adr = n->in(MemNode::Address); 1.3358 + if (other_adr == adr) { 1.3359 + failed = true; 1.3360 + break; 1.3361 + } else { 1.3362 + const TypePtr* other_t_adr = phase->type(other_adr)->isa_ptr(); 1.3363 + if (other_t_adr != NULL) { 1.3364 + int other_alias_idx = phase->C->get_alias_index(other_t_adr); 1.3365 + if (other_alias_idx == alias_idx) { 1.3366 + // A load from the same memory slice as the store right 1.3367 + // after the InitializeNode. We check the control of the 1.3368 + // object/array that is loaded from. If it's the same as 1.3369 + // the store control then we cannot capture the store. 1.3370 + assert(!n->is_Store(), "2 stores to same slice on same control?"); 1.3371 + Node* base = other_adr; 1.3372 + assert(base->is_AddP(), err_msg_res("should be addp but is %s", base->Name())); 1.3373 + base = base->in(AddPNode::Base); 1.3374 + if (base != NULL) { 1.3375 + base = base->uncast(); 1.3376 + if (base->is_Proj() && base->in(0) == alloc) { 1.3377 + failed = true; 1.3378 + break; 1.3379 + } 1.3380 + } 1.3381 + } 1.3382 + } 1.3383 + } 1.3384 + } else { 1.3385 + failed = true; 1.3386 + break; 1.3387 + } 1.3388 + } 1.3389 + } 1.3390 + } 1.3391 + if (failed) { 1.3392 + if (!can_reshape) { 1.3393 + // We decided we couldn't capture the store during parsing. We 1.3394 + // should try again during the next IGVN once the graph is 1.3395 + // cleaner. 1.3396 + phase->C->record_for_igvn(st); 1.3397 + } 1.3398 + return FAIL; 1.3399 + } 1.3400 + 1.3401 + return offset; // success 1.3402 +} 1.3403 + 1.3404 +// Find the captured store in(i) which corresponds to the range 1.3405 +// [start..start+size) in the initialized object. 1.3406 +// If there is one, return its index i. If there isn't, return the 1.3407 +// negative of the index where it should be inserted. 1.3408 +// Return 0 if the queried range overlaps an initialization boundary 1.3409 +// or if dead code is encountered. 1.3410 +// If size_in_bytes is zero, do not bother with overlap checks. 1.3411 +int InitializeNode::captured_store_insertion_point(intptr_t start, 1.3412 + int size_in_bytes, 1.3413 + PhaseTransform* phase) { 1.3414 + const int FAIL = 0, MAX_STORE = BytesPerLong; 1.3415 + 1.3416 + if (is_complete()) 1.3417 + return FAIL; // arraycopy got here first; punt 1.3418 + 1.3419 + assert(allocation() != NULL, "must be present"); 1.3420 + 1.3421 + // no negatives, no header fields: 1.3422 + if (start < (intptr_t) allocation()->minimum_header_size()) return FAIL; 1.3423 + 1.3424 + // after a certain size, we bail out on tracking all the stores: 1.3425 + intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize); 1.3426 + if (start >= ti_limit) return FAIL; 1.3427 + 1.3428 + for (uint i = InitializeNode::RawStores, limit = req(); ; ) { 1.3429 + if (i >= limit) return -(int)i; // not found; here is where to put it 1.3430 + 1.3431 + Node* st = in(i); 1.3432 + intptr_t st_off = get_store_offset(st, phase); 1.3433 + if (st_off < 0) { 1.3434 + if (st != zero_memory()) { 1.3435 + return FAIL; // bail out if there is dead garbage 1.3436 + } 1.3437 + } else if (st_off > start) { 1.3438 + // ...we are done, since stores are ordered 1.3439 + if (st_off < start + size_in_bytes) { 1.3440 + return FAIL; // the next store overlaps 1.3441 + } 1.3442 + return -(int)i; // not found; here is where to put it 1.3443 + } else if (st_off < start) { 1.3444 + if (size_in_bytes != 0 && 1.3445 + start < st_off + MAX_STORE && 1.3446 + start < st_off + st->as_Store()->memory_size()) { 1.3447 + return FAIL; // the previous store overlaps 1.3448 + } 1.3449 + } else { 1.3450 + if (size_in_bytes != 0 && 1.3451 + st->as_Store()->memory_size() != size_in_bytes) { 1.3452 + return FAIL; // mismatched store size 1.3453 + } 1.3454 + return i; 1.3455 + } 1.3456 + 1.3457 + ++i; 1.3458 + } 1.3459 +} 1.3460 + 1.3461 +// Look for a captured store which initializes at the offset 'start' 1.3462 +// with the given size. If there is no such store, and no other 1.3463 +// initialization interferes, then return zero_memory (the memory 1.3464 +// projection of the AllocateNode). 1.3465 +Node* InitializeNode::find_captured_store(intptr_t start, int size_in_bytes, 1.3466 + PhaseTransform* phase) { 1.3467 + assert(stores_are_sane(phase), ""); 1.3468 + int i = captured_store_insertion_point(start, size_in_bytes, phase); 1.3469 + if (i == 0) { 1.3470 + return NULL; // something is dead 1.3471 + } else if (i < 0) { 1.3472 + return zero_memory(); // just primordial zero bits here 1.3473 + } else { 1.3474 + Node* st = in(i); // here is the store at this position 1.3475 + assert(get_store_offset(st->as_Store(), phase) == start, "sanity"); 1.3476 + return st; 1.3477 + } 1.3478 +} 1.3479 + 1.3480 +// Create, as a raw pointer, an address within my new object at 'offset'. 1.3481 +Node* InitializeNode::make_raw_address(intptr_t offset, 1.3482 + PhaseTransform* phase) { 1.3483 + Node* addr = in(RawAddress); 1.3484 + if (offset != 0) { 1.3485 + Compile* C = phase->C; 1.3486 + addr = phase->transform( new (C) AddPNode(C->top(), addr, 1.3487 + phase->MakeConX(offset)) ); 1.3488 + } 1.3489 + return addr; 1.3490 +} 1.3491 + 1.3492 +// Clone the given store, converting it into a raw store 1.3493 +// initializing a field or element of my new object. 1.3494 +// Caller is responsible for retiring the original store, 1.3495 +// with subsume_node or the like. 1.3496 +// 1.3497 +// From the example above InitializeNode::InitializeNode, 1.3498 +// here are the old stores to be captured: 1.3499 +// store1 = (StoreC init.Control init.Memory (+ oop 12) 1) 1.3500 +// store2 = (StoreC init.Control store1 (+ oop 14) 2) 1.3501 +// 1.3502 +// Here is the changed code; note the extra edges on init: 1.3503 +// alloc = (Allocate ...) 1.3504 +// rawoop = alloc.RawAddress 1.3505 +// rawstore1 = (StoreC alloc.Control alloc.Memory (+ rawoop 12) 1) 1.3506 +// rawstore2 = (StoreC alloc.Control alloc.Memory (+ rawoop 14) 2) 1.3507 +// init = (Initialize alloc.Control alloc.Memory rawoop 1.3508 +// rawstore1 rawstore2) 1.3509 +// 1.3510 +Node* InitializeNode::capture_store(StoreNode* st, intptr_t start, 1.3511 + PhaseTransform* phase, bool can_reshape) { 1.3512 + assert(stores_are_sane(phase), ""); 1.3513 + 1.3514 + if (start < 0) return NULL; 1.3515 + assert(can_capture_store(st, phase, can_reshape) == start, "sanity"); 1.3516 + 1.3517 + Compile* C = phase->C; 1.3518 + int size_in_bytes = st->memory_size(); 1.3519 + int i = captured_store_insertion_point(start, size_in_bytes, phase); 1.3520 + if (i == 0) return NULL; // bail out 1.3521 + Node* prev_mem = NULL; // raw memory for the captured store 1.3522 + if (i > 0) { 1.3523 + prev_mem = in(i); // there is a pre-existing store under this one 1.3524 + set_req(i, C->top()); // temporarily disconnect it 1.3525 + // See StoreNode::Ideal 'st->outcnt() == 1' for the reason to disconnect. 1.3526 + } else { 1.3527 + i = -i; // no pre-existing store 1.3528 + prev_mem = zero_memory(); // a slice of the newly allocated object 1.3529 + if (i > InitializeNode::RawStores && in(i-1) == prev_mem) 1.3530 + set_req(--i, C->top()); // reuse this edge; it has been folded away 1.3531 + else 1.3532 + ins_req(i, C->top()); // build a new edge 1.3533 + } 1.3534 + Node* new_st = st->clone(); 1.3535 + new_st->set_req(MemNode::Control, in(Control)); 1.3536 + new_st->set_req(MemNode::Memory, prev_mem); 1.3537 + new_st->set_req(MemNode::Address, make_raw_address(start, phase)); 1.3538 + new_st = phase->transform(new_st); 1.3539 + 1.3540 + // At this point, new_st might have swallowed a pre-existing store 1.3541 + // at the same offset, or perhaps new_st might have disappeared, 1.3542 + // if it redundantly stored the same value (or zero to fresh memory). 1.3543 + 1.3544 + // In any case, wire it in: 1.3545 + set_req(i, new_st); 1.3546 + 1.3547 + // The caller may now kill the old guy. 1.3548 + DEBUG_ONLY(Node* check_st = find_captured_store(start, size_in_bytes, phase)); 1.3549 + assert(check_st == new_st || check_st == NULL, "must be findable"); 1.3550 + assert(!is_complete(), ""); 1.3551 + return new_st; 1.3552 +} 1.3553 + 1.3554 +static bool store_constant(jlong* tiles, int num_tiles, 1.3555 + intptr_t st_off, int st_size, 1.3556 + jlong con) { 1.3557 + if ((st_off & (st_size-1)) != 0) 1.3558 + return false; // strange store offset (assume size==2**N) 1.3559 + address addr = (address)tiles + st_off; 1.3560 + assert(st_off >= 0 && addr+st_size <= (address)&tiles[num_tiles], "oob"); 1.3561 + switch (st_size) { 1.3562 + case sizeof(jbyte): *(jbyte*) addr = (jbyte) con; break; 1.3563 + case sizeof(jchar): *(jchar*) addr = (jchar) con; break; 1.3564 + case sizeof(jint): *(jint*) addr = (jint) con; break; 1.3565 + case sizeof(jlong): *(jlong*) addr = (jlong) con; break; 1.3566 + default: return false; // strange store size (detect size!=2**N here) 1.3567 + } 1.3568 + return true; // return success to caller 1.3569 +} 1.3570 + 1.3571 +// Coalesce subword constants into int constants and possibly 1.3572 +// into long constants. The goal, if the CPU permits, 1.3573 +// is to initialize the object with a small number of 64-bit tiles. 1.3574 +// Also, convert floating-point constants to bit patterns. 1.3575 +// Non-constants are not relevant to this pass. 1.3576 +// 1.3577 +// In terms of the running example on InitializeNode::InitializeNode 1.3578 +// and InitializeNode::capture_store, here is the transformation 1.3579 +// of rawstore1 and rawstore2 into rawstore12: 1.3580 +// alloc = (Allocate ...) 1.3581 +// rawoop = alloc.RawAddress 1.3582 +// tile12 = 0x00010002 1.3583 +// rawstore12 = (StoreI alloc.Control alloc.Memory (+ rawoop 12) tile12) 1.3584 +// init = (Initialize alloc.Control alloc.Memory rawoop rawstore12) 1.3585 +// 1.3586 +void 1.3587 +InitializeNode::coalesce_subword_stores(intptr_t header_size, 1.3588 + Node* size_in_bytes, 1.3589 + PhaseGVN* phase) { 1.3590 + Compile* C = phase->C; 1.3591 + 1.3592 + assert(stores_are_sane(phase), ""); 1.3593 + // Note: After this pass, they are not completely sane, 1.3594 + // since there may be some overlaps. 1.3595 + 1.3596 + int old_subword = 0, old_long = 0, new_int = 0, new_long = 0; 1.3597 + 1.3598 + intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize); 1.3599 + intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, ti_limit); 1.3600 + size_limit = MIN2(size_limit, ti_limit); 1.3601 + size_limit = align_size_up(size_limit, BytesPerLong); 1.3602 + int num_tiles = size_limit / BytesPerLong; 1.3603 + 1.3604 + // allocate space for the tile map: 1.3605 + const int small_len = DEBUG_ONLY(true ? 3 :) 30; // keep stack frames small 1.3606 + jlong tiles_buf[small_len]; 1.3607 + Node* nodes_buf[small_len]; 1.3608 + jlong inits_buf[small_len]; 1.3609 + jlong* tiles = ((num_tiles <= small_len) ? &tiles_buf[0] 1.3610 + : NEW_RESOURCE_ARRAY(jlong, num_tiles)); 1.3611 + Node** nodes = ((num_tiles <= small_len) ? &nodes_buf[0] 1.3612 + : NEW_RESOURCE_ARRAY(Node*, num_tiles)); 1.3613 + jlong* inits = ((num_tiles <= small_len) ? &inits_buf[0] 1.3614 + : NEW_RESOURCE_ARRAY(jlong, num_tiles)); 1.3615 + // tiles: exact bitwise model of all primitive constants 1.3616 + // nodes: last constant-storing node subsumed into the tiles model 1.3617 + // inits: which bytes (in each tile) are touched by any initializations 1.3618 + 1.3619 + //// Pass A: Fill in the tile model with any relevant stores. 1.3620 + 1.3621 + Copy::zero_to_bytes(tiles, sizeof(tiles[0]) * num_tiles); 1.3622 + Copy::zero_to_bytes(nodes, sizeof(nodes[0]) * num_tiles); 1.3623 + Copy::zero_to_bytes(inits, sizeof(inits[0]) * num_tiles); 1.3624 + Node* zmem = zero_memory(); // initially zero memory state 1.3625 + for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) { 1.3626 + Node* st = in(i); 1.3627 + intptr_t st_off = get_store_offset(st, phase); 1.3628 + 1.3629 + // Figure out the store's offset and constant value: 1.3630 + if (st_off < header_size) continue; //skip (ignore header) 1.3631 + if (st->in(MemNode::Memory) != zmem) continue; //skip (odd store chain) 1.3632 + int st_size = st->as_Store()->memory_size(); 1.3633 + if (st_off + st_size > size_limit) break; 1.3634 + 1.3635 + // Record which bytes are touched, whether by constant or not. 1.3636 + if (!store_constant(inits, num_tiles, st_off, st_size, (jlong) -1)) 1.3637 + continue; // skip (strange store size) 1.3638 + 1.3639 + const Type* val = phase->type(st->in(MemNode::ValueIn)); 1.3640 + if (!val->singleton()) continue; //skip (non-con store) 1.3641 + BasicType type = val->basic_type(); 1.3642 + 1.3643 + jlong con = 0; 1.3644 + switch (type) { 1.3645 + case T_INT: con = val->is_int()->get_con(); break; 1.3646 + case T_LONG: con = val->is_long()->get_con(); break; 1.3647 + case T_FLOAT: con = jint_cast(val->getf()); break; 1.3648 + case T_DOUBLE: con = jlong_cast(val->getd()); break; 1.3649 + default: continue; //skip (odd store type) 1.3650 + } 1.3651 + 1.3652 + if (type == T_LONG && Matcher::isSimpleConstant64(con) && 1.3653 + st->Opcode() == Op_StoreL) { 1.3654 + continue; // This StoreL is already optimal. 1.3655 + } 1.3656 + 1.3657 + // Store down the constant. 1.3658 + store_constant(tiles, num_tiles, st_off, st_size, con); 1.3659 + 1.3660 + intptr_t j = st_off >> LogBytesPerLong; 1.3661 + 1.3662 + if (type == T_INT && st_size == BytesPerInt 1.3663 + && (st_off & BytesPerInt) == BytesPerInt) { 1.3664 + jlong lcon = tiles[j]; 1.3665 + if (!Matcher::isSimpleConstant64(lcon) && 1.3666 + st->Opcode() == Op_StoreI) { 1.3667 + // This StoreI is already optimal by itself. 1.3668 + jint* intcon = (jint*) &tiles[j]; 1.3669 + intcon[1] = 0; // undo the store_constant() 1.3670 + 1.3671 + // If the previous store is also optimal by itself, back up and 1.3672 + // undo the action of the previous loop iteration... if we can. 1.3673 + // But if we can't, just let the previous half take care of itself. 1.3674 + st = nodes[j]; 1.3675 + st_off -= BytesPerInt; 1.3676 + con = intcon[0]; 1.3677 + if (con != 0 && st != NULL && st->Opcode() == Op_StoreI) { 1.3678 + assert(st_off >= header_size, "still ignoring header"); 1.3679 + assert(get_store_offset(st, phase) == st_off, "must be"); 1.3680 + assert(in(i-1) == zmem, "must be"); 1.3681 + DEBUG_ONLY(const Type* tcon = phase->type(st->in(MemNode::ValueIn))); 1.3682 + assert(con == tcon->is_int()->get_con(), "must be"); 1.3683 + // Undo the effects of the previous loop trip, which swallowed st: 1.3684 + intcon[0] = 0; // undo store_constant() 1.3685 + set_req(i-1, st); // undo set_req(i, zmem) 1.3686 + nodes[j] = NULL; // undo nodes[j] = st 1.3687 + --old_subword; // undo ++old_subword 1.3688 + } 1.3689 + continue; // This StoreI is already optimal. 1.3690 + } 1.3691 + } 1.3692 + 1.3693 + // This store is not needed. 1.3694 + set_req(i, zmem); 1.3695 + nodes[j] = st; // record for the moment 1.3696 + if (st_size < BytesPerLong) // something has changed 1.3697 + ++old_subword; // includes int/float, but who's counting... 1.3698 + else ++old_long; 1.3699 + } 1.3700 + 1.3701 + if ((old_subword + old_long) == 0) 1.3702 + return; // nothing more to do 1.3703 + 1.3704 + //// Pass B: Convert any non-zero tiles into optimal constant stores. 1.3705 + // Be sure to insert them before overlapping non-constant stores. 1.3706 + // (E.g., byte[] x = { 1,2,y,4 } => x[int 0] = 0x01020004, x[2]=y.) 1.3707 + for (int j = 0; j < num_tiles; j++) { 1.3708 + jlong con = tiles[j]; 1.3709 + jlong init = inits[j]; 1.3710 + if (con == 0) continue; 1.3711 + jint con0, con1; // split the constant, address-wise 1.3712 + jint init0, init1; // split the init map, address-wise 1.3713 + { union { jlong con; jint intcon[2]; } u; 1.3714 + u.con = con; 1.3715 + con0 = u.intcon[0]; 1.3716 + con1 = u.intcon[1]; 1.3717 + u.con = init; 1.3718 + init0 = u.intcon[0]; 1.3719 + init1 = u.intcon[1]; 1.3720 + } 1.3721 + 1.3722 + Node* old = nodes[j]; 1.3723 + assert(old != NULL, "need the prior store"); 1.3724 + intptr_t offset = (j * BytesPerLong); 1.3725 + 1.3726 + bool split = !Matcher::isSimpleConstant64(con); 1.3727 + 1.3728 + if (offset < header_size) { 1.3729 + assert(offset + BytesPerInt >= header_size, "second int counts"); 1.3730 + assert(*(jint*)&tiles[j] == 0, "junk in header"); 1.3731 + split = true; // only the second word counts 1.3732 + // Example: int a[] = { 42 ... } 1.3733 + } else if (con0 == 0 && init0 == -1) { 1.3734 + split = true; // first word is covered by full inits 1.3735 + // Example: int a[] = { ... foo(), 42 ... } 1.3736 + } else if (con1 == 0 && init1 == -1) { 1.3737 + split = true; // second word is covered by full inits 1.3738 + // Example: int a[] = { ... 42, foo() ... } 1.3739 + } 1.3740 + 1.3741 + // Here's a case where init0 is neither 0 nor -1: 1.3742 + // byte a[] = { ... 0,0,foo(),0, 0,0,0,42 ... } 1.3743 + // Assuming big-endian memory, init0, init1 are 0x0000FF00, 0x000000FF. 1.3744 + // In this case the tile is not split; it is (jlong)42. 1.3745 + // The big tile is stored down, and then the foo() value is inserted. 1.3746 + // (If there were foo(),foo() instead of foo(),0, init0 would be -1.) 1.3747 + 1.3748 + Node* ctl = old->in(MemNode::Control); 1.3749 + Node* adr = make_raw_address(offset, phase); 1.3750 + const TypePtr* atp = TypeRawPtr::BOTTOM; 1.3751 + 1.3752 + // One or two coalesced stores to plop down. 1.3753 + Node* st[2]; 1.3754 + intptr_t off[2]; 1.3755 + int nst = 0; 1.3756 + if (!split) { 1.3757 + ++new_long; 1.3758 + off[nst] = offset; 1.3759 + st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp, 1.3760 + phase->longcon(con), T_LONG, MemNode::unordered); 1.3761 + } else { 1.3762 + // Omit either if it is a zero. 1.3763 + if (con0 != 0) { 1.3764 + ++new_int; 1.3765 + off[nst] = offset; 1.3766 + st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp, 1.3767 + phase->intcon(con0), T_INT, MemNode::unordered); 1.3768 + } 1.3769 + if (con1 != 0) { 1.3770 + ++new_int; 1.3771 + offset += BytesPerInt; 1.3772 + adr = make_raw_address(offset, phase); 1.3773 + off[nst] = offset; 1.3774 + st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp, 1.3775 + phase->intcon(con1), T_INT, MemNode::unordered); 1.3776 + } 1.3777 + } 1.3778 + 1.3779 + // Insert second store first, then the first before the second. 1.3780 + // Insert each one just before any overlapping non-constant stores. 1.3781 + while (nst > 0) { 1.3782 + Node* st1 = st[--nst]; 1.3783 + C->copy_node_notes_to(st1, old); 1.3784 + st1 = phase->transform(st1); 1.3785 + offset = off[nst]; 1.3786 + assert(offset >= header_size, "do not smash header"); 1.3787 + int ins_idx = captured_store_insertion_point(offset, /*size:*/0, phase); 1.3788 + guarantee(ins_idx != 0, "must re-insert constant store"); 1.3789 + if (ins_idx < 0) ins_idx = -ins_idx; // never overlap 1.3790 + if (ins_idx > InitializeNode::RawStores && in(ins_idx-1) == zmem) 1.3791 + set_req(--ins_idx, st1); 1.3792 + else 1.3793 + ins_req(ins_idx, st1); 1.3794 + } 1.3795 + } 1.3796 + 1.3797 + if (PrintCompilation && WizardMode) 1.3798 + tty->print_cr("Changed %d/%d subword/long constants into %d/%d int/long", 1.3799 + old_subword, old_long, new_int, new_long); 1.3800 + if (C->log() != NULL) 1.3801 + C->log()->elem("comment that='%d/%d subword/long to %d/%d int/long'", 1.3802 + old_subword, old_long, new_int, new_long); 1.3803 + 1.3804 + // Clean up any remaining occurrences of zmem: 1.3805 + remove_extra_zeroes(); 1.3806 +} 1.3807 + 1.3808 +// Explore forward from in(start) to find the first fully initialized 1.3809 +// word, and return its offset. Skip groups of subword stores which 1.3810 +// together initialize full words. If in(start) is itself part of a 1.3811 +// fully initialized word, return the offset of in(start). If there 1.3812 +// are no following full-word stores, or if something is fishy, return 1.3813 +// a negative value. 1.3814 +intptr_t InitializeNode::find_next_fullword_store(uint start, PhaseGVN* phase) { 1.3815 + int int_map = 0; 1.3816 + intptr_t int_map_off = 0; 1.3817 + const int FULL_MAP = right_n_bits(BytesPerInt); // the int_map we hope for 1.3818 + 1.3819 + for (uint i = start, limit = req(); i < limit; i++) { 1.3820 + Node* st = in(i); 1.3821 + 1.3822 + intptr_t st_off = get_store_offset(st, phase); 1.3823 + if (st_off < 0) break; // return conservative answer 1.3824 + 1.3825 + int st_size = st->as_Store()->memory_size(); 1.3826 + if (st_size >= BytesPerInt && (st_off % BytesPerInt) == 0) { 1.3827 + return st_off; // we found a complete word init 1.3828 + } 1.3829 + 1.3830 + // update the map: 1.3831 + 1.3832 + intptr_t this_int_off = align_size_down(st_off, BytesPerInt); 1.3833 + if (this_int_off != int_map_off) { 1.3834 + // reset the map: 1.3835 + int_map = 0; 1.3836 + int_map_off = this_int_off; 1.3837 + } 1.3838 + 1.3839 + int subword_off = st_off - this_int_off; 1.3840 + int_map |= right_n_bits(st_size) << subword_off; 1.3841 + if ((int_map & FULL_MAP) == FULL_MAP) { 1.3842 + return this_int_off; // we found a complete word init 1.3843 + } 1.3844 + 1.3845 + // Did this store hit or cross the word boundary? 1.3846 + intptr_t next_int_off = align_size_down(st_off + st_size, BytesPerInt); 1.3847 + if (next_int_off == this_int_off + BytesPerInt) { 1.3848 + // We passed the current int, without fully initializing it. 1.3849 + int_map_off = next_int_off; 1.3850 + int_map >>= BytesPerInt; 1.3851 + } else if (next_int_off > this_int_off + BytesPerInt) { 1.3852 + // We passed the current and next int. 1.3853 + return this_int_off + BytesPerInt; 1.3854 + } 1.3855 + } 1.3856 + 1.3857 + return -1; 1.3858 +} 1.3859 + 1.3860 + 1.3861 +// Called when the associated AllocateNode is expanded into CFG. 1.3862 +// At this point, we may perform additional optimizations. 1.3863 +// Linearize the stores by ascending offset, to make memory 1.3864 +// activity as coherent as possible. 1.3865 +Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr, 1.3866 + intptr_t header_size, 1.3867 + Node* size_in_bytes, 1.3868 + PhaseGVN* phase) { 1.3869 + assert(!is_complete(), "not already complete"); 1.3870 + assert(stores_are_sane(phase), ""); 1.3871 + assert(allocation() != NULL, "must be present"); 1.3872 + 1.3873 + remove_extra_zeroes(); 1.3874 + 1.3875 + if (ReduceFieldZeroing || ReduceBulkZeroing) 1.3876 + // reduce instruction count for common initialization patterns 1.3877 + coalesce_subword_stores(header_size, size_in_bytes, phase); 1.3878 + 1.3879 + Node* zmem = zero_memory(); // initially zero memory state 1.3880 + Node* inits = zmem; // accumulating a linearized chain of inits 1.3881 + #ifdef ASSERT 1.3882 + intptr_t first_offset = allocation()->minimum_header_size(); 1.3883 + intptr_t last_init_off = first_offset; // previous init offset 1.3884 + intptr_t last_init_end = first_offset; // previous init offset+size 1.3885 + intptr_t last_tile_end = first_offset; // previous tile offset+size 1.3886 + #endif 1.3887 + intptr_t zeroes_done = header_size; 1.3888 + 1.3889 + bool do_zeroing = true; // we might give up if inits are very sparse 1.3890 + int big_init_gaps = 0; // how many large gaps have we seen? 1.3891 + 1.3892 + if (ZeroTLAB) do_zeroing = false; 1.3893 + if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false; 1.3894 + 1.3895 + for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) { 1.3896 + Node* st = in(i); 1.3897 + intptr_t st_off = get_store_offset(st, phase); 1.3898 + if (st_off < 0) 1.3899 + break; // unknown junk in the inits 1.3900 + if (st->in(MemNode::Memory) != zmem) 1.3901 + break; // complicated store chains somehow in list 1.3902 + 1.3903 + int st_size = st->as_Store()->memory_size(); 1.3904 + intptr_t next_init_off = st_off + st_size; 1.3905 + 1.3906 + if (do_zeroing && zeroes_done < next_init_off) { 1.3907 + // See if this store needs a zero before it or under it. 1.3908 + intptr_t zeroes_needed = st_off; 1.3909 + 1.3910 + if (st_size < BytesPerInt) { 1.3911 + // Look for subword stores which only partially initialize words. 1.3912 + // If we find some, we must lay down some word-level zeroes first, 1.3913 + // underneath the subword stores. 1.3914 + // 1.3915 + // Examples: 1.3916 + // byte[] a = { p,q,r,s } => a[0]=p,a[1]=q,a[2]=r,a[3]=s 1.3917 + // byte[] a = { x,y,0,0 } => a[0..3] = 0, a[0]=x,a[1]=y 1.3918 + // byte[] a = { 0,0,z,0 } => a[0..3] = 0, a[2]=z 1.3919 + // 1.3920 + // Note: coalesce_subword_stores may have already done this, 1.3921 + // if it was prompted by constant non-zero subword initializers. 1.3922 + // But this case can still arise with non-constant stores. 1.3923 + 1.3924 + intptr_t next_full_store = find_next_fullword_store(i, phase); 1.3925 + 1.3926 + // In the examples above: 1.3927 + // in(i) p q r s x y z 1.3928 + // st_off 12 13 14 15 12 13 14 1.3929 + // st_size 1 1 1 1 1 1 1 1.3930 + // next_full_s. 12 16 16 16 16 16 16 1.3931 + // z's_done 12 16 16 16 12 16 12 1.3932 + // z's_needed 12 16 16 16 16 16 16 1.3933 + // zsize 0 0 0 0 4 0 4 1.3934 + if (next_full_store < 0) { 1.3935 + // Conservative tack: Zero to end of current word. 1.3936 + zeroes_needed = align_size_up(zeroes_needed, BytesPerInt); 1.3937 + } else { 1.3938 + // Zero to beginning of next fully initialized word. 1.3939 + // Or, don't zero at all, if we are already in that word. 1.3940 + assert(next_full_store >= zeroes_needed, "must go forward"); 1.3941 + assert((next_full_store & (BytesPerInt-1)) == 0, "even boundary"); 1.3942 + zeroes_needed = next_full_store; 1.3943 + } 1.3944 + } 1.3945 + 1.3946 + if (zeroes_needed > zeroes_done) { 1.3947 + intptr_t zsize = zeroes_needed - zeroes_done; 1.3948 + // Do some incremental zeroing on rawmem, in parallel with inits. 1.3949 + zeroes_done = align_size_down(zeroes_done, BytesPerInt); 1.3950 + rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr, 1.3951 + zeroes_done, zeroes_needed, 1.3952 + phase); 1.3953 + zeroes_done = zeroes_needed; 1.3954 + if (zsize > Matcher::init_array_short_size && ++big_init_gaps > 2) 1.3955 + do_zeroing = false; // leave the hole, next time 1.3956 + } 1.3957 + } 1.3958 + 1.3959 + // Collect the store and move on: 1.3960 + st->set_req(MemNode::Memory, inits); 1.3961 + inits = st; // put it on the linearized chain 1.3962 + set_req(i, zmem); // unhook from previous position 1.3963 + 1.3964 + if (zeroes_done == st_off) 1.3965 + zeroes_done = next_init_off; 1.3966 + 1.3967 + assert(!do_zeroing || zeroes_done >= next_init_off, "don't miss any"); 1.3968 + 1.3969 + #ifdef ASSERT 1.3970 + // Various order invariants. Weaker than stores_are_sane because 1.3971 + // a large constant tile can be filled in by smaller non-constant stores. 1.3972 + assert(st_off >= last_init_off, "inits do not reverse"); 1.3973 + last_init_off = st_off; 1.3974 + const Type* val = NULL; 1.3975 + if (st_size >= BytesPerInt && 1.3976 + (val = phase->type(st->in(MemNode::ValueIn)))->singleton() && 1.3977 + (int)val->basic_type() < (int)T_OBJECT) { 1.3978 + assert(st_off >= last_tile_end, "tiles do not overlap"); 1.3979 + assert(st_off >= last_init_end, "tiles do not overwrite inits"); 1.3980 + last_tile_end = MAX2(last_tile_end, next_init_off); 1.3981 + } else { 1.3982 + intptr_t st_tile_end = align_size_up(next_init_off, BytesPerLong); 1.3983 + assert(st_tile_end >= last_tile_end, "inits stay with tiles"); 1.3984 + assert(st_off >= last_init_end, "inits do not overlap"); 1.3985 + last_init_end = next_init_off; // it's a non-tile 1.3986 + } 1.3987 + #endif //ASSERT 1.3988 + } 1.3989 + 1.3990 + remove_extra_zeroes(); // clear out all the zmems left over 1.3991 + add_req(inits); 1.3992 + 1.3993 + if (!ZeroTLAB) { 1.3994 + // If anything remains to be zeroed, zero it all now. 1.3995 + zeroes_done = align_size_down(zeroes_done, BytesPerInt); 1.3996 + // if it is the last unused 4 bytes of an instance, forget about it 1.3997 + intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, max_jint); 1.3998 + if (zeroes_done + BytesPerLong >= size_limit) { 1.3999 + assert(allocation() != NULL, ""); 1.4000 + if (allocation()->Opcode() == Op_Allocate) { 1.4001 + Node* klass_node = allocation()->in(AllocateNode::KlassNode); 1.4002 + ciKlass* k = phase->type(klass_node)->is_klassptr()->klass(); 1.4003 + if (zeroes_done == k->layout_helper()) 1.4004 + zeroes_done = size_limit; 1.4005 + } 1.4006 + } 1.4007 + if (zeroes_done < size_limit) { 1.4008 + rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr, 1.4009 + zeroes_done, size_in_bytes, phase); 1.4010 + } 1.4011 + } 1.4012 + 1.4013 + set_complete(phase); 1.4014 + return rawmem; 1.4015 +} 1.4016 + 1.4017 + 1.4018 +#ifdef ASSERT 1.4019 +bool InitializeNode::stores_are_sane(PhaseTransform* phase) { 1.4020 + if (is_complete()) 1.4021 + return true; // stores could be anything at this point 1.4022 + assert(allocation() != NULL, "must be present"); 1.4023 + intptr_t last_off = allocation()->minimum_header_size(); 1.4024 + for (uint i = InitializeNode::RawStores; i < req(); i++) { 1.4025 + Node* st = in(i); 1.4026 + intptr_t st_off = get_store_offset(st, phase); 1.4027 + if (st_off < 0) continue; // ignore dead garbage 1.4028 + if (last_off > st_off) { 1.4029 + tty->print_cr("*** bad store offset at %d: " INTX_FORMAT " > " INTX_FORMAT, i, last_off, st_off); 1.4030 + this->dump(2); 1.4031 + assert(false, "ascending store offsets"); 1.4032 + return false; 1.4033 + } 1.4034 + last_off = st_off + st->as_Store()->memory_size(); 1.4035 + } 1.4036 + return true; 1.4037 +} 1.4038 +#endif //ASSERT 1.4039 + 1.4040 + 1.4041 + 1.4042 + 1.4043 +//============================MergeMemNode===================================== 1.4044 +// 1.4045 +// SEMANTICS OF MEMORY MERGES: A MergeMem is a memory state assembled from several 1.4046 +// contributing store or call operations. Each contributor provides the memory 1.4047 +// state for a particular "alias type" (see Compile::alias_type). For example, 1.4048 +// if a MergeMem has an input X for alias category #6, then any memory reference 1.4049 +// to alias category #6 may use X as its memory state input, as an exact equivalent 1.4050 +// to using the MergeMem as a whole. 1.4051 +// Load<6>( MergeMem(<6>: X, ...), p ) <==> Load<6>(X,p) 1.4052 +// 1.4053 +// (Here, the <N> notation gives the index of the relevant adr_type.) 1.4054 +// 1.4055 +// In one special case (and more cases in the future), alias categories overlap. 1.4056 +// The special alias category "Bot" (Compile::AliasIdxBot) includes all memory 1.4057 +// states. Therefore, if a MergeMem has only one contributing input W for Bot, 1.4058 +// it is exactly equivalent to that state W: 1.4059 +// MergeMem(<Bot>: W) <==> W 1.4060 +// 1.4061 +// Usually, the merge has more than one input. In that case, where inputs 1.4062 +// overlap (i.e., one is Bot), the narrower alias type determines the memory 1.4063 +// state for that type, and the wider alias type (Bot) fills in everywhere else: 1.4064 +// Load<5>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<5>(W,p) 1.4065 +// Load<6>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<6>(X,p) 1.4066 +// 1.4067 +// A merge can take a "wide" memory state as one of its narrow inputs. 1.4068 +// This simply means that the merge observes out only the relevant parts of 1.4069 +// the wide input. That is, wide memory states arriving at narrow merge inputs 1.4070 +// are implicitly "filtered" or "sliced" as necessary. (This is rare.) 1.4071 +// 1.4072 +// These rules imply that MergeMem nodes may cascade (via their <Bot> links), 1.4073 +// and that memory slices "leak through": 1.4074 +// MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y)) <==> MergeMem(<Bot>: W, <7>: Y) 1.4075 +// 1.4076 +// But, in such a cascade, repeated memory slices can "block the leak": 1.4077 +// MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y), <7>: Y') <==> MergeMem(<Bot>: W, <7>: Y') 1.4078 +// 1.4079 +// In the last example, Y is not part of the combined memory state of the 1.4080 +// outermost MergeMem. The system must, of course, prevent unschedulable 1.4081 +// memory states from arising, so you can be sure that the state Y is somehow 1.4082 +// a precursor to state Y'. 1.4083 +// 1.4084 +// 1.4085 +// REPRESENTATION OF MEMORY MERGES: The indexes used to address the Node::in array 1.4086 +// of each MergeMemNode array are exactly the numerical alias indexes, including 1.4087 +// but not limited to AliasIdxTop, AliasIdxBot, and AliasIdxRaw. The functions 1.4088 +// Compile::alias_type (and kin) produce and manage these indexes. 1.4089 +// 1.4090 +// By convention, the value of in(AliasIdxTop) (i.e., in(1)) is always the top node. 1.4091 +// (Note that this provides quick access to the top node inside MergeMem methods, 1.4092 +// without the need to reach out via TLS to Compile::current.) 1.4093 +// 1.4094 +// As a consequence of what was just described, a MergeMem that represents a full 1.4095 +// memory state has an edge in(AliasIdxBot) which is a "wide" memory state, 1.4096 +// containing all alias categories. 1.4097 +// 1.4098 +// MergeMem nodes never (?) have control inputs, so in(0) is NULL. 1.4099 +// 1.4100 +// All other edges in(N) (including in(AliasIdxRaw), which is in(3)) are either 1.4101 +// a memory state for the alias type <N>, or else the top node, meaning that 1.4102 +// there is no particular input for that alias type. Note that the length of 1.4103 +// a MergeMem is variable, and may be extended at any time to accommodate new 1.4104 +// memory states at larger alias indexes. When merges grow, they are of course 1.4105 +// filled with "top" in the unused in() positions. 1.4106 +// 1.4107 +// This use of top is named "empty_memory()", or "empty_mem" (no-memory) as a variable. 1.4108 +// (Top was chosen because it works smoothly with passes like GCM.) 1.4109 +// 1.4110 +// For convenience, we hardwire the alias index for TypeRawPtr::BOTTOM. (It is 1.4111 +// the type of random VM bits like TLS references.) Since it is always the 1.4112 +// first non-Bot memory slice, some low-level loops use it to initialize an 1.4113 +// index variable: for (i = AliasIdxRaw; i < req(); i++). 1.4114 +// 1.4115 +// 1.4116 +// ACCESSORS: There is a special accessor MergeMemNode::base_memory which returns 1.4117 +// the distinguished "wide" state. The accessor MergeMemNode::memory_at(N) returns 1.4118 +// the memory state for alias type <N>, or (if there is no particular slice at <N>, 1.4119 +// it returns the base memory. To prevent bugs, memory_at does not accept <Top> 1.4120 +// or <Bot> indexes. The iterator MergeMemStream provides robust iteration over 1.4121 +// MergeMem nodes or pairs of such nodes, ensuring that the non-top edges are visited. 1.4122 +// 1.4123 +// %%%% We may get rid of base_memory as a separate accessor at some point; it isn't 1.4124 +// really that different from the other memory inputs. An abbreviation called 1.4125 +// "bot_memory()" for "memory_at(AliasIdxBot)" would keep code tidy. 1.4126 +// 1.4127 +// 1.4128 +// PARTIAL MEMORY STATES: During optimization, MergeMem nodes may arise that represent 1.4129 +// partial memory states. When a Phi splits through a MergeMem, the copy of the Phi 1.4130 +// that "emerges though" the base memory will be marked as excluding the alias types 1.4131 +// of the other (narrow-memory) copies which "emerged through" the narrow edges: 1.4132 +// 1.4133 +// Phi<Bot>(U, MergeMem(<Bot>: W, <8>: Y)) 1.4134 +// ==Ideal=> MergeMem(<Bot>: Phi<Bot-8>(U, W), Phi<8>(U, Y)) 1.4135 +// 1.4136 +// This strange "subtraction" effect is necessary to ensure IGVN convergence. 1.4137 +// (It is currently unimplemented.) As you can see, the resulting merge is 1.4138 +// actually a disjoint union of memory states, rather than an overlay. 1.4139 +// 1.4140 + 1.4141 +//------------------------------MergeMemNode----------------------------------- 1.4142 +Node* MergeMemNode::make_empty_memory() { 1.4143 + Node* empty_memory = (Node*) Compile::current()->top(); 1.4144 + assert(empty_memory->is_top(), "correct sentinel identity"); 1.4145 + return empty_memory; 1.4146 +} 1.4147 + 1.4148 +MergeMemNode::MergeMemNode(Node *new_base) : Node(1+Compile::AliasIdxRaw) { 1.4149 + init_class_id(Class_MergeMem); 1.4150 + // all inputs are nullified in Node::Node(int) 1.4151 + // set_input(0, NULL); // no control input 1.4152 + 1.4153 + // Initialize the edges uniformly to top, for starters. 1.4154 + Node* empty_mem = make_empty_memory(); 1.4155 + for (uint i = Compile::AliasIdxTop; i < req(); i++) { 1.4156 + init_req(i,empty_mem); 1.4157 + } 1.4158 + assert(empty_memory() == empty_mem, ""); 1.4159 + 1.4160 + if( new_base != NULL && new_base->is_MergeMem() ) { 1.4161 + MergeMemNode* mdef = new_base->as_MergeMem(); 1.4162 + assert(mdef->empty_memory() == empty_mem, "consistent sentinels"); 1.4163 + for (MergeMemStream mms(this, mdef); mms.next_non_empty2(); ) { 1.4164 + mms.set_memory(mms.memory2()); 1.4165 + } 1.4166 + assert(base_memory() == mdef->base_memory(), ""); 1.4167 + } else { 1.4168 + set_base_memory(new_base); 1.4169 + } 1.4170 +} 1.4171 + 1.4172 +// Make a new, untransformed MergeMem with the same base as 'mem'. 1.4173 +// If mem is itself a MergeMem, populate the result with the same edges. 1.4174 +MergeMemNode* MergeMemNode::make(Compile* C, Node* mem) { 1.4175 + return new(C) MergeMemNode(mem); 1.4176 +} 1.4177 + 1.4178 +//------------------------------cmp-------------------------------------------- 1.4179 +uint MergeMemNode::hash() const { return NO_HASH; } 1.4180 +uint MergeMemNode::cmp( const Node &n ) const { 1.4181 + return (&n == this); // Always fail except on self 1.4182 +} 1.4183 + 1.4184 +//------------------------------Identity--------------------------------------- 1.4185 +Node* MergeMemNode::Identity(PhaseTransform *phase) { 1.4186 + // Identity if this merge point does not record any interesting memory 1.4187 + // disambiguations. 1.4188 + Node* base_mem = base_memory(); 1.4189 + Node* empty_mem = empty_memory(); 1.4190 + if (base_mem != empty_mem) { // Memory path is not dead? 1.4191 + for (uint i = Compile::AliasIdxRaw; i < req(); i++) { 1.4192 + Node* mem = in(i); 1.4193 + if (mem != empty_mem && mem != base_mem) { 1.4194 + return this; // Many memory splits; no change 1.4195 + } 1.4196 + } 1.4197 + } 1.4198 + return base_mem; // No memory splits; ID on the one true input 1.4199 +} 1.4200 + 1.4201 +//------------------------------Ideal------------------------------------------ 1.4202 +// This method is invoked recursively on chains of MergeMem nodes 1.4203 +Node *MergeMemNode::Ideal(PhaseGVN *phase, bool can_reshape) { 1.4204 + // Remove chain'd MergeMems 1.4205 + // 1.4206 + // This is delicate, because the each "in(i)" (i >= Raw) is interpreted 1.4207 + // relative to the "in(Bot)". Since we are patching both at the same time, 1.4208 + // we have to be careful to read each "in(i)" relative to the old "in(Bot)", 1.4209 + // but rewrite each "in(i)" relative to the new "in(Bot)". 1.4210 + Node *progress = NULL; 1.4211 + 1.4212 + 1.4213 + Node* old_base = base_memory(); 1.4214 + Node* empty_mem = empty_memory(); 1.4215 + if (old_base == empty_mem) 1.4216 + return NULL; // Dead memory path. 1.4217 + 1.4218 + MergeMemNode* old_mbase; 1.4219 + if (old_base != NULL && old_base->is_MergeMem()) 1.4220 + old_mbase = old_base->as_MergeMem(); 1.4221 + else 1.4222 + old_mbase = NULL; 1.4223 + Node* new_base = old_base; 1.4224 + 1.4225 + // simplify stacked MergeMems in base memory 1.4226 + if (old_mbase) new_base = old_mbase->base_memory(); 1.4227 + 1.4228 + // the base memory might contribute new slices beyond my req() 1.4229 + if (old_mbase) grow_to_match(old_mbase); 1.4230 + 1.4231 + // Look carefully at the base node if it is a phi. 1.4232 + PhiNode* phi_base; 1.4233 + if (new_base != NULL && new_base->is_Phi()) 1.4234 + phi_base = new_base->as_Phi(); 1.4235 + else 1.4236 + phi_base = NULL; 1.4237 + 1.4238 + Node* phi_reg = NULL; 1.4239 + uint phi_len = (uint)-1; 1.4240 + if (phi_base != NULL && !phi_base->is_copy()) { 1.4241 + // do not examine phi if degraded to a copy 1.4242 + phi_reg = phi_base->region(); 1.4243 + phi_len = phi_base->req(); 1.4244 + // see if the phi is unfinished 1.4245 + for (uint i = 1; i < phi_len; i++) { 1.4246 + if (phi_base->in(i) == NULL) { 1.4247 + // incomplete phi; do not look at it yet! 1.4248 + phi_reg = NULL; 1.4249 + phi_len = (uint)-1; 1.4250 + break; 1.4251 + } 1.4252 + } 1.4253 + } 1.4254 + 1.4255 + // Note: We do not call verify_sparse on entry, because inputs 1.4256 + // can normalize to the base_memory via subsume_node or similar 1.4257 + // mechanisms. This method repairs that damage. 1.4258 + 1.4259 + assert(!old_mbase || old_mbase->is_empty_memory(empty_mem), "consistent sentinels"); 1.4260 + 1.4261 + // Look at each slice. 1.4262 + for (uint i = Compile::AliasIdxRaw; i < req(); i++) { 1.4263 + Node* old_in = in(i); 1.4264 + // calculate the old memory value 1.4265 + Node* old_mem = old_in; 1.4266 + if (old_mem == empty_mem) old_mem = old_base; 1.4267 + assert(old_mem == memory_at(i), ""); 1.4268 + 1.4269 + // maybe update (reslice) the old memory value 1.4270 + 1.4271 + // simplify stacked MergeMems 1.4272 + Node* new_mem = old_mem; 1.4273 + MergeMemNode* old_mmem; 1.4274 + if (old_mem != NULL && old_mem->is_MergeMem()) 1.4275 + old_mmem = old_mem->as_MergeMem(); 1.4276 + else 1.4277 + old_mmem = NULL; 1.4278 + if (old_mmem == this) { 1.4279 + // This can happen if loops break up and safepoints disappear. 1.4280 + // A merge of BotPtr (default) with a RawPtr memory derived from a 1.4281 + // safepoint can be rewritten to a merge of the same BotPtr with 1.4282 + // the BotPtr phi coming into the loop. If that phi disappears 1.4283 + // also, we can end up with a self-loop of the mergemem. 1.4284 + // In general, if loops degenerate and memory effects disappear, 1.4285 + // a mergemem can be left looking at itself. This simply means 1.4286 + // that the mergemem's default should be used, since there is 1.4287 + // no longer any apparent effect on this slice. 1.4288 + // Note: If a memory slice is a MergeMem cycle, it is unreachable 1.4289 + // from start. Update the input to TOP. 1.4290 + new_mem = (new_base == this || new_base == empty_mem)? empty_mem : new_base; 1.4291 + } 1.4292 + else if (old_mmem != NULL) { 1.4293 + new_mem = old_mmem->memory_at(i); 1.4294 + } 1.4295 + // else preceding memory was not a MergeMem 1.4296 + 1.4297 + // replace equivalent phis (unfortunately, they do not GVN together) 1.4298 + if (new_mem != NULL && new_mem != new_base && 1.4299 + new_mem->req() == phi_len && new_mem->in(0) == phi_reg) { 1.4300 + if (new_mem->is_Phi()) { 1.4301 + PhiNode* phi_mem = new_mem->as_Phi(); 1.4302 + for (uint i = 1; i < phi_len; i++) { 1.4303 + if (phi_base->in(i) != phi_mem->in(i)) { 1.4304 + phi_mem = NULL; 1.4305 + break; 1.4306 + } 1.4307 + } 1.4308 + if (phi_mem != NULL) { 1.4309 + // equivalent phi nodes; revert to the def 1.4310 + new_mem = new_base; 1.4311 + } 1.4312 + } 1.4313 + } 1.4314 + 1.4315 + // maybe store down a new value 1.4316 + Node* new_in = new_mem; 1.4317 + if (new_in == new_base) new_in = empty_mem; 1.4318 + 1.4319 + if (new_in != old_in) { 1.4320 + // Warning: Do not combine this "if" with the previous "if" 1.4321 + // A memory slice might have be be rewritten even if it is semantically 1.4322 + // unchanged, if the base_memory value has changed. 1.4323 + set_req(i, new_in); 1.4324 + progress = this; // Report progress 1.4325 + } 1.4326 + } 1.4327 + 1.4328 + if (new_base != old_base) { 1.4329 + set_req(Compile::AliasIdxBot, new_base); 1.4330 + // Don't use set_base_memory(new_base), because we need to update du. 1.4331 + assert(base_memory() == new_base, ""); 1.4332 + progress = this; 1.4333 + } 1.4334 + 1.4335 + if( base_memory() == this ) { 1.4336 + // a self cycle indicates this memory path is dead 1.4337 + set_req(Compile::AliasIdxBot, empty_mem); 1.4338 + } 1.4339 + 1.4340 + // Resolve external cycles by calling Ideal on a MergeMem base_memory 1.4341 + // Recursion must occur after the self cycle check above 1.4342 + if( base_memory()->is_MergeMem() ) { 1.4343 + MergeMemNode *new_mbase = base_memory()->as_MergeMem(); 1.4344 + Node *m = phase->transform(new_mbase); // Rollup any cycles 1.4345 + if( m != NULL && (m->is_top() || 1.4346 + m->is_MergeMem() && m->as_MergeMem()->base_memory() == empty_mem) ) { 1.4347 + // propagate rollup of dead cycle to self 1.4348 + set_req(Compile::AliasIdxBot, empty_mem); 1.4349 + } 1.4350 + } 1.4351 + 1.4352 + if( base_memory() == empty_mem ) { 1.4353 + progress = this; 1.4354 + // Cut inputs during Parse phase only. 1.4355 + // During Optimize phase a dead MergeMem node will be subsumed by Top. 1.4356 + if( !can_reshape ) { 1.4357 + for (uint i = Compile::AliasIdxRaw; i < req(); i++) { 1.4358 + if( in(i) != empty_mem ) { set_req(i, empty_mem); } 1.4359 + } 1.4360 + } 1.4361 + } 1.4362 + 1.4363 + if( !progress && base_memory()->is_Phi() && can_reshape ) { 1.4364 + // Check if PhiNode::Ideal's "Split phis through memory merges" 1.4365 + // transform should be attempted. Look for this->phi->this cycle. 1.4366 + uint merge_width = req(); 1.4367 + if (merge_width > Compile::AliasIdxRaw) { 1.4368 + PhiNode* phi = base_memory()->as_Phi(); 1.4369 + for( uint i = 1; i < phi->req(); ++i ) {// For all paths in 1.4370 + if (phi->in(i) == this) { 1.4371 + phase->is_IterGVN()->_worklist.push(phi); 1.4372 + break; 1.4373 + } 1.4374 + } 1.4375 + } 1.4376 + } 1.4377 + 1.4378 + assert(progress || verify_sparse(), "please, no dups of base"); 1.4379 + return progress; 1.4380 +} 1.4381 + 1.4382 +//-------------------------set_base_memory------------------------------------- 1.4383 +void MergeMemNode::set_base_memory(Node *new_base) { 1.4384 + Node* empty_mem = empty_memory(); 1.4385 + set_req(Compile::AliasIdxBot, new_base); 1.4386 + assert(memory_at(req()) == new_base, "must set default memory"); 1.4387 + // Clear out other occurrences of new_base: 1.4388 + if (new_base != empty_mem) { 1.4389 + for (uint i = Compile::AliasIdxRaw; i < req(); i++) { 1.4390 + if (in(i) == new_base) set_req(i, empty_mem); 1.4391 + } 1.4392 + } 1.4393 +} 1.4394 + 1.4395 +//------------------------------out_RegMask------------------------------------ 1.4396 +const RegMask &MergeMemNode::out_RegMask() const { 1.4397 + return RegMask::Empty; 1.4398 +} 1.4399 + 1.4400 +//------------------------------dump_spec-------------------------------------- 1.4401 +#ifndef PRODUCT 1.4402 +void MergeMemNode::dump_spec(outputStream *st) const { 1.4403 + st->print(" {"); 1.4404 + Node* base_mem = base_memory(); 1.4405 + for( uint i = Compile::AliasIdxRaw; i < req(); i++ ) { 1.4406 + Node* mem = memory_at(i); 1.4407 + if (mem == base_mem) { st->print(" -"); continue; } 1.4408 + st->print( " N%d:", mem->_idx ); 1.4409 + Compile::current()->get_adr_type(i)->dump_on(st); 1.4410 + } 1.4411 + st->print(" }"); 1.4412 +} 1.4413 +#endif // !PRODUCT 1.4414 + 1.4415 + 1.4416 +#ifdef ASSERT 1.4417 +static bool might_be_same(Node* a, Node* b) { 1.4418 + if (a == b) return true; 1.4419 + if (!(a->is_Phi() || b->is_Phi())) return false; 1.4420 + // phis shift around during optimization 1.4421 + return true; // pretty stupid... 1.4422 +} 1.4423 + 1.4424 +// verify a narrow slice (either incoming or outgoing) 1.4425 +static void verify_memory_slice(const MergeMemNode* m, int alias_idx, Node* n) { 1.4426 + if (!VerifyAliases) return; // don't bother to verify unless requested 1.4427 + if (is_error_reported()) return; // muzzle asserts when debugging an error 1.4428 + if (Node::in_dump()) return; // muzzle asserts when printing 1.4429 + assert(alias_idx >= Compile::AliasIdxRaw, "must not disturb base_memory or sentinel"); 1.4430 + assert(n != NULL, ""); 1.4431 + // Elide intervening MergeMem's 1.4432 + while (n->is_MergeMem()) { 1.4433 + n = n->as_MergeMem()->memory_at(alias_idx); 1.4434 + } 1.4435 + Compile* C = Compile::current(); 1.4436 + const TypePtr* n_adr_type = n->adr_type(); 1.4437 + if (n == m->empty_memory()) { 1.4438 + // Implicit copy of base_memory() 1.4439 + } else if (n_adr_type != TypePtr::BOTTOM) { 1.4440 + assert(n_adr_type != NULL, "new memory must have a well-defined adr_type"); 1.4441 + assert(C->must_alias(n_adr_type, alias_idx), "new memory must match selected slice"); 1.4442 + } else { 1.4443 + // A few places like make_runtime_call "know" that VM calls are narrow, 1.4444 + // and can be used to update only the VM bits stored as TypeRawPtr::BOTTOM. 1.4445 + bool expected_wide_mem = false; 1.4446 + if (n == m->base_memory()) { 1.4447 + expected_wide_mem = true; 1.4448 + } else if (alias_idx == Compile::AliasIdxRaw || 1.4449 + n == m->memory_at(Compile::AliasIdxRaw)) { 1.4450 + expected_wide_mem = true; 1.4451 + } else if (!C->alias_type(alias_idx)->is_rewritable()) { 1.4452 + // memory can "leak through" calls on channels that 1.4453 + // are write-once. Allow this also. 1.4454 + expected_wide_mem = true; 1.4455 + } 1.4456 + assert(expected_wide_mem, "expected narrow slice replacement"); 1.4457 + } 1.4458 +} 1.4459 +#else // !ASSERT 1.4460 +#define verify_memory_slice(m,i,n) (void)(0) // PRODUCT version is no-op 1.4461 +#endif 1.4462 + 1.4463 + 1.4464 +//-----------------------------memory_at--------------------------------------- 1.4465 +Node* MergeMemNode::memory_at(uint alias_idx) const { 1.4466 + assert(alias_idx >= Compile::AliasIdxRaw || 1.4467 + alias_idx == Compile::AliasIdxBot && Compile::current()->AliasLevel() == 0, 1.4468 + "must avoid base_memory and AliasIdxTop"); 1.4469 + 1.4470 + // Otherwise, it is a narrow slice. 1.4471 + Node* n = alias_idx < req() ? in(alias_idx) : empty_memory(); 1.4472 + Compile *C = Compile::current(); 1.4473 + if (is_empty_memory(n)) { 1.4474 + // the array is sparse; empty slots are the "top" node 1.4475 + n = base_memory(); 1.4476 + assert(Node::in_dump() 1.4477 + || n == NULL || n->bottom_type() == Type::TOP 1.4478 + || n->adr_type() == NULL // address is TOP 1.4479 + || n->adr_type() == TypePtr::BOTTOM 1.4480 + || n->adr_type() == TypeRawPtr::BOTTOM 1.4481 + || Compile::current()->AliasLevel() == 0, 1.4482 + "must be a wide memory"); 1.4483 + // AliasLevel == 0 if we are organizing the memory states manually. 1.4484 + // See verify_memory_slice for comments on TypeRawPtr::BOTTOM. 1.4485 + } else { 1.4486 + // make sure the stored slice is sane 1.4487 + #ifdef ASSERT 1.4488 + if (is_error_reported() || Node::in_dump()) { 1.4489 + } else if (might_be_same(n, base_memory())) { 1.4490 + // Give it a pass: It is a mostly harmless repetition of the base. 1.4491 + // This can arise normally from node subsumption during optimization. 1.4492 + } else { 1.4493 + verify_memory_slice(this, alias_idx, n); 1.4494 + } 1.4495 + #endif 1.4496 + } 1.4497 + return n; 1.4498 +} 1.4499 + 1.4500 +//---------------------------set_memory_at------------------------------------- 1.4501 +void MergeMemNode::set_memory_at(uint alias_idx, Node *n) { 1.4502 + verify_memory_slice(this, alias_idx, n); 1.4503 + Node* empty_mem = empty_memory(); 1.4504 + if (n == base_memory()) n = empty_mem; // collapse default 1.4505 + uint need_req = alias_idx+1; 1.4506 + if (req() < need_req) { 1.4507 + if (n == empty_mem) return; // already the default, so do not grow me 1.4508 + // grow the sparse array 1.4509 + do { 1.4510 + add_req(empty_mem); 1.4511 + } while (req() < need_req); 1.4512 + } 1.4513 + set_req( alias_idx, n ); 1.4514 +} 1.4515 + 1.4516 + 1.4517 + 1.4518 +//--------------------------iteration_setup------------------------------------ 1.4519 +void MergeMemNode::iteration_setup(const MergeMemNode* other) { 1.4520 + if (other != NULL) { 1.4521 + grow_to_match(other); 1.4522 + // invariant: the finite support of mm2 is within mm->req() 1.4523 + #ifdef ASSERT 1.4524 + for (uint i = req(); i < other->req(); i++) { 1.4525 + assert(other->is_empty_memory(other->in(i)), "slice left uncovered"); 1.4526 + } 1.4527 + #endif 1.4528 + } 1.4529 + // Replace spurious copies of base_memory by top. 1.4530 + Node* base_mem = base_memory(); 1.4531 + if (base_mem != NULL && !base_mem->is_top()) { 1.4532 + for (uint i = Compile::AliasIdxBot+1, imax = req(); i < imax; i++) { 1.4533 + if (in(i) == base_mem) 1.4534 + set_req(i, empty_memory()); 1.4535 + } 1.4536 + } 1.4537 +} 1.4538 + 1.4539 +//---------------------------grow_to_match------------------------------------- 1.4540 +void MergeMemNode::grow_to_match(const MergeMemNode* other) { 1.4541 + Node* empty_mem = empty_memory(); 1.4542 + assert(other->is_empty_memory(empty_mem), "consistent sentinels"); 1.4543 + // look for the finite support of the other memory 1.4544 + for (uint i = other->req(); --i >= req(); ) { 1.4545 + if (other->in(i) != empty_mem) { 1.4546 + uint new_len = i+1; 1.4547 + while (req() < new_len) add_req(empty_mem); 1.4548 + break; 1.4549 + } 1.4550 + } 1.4551 +} 1.4552 + 1.4553 +//---------------------------verify_sparse------------------------------------- 1.4554 +#ifndef PRODUCT 1.4555 +bool MergeMemNode::verify_sparse() const { 1.4556 + assert(is_empty_memory(make_empty_memory()), "sane sentinel"); 1.4557 + Node* base_mem = base_memory(); 1.4558 + // The following can happen in degenerate cases, since empty==top. 1.4559 + if (is_empty_memory(base_mem)) return true; 1.4560 + for (uint i = Compile::AliasIdxRaw; i < req(); i++) { 1.4561 + assert(in(i) != NULL, "sane slice"); 1.4562 + if (in(i) == base_mem) return false; // should have been the sentinel value! 1.4563 + } 1.4564 + return true; 1.4565 +} 1.4566 + 1.4567 +bool MergeMemStream::match_memory(Node* mem, const MergeMemNode* mm, int idx) { 1.4568 + Node* n; 1.4569 + n = mm->in(idx); 1.4570 + if (mem == n) return true; // might be empty_memory() 1.4571 + n = (idx == Compile::AliasIdxBot)? mm->base_memory(): mm->memory_at(idx); 1.4572 + if (mem == n) return true; 1.4573 + while (n->is_Phi() && (n = n->as_Phi()->is_copy()) != NULL) { 1.4574 + if (mem == n) return true; 1.4575 + if (n == NULL) break; 1.4576 + } 1.4577 + return false; 1.4578 +} 1.4579 +#endif // !PRODUCT