Wed, 09 Dec 2009 16:40:45 -0800
6895383: JCK test throws NPE for method compiled with Escape Analysis
Summary: Add missing checks for MemBar nodes in EA.
Reviewed-by: never
duke@435 | 1 | /* |
xdono@1014 | 2 | * Copyright 2007-2009 Sun Microsystems, Inc. All Rights Reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
duke@435 | 19 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
duke@435 | 20 | * CA 95054 USA or visit www.sun.com if you need additional information or |
duke@435 | 21 | * have any questions. |
duke@435 | 22 | */ |
duke@435 | 23 | |
duke@435 | 24 | #include "incls/_precompiled.incl" |
duke@435 | 25 | #include "incls/_superword.cpp.incl" |
duke@435 | 26 | |
duke@435 | 27 | // |
duke@435 | 28 | // S U P E R W O R D T R A N S F O R M |
duke@435 | 29 | //============================================================================= |
duke@435 | 30 | |
duke@435 | 31 | //------------------------------SuperWord--------------------------- |
duke@435 | 32 | SuperWord::SuperWord(PhaseIdealLoop* phase) : |
duke@435 | 33 | _phase(phase), |
duke@435 | 34 | _igvn(phase->_igvn), |
duke@435 | 35 | _arena(phase->C->comp_arena()), |
duke@435 | 36 | _packset(arena(), 8, 0, NULL), // packs for the current block |
duke@435 | 37 | _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb |
duke@435 | 38 | _block(arena(), 8, 0, NULL), // nodes in current block |
duke@435 | 39 | _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside |
duke@435 | 40 | _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads |
duke@435 | 41 | _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails |
duke@435 | 42 | _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node |
duke@435 | 43 | _align_to_ref(NULL), // memory reference to align vectors to |
duke@435 | 44 | _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs |
duke@435 | 45 | _dg(_arena), // dependence graph |
duke@435 | 46 | _visited(arena()), // visited node set |
duke@435 | 47 | _post_visited(arena()), // post visited node set |
duke@435 | 48 | _n_idx_list(arena(), 8), // scratch list of (node,index) pairs |
duke@435 | 49 | _stk(arena(), 8, 0, NULL), // scratch stack of nodes |
duke@435 | 50 | _nlist(arena(), 8, 0, NULL), // scratch list of nodes |
duke@435 | 51 | _lpt(NULL), // loop tree node |
duke@435 | 52 | _lp(NULL), // LoopNode |
duke@435 | 53 | _bb(NULL), // basic block |
duke@435 | 54 | _iv(NULL) // induction var |
duke@435 | 55 | {} |
duke@435 | 56 | |
duke@435 | 57 | //------------------------------transform_loop--------------------------- |
duke@435 | 58 | void SuperWord::transform_loop(IdealLoopTree* lpt) { |
duke@435 | 59 | assert(lpt->_head->is_CountedLoop(), "must be"); |
duke@435 | 60 | CountedLoopNode *cl = lpt->_head->as_CountedLoop(); |
duke@435 | 61 | |
duke@435 | 62 | if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops |
duke@435 | 63 | |
duke@435 | 64 | // Check for no control flow in body (other than exit) |
duke@435 | 65 | Node *cl_exit = cl->loopexit(); |
duke@435 | 66 | if (cl_exit->in(0) != lpt->_head) return; |
duke@435 | 67 | |
never@540 | 68 | // Make sure the are no extra control users of the loop backedge |
never@540 | 69 | if (cl->back_control()->outcnt() != 1) { |
never@540 | 70 | return; |
never@540 | 71 | } |
never@540 | 72 | |
duke@435 | 73 | // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) |
duke@435 | 74 | CountedLoopEndNode* pre_end = get_pre_loop_end(cl); |
duke@435 | 75 | if (pre_end == NULL) return; |
duke@435 | 76 | Node *pre_opaq1 = pre_end->limit(); |
duke@435 | 77 | if (pre_opaq1->Opcode() != Op_Opaque1) return; |
duke@435 | 78 | |
duke@435 | 79 | // Do vectors exist on this architecture? |
duke@435 | 80 | if (vector_width_in_bytes() == 0) return; |
duke@435 | 81 | |
duke@435 | 82 | init(); // initialize data structures |
duke@435 | 83 | |
duke@435 | 84 | set_lpt(lpt); |
duke@435 | 85 | set_lp(cl); |
duke@435 | 86 | |
duke@435 | 87 | // For now, define one block which is the entire loop body |
duke@435 | 88 | set_bb(cl); |
duke@435 | 89 | |
duke@435 | 90 | assert(_packset.length() == 0, "packset must be empty"); |
duke@435 | 91 | SLP_extract(); |
duke@435 | 92 | } |
duke@435 | 93 | |
duke@435 | 94 | //------------------------------SLP_extract--------------------------- |
duke@435 | 95 | // Extract the superword level parallelism |
duke@435 | 96 | // |
duke@435 | 97 | // 1) A reverse post-order of nodes in the block is constructed. By scanning |
duke@435 | 98 | // this list from first to last, all definitions are visited before their uses. |
duke@435 | 99 | // |
duke@435 | 100 | // 2) A point-to-point dependence graph is constructed between memory references. |
duke@435 | 101 | // This simplies the upcoming "independence" checker. |
duke@435 | 102 | // |
duke@435 | 103 | // 3) The maximum depth in the node graph from the beginning of the block |
duke@435 | 104 | // to each node is computed. This is used to prune the graph search |
duke@435 | 105 | // in the independence checker. |
duke@435 | 106 | // |
duke@435 | 107 | // 4) For integer types, the necessary bit width is propagated backwards |
duke@435 | 108 | // from stores to allow packed operations on byte, char, and short |
duke@435 | 109 | // integers. This reverses the promotion to type "int" that javac |
duke@435 | 110 | // did for operations like: char c1,c2,c3; c1 = c2 + c3. |
duke@435 | 111 | // |
duke@435 | 112 | // 5) One of the memory references is picked to be an aligned vector reference. |
duke@435 | 113 | // The pre-loop trip count is adjusted to align this reference in the |
duke@435 | 114 | // unrolled body. |
duke@435 | 115 | // |
duke@435 | 116 | // 6) The initial set of pack pairs is seeded with memory references. |
duke@435 | 117 | // |
duke@435 | 118 | // 7) The set of pack pairs is extended by following use->def and def->use links. |
duke@435 | 119 | // |
duke@435 | 120 | // 8) The pairs are combined into vector sized packs. |
duke@435 | 121 | // |
duke@435 | 122 | // 9) Reorder the memory slices to co-locate members of the memory packs. |
duke@435 | 123 | // |
duke@435 | 124 | // 10) Generate ideal vector nodes for the final set of packs and where necessary, |
duke@435 | 125 | // inserting scalar promotion, vector creation from multiple scalars, and |
duke@435 | 126 | // extraction of scalar values from vectors. |
duke@435 | 127 | // |
duke@435 | 128 | void SuperWord::SLP_extract() { |
duke@435 | 129 | |
duke@435 | 130 | // Ready the block |
duke@435 | 131 | |
duke@435 | 132 | construct_bb(); |
duke@435 | 133 | |
duke@435 | 134 | dependence_graph(); |
duke@435 | 135 | |
duke@435 | 136 | compute_max_depth(); |
duke@435 | 137 | |
duke@435 | 138 | compute_vector_element_type(); |
duke@435 | 139 | |
duke@435 | 140 | // Attempt vectorization |
duke@435 | 141 | |
duke@435 | 142 | find_adjacent_refs(); |
duke@435 | 143 | |
duke@435 | 144 | extend_packlist(); |
duke@435 | 145 | |
duke@435 | 146 | combine_packs(); |
duke@435 | 147 | |
duke@435 | 148 | construct_my_pack_map(); |
duke@435 | 149 | |
duke@435 | 150 | filter_packs(); |
duke@435 | 151 | |
duke@435 | 152 | schedule(); |
duke@435 | 153 | |
duke@435 | 154 | output(); |
duke@435 | 155 | } |
duke@435 | 156 | |
duke@435 | 157 | //------------------------------find_adjacent_refs--------------------------- |
duke@435 | 158 | // Find the adjacent memory references and create pack pairs for them. |
duke@435 | 159 | // This is the initial set of packs that will then be extended by |
duke@435 | 160 | // following use->def and def->use links. The align positions are |
duke@435 | 161 | // assigned relative to the reference "align_to_ref" |
duke@435 | 162 | void SuperWord::find_adjacent_refs() { |
duke@435 | 163 | // Get list of memory operations |
duke@435 | 164 | Node_List memops; |
duke@435 | 165 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 166 | Node* n = _block.at(i); |
kvn@464 | 167 | if (n->is_Mem() && in_bb(n) && |
kvn@464 | 168 | is_java_primitive(n->as_Mem()->memory_type())) { |
duke@435 | 169 | int align = memory_alignment(n->as_Mem(), 0); |
duke@435 | 170 | if (align != bottom_align) { |
duke@435 | 171 | memops.push(n); |
duke@435 | 172 | } |
duke@435 | 173 | } |
duke@435 | 174 | } |
duke@435 | 175 | if (memops.size() == 0) return; |
duke@435 | 176 | |
duke@435 | 177 | // Find a memory reference to align to. The pre-loop trip count |
duke@435 | 178 | // is modified to align this reference to a vector-aligned address |
duke@435 | 179 | find_align_to_ref(memops); |
duke@435 | 180 | if (align_to_ref() == NULL) return; |
duke@435 | 181 | |
duke@435 | 182 | SWPointer align_to_ref_p(align_to_ref(), this); |
duke@435 | 183 | int offset = align_to_ref_p.offset_in_bytes(); |
duke@435 | 184 | int scale = align_to_ref_p.scale_in_bytes(); |
duke@435 | 185 | int vw = vector_width_in_bytes(); |
duke@435 | 186 | int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; |
duke@435 | 187 | int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; |
duke@435 | 188 | |
duke@435 | 189 | #ifndef PRODUCT |
duke@435 | 190 | if (TraceSuperWord) |
never@507 | 191 | tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d scale = %d iv_stride = %d", |
never@507 | 192 | offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride()); |
duke@435 | 193 | #endif |
duke@435 | 194 | |
duke@435 | 195 | // Set alignment relative to "align_to_ref" |
duke@435 | 196 | for (int i = memops.size() - 1; i >= 0; i--) { |
duke@435 | 197 | MemNode* s = memops.at(i)->as_Mem(); |
duke@435 | 198 | SWPointer p2(s, this); |
duke@435 | 199 | if (p2.comparable(align_to_ref_p)) { |
duke@435 | 200 | int align = memory_alignment(s, iv_adjustment); |
duke@435 | 201 | set_alignment(s, align); |
duke@435 | 202 | } else { |
duke@435 | 203 | memops.remove(i); |
duke@435 | 204 | } |
duke@435 | 205 | } |
duke@435 | 206 | |
duke@435 | 207 | // Create initial pack pairs of memory operations |
duke@435 | 208 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 209 | Node* s1 = memops.at(i); |
duke@435 | 210 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 211 | Node* s2 = memops.at(j); |
duke@435 | 212 | if (s1 != s2 && are_adjacent_refs(s1, s2)) { |
duke@435 | 213 | int align = alignment(s1); |
duke@435 | 214 | if (stmts_can_pack(s1, s2, align)) { |
duke@435 | 215 | Node_List* pair = new Node_List(); |
duke@435 | 216 | pair->push(s1); |
duke@435 | 217 | pair->push(s2); |
duke@435 | 218 | _packset.append(pair); |
duke@435 | 219 | } |
duke@435 | 220 | } |
duke@435 | 221 | } |
duke@435 | 222 | } |
duke@435 | 223 | |
duke@435 | 224 | #ifndef PRODUCT |
duke@435 | 225 | if (TraceSuperWord) { |
duke@435 | 226 | tty->print_cr("\nAfter find_adjacent_refs"); |
duke@435 | 227 | print_packset(); |
duke@435 | 228 | } |
duke@435 | 229 | #endif |
duke@435 | 230 | } |
duke@435 | 231 | |
duke@435 | 232 | //------------------------------find_align_to_ref--------------------------- |
duke@435 | 233 | // Find a memory reference to align the loop induction variable to. |
duke@435 | 234 | // Looks first at stores then at loads, looking for a memory reference |
duke@435 | 235 | // with the largest number of references similar to it. |
duke@435 | 236 | void SuperWord::find_align_to_ref(Node_List &memops) { |
duke@435 | 237 | GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0); |
duke@435 | 238 | |
duke@435 | 239 | // Count number of comparable memory ops |
duke@435 | 240 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 241 | MemNode* s1 = memops.at(i)->as_Mem(); |
duke@435 | 242 | SWPointer p1(s1, this); |
duke@435 | 243 | // Discard if pre loop can't align this reference |
duke@435 | 244 | if (!ref_is_alignable(p1)) { |
duke@435 | 245 | *cmp_ct.adr_at(i) = 0; |
duke@435 | 246 | continue; |
duke@435 | 247 | } |
duke@435 | 248 | for (uint j = i+1; j < memops.size(); j++) { |
duke@435 | 249 | MemNode* s2 = memops.at(j)->as_Mem(); |
duke@435 | 250 | if (isomorphic(s1, s2)) { |
duke@435 | 251 | SWPointer p2(s2, this); |
duke@435 | 252 | if (p1.comparable(p2)) { |
duke@435 | 253 | (*cmp_ct.adr_at(i))++; |
duke@435 | 254 | (*cmp_ct.adr_at(j))++; |
duke@435 | 255 | } |
duke@435 | 256 | } |
duke@435 | 257 | } |
duke@435 | 258 | } |
duke@435 | 259 | |
duke@435 | 260 | // Find Store (or Load) with the greatest number of "comparable" references |
duke@435 | 261 | int max_ct = 0; |
duke@435 | 262 | int max_idx = -1; |
duke@435 | 263 | int min_size = max_jint; |
duke@435 | 264 | int min_iv_offset = max_jint; |
duke@435 | 265 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 266 | MemNode* s = memops.at(j)->as_Mem(); |
duke@435 | 267 | if (s->is_Store()) { |
duke@435 | 268 | SWPointer p(s, this); |
duke@435 | 269 | if (cmp_ct.at(j) > max_ct || |
duke@435 | 270 | cmp_ct.at(j) == max_ct && (data_size(s) < min_size || |
duke@435 | 271 | data_size(s) == min_size && |
duke@435 | 272 | p.offset_in_bytes() < min_iv_offset)) { |
duke@435 | 273 | max_ct = cmp_ct.at(j); |
duke@435 | 274 | max_idx = j; |
duke@435 | 275 | min_size = data_size(s); |
duke@435 | 276 | min_iv_offset = p.offset_in_bytes(); |
duke@435 | 277 | } |
duke@435 | 278 | } |
duke@435 | 279 | } |
duke@435 | 280 | // If no stores, look at loads |
duke@435 | 281 | if (max_ct == 0) { |
duke@435 | 282 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 283 | MemNode* s = memops.at(j)->as_Mem(); |
duke@435 | 284 | if (s->is_Load()) { |
duke@435 | 285 | SWPointer p(s, this); |
duke@435 | 286 | if (cmp_ct.at(j) > max_ct || |
duke@435 | 287 | cmp_ct.at(j) == max_ct && (data_size(s) < min_size || |
duke@435 | 288 | data_size(s) == min_size && |
duke@435 | 289 | p.offset_in_bytes() < min_iv_offset)) { |
duke@435 | 290 | max_ct = cmp_ct.at(j); |
duke@435 | 291 | max_idx = j; |
duke@435 | 292 | min_size = data_size(s); |
duke@435 | 293 | min_iv_offset = p.offset_in_bytes(); |
duke@435 | 294 | } |
duke@435 | 295 | } |
duke@435 | 296 | } |
duke@435 | 297 | } |
duke@435 | 298 | |
duke@435 | 299 | if (max_ct > 0) |
duke@435 | 300 | set_align_to_ref(memops.at(max_idx)->as_Mem()); |
duke@435 | 301 | |
duke@435 | 302 | #ifndef PRODUCT |
duke@435 | 303 | if (TraceSuperWord && Verbose) { |
duke@435 | 304 | tty->print_cr("\nVector memops after find_align_to_refs"); |
duke@435 | 305 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 306 | MemNode* s = memops.at(i)->as_Mem(); |
duke@435 | 307 | s->dump(); |
duke@435 | 308 | } |
duke@435 | 309 | } |
duke@435 | 310 | #endif |
duke@435 | 311 | } |
duke@435 | 312 | |
duke@435 | 313 | //------------------------------ref_is_alignable--------------------------- |
duke@435 | 314 | // Can the preloop align the reference to position zero in the vector? |
duke@435 | 315 | bool SuperWord::ref_is_alignable(SWPointer& p) { |
duke@435 | 316 | if (!p.has_iv()) { |
duke@435 | 317 | return true; // no induction variable |
duke@435 | 318 | } |
duke@435 | 319 | CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop()); |
duke@435 | 320 | assert(pre_end->stride_is_con(), "pre loop stride is constant"); |
duke@435 | 321 | int preloop_stride = pre_end->stride_con(); |
duke@435 | 322 | |
duke@435 | 323 | int span = preloop_stride * p.scale_in_bytes(); |
duke@435 | 324 | |
duke@435 | 325 | // Stride one accesses are alignable. |
duke@435 | 326 | if (ABS(span) == p.memory_size()) |
duke@435 | 327 | return true; |
duke@435 | 328 | |
duke@435 | 329 | // If initial offset from start of object is computable, |
duke@435 | 330 | // compute alignment within the vector. |
duke@435 | 331 | int vw = vector_width_in_bytes(); |
duke@435 | 332 | if (vw % span == 0) { |
duke@435 | 333 | Node* init_nd = pre_end->init_trip(); |
duke@435 | 334 | if (init_nd->is_Con() && p.invar() == NULL) { |
duke@435 | 335 | int init = init_nd->bottom_type()->is_int()->get_con(); |
duke@435 | 336 | |
duke@435 | 337 | int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes(); |
duke@435 | 338 | assert(init_offset >= 0, "positive offset from object start"); |
duke@435 | 339 | |
duke@435 | 340 | if (span > 0) { |
duke@435 | 341 | return (vw - (init_offset % vw)) % span == 0; |
duke@435 | 342 | } else { |
duke@435 | 343 | assert(span < 0, "nonzero stride * scale"); |
duke@435 | 344 | return (init_offset % vw) % -span == 0; |
duke@435 | 345 | } |
duke@435 | 346 | } |
duke@435 | 347 | } |
duke@435 | 348 | return false; |
duke@435 | 349 | } |
duke@435 | 350 | |
duke@435 | 351 | //---------------------------dependence_graph--------------------------- |
duke@435 | 352 | // Construct dependency graph. |
duke@435 | 353 | // Add dependence edges to load/store nodes for memory dependence |
duke@435 | 354 | // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x) |
duke@435 | 355 | void SuperWord::dependence_graph() { |
duke@435 | 356 | // First, assign a dependence node to each memory node |
duke@435 | 357 | for (int i = 0; i < _block.length(); i++ ) { |
duke@435 | 358 | Node *n = _block.at(i); |
duke@435 | 359 | if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) { |
duke@435 | 360 | _dg.make_node(n); |
duke@435 | 361 | } |
duke@435 | 362 | } |
duke@435 | 363 | |
duke@435 | 364 | // For each memory slice, create the dependences |
duke@435 | 365 | for (int i = 0; i < _mem_slice_head.length(); i++) { |
duke@435 | 366 | Node* n = _mem_slice_head.at(i); |
duke@435 | 367 | Node* n_tail = _mem_slice_tail.at(i); |
duke@435 | 368 | |
duke@435 | 369 | // Get slice in predecessor order (last is first) |
duke@435 | 370 | mem_slice_preds(n_tail, n, _nlist); |
duke@435 | 371 | |
duke@435 | 372 | // Make the slice dependent on the root |
duke@435 | 373 | DepMem* slice = _dg.dep(n); |
duke@435 | 374 | _dg.make_edge(_dg.root(), slice); |
duke@435 | 375 | |
duke@435 | 376 | // Create a sink for the slice |
duke@435 | 377 | DepMem* slice_sink = _dg.make_node(NULL); |
duke@435 | 378 | _dg.make_edge(slice_sink, _dg.tail()); |
duke@435 | 379 | |
duke@435 | 380 | // Now visit each pair of memory ops, creating the edges |
duke@435 | 381 | for (int j = _nlist.length() - 1; j >= 0 ; j--) { |
duke@435 | 382 | Node* s1 = _nlist.at(j); |
duke@435 | 383 | |
duke@435 | 384 | // If no dependency yet, use slice |
duke@435 | 385 | if (_dg.dep(s1)->in_cnt() == 0) { |
duke@435 | 386 | _dg.make_edge(slice, s1); |
duke@435 | 387 | } |
duke@435 | 388 | SWPointer p1(s1->as_Mem(), this); |
duke@435 | 389 | bool sink_dependent = true; |
duke@435 | 390 | for (int k = j - 1; k >= 0; k--) { |
duke@435 | 391 | Node* s2 = _nlist.at(k); |
duke@435 | 392 | if (s1->is_Load() && s2->is_Load()) |
duke@435 | 393 | continue; |
duke@435 | 394 | SWPointer p2(s2->as_Mem(), this); |
duke@435 | 395 | |
duke@435 | 396 | int cmp = p1.cmp(p2); |
duke@435 | 397 | if (SuperWordRTDepCheck && |
duke@435 | 398 | p1.base() != p2.base() && p1.valid() && p2.valid()) { |
duke@435 | 399 | // Create a runtime check to disambiguate |
duke@435 | 400 | OrderedPair pp(p1.base(), p2.base()); |
duke@435 | 401 | _disjoint_ptrs.append_if_missing(pp); |
duke@435 | 402 | } else if (!SWPointer::not_equal(cmp)) { |
duke@435 | 403 | // Possibly same address |
duke@435 | 404 | _dg.make_edge(s1, s2); |
duke@435 | 405 | sink_dependent = false; |
duke@435 | 406 | } |
duke@435 | 407 | } |
duke@435 | 408 | if (sink_dependent) { |
duke@435 | 409 | _dg.make_edge(s1, slice_sink); |
duke@435 | 410 | } |
duke@435 | 411 | } |
duke@435 | 412 | #ifndef PRODUCT |
duke@435 | 413 | if (TraceSuperWord) { |
duke@435 | 414 | tty->print_cr("\nDependence graph for slice: %d", n->_idx); |
duke@435 | 415 | for (int q = 0; q < _nlist.length(); q++) { |
duke@435 | 416 | _dg.print(_nlist.at(q)); |
duke@435 | 417 | } |
duke@435 | 418 | tty->cr(); |
duke@435 | 419 | } |
duke@435 | 420 | #endif |
duke@435 | 421 | _nlist.clear(); |
duke@435 | 422 | } |
duke@435 | 423 | |
duke@435 | 424 | #ifndef PRODUCT |
duke@435 | 425 | if (TraceSuperWord) { |
duke@435 | 426 | tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE"); |
duke@435 | 427 | for (int r = 0; r < _disjoint_ptrs.length(); r++) { |
duke@435 | 428 | _disjoint_ptrs.at(r).print(); |
duke@435 | 429 | tty->cr(); |
duke@435 | 430 | } |
duke@435 | 431 | tty->cr(); |
duke@435 | 432 | } |
duke@435 | 433 | #endif |
duke@435 | 434 | } |
duke@435 | 435 | |
duke@435 | 436 | //---------------------------mem_slice_preds--------------------------- |
duke@435 | 437 | // Return a memory slice (node list) in predecessor order starting at "start" |
duke@435 | 438 | void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) { |
duke@435 | 439 | assert(preds.length() == 0, "start empty"); |
duke@435 | 440 | Node* n = start; |
duke@435 | 441 | Node* prev = NULL; |
duke@435 | 442 | while (true) { |
duke@435 | 443 | assert(in_bb(n), "must be in block"); |
duke@435 | 444 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 445 | Node* out = n->fast_out(i); |
duke@435 | 446 | if (out->is_Load()) { |
duke@435 | 447 | if (in_bb(out)) { |
duke@435 | 448 | preds.push(out); |
duke@435 | 449 | } |
duke@435 | 450 | } else { |
duke@435 | 451 | // FIXME |
duke@435 | 452 | if (out->is_MergeMem() && !in_bb(out)) { |
duke@435 | 453 | // Either unrolling is causing a memory edge not to disappear, |
duke@435 | 454 | // or need to run igvn.optimize() again before SLP |
duke@435 | 455 | } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) { |
duke@435 | 456 | // Ditto. Not sure what else to check further. |
cfang@1102 | 457 | } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) { |
duke@435 | 458 | // StoreCM has an input edge used as a precedence edge. |
duke@435 | 459 | // Maybe an issue when oop stores are vectorized. |
duke@435 | 460 | } else { |
duke@435 | 461 | assert(out == prev || prev == NULL, "no branches off of store slice"); |
duke@435 | 462 | } |
duke@435 | 463 | } |
duke@435 | 464 | } |
duke@435 | 465 | if (n == stop) break; |
duke@435 | 466 | preds.push(n); |
duke@435 | 467 | prev = n; |
duke@435 | 468 | n = n->in(MemNode::Memory); |
duke@435 | 469 | } |
duke@435 | 470 | } |
duke@435 | 471 | |
duke@435 | 472 | //------------------------------stmts_can_pack--------------------------- |
twisti@1040 | 473 | // Can s1 and s2 be in a pack with s1 immediately preceding s2 and |
duke@435 | 474 | // s1 aligned at "align" |
duke@435 | 475 | bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { |
cfang@1422 | 476 | |
cfang@1422 | 477 | // Do not use superword for non-primitives |
cfang@1422 | 478 | if((s1->is_Mem() && !is_java_primitive(s1->as_Mem()->memory_type())) || |
cfang@1422 | 479 | (s2->is_Mem() && !is_java_primitive(s2->as_Mem()->memory_type()))) |
cfang@1422 | 480 | return false; |
cfang@1422 | 481 | |
duke@435 | 482 | if (isomorphic(s1, s2)) { |
duke@435 | 483 | if (independent(s1, s2)) { |
duke@435 | 484 | if (!exists_at(s1, 0) && !exists_at(s2, 1)) { |
duke@435 | 485 | if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { |
duke@435 | 486 | int s1_align = alignment(s1); |
duke@435 | 487 | int s2_align = alignment(s2); |
duke@435 | 488 | if (s1_align == top_align || s1_align == align) { |
duke@435 | 489 | if (s2_align == top_align || s2_align == align + data_size(s1)) { |
duke@435 | 490 | return true; |
duke@435 | 491 | } |
duke@435 | 492 | } |
duke@435 | 493 | } |
duke@435 | 494 | } |
duke@435 | 495 | } |
duke@435 | 496 | } |
duke@435 | 497 | return false; |
duke@435 | 498 | } |
duke@435 | 499 | |
duke@435 | 500 | //------------------------------exists_at--------------------------- |
duke@435 | 501 | // Does s exist in a pack at position pos? |
duke@435 | 502 | bool SuperWord::exists_at(Node* s, uint pos) { |
duke@435 | 503 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 504 | Node_List* p = _packset.at(i); |
duke@435 | 505 | if (p->at(pos) == s) { |
duke@435 | 506 | return true; |
duke@435 | 507 | } |
duke@435 | 508 | } |
duke@435 | 509 | return false; |
duke@435 | 510 | } |
duke@435 | 511 | |
duke@435 | 512 | //------------------------------are_adjacent_refs--------------------------- |
duke@435 | 513 | // Is s1 immediately before s2 in memory? |
duke@435 | 514 | bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) { |
duke@435 | 515 | if (!s1->is_Mem() || !s2->is_Mem()) return false; |
duke@435 | 516 | if (!in_bb(s1) || !in_bb(s2)) return false; |
duke@435 | 517 | // FIXME - co_locate_pack fails on Stores in different mem-slices, so |
duke@435 | 518 | // only pack memops that are in the same alias set until that's fixed. |
duke@435 | 519 | if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) != |
duke@435 | 520 | _phase->C->get_alias_index(s2->as_Mem()->adr_type())) |
duke@435 | 521 | return false; |
duke@435 | 522 | SWPointer p1(s1->as_Mem(), this); |
duke@435 | 523 | SWPointer p2(s2->as_Mem(), this); |
duke@435 | 524 | if (p1.base() != p2.base() || !p1.comparable(p2)) return false; |
duke@435 | 525 | int diff = p2.offset_in_bytes() - p1.offset_in_bytes(); |
duke@435 | 526 | return diff == data_size(s1); |
duke@435 | 527 | } |
duke@435 | 528 | |
duke@435 | 529 | //------------------------------isomorphic--------------------------- |
duke@435 | 530 | // Are s1 and s2 similar? |
duke@435 | 531 | bool SuperWord::isomorphic(Node* s1, Node* s2) { |
duke@435 | 532 | if (s1->Opcode() != s2->Opcode()) return false; |
duke@435 | 533 | if (s1->req() != s2->req()) return false; |
duke@435 | 534 | if (s1->in(0) != s2->in(0)) return false; |
duke@435 | 535 | if (velt_type(s1) != velt_type(s2)) return false; |
duke@435 | 536 | return true; |
duke@435 | 537 | } |
duke@435 | 538 | |
duke@435 | 539 | //------------------------------independent--------------------------- |
duke@435 | 540 | // Is there no data path from s1 to s2 or s2 to s1? |
duke@435 | 541 | bool SuperWord::independent(Node* s1, Node* s2) { |
duke@435 | 542 | // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first"); |
duke@435 | 543 | int d1 = depth(s1); |
duke@435 | 544 | int d2 = depth(s2); |
duke@435 | 545 | if (d1 == d2) return s1 != s2; |
duke@435 | 546 | Node* deep = d1 > d2 ? s1 : s2; |
duke@435 | 547 | Node* shallow = d1 > d2 ? s2 : s1; |
duke@435 | 548 | |
duke@435 | 549 | visited_clear(); |
duke@435 | 550 | |
duke@435 | 551 | return independent_path(shallow, deep); |
duke@435 | 552 | } |
duke@435 | 553 | |
duke@435 | 554 | //------------------------------independent_path------------------------------ |
duke@435 | 555 | // Helper for independent |
duke@435 | 556 | bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) { |
duke@435 | 557 | if (dp >= 1000) return false; // stop deep recursion |
duke@435 | 558 | visited_set(deep); |
duke@435 | 559 | int shal_depth = depth(shallow); |
duke@435 | 560 | assert(shal_depth <= depth(deep), "must be"); |
duke@435 | 561 | for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) { |
duke@435 | 562 | Node* pred = preds.current(); |
duke@435 | 563 | if (in_bb(pred) && !visited_test(pred)) { |
duke@435 | 564 | if (shallow == pred) { |
duke@435 | 565 | return false; |
duke@435 | 566 | } |
duke@435 | 567 | if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) { |
duke@435 | 568 | return false; |
duke@435 | 569 | } |
duke@435 | 570 | } |
duke@435 | 571 | } |
duke@435 | 572 | return true; |
duke@435 | 573 | } |
duke@435 | 574 | |
duke@435 | 575 | //------------------------------set_alignment--------------------------- |
duke@435 | 576 | void SuperWord::set_alignment(Node* s1, Node* s2, int align) { |
duke@435 | 577 | set_alignment(s1, align); |
duke@435 | 578 | set_alignment(s2, align + data_size(s1)); |
duke@435 | 579 | } |
duke@435 | 580 | |
duke@435 | 581 | //------------------------------data_size--------------------------- |
duke@435 | 582 | int SuperWord::data_size(Node* s) { |
duke@435 | 583 | const Type* t = velt_type(s); |
duke@435 | 584 | BasicType bt = t->array_element_basic_type(); |
kvn@464 | 585 | int bsize = type2aelembytes(bt); |
duke@435 | 586 | assert(bsize != 0, "valid size"); |
duke@435 | 587 | return bsize; |
duke@435 | 588 | } |
duke@435 | 589 | |
duke@435 | 590 | //------------------------------extend_packlist--------------------------- |
duke@435 | 591 | // Extend packset by following use->def and def->use links from pack members. |
duke@435 | 592 | void SuperWord::extend_packlist() { |
duke@435 | 593 | bool changed; |
duke@435 | 594 | do { |
duke@435 | 595 | changed = false; |
duke@435 | 596 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 597 | Node_List* p = _packset.at(i); |
duke@435 | 598 | changed |= follow_use_defs(p); |
duke@435 | 599 | changed |= follow_def_uses(p); |
duke@435 | 600 | } |
duke@435 | 601 | } while (changed); |
duke@435 | 602 | |
duke@435 | 603 | #ifndef PRODUCT |
duke@435 | 604 | if (TraceSuperWord) { |
duke@435 | 605 | tty->print_cr("\nAfter extend_packlist"); |
duke@435 | 606 | print_packset(); |
duke@435 | 607 | } |
duke@435 | 608 | #endif |
duke@435 | 609 | } |
duke@435 | 610 | |
duke@435 | 611 | //------------------------------follow_use_defs--------------------------- |
duke@435 | 612 | // Extend the packset by visiting operand definitions of nodes in pack p |
duke@435 | 613 | bool SuperWord::follow_use_defs(Node_List* p) { |
duke@435 | 614 | Node* s1 = p->at(0); |
duke@435 | 615 | Node* s2 = p->at(1); |
duke@435 | 616 | assert(p->size() == 2, "just checking"); |
duke@435 | 617 | assert(s1->req() == s2->req(), "just checking"); |
duke@435 | 618 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
duke@435 | 619 | |
duke@435 | 620 | if (s1->is_Load()) return false; |
duke@435 | 621 | |
duke@435 | 622 | int align = alignment(s1); |
duke@435 | 623 | bool changed = false; |
duke@435 | 624 | int start = s1->is_Store() ? MemNode::ValueIn : 1; |
duke@435 | 625 | int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req(); |
duke@435 | 626 | for (int j = start; j < end; j++) { |
duke@435 | 627 | Node* t1 = s1->in(j); |
duke@435 | 628 | Node* t2 = s2->in(j); |
duke@435 | 629 | if (!in_bb(t1) || !in_bb(t2)) |
duke@435 | 630 | continue; |
duke@435 | 631 | if (stmts_can_pack(t1, t2, align)) { |
duke@435 | 632 | if (est_savings(t1, t2) >= 0) { |
duke@435 | 633 | Node_List* pair = new Node_List(); |
duke@435 | 634 | pair->push(t1); |
duke@435 | 635 | pair->push(t2); |
duke@435 | 636 | _packset.append(pair); |
duke@435 | 637 | set_alignment(t1, t2, align); |
duke@435 | 638 | changed = true; |
duke@435 | 639 | } |
duke@435 | 640 | } |
duke@435 | 641 | } |
duke@435 | 642 | return changed; |
duke@435 | 643 | } |
duke@435 | 644 | |
duke@435 | 645 | //------------------------------follow_def_uses--------------------------- |
duke@435 | 646 | // Extend the packset by visiting uses of nodes in pack p |
duke@435 | 647 | bool SuperWord::follow_def_uses(Node_List* p) { |
duke@435 | 648 | bool changed = false; |
duke@435 | 649 | Node* s1 = p->at(0); |
duke@435 | 650 | Node* s2 = p->at(1); |
duke@435 | 651 | assert(p->size() == 2, "just checking"); |
duke@435 | 652 | assert(s1->req() == s2->req(), "just checking"); |
duke@435 | 653 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
duke@435 | 654 | |
duke@435 | 655 | if (s1->is_Store()) return false; |
duke@435 | 656 | |
duke@435 | 657 | int align = alignment(s1); |
duke@435 | 658 | int savings = -1; |
duke@435 | 659 | Node* u1 = NULL; |
duke@435 | 660 | Node* u2 = NULL; |
duke@435 | 661 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
duke@435 | 662 | Node* t1 = s1->fast_out(i); |
duke@435 | 663 | if (!in_bb(t1)) continue; |
duke@435 | 664 | for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 665 | Node* t2 = s2->fast_out(j); |
duke@435 | 666 | if (!in_bb(t2)) continue; |
duke@435 | 667 | if (!opnd_positions_match(s1, t1, s2, t2)) |
duke@435 | 668 | continue; |
duke@435 | 669 | if (stmts_can_pack(t1, t2, align)) { |
duke@435 | 670 | int my_savings = est_savings(t1, t2); |
duke@435 | 671 | if (my_savings > savings) { |
duke@435 | 672 | savings = my_savings; |
duke@435 | 673 | u1 = t1; |
duke@435 | 674 | u2 = t2; |
duke@435 | 675 | } |
duke@435 | 676 | } |
duke@435 | 677 | } |
duke@435 | 678 | } |
duke@435 | 679 | if (savings >= 0) { |
duke@435 | 680 | Node_List* pair = new Node_List(); |
duke@435 | 681 | pair->push(u1); |
duke@435 | 682 | pair->push(u2); |
duke@435 | 683 | _packset.append(pair); |
duke@435 | 684 | set_alignment(u1, u2, align); |
duke@435 | 685 | changed = true; |
duke@435 | 686 | } |
duke@435 | 687 | return changed; |
duke@435 | 688 | } |
duke@435 | 689 | |
duke@435 | 690 | //---------------------------opnd_positions_match------------------------- |
duke@435 | 691 | // Is the use of d1 in u1 at the same operand position as d2 in u2? |
duke@435 | 692 | bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) { |
duke@435 | 693 | uint ct = u1->req(); |
duke@435 | 694 | if (ct != u2->req()) return false; |
duke@435 | 695 | uint i1 = 0; |
duke@435 | 696 | uint i2 = 0; |
duke@435 | 697 | do { |
duke@435 | 698 | for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; |
duke@435 | 699 | for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; |
duke@435 | 700 | if (i1 != i2) { |
duke@435 | 701 | return false; |
duke@435 | 702 | } |
duke@435 | 703 | } while (i1 < ct); |
duke@435 | 704 | return true; |
duke@435 | 705 | } |
duke@435 | 706 | |
duke@435 | 707 | //------------------------------est_savings--------------------------- |
duke@435 | 708 | // Estimate the savings from executing s1 and s2 as a pack |
duke@435 | 709 | int SuperWord::est_savings(Node* s1, Node* s2) { |
duke@435 | 710 | int save = 2 - 1; // 2 operations per instruction in packed form |
duke@435 | 711 | |
duke@435 | 712 | // inputs |
duke@435 | 713 | for (uint i = 1; i < s1->req(); i++) { |
duke@435 | 714 | Node* x1 = s1->in(i); |
duke@435 | 715 | Node* x2 = s2->in(i); |
duke@435 | 716 | if (x1 != x2) { |
duke@435 | 717 | if (are_adjacent_refs(x1, x2)) { |
duke@435 | 718 | save += adjacent_profit(x1, x2); |
duke@435 | 719 | } else if (!in_packset(x1, x2)) { |
duke@435 | 720 | save -= pack_cost(2); |
duke@435 | 721 | } else { |
duke@435 | 722 | save += unpack_cost(2); |
duke@435 | 723 | } |
duke@435 | 724 | } |
duke@435 | 725 | } |
duke@435 | 726 | |
duke@435 | 727 | // uses of result |
duke@435 | 728 | uint ct = 0; |
duke@435 | 729 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
duke@435 | 730 | Node* s1_use = s1->fast_out(i); |
duke@435 | 731 | for (int j = 0; j < _packset.length(); j++) { |
duke@435 | 732 | Node_List* p = _packset.at(j); |
duke@435 | 733 | if (p->at(0) == s1_use) { |
duke@435 | 734 | for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { |
duke@435 | 735 | Node* s2_use = s2->fast_out(k); |
duke@435 | 736 | if (p->at(p->size()-1) == s2_use) { |
duke@435 | 737 | ct++; |
duke@435 | 738 | if (are_adjacent_refs(s1_use, s2_use)) { |
duke@435 | 739 | save += adjacent_profit(s1_use, s2_use); |
duke@435 | 740 | } |
duke@435 | 741 | } |
duke@435 | 742 | } |
duke@435 | 743 | } |
duke@435 | 744 | } |
duke@435 | 745 | } |
duke@435 | 746 | |
duke@435 | 747 | if (ct < s1->outcnt()) save += unpack_cost(1); |
duke@435 | 748 | if (ct < s2->outcnt()) save += unpack_cost(1); |
duke@435 | 749 | |
duke@435 | 750 | return save; |
duke@435 | 751 | } |
duke@435 | 752 | |
duke@435 | 753 | //------------------------------costs--------------------------- |
duke@435 | 754 | int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } |
duke@435 | 755 | int SuperWord::pack_cost(int ct) { return ct; } |
duke@435 | 756 | int SuperWord::unpack_cost(int ct) { return ct; } |
duke@435 | 757 | |
duke@435 | 758 | //------------------------------combine_packs--------------------------- |
duke@435 | 759 | // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last |
duke@435 | 760 | void SuperWord::combine_packs() { |
duke@435 | 761 | bool changed; |
duke@435 | 762 | do { |
duke@435 | 763 | changed = false; |
duke@435 | 764 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 765 | Node_List* p1 = _packset.at(i); |
duke@435 | 766 | if (p1 == NULL) continue; |
duke@435 | 767 | for (int j = 0; j < _packset.length(); j++) { |
duke@435 | 768 | Node_List* p2 = _packset.at(j); |
duke@435 | 769 | if (p2 == NULL) continue; |
duke@435 | 770 | if (p1->at(p1->size()-1) == p2->at(0)) { |
duke@435 | 771 | for (uint k = 1; k < p2->size(); k++) { |
duke@435 | 772 | p1->push(p2->at(k)); |
duke@435 | 773 | } |
duke@435 | 774 | _packset.at_put(j, NULL); |
duke@435 | 775 | changed = true; |
duke@435 | 776 | } |
duke@435 | 777 | } |
duke@435 | 778 | } |
duke@435 | 779 | } while (changed); |
duke@435 | 780 | |
duke@435 | 781 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 782 | Node_List* p1 = _packset.at(i); |
duke@435 | 783 | if (p1 == NULL) { |
duke@435 | 784 | _packset.remove_at(i); |
duke@435 | 785 | } |
duke@435 | 786 | } |
duke@435 | 787 | |
duke@435 | 788 | #ifndef PRODUCT |
duke@435 | 789 | if (TraceSuperWord) { |
duke@435 | 790 | tty->print_cr("\nAfter combine_packs"); |
duke@435 | 791 | print_packset(); |
duke@435 | 792 | } |
duke@435 | 793 | #endif |
duke@435 | 794 | } |
duke@435 | 795 | |
duke@435 | 796 | //-----------------------------construct_my_pack_map-------------------------- |
duke@435 | 797 | // Construct the map from nodes to packs. Only valid after the |
duke@435 | 798 | // point where a node is only in one pack (after combine_packs). |
duke@435 | 799 | void SuperWord::construct_my_pack_map() { |
duke@435 | 800 | Node_List* rslt = NULL; |
duke@435 | 801 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 802 | Node_List* p = _packset.at(i); |
duke@435 | 803 | for (uint j = 0; j < p->size(); j++) { |
duke@435 | 804 | Node* s = p->at(j); |
duke@435 | 805 | assert(my_pack(s) == NULL, "only in one pack"); |
duke@435 | 806 | set_my_pack(s, p); |
duke@435 | 807 | } |
duke@435 | 808 | } |
duke@435 | 809 | } |
duke@435 | 810 | |
duke@435 | 811 | //------------------------------filter_packs--------------------------- |
duke@435 | 812 | // Remove packs that are not implemented or not profitable. |
duke@435 | 813 | void SuperWord::filter_packs() { |
duke@435 | 814 | |
duke@435 | 815 | // Remove packs that are not implemented |
duke@435 | 816 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 817 | Node_List* pk = _packset.at(i); |
duke@435 | 818 | bool impl = implemented(pk); |
duke@435 | 819 | if (!impl) { |
duke@435 | 820 | #ifndef PRODUCT |
duke@435 | 821 | if (TraceSuperWord && Verbose) { |
duke@435 | 822 | tty->print_cr("Unimplemented"); |
duke@435 | 823 | pk->at(0)->dump(); |
duke@435 | 824 | } |
duke@435 | 825 | #endif |
duke@435 | 826 | remove_pack_at(i); |
duke@435 | 827 | } |
duke@435 | 828 | } |
duke@435 | 829 | |
duke@435 | 830 | // Remove packs that are not profitable |
duke@435 | 831 | bool changed; |
duke@435 | 832 | do { |
duke@435 | 833 | changed = false; |
duke@435 | 834 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 835 | Node_List* pk = _packset.at(i); |
duke@435 | 836 | bool prof = profitable(pk); |
duke@435 | 837 | if (!prof) { |
duke@435 | 838 | #ifndef PRODUCT |
duke@435 | 839 | if (TraceSuperWord && Verbose) { |
duke@435 | 840 | tty->print_cr("Unprofitable"); |
duke@435 | 841 | pk->at(0)->dump(); |
duke@435 | 842 | } |
duke@435 | 843 | #endif |
duke@435 | 844 | remove_pack_at(i); |
duke@435 | 845 | changed = true; |
duke@435 | 846 | } |
duke@435 | 847 | } |
duke@435 | 848 | } while (changed); |
duke@435 | 849 | |
duke@435 | 850 | #ifndef PRODUCT |
duke@435 | 851 | if (TraceSuperWord) { |
duke@435 | 852 | tty->print_cr("\nAfter filter_packs"); |
duke@435 | 853 | print_packset(); |
duke@435 | 854 | tty->cr(); |
duke@435 | 855 | } |
duke@435 | 856 | #endif |
duke@435 | 857 | } |
duke@435 | 858 | |
duke@435 | 859 | //------------------------------implemented--------------------------- |
duke@435 | 860 | // Can code be generated for pack p? |
duke@435 | 861 | bool SuperWord::implemented(Node_List* p) { |
duke@435 | 862 | Node* p0 = p->at(0); |
duke@435 | 863 | int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0)); |
duke@435 | 864 | return vopc > 0 && Matcher::has_match_rule(vopc); |
duke@435 | 865 | } |
duke@435 | 866 | |
duke@435 | 867 | //------------------------------profitable--------------------------- |
duke@435 | 868 | // For pack p, are all operands and all uses (with in the block) vector? |
duke@435 | 869 | bool SuperWord::profitable(Node_List* p) { |
duke@435 | 870 | Node* p0 = p->at(0); |
duke@435 | 871 | uint start, end; |
duke@435 | 872 | vector_opd_range(p0, &start, &end); |
duke@435 | 873 | |
duke@435 | 874 | // Return false if some input is not vector and inside block |
duke@435 | 875 | for (uint i = start; i < end; i++) { |
duke@435 | 876 | if (!is_vector_use(p0, i)) { |
duke@435 | 877 | // For now, return false if not scalar promotion case (inputs are the same.) |
twisti@1040 | 878 | // Later, implement PackNode and allow differing, non-vector inputs |
duke@435 | 879 | // (maybe just the ones from outside the block.) |
duke@435 | 880 | Node* p0_def = p0->in(i); |
duke@435 | 881 | for (uint j = 1; j < p->size(); j++) { |
duke@435 | 882 | Node* use = p->at(j); |
duke@435 | 883 | Node* def = use->in(i); |
duke@435 | 884 | if (p0_def != def) |
duke@435 | 885 | return false; |
duke@435 | 886 | } |
duke@435 | 887 | } |
duke@435 | 888 | } |
duke@435 | 889 | if (!p0->is_Store()) { |
duke@435 | 890 | // For now, return false if not all uses are vector. |
duke@435 | 891 | // Later, implement ExtractNode and allow non-vector uses (maybe |
duke@435 | 892 | // just the ones outside the block.) |
duke@435 | 893 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 894 | Node* def = p->at(i); |
duke@435 | 895 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 896 | Node* use = def->fast_out(j); |
duke@435 | 897 | for (uint k = 0; k < use->req(); k++) { |
duke@435 | 898 | Node* n = use->in(k); |
duke@435 | 899 | if (def == n) { |
duke@435 | 900 | if (!is_vector_use(use, k)) { |
duke@435 | 901 | return false; |
duke@435 | 902 | } |
duke@435 | 903 | } |
duke@435 | 904 | } |
duke@435 | 905 | } |
duke@435 | 906 | } |
duke@435 | 907 | } |
duke@435 | 908 | return true; |
duke@435 | 909 | } |
duke@435 | 910 | |
duke@435 | 911 | //------------------------------schedule--------------------------- |
duke@435 | 912 | // Adjust the memory graph for the packed operations |
duke@435 | 913 | void SuperWord::schedule() { |
duke@435 | 914 | |
duke@435 | 915 | // Co-locate in the memory graph the members of each memory pack |
duke@435 | 916 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 917 | co_locate_pack(_packset.at(i)); |
duke@435 | 918 | } |
duke@435 | 919 | } |
duke@435 | 920 | |
cfang@1102 | 921 | //-------------------------------remove_and_insert------------------- |
cfang@1102 | 922 | //remove "current" from its current position in the memory graph and insert |
cfang@1102 | 923 | //it after the appropriate insertion point (lip or uip) |
cfang@1102 | 924 | void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, |
cfang@1102 | 925 | Node *uip, Unique_Node_List &sched_before) { |
cfang@1102 | 926 | Node* my_mem = current->in(MemNode::Memory); |
cfang@1102 | 927 | _igvn.hash_delete(current); |
cfang@1102 | 928 | _igvn.hash_delete(my_mem); |
cfang@1102 | 929 | |
cfang@1102 | 930 | //remove current_store from its current position in the memmory graph |
cfang@1102 | 931 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
cfang@1102 | 932 | Node* use = current->out(i); |
cfang@1102 | 933 | if (use->is_Mem()) { |
cfang@1102 | 934 | assert(use->in(MemNode::Memory) == current, "must be"); |
cfang@1102 | 935 | _igvn.hash_delete(use); |
cfang@1102 | 936 | if (use == prev) { // connect prev to my_mem |
cfang@1102 | 937 | use->set_req(MemNode::Memory, my_mem); |
cfang@1102 | 938 | } else if (sched_before.member(use)) { |
cfang@1102 | 939 | _igvn.hash_delete(uip); |
cfang@1102 | 940 | use->set_req(MemNode::Memory, uip); |
cfang@1102 | 941 | } else { |
cfang@1102 | 942 | _igvn.hash_delete(lip); |
cfang@1102 | 943 | use->set_req(MemNode::Memory, lip); |
cfang@1102 | 944 | } |
cfang@1102 | 945 | _igvn._worklist.push(use); |
cfang@1102 | 946 | --i; //deleted this edge; rescan position |
cfang@1102 | 947 | } |
cfang@1102 | 948 | } |
cfang@1102 | 949 | |
cfang@1102 | 950 | bool sched_up = sched_before.member(current); |
cfang@1102 | 951 | Node *insert_pt = sched_up ? uip : lip; |
cfang@1102 | 952 | _igvn.hash_delete(insert_pt); |
cfang@1102 | 953 | |
cfang@1102 | 954 | // all uses of insert_pt's memory state should use current's instead |
cfang@1102 | 955 | for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { |
cfang@1102 | 956 | Node* use = insert_pt->out(i); |
cfang@1102 | 957 | if (use->is_Mem()) { |
cfang@1102 | 958 | assert(use->in(MemNode::Memory) == insert_pt, "must be"); |
cfang@1102 | 959 | _igvn.hash_delete(use); |
cfang@1102 | 960 | use->set_req(MemNode::Memory, current); |
cfang@1102 | 961 | _igvn._worklist.push(use); |
cfang@1102 | 962 | --i; //deleted this edge; rescan position |
cfang@1102 | 963 | } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) { |
cfang@1102 | 964 | uint pos; //lip (lower insert point) must be the last one in the memory slice |
cfang@1102 | 965 | _igvn.hash_delete(use); |
cfang@1102 | 966 | for (pos=1; pos < use->req(); pos++) { |
cfang@1102 | 967 | if (use->in(pos) == insert_pt) break; |
cfang@1102 | 968 | } |
cfang@1102 | 969 | use->set_req(pos, current); |
cfang@1102 | 970 | _igvn._worklist.push(use); |
cfang@1102 | 971 | --i; |
cfang@1102 | 972 | } |
cfang@1102 | 973 | } |
cfang@1102 | 974 | |
cfang@1102 | 975 | //connect current to insert_pt |
cfang@1102 | 976 | current->set_req(MemNode::Memory, insert_pt); |
cfang@1102 | 977 | _igvn._worklist.push(current); |
cfang@1102 | 978 | } |
cfang@1102 | 979 | |
cfang@1102 | 980 | //------------------------------co_locate_pack---------------------------------- |
cfang@1102 | 981 | // To schedule a store pack, we need to move any sandwiched memory ops either before |
cfang@1102 | 982 | // or after the pack, based upon dependence information: |
cfang@1102 | 983 | // (1) If any store in the pack depends on the sandwiched memory op, the |
cfang@1102 | 984 | // sandwiched memory op must be scheduled BEFORE the pack; |
cfang@1102 | 985 | // (2) If a sandwiched memory op depends on any store in the pack, the |
cfang@1102 | 986 | // sandwiched memory op must be scheduled AFTER the pack; |
cfang@1102 | 987 | // (3) If a sandwiched memory op (say, memA) depends on another sandwiched |
cfang@1102 | 988 | // memory op (say memB), memB must be scheduled before memA. So, if memA is |
cfang@1102 | 989 | // scheduled before the pack, memB must also be scheduled before the pack; |
cfang@1102 | 990 | // (4) If there is no dependence restriction for a sandwiched memory op, we simply |
cfang@1102 | 991 | // schedule this store AFTER the pack |
cfang@1102 | 992 | // (5) We know there is no dependence cycle, so there in no other case; |
cfang@1102 | 993 | // (6) Finally, all memory ops in another single pack should be moved in the same direction. |
cfang@1102 | 994 | // |
cfang@1387 | 995 | // To schedule a load pack, we use the memory state of either the first or the last load in |
cfang@1387 | 996 | // the pack, based on the dependence constraint. |
duke@435 | 997 | void SuperWord::co_locate_pack(Node_List* pk) { |
duke@435 | 998 | if (pk->at(0)->is_Store()) { |
duke@435 | 999 | MemNode* first = executed_first(pk)->as_Mem(); |
duke@435 | 1000 | MemNode* last = executed_last(pk)->as_Mem(); |
cfang@1102 | 1001 | Unique_Node_List schedule_before_pack; |
cfang@1102 | 1002 | Unique_Node_List memops; |
cfang@1102 | 1003 | |
duke@435 | 1004 | MemNode* current = last->in(MemNode::Memory)->as_Mem(); |
cfang@1102 | 1005 | MemNode* previous = last; |
duke@435 | 1006 | while (true) { |
duke@435 | 1007 | assert(in_bb(current), "stay in block"); |
cfang@1102 | 1008 | memops.push(previous); |
cfang@1102 | 1009 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
cfang@1102 | 1010 | Node* use = current->out(i); |
cfang@1102 | 1011 | if (use->is_Mem() && use != previous) |
cfang@1102 | 1012 | memops.push(use); |
cfang@1102 | 1013 | } |
cfang@1102 | 1014 | if(current == first) break; |
cfang@1102 | 1015 | previous = current; |
cfang@1102 | 1016 | current = current->in(MemNode::Memory)->as_Mem(); |
cfang@1102 | 1017 | } |
cfang@1102 | 1018 | |
cfang@1102 | 1019 | // determine which memory operations should be scheduled before the pack |
cfang@1102 | 1020 | for (uint i = 1; i < memops.size(); i++) { |
cfang@1102 | 1021 | Node *s1 = memops.at(i); |
cfang@1102 | 1022 | if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) { |
cfang@1102 | 1023 | for (uint j = 0; j< i; j++) { |
cfang@1102 | 1024 | Node *s2 = memops.at(j); |
cfang@1102 | 1025 | if (!independent(s1, s2)) { |
cfang@1102 | 1026 | if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { |
cfang@1102 | 1027 | schedule_before_pack.push(s1); //s1 must be scheduled before |
cfang@1102 | 1028 | Node_List* mem_pk = my_pack(s1); |
cfang@1102 | 1029 | if (mem_pk != NULL) { |
cfang@1102 | 1030 | for (uint ii = 0; ii < mem_pk->size(); ii++) { |
cfang@1102 | 1031 | Node* s = mem_pk->at(ii); // follow partner |
cfang@1102 | 1032 | if (memops.member(s) && !schedule_before_pack.member(s)) |
cfang@1102 | 1033 | schedule_before_pack.push(s); |
cfang@1102 | 1034 | } |
cfang@1102 | 1035 | } |
cfang@1102 | 1036 | } |
cfang@1102 | 1037 | } |
cfang@1102 | 1038 | } |
cfang@1102 | 1039 | } |
cfang@1102 | 1040 | } |
cfang@1102 | 1041 | |
cfang@1102 | 1042 | MemNode* lower_insert_pt = last; |
cfang@1102 | 1043 | Node* upper_insert_pt = first->in(MemNode::Memory); |
cfang@1102 | 1044 | previous = last; //previous store in pk |
cfang@1102 | 1045 | current = last->in(MemNode::Memory)->as_Mem(); |
cfang@1102 | 1046 | |
cfang@1102 | 1047 | //start scheduling from "last" to "first" |
cfang@1102 | 1048 | while (true) { |
cfang@1102 | 1049 | assert(in_bb(current), "stay in block"); |
cfang@1102 | 1050 | assert(in_pack(previous, pk), "previous stays in pack"); |
duke@435 | 1051 | Node* my_mem = current->in(MemNode::Memory); |
cfang@1102 | 1052 | |
duke@435 | 1053 | if (in_pack(current, pk)) { |
cfang@1102 | 1054 | // Forward users of my memory state (except "previous) to my input memory state |
duke@435 | 1055 | _igvn.hash_delete(current); |
duke@435 | 1056 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
duke@435 | 1057 | Node* use = current->out(i); |
cfang@1102 | 1058 | if (use->is_Mem() && use != previous) { |
duke@435 | 1059 | assert(use->in(MemNode::Memory) == current, "must be"); |
duke@435 | 1060 | _igvn.hash_delete(use); |
cfang@1102 | 1061 | if (schedule_before_pack.member(use)) { |
cfang@1102 | 1062 | _igvn.hash_delete(upper_insert_pt); |
cfang@1102 | 1063 | use->set_req(MemNode::Memory, upper_insert_pt); |
cfang@1102 | 1064 | } else { |
cfang@1102 | 1065 | _igvn.hash_delete(lower_insert_pt); |
cfang@1102 | 1066 | use->set_req(MemNode::Memory, lower_insert_pt); |
cfang@1102 | 1067 | } |
duke@435 | 1068 | _igvn._worklist.push(use); |
duke@435 | 1069 | --i; // deleted this edge; rescan position |
duke@435 | 1070 | } |
duke@435 | 1071 | } |
cfang@1102 | 1072 | previous = current; |
cfang@1102 | 1073 | } else { // !in_pack(current, pk) ==> a sandwiched store |
cfang@1102 | 1074 | remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack); |
duke@435 | 1075 | } |
cfang@1102 | 1076 | |
duke@435 | 1077 | if (current == first) break; |
duke@435 | 1078 | current = my_mem->as_Mem(); |
cfang@1102 | 1079 | } // end while |
cfang@1102 | 1080 | } else if (pk->at(0)->is_Load()) { //load |
cfang@1387 | 1081 | // all loads in the pack should have the same memory state. By default, |
cfang@1387 | 1082 | // we use the memory state of the last load. However, if any load could |
cfang@1387 | 1083 | // not be moved down due to the dependence constraint, we use the memory |
cfang@1387 | 1084 | // state of the first load. |
cfang@1387 | 1085 | Node* last_mem = executed_last(pk)->in(MemNode::Memory); |
cfang@1387 | 1086 | Node* first_mem = executed_first(pk)->in(MemNode::Memory); |
cfang@1387 | 1087 | bool schedule_last = true; |
cfang@1387 | 1088 | for (uint i = 0; i < pk->size(); i++) { |
cfang@1387 | 1089 | Node* ld = pk->at(i); |
cfang@1387 | 1090 | for (Node* current = last_mem; current != ld->in(MemNode::Memory); |
cfang@1387 | 1091 | current=current->in(MemNode::Memory)) { |
cfang@1387 | 1092 | assert(current != first_mem, "corrupted memory graph"); |
cfang@1387 | 1093 | if(current->is_Mem() && !independent(current, ld)){ |
cfang@1387 | 1094 | schedule_last = false; // a later store depends on this load |
cfang@1387 | 1095 | break; |
cfang@1387 | 1096 | } |
cfang@1387 | 1097 | } |
cfang@1387 | 1098 | } |
cfang@1387 | 1099 | |
cfang@1387 | 1100 | Node* mem_input = schedule_last ? last_mem : first_mem; |
cfang@1387 | 1101 | _igvn.hash_delete(mem_input); |
cfang@1387 | 1102 | // Give each load the same memory state |
duke@435 | 1103 | for (uint i = 0; i < pk->size(); i++) { |
duke@435 | 1104 | LoadNode* ld = pk->at(i)->as_Load(); |
duke@435 | 1105 | _igvn.hash_delete(ld); |
cfang@1387 | 1106 | ld->set_req(MemNode::Memory, mem_input); |
duke@435 | 1107 | _igvn._worklist.push(ld); |
duke@435 | 1108 | } |
duke@435 | 1109 | } |
duke@435 | 1110 | } |
duke@435 | 1111 | |
duke@435 | 1112 | //------------------------------output--------------------------- |
duke@435 | 1113 | // Convert packs into vector node operations |
duke@435 | 1114 | void SuperWord::output() { |
duke@435 | 1115 | if (_packset.length() == 0) return; |
duke@435 | 1116 | |
duke@435 | 1117 | // MUST ENSURE main loop's initial value is properly aligned: |
duke@435 | 1118 | // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0 |
duke@435 | 1119 | |
duke@435 | 1120 | align_initial_loop_index(align_to_ref()); |
duke@435 | 1121 | |
duke@435 | 1122 | // Insert extract (unpack) operations for scalar uses |
duke@435 | 1123 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1124 | insert_extracts(_packset.at(i)); |
duke@435 | 1125 | } |
duke@435 | 1126 | |
duke@435 | 1127 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1128 | Node* n = _block.at(i); |
duke@435 | 1129 | Node_List* p = my_pack(n); |
duke@435 | 1130 | if (p && n == executed_last(p)) { |
duke@435 | 1131 | uint vlen = p->size(); |
duke@435 | 1132 | Node* vn = NULL; |
duke@435 | 1133 | Node* low_adr = p->at(0); |
duke@435 | 1134 | Node* first = executed_first(p); |
duke@435 | 1135 | if (n->is_Load()) { |
duke@435 | 1136 | int opc = n->Opcode(); |
duke@435 | 1137 | Node* ctl = n->in(MemNode::Control); |
duke@435 | 1138 | Node* mem = first->in(MemNode::Memory); |
duke@435 | 1139 | Node* adr = low_adr->in(MemNode::Address); |
duke@435 | 1140 | const TypePtr* atyp = n->adr_type(); |
duke@435 | 1141 | vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen); |
duke@435 | 1142 | |
duke@435 | 1143 | } else if (n->is_Store()) { |
duke@435 | 1144 | // Promote value to be stored to vector |
duke@435 | 1145 | VectorNode* val = vector_opd(p, MemNode::ValueIn); |
duke@435 | 1146 | |
duke@435 | 1147 | int opc = n->Opcode(); |
duke@435 | 1148 | Node* ctl = n->in(MemNode::Control); |
duke@435 | 1149 | Node* mem = first->in(MemNode::Memory); |
duke@435 | 1150 | Node* adr = low_adr->in(MemNode::Address); |
duke@435 | 1151 | const TypePtr* atyp = n->adr_type(); |
duke@435 | 1152 | vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); |
duke@435 | 1153 | |
duke@435 | 1154 | } else if (n->req() == 3) { |
duke@435 | 1155 | // Promote operands to vector |
duke@435 | 1156 | Node* in1 = vector_opd(p, 1); |
duke@435 | 1157 | Node* in2 = vector_opd(p, 2); |
duke@435 | 1158 | vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n)); |
duke@435 | 1159 | |
duke@435 | 1160 | } else { |
duke@435 | 1161 | ShouldNotReachHere(); |
duke@435 | 1162 | } |
duke@435 | 1163 | |
duke@435 | 1164 | _phase->_igvn.register_new_node_with_optimizer(vn); |
duke@435 | 1165 | _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); |
duke@435 | 1166 | for (uint j = 0; j < p->size(); j++) { |
duke@435 | 1167 | Node* pm = p->at(j); |
duke@435 | 1168 | _igvn.hash_delete(pm); |
duke@435 | 1169 | _igvn.subsume_node(pm, vn); |
duke@435 | 1170 | } |
duke@435 | 1171 | _igvn._worklist.push(vn); |
duke@435 | 1172 | } |
duke@435 | 1173 | } |
duke@435 | 1174 | } |
duke@435 | 1175 | |
duke@435 | 1176 | //------------------------------vector_opd--------------------------- |
duke@435 | 1177 | // Create a vector operand for the nodes in pack p for operand: in(opd_idx) |
duke@435 | 1178 | VectorNode* SuperWord::vector_opd(Node_List* p, int opd_idx) { |
duke@435 | 1179 | Node* p0 = p->at(0); |
duke@435 | 1180 | uint vlen = p->size(); |
duke@435 | 1181 | Node* opd = p0->in(opd_idx); |
duke@435 | 1182 | |
duke@435 | 1183 | bool same_opd = true; |
duke@435 | 1184 | for (uint i = 1; i < vlen; i++) { |
duke@435 | 1185 | Node* pi = p->at(i); |
duke@435 | 1186 | Node* in = pi->in(opd_idx); |
duke@435 | 1187 | if (opd != in) { |
duke@435 | 1188 | same_opd = false; |
duke@435 | 1189 | break; |
duke@435 | 1190 | } |
duke@435 | 1191 | } |
duke@435 | 1192 | |
duke@435 | 1193 | if (same_opd) { |
duke@435 | 1194 | if (opd->is_Vector()) { |
duke@435 | 1195 | return (VectorNode*)opd; // input is matching vector |
duke@435 | 1196 | } |
duke@435 | 1197 | // Convert scalar input to vector. Use p0's type because it's container |
duke@435 | 1198 | // maybe smaller than the operand's container. |
duke@435 | 1199 | const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd); |
duke@435 | 1200 | const Type* p0_t = velt_type(p0); |
duke@435 | 1201 | if (p0_t->higher_equal(opd_t)) opd_t = p0_t; |
duke@435 | 1202 | VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, opd_t); |
duke@435 | 1203 | |
duke@435 | 1204 | _phase->_igvn.register_new_node_with_optimizer(vn); |
duke@435 | 1205 | _phase->set_ctrl(vn, _phase->get_ctrl(opd)); |
duke@435 | 1206 | return vn; |
duke@435 | 1207 | } |
duke@435 | 1208 | |
duke@435 | 1209 | // Insert pack operation |
duke@435 | 1210 | const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd); |
duke@435 | 1211 | PackNode* pk = PackNode::make(_phase->C, opd, opd_t); |
duke@435 | 1212 | |
duke@435 | 1213 | for (uint i = 1; i < vlen; i++) { |
duke@435 | 1214 | Node* pi = p->at(i); |
duke@435 | 1215 | Node* in = pi->in(opd_idx); |
duke@435 | 1216 | assert(my_pack(in) == NULL, "Should already have been unpacked"); |
duke@435 | 1217 | assert(opd_t == velt_type(!in_bb(in) ? pi : in), "all same type"); |
duke@435 | 1218 | pk->add_opd(in); |
duke@435 | 1219 | } |
duke@435 | 1220 | _phase->_igvn.register_new_node_with_optimizer(pk); |
duke@435 | 1221 | _phase->set_ctrl(pk, _phase->get_ctrl(opd)); |
duke@435 | 1222 | return pk; |
duke@435 | 1223 | } |
duke@435 | 1224 | |
duke@435 | 1225 | //------------------------------insert_extracts--------------------------- |
duke@435 | 1226 | // If a use of pack p is not a vector use, then replace the |
duke@435 | 1227 | // use with an extract operation. |
duke@435 | 1228 | void SuperWord::insert_extracts(Node_List* p) { |
duke@435 | 1229 | if (p->at(0)->is_Store()) return; |
duke@435 | 1230 | assert(_n_idx_list.is_empty(), "empty (node,index) list"); |
duke@435 | 1231 | |
duke@435 | 1232 | // Inspect each use of each pack member. For each use that is |
duke@435 | 1233 | // not a vector use, replace the use with an extract operation. |
duke@435 | 1234 | |
duke@435 | 1235 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1236 | Node* def = p->at(i); |
duke@435 | 1237 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 1238 | Node* use = def->fast_out(j); |
duke@435 | 1239 | for (uint k = 0; k < use->req(); k++) { |
duke@435 | 1240 | Node* n = use->in(k); |
duke@435 | 1241 | if (def == n) { |
duke@435 | 1242 | if (!is_vector_use(use, k)) { |
duke@435 | 1243 | _n_idx_list.push(use, k); |
duke@435 | 1244 | } |
duke@435 | 1245 | } |
duke@435 | 1246 | } |
duke@435 | 1247 | } |
duke@435 | 1248 | } |
duke@435 | 1249 | |
duke@435 | 1250 | while (_n_idx_list.is_nonempty()) { |
duke@435 | 1251 | Node* use = _n_idx_list.node(); |
duke@435 | 1252 | int idx = _n_idx_list.index(); |
duke@435 | 1253 | _n_idx_list.pop(); |
duke@435 | 1254 | Node* def = use->in(idx); |
duke@435 | 1255 | |
duke@435 | 1256 | // Insert extract operation |
duke@435 | 1257 | _igvn.hash_delete(def); |
duke@435 | 1258 | _igvn.hash_delete(use); |
duke@435 | 1259 | int def_pos = alignment(def) / data_size(def); |
duke@435 | 1260 | const Type* def_t = velt_type(def); |
duke@435 | 1261 | |
duke@435 | 1262 | Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t); |
duke@435 | 1263 | _phase->_igvn.register_new_node_with_optimizer(ex); |
duke@435 | 1264 | _phase->set_ctrl(ex, _phase->get_ctrl(def)); |
duke@435 | 1265 | use->set_req(idx, ex); |
duke@435 | 1266 | _igvn._worklist.push(def); |
duke@435 | 1267 | _igvn._worklist.push(use); |
duke@435 | 1268 | |
duke@435 | 1269 | bb_insert_after(ex, bb_idx(def)); |
duke@435 | 1270 | set_velt_type(ex, def_t); |
duke@435 | 1271 | } |
duke@435 | 1272 | } |
duke@435 | 1273 | |
duke@435 | 1274 | //------------------------------is_vector_use--------------------------- |
duke@435 | 1275 | // Is use->in(u_idx) a vector use? |
duke@435 | 1276 | bool SuperWord::is_vector_use(Node* use, int u_idx) { |
duke@435 | 1277 | Node_List* u_pk = my_pack(use); |
duke@435 | 1278 | if (u_pk == NULL) return false; |
duke@435 | 1279 | Node* def = use->in(u_idx); |
duke@435 | 1280 | Node_List* d_pk = my_pack(def); |
duke@435 | 1281 | if (d_pk == NULL) { |
duke@435 | 1282 | // check for scalar promotion |
duke@435 | 1283 | Node* n = u_pk->at(0)->in(u_idx); |
duke@435 | 1284 | for (uint i = 1; i < u_pk->size(); i++) { |
duke@435 | 1285 | if (u_pk->at(i)->in(u_idx) != n) return false; |
duke@435 | 1286 | } |
duke@435 | 1287 | return true; |
duke@435 | 1288 | } |
duke@435 | 1289 | if (u_pk->size() != d_pk->size()) |
duke@435 | 1290 | return false; |
duke@435 | 1291 | for (uint i = 0; i < u_pk->size(); i++) { |
duke@435 | 1292 | Node* ui = u_pk->at(i); |
duke@435 | 1293 | Node* di = d_pk->at(i); |
duke@435 | 1294 | if (ui->in(u_idx) != di || alignment(ui) != alignment(di)) |
duke@435 | 1295 | return false; |
duke@435 | 1296 | } |
duke@435 | 1297 | return true; |
duke@435 | 1298 | } |
duke@435 | 1299 | |
duke@435 | 1300 | //------------------------------construct_bb--------------------------- |
duke@435 | 1301 | // Construct reverse postorder list of block members |
duke@435 | 1302 | void SuperWord::construct_bb() { |
duke@435 | 1303 | Node* entry = bb(); |
duke@435 | 1304 | |
duke@435 | 1305 | assert(_stk.length() == 0, "stk is empty"); |
duke@435 | 1306 | assert(_block.length() == 0, "block is empty"); |
duke@435 | 1307 | assert(_data_entry.length() == 0, "data_entry is empty"); |
duke@435 | 1308 | assert(_mem_slice_head.length() == 0, "mem_slice_head is empty"); |
duke@435 | 1309 | assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty"); |
duke@435 | 1310 | |
duke@435 | 1311 | // Find non-control nodes with no inputs from within block, |
duke@435 | 1312 | // create a temporary map from node _idx to bb_idx for use |
duke@435 | 1313 | // by the visited and post_visited sets, |
duke@435 | 1314 | // and count number of nodes in block. |
duke@435 | 1315 | int bb_ct = 0; |
duke@435 | 1316 | for (uint i = 0; i < lpt()->_body.size(); i++ ) { |
duke@435 | 1317 | Node *n = lpt()->_body.at(i); |
duke@435 | 1318 | set_bb_idx(n, i); // Create a temporary map |
duke@435 | 1319 | if (in_bb(n)) { |
duke@435 | 1320 | bb_ct++; |
duke@435 | 1321 | if (!n->is_CFG()) { |
duke@435 | 1322 | bool found = false; |
duke@435 | 1323 | for (uint j = 0; j < n->req(); j++) { |
duke@435 | 1324 | Node* def = n->in(j); |
duke@435 | 1325 | if (def && in_bb(def)) { |
duke@435 | 1326 | found = true; |
duke@435 | 1327 | break; |
duke@435 | 1328 | } |
duke@435 | 1329 | } |
duke@435 | 1330 | if (!found) { |
duke@435 | 1331 | assert(n != entry, "can't be entry"); |
duke@435 | 1332 | _data_entry.push(n); |
duke@435 | 1333 | } |
duke@435 | 1334 | } |
duke@435 | 1335 | } |
duke@435 | 1336 | } |
duke@435 | 1337 | |
duke@435 | 1338 | // Find memory slices (head and tail) |
duke@435 | 1339 | for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) { |
duke@435 | 1340 | Node *n = lp()->fast_out(i); |
duke@435 | 1341 | if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { |
duke@435 | 1342 | Node* n_tail = n->in(LoopNode::LoopBackControl); |
kvn@688 | 1343 | if (n_tail != n->in(LoopNode::EntryControl)) { |
kvn@688 | 1344 | _mem_slice_head.push(n); |
kvn@688 | 1345 | _mem_slice_tail.push(n_tail); |
kvn@688 | 1346 | } |
duke@435 | 1347 | } |
duke@435 | 1348 | } |
duke@435 | 1349 | |
duke@435 | 1350 | // Create an RPO list of nodes in block |
duke@435 | 1351 | |
duke@435 | 1352 | visited_clear(); |
duke@435 | 1353 | post_visited_clear(); |
duke@435 | 1354 | |
duke@435 | 1355 | // Push all non-control nodes with no inputs from within block, then control entry |
duke@435 | 1356 | for (int j = 0; j < _data_entry.length(); j++) { |
duke@435 | 1357 | Node* n = _data_entry.at(j); |
duke@435 | 1358 | visited_set(n); |
duke@435 | 1359 | _stk.push(n); |
duke@435 | 1360 | } |
duke@435 | 1361 | visited_set(entry); |
duke@435 | 1362 | _stk.push(entry); |
duke@435 | 1363 | |
duke@435 | 1364 | // Do a depth first walk over out edges |
duke@435 | 1365 | int rpo_idx = bb_ct - 1; |
duke@435 | 1366 | int size; |
duke@435 | 1367 | while ((size = _stk.length()) > 0) { |
duke@435 | 1368 | Node* n = _stk.top(); // Leave node on stack |
duke@435 | 1369 | if (!visited_test_set(n)) { |
duke@435 | 1370 | // forward arc in graph |
duke@435 | 1371 | } else if (!post_visited_test(n)) { |
duke@435 | 1372 | // cross or back arc |
duke@435 | 1373 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 1374 | Node *use = n->fast_out(i); |
duke@435 | 1375 | if (in_bb(use) && !visited_test(use) && |
duke@435 | 1376 | // Don't go around backedge |
duke@435 | 1377 | (!use->is_Phi() || n == entry)) { |
duke@435 | 1378 | _stk.push(use); |
duke@435 | 1379 | } |
duke@435 | 1380 | } |
duke@435 | 1381 | if (_stk.length() == size) { |
duke@435 | 1382 | // There were no additional uses, post visit node now |
duke@435 | 1383 | _stk.pop(); // Remove node from stack |
duke@435 | 1384 | assert(rpo_idx >= 0, ""); |
duke@435 | 1385 | _block.at_put_grow(rpo_idx, n); |
duke@435 | 1386 | rpo_idx--; |
duke@435 | 1387 | post_visited_set(n); |
duke@435 | 1388 | assert(rpo_idx >= 0 || _stk.is_empty(), ""); |
duke@435 | 1389 | } |
duke@435 | 1390 | } else { |
duke@435 | 1391 | _stk.pop(); // Remove post-visited node from stack |
duke@435 | 1392 | } |
duke@435 | 1393 | } |
duke@435 | 1394 | |
duke@435 | 1395 | // Create real map of block indices for nodes |
duke@435 | 1396 | for (int j = 0; j < _block.length(); j++) { |
duke@435 | 1397 | Node* n = _block.at(j); |
duke@435 | 1398 | set_bb_idx(n, j); |
duke@435 | 1399 | } |
duke@435 | 1400 | |
duke@435 | 1401 | initialize_bb(); // Ensure extra info is allocated. |
duke@435 | 1402 | |
duke@435 | 1403 | #ifndef PRODUCT |
duke@435 | 1404 | if (TraceSuperWord) { |
duke@435 | 1405 | print_bb(); |
duke@435 | 1406 | tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); |
duke@435 | 1407 | for (int m = 0; m < _data_entry.length(); m++) { |
duke@435 | 1408 | tty->print("%3d ", m); |
duke@435 | 1409 | _data_entry.at(m)->dump(); |
duke@435 | 1410 | } |
duke@435 | 1411 | tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE"); |
duke@435 | 1412 | for (int m = 0; m < _mem_slice_head.length(); m++) { |
duke@435 | 1413 | tty->print("%3d ", m); _mem_slice_head.at(m)->dump(); |
duke@435 | 1414 | tty->print(" "); _mem_slice_tail.at(m)->dump(); |
duke@435 | 1415 | } |
duke@435 | 1416 | } |
duke@435 | 1417 | #endif |
duke@435 | 1418 | assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found"); |
duke@435 | 1419 | } |
duke@435 | 1420 | |
duke@435 | 1421 | //------------------------------initialize_bb--------------------------- |
duke@435 | 1422 | // Initialize per node info |
duke@435 | 1423 | void SuperWord::initialize_bb() { |
duke@435 | 1424 | Node* last = _block.at(_block.length() - 1); |
duke@435 | 1425 | grow_node_info(bb_idx(last)); |
duke@435 | 1426 | } |
duke@435 | 1427 | |
duke@435 | 1428 | //------------------------------bb_insert_after--------------------------- |
duke@435 | 1429 | // Insert n into block after pos |
duke@435 | 1430 | void SuperWord::bb_insert_after(Node* n, int pos) { |
duke@435 | 1431 | int n_pos = pos + 1; |
duke@435 | 1432 | // Make room |
duke@435 | 1433 | for (int i = _block.length() - 1; i >= n_pos; i--) { |
duke@435 | 1434 | _block.at_put_grow(i+1, _block.at(i)); |
duke@435 | 1435 | } |
duke@435 | 1436 | for (int j = _node_info.length() - 1; j >= n_pos; j--) { |
duke@435 | 1437 | _node_info.at_put_grow(j+1, _node_info.at(j)); |
duke@435 | 1438 | } |
duke@435 | 1439 | // Set value |
duke@435 | 1440 | _block.at_put_grow(n_pos, n); |
duke@435 | 1441 | _node_info.at_put_grow(n_pos, SWNodeInfo::initial); |
duke@435 | 1442 | // Adjust map from node->_idx to _block index |
duke@435 | 1443 | for (int i = n_pos; i < _block.length(); i++) { |
duke@435 | 1444 | set_bb_idx(_block.at(i), i); |
duke@435 | 1445 | } |
duke@435 | 1446 | } |
duke@435 | 1447 | |
duke@435 | 1448 | //------------------------------compute_max_depth--------------------------- |
duke@435 | 1449 | // Compute max depth for expressions from beginning of block |
duke@435 | 1450 | // Use to prune search paths during test for independence. |
duke@435 | 1451 | void SuperWord::compute_max_depth() { |
duke@435 | 1452 | int ct = 0; |
duke@435 | 1453 | bool again; |
duke@435 | 1454 | do { |
duke@435 | 1455 | again = false; |
duke@435 | 1456 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1457 | Node* n = _block.at(i); |
duke@435 | 1458 | if (!n->is_Phi()) { |
duke@435 | 1459 | int d_orig = depth(n); |
duke@435 | 1460 | int d_in = 0; |
duke@435 | 1461 | for (DepPreds preds(n, _dg); !preds.done(); preds.next()) { |
duke@435 | 1462 | Node* pred = preds.current(); |
duke@435 | 1463 | if (in_bb(pred)) { |
duke@435 | 1464 | d_in = MAX2(d_in, depth(pred)); |
duke@435 | 1465 | } |
duke@435 | 1466 | } |
duke@435 | 1467 | if (d_in + 1 != d_orig) { |
duke@435 | 1468 | set_depth(n, d_in + 1); |
duke@435 | 1469 | again = true; |
duke@435 | 1470 | } |
duke@435 | 1471 | } |
duke@435 | 1472 | } |
duke@435 | 1473 | ct++; |
duke@435 | 1474 | } while (again); |
duke@435 | 1475 | #ifndef PRODUCT |
duke@435 | 1476 | if (TraceSuperWord && Verbose) |
duke@435 | 1477 | tty->print_cr("compute_max_depth iterated: %d times", ct); |
duke@435 | 1478 | #endif |
duke@435 | 1479 | } |
duke@435 | 1480 | |
duke@435 | 1481 | //-------------------------compute_vector_element_type----------------------- |
duke@435 | 1482 | // Compute necessary vector element type for expressions |
duke@435 | 1483 | // This propagates backwards a narrower integer type when the |
duke@435 | 1484 | // upper bits of the value are not needed. |
duke@435 | 1485 | // Example: char a,b,c; a = b + c; |
duke@435 | 1486 | // Normally the type of the add is integer, but for packed character |
duke@435 | 1487 | // operations the type of the add needs to be char. |
duke@435 | 1488 | void SuperWord::compute_vector_element_type() { |
duke@435 | 1489 | #ifndef PRODUCT |
duke@435 | 1490 | if (TraceSuperWord && Verbose) |
duke@435 | 1491 | tty->print_cr("\ncompute_velt_type:"); |
duke@435 | 1492 | #endif |
duke@435 | 1493 | |
duke@435 | 1494 | // Initial type |
duke@435 | 1495 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1496 | Node* n = _block.at(i); |
duke@435 | 1497 | const Type* t = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type()) |
duke@435 | 1498 | : _igvn.type(n); |
duke@435 | 1499 | const Type* vt = container_type(t); |
duke@435 | 1500 | set_velt_type(n, vt); |
duke@435 | 1501 | } |
duke@435 | 1502 | |
duke@435 | 1503 | // Propagate narrowed type backwards through operations |
duke@435 | 1504 | // that don't depend on higher order bits |
duke@435 | 1505 | for (int i = _block.length() - 1; i >= 0; i--) { |
duke@435 | 1506 | Node* n = _block.at(i); |
duke@435 | 1507 | // Only integer types need be examined |
duke@435 | 1508 | if (n->bottom_type()->isa_int()) { |
duke@435 | 1509 | uint start, end; |
duke@435 | 1510 | vector_opd_range(n, &start, &end); |
duke@435 | 1511 | const Type* vt = velt_type(n); |
duke@435 | 1512 | |
duke@435 | 1513 | for (uint j = start; j < end; j++) { |
duke@435 | 1514 | Node* in = n->in(j); |
duke@435 | 1515 | // Don't propagate through a type conversion |
duke@435 | 1516 | if (n->bottom_type() != in->bottom_type()) |
duke@435 | 1517 | continue; |
duke@435 | 1518 | switch(in->Opcode()) { |
duke@435 | 1519 | case Op_AddI: case Op_AddL: |
duke@435 | 1520 | case Op_SubI: case Op_SubL: |
duke@435 | 1521 | case Op_MulI: case Op_MulL: |
duke@435 | 1522 | case Op_AndI: case Op_AndL: |
duke@435 | 1523 | case Op_OrI: case Op_OrL: |
duke@435 | 1524 | case Op_XorI: case Op_XorL: |
duke@435 | 1525 | case Op_LShiftI: case Op_LShiftL: |
duke@435 | 1526 | case Op_CMoveI: case Op_CMoveL: |
duke@435 | 1527 | if (in_bb(in)) { |
duke@435 | 1528 | bool same_type = true; |
duke@435 | 1529 | for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { |
duke@435 | 1530 | Node *use = in->fast_out(k); |
duke@435 | 1531 | if (!in_bb(use) || velt_type(use) != vt) { |
duke@435 | 1532 | same_type = false; |
duke@435 | 1533 | break; |
duke@435 | 1534 | } |
duke@435 | 1535 | } |
duke@435 | 1536 | if (same_type) { |
duke@435 | 1537 | set_velt_type(in, vt); |
duke@435 | 1538 | } |
duke@435 | 1539 | } |
duke@435 | 1540 | } |
duke@435 | 1541 | } |
duke@435 | 1542 | } |
duke@435 | 1543 | } |
duke@435 | 1544 | #ifndef PRODUCT |
duke@435 | 1545 | if (TraceSuperWord && Verbose) { |
duke@435 | 1546 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1547 | Node* n = _block.at(i); |
duke@435 | 1548 | velt_type(n)->dump(); |
duke@435 | 1549 | tty->print("\t"); |
duke@435 | 1550 | n->dump(); |
duke@435 | 1551 | } |
duke@435 | 1552 | } |
duke@435 | 1553 | #endif |
duke@435 | 1554 | } |
duke@435 | 1555 | |
duke@435 | 1556 | //------------------------------memory_alignment--------------------------- |
duke@435 | 1557 | // Alignment within a vector memory reference |
duke@435 | 1558 | int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { |
duke@435 | 1559 | SWPointer p(s, this); |
duke@435 | 1560 | if (!p.valid()) { |
duke@435 | 1561 | return bottom_align; |
duke@435 | 1562 | } |
duke@435 | 1563 | int offset = p.offset_in_bytes(); |
duke@435 | 1564 | offset += iv_adjust_in_bytes; |
duke@435 | 1565 | int off_rem = offset % vector_width_in_bytes(); |
duke@435 | 1566 | int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes(); |
duke@435 | 1567 | return off_mod; |
duke@435 | 1568 | } |
duke@435 | 1569 | |
duke@435 | 1570 | //---------------------------container_type--------------------------- |
duke@435 | 1571 | // Smallest type containing range of values |
duke@435 | 1572 | const Type* SuperWord::container_type(const Type* t) { |
kvn@656 | 1573 | const Type* tp = t->make_ptr(); |
kvn@656 | 1574 | if (tp && tp->isa_aryptr()) { |
kvn@656 | 1575 | t = tp->is_aryptr()->elem(); |
duke@435 | 1576 | } |
duke@435 | 1577 | if (t->basic_type() == T_INT) { |
duke@435 | 1578 | if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL; |
duke@435 | 1579 | if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE; |
duke@435 | 1580 | if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR; |
duke@435 | 1581 | if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT; |
duke@435 | 1582 | return TypeInt::INT; |
duke@435 | 1583 | } |
duke@435 | 1584 | return t; |
duke@435 | 1585 | } |
duke@435 | 1586 | |
duke@435 | 1587 | //-------------------------vector_opd_range----------------------- |
duke@435 | 1588 | // (Start, end] half-open range defining which operands are vector |
duke@435 | 1589 | void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) { |
duke@435 | 1590 | switch (n->Opcode()) { |
twisti@993 | 1591 | case Op_LoadB: case Op_LoadUS: |
duke@435 | 1592 | case Op_LoadI: case Op_LoadL: |
duke@435 | 1593 | case Op_LoadF: case Op_LoadD: |
duke@435 | 1594 | case Op_LoadP: |
duke@435 | 1595 | *start = 0; |
duke@435 | 1596 | *end = 0; |
duke@435 | 1597 | return; |
duke@435 | 1598 | case Op_StoreB: case Op_StoreC: |
duke@435 | 1599 | case Op_StoreI: case Op_StoreL: |
duke@435 | 1600 | case Op_StoreF: case Op_StoreD: |
duke@435 | 1601 | case Op_StoreP: |
duke@435 | 1602 | *start = MemNode::ValueIn; |
duke@435 | 1603 | *end = *start + 1; |
duke@435 | 1604 | return; |
duke@435 | 1605 | case Op_LShiftI: case Op_LShiftL: |
duke@435 | 1606 | *start = 1; |
duke@435 | 1607 | *end = 2; |
duke@435 | 1608 | return; |
duke@435 | 1609 | case Op_CMoveI: case Op_CMoveL: case Op_CMoveF: case Op_CMoveD: |
duke@435 | 1610 | *start = 2; |
duke@435 | 1611 | *end = n->req(); |
duke@435 | 1612 | return; |
duke@435 | 1613 | } |
duke@435 | 1614 | *start = 1; |
duke@435 | 1615 | *end = n->req(); // default is all operands |
duke@435 | 1616 | } |
duke@435 | 1617 | |
duke@435 | 1618 | //------------------------------in_packset--------------------------- |
duke@435 | 1619 | // Are s1 and s2 in a pack pair and ordered as s1,s2? |
duke@435 | 1620 | bool SuperWord::in_packset(Node* s1, Node* s2) { |
duke@435 | 1621 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1622 | Node_List* p = _packset.at(i); |
duke@435 | 1623 | assert(p->size() == 2, "must be"); |
duke@435 | 1624 | if (p->at(0) == s1 && p->at(p->size()-1) == s2) { |
duke@435 | 1625 | return true; |
duke@435 | 1626 | } |
duke@435 | 1627 | } |
duke@435 | 1628 | return false; |
duke@435 | 1629 | } |
duke@435 | 1630 | |
duke@435 | 1631 | //------------------------------in_pack--------------------------- |
duke@435 | 1632 | // Is s in pack p? |
duke@435 | 1633 | Node_List* SuperWord::in_pack(Node* s, Node_List* p) { |
duke@435 | 1634 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1635 | if (p->at(i) == s) { |
duke@435 | 1636 | return p; |
duke@435 | 1637 | } |
duke@435 | 1638 | } |
duke@435 | 1639 | return NULL; |
duke@435 | 1640 | } |
duke@435 | 1641 | |
duke@435 | 1642 | //------------------------------remove_pack_at--------------------------- |
duke@435 | 1643 | // Remove the pack at position pos in the packset |
duke@435 | 1644 | void SuperWord::remove_pack_at(int pos) { |
duke@435 | 1645 | Node_List* p = _packset.at(pos); |
duke@435 | 1646 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1647 | Node* s = p->at(i); |
duke@435 | 1648 | set_my_pack(s, NULL); |
duke@435 | 1649 | } |
duke@435 | 1650 | _packset.remove_at(pos); |
duke@435 | 1651 | } |
duke@435 | 1652 | |
duke@435 | 1653 | //------------------------------executed_first--------------------------- |
duke@435 | 1654 | // Return the node executed first in pack p. Uses the RPO block list |
duke@435 | 1655 | // to determine order. |
duke@435 | 1656 | Node* SuperWord::executed_first(Node_List* p) { |
duke@435 | 1657 | Node* n = p->at(0); |
duke@435 | 1658 | int n_rpo = bb_idx(n); |
duke@435 | 1659 | for (uint i = 1; i < p->size(); i++) { |
duke@435 | 1660 | Node* s = p->at(i); |
duke@435 | 1661 | int s_rpo = bb_idx(s); |
duke@435 | 1662 | if (s_rpo < n_rpo) { |
duke@435 | 1663 | n = s; |
duke@435 | 1664 | n_rpo = s_rpo; |
duke@435 | 1665 | } |
duke@435 | 1666 | } |
duke@435 | 1667 | return n; |
duke@435 | 1668 | } |
duke@435 | 1669 | |
duke@435 | 1670 | //------------------------------executed_last--------------------------- |
duke@435 | 1671 | // Return the node executed last in pack p. |
duke@435 | 1672 | Node* SuperWord::executed_last(Node_List* p) { |
duke@435 | 1673 | Node* n = p->at(0); |
duke@435 | 1674 | int n_rpo = bb_idx(n); |
duke@435 | 1675 | for (uint i = 1; i < p->size(); i++) { |
duke@435 | 1676 | Node* s = p->at(i); |
duke@435 | 1677 | int s_rpo = bb_idx(s); |
duke@435 | 1678 | if (s_rpo > n_rpo) { |
duke@435 | 1679 | n = s; |
duke@435 | 1680 | n_rpo = s_rpo; |
duke@435 | 1681 | } |
duke@435 | 1682 | } |
duke@435 | 1683 | return n; |
duke@435 | 1684 | } |
duke@435 | 1685 | |
duke@435 | 1686 | //----------------------------align_initial_loop_index--------------------------- |
duke@435 | 1687 | // Adjust pre-loop limit so that in main loop, a load/store reference |
duke@435 | 1688 | // to align_to_ref will be a position zero in the vector. |
duke@435 | 1689 | // (iv + k) mod vector_align == 0 |
duke@435 | 1690 | void SuperWord::align_initial_loop_index(MemNode* align_to_ref) { |
duke@435 | 1691 | CountedLoopNode *main_head = lp()->as_CountedLoop(); |
duke@435 | 1692 | assert(main_head->is_main_loop(), ""); |
duke@435 | 1693 | CountedLoopEndNode* pre_end = get_pre_loop_end(main_head); |
duke@435 | 1694 | assert(pre_end != NULL, ""); |
duke@435 | 1695 | Node *pre_opaq1 = pre_end->limit(); |
duke@435 | 1696 | assert(pre_opaq1->Opcode() == Op_Opaque1, ""); |
duke@435 | 1697 | Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1; |
never@507 | 1698 | Node *lim0 = pre_opaq->in(1); |
duke@435 | 1699 | |
duke@435 | 1700 | // Where we put new limit calculations |
duke@435 | 1701 | Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl); |
duke@435 | 1702 | |
duke@435 | 1703 | // Ensure the original loop limit is available from the |
duke@435 | 1704 | // pre-loop Opaque1 node. |
duke@435 | 1705 | Node *orig_limit = pre_opaq->original_loop_limit(); |
duke@435 | 1706 | assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); |
duke@435 | 1707 | |
duke@435 | 1708 | SWPointer align_to_ref_p(align_to_ref, this); |
duke@435 | 1709 | |
never@507 | 1710 | // Given: |
never@507 | 1711 | // lim0 == original pre loop limit |
never@507 | 1712 | // V == v_align (power of 2) |
never@507 | 1713 | // invar == extra invariant piece of the address expression |
never@507 | 1714 | // e == k [ +/- invar ] |
duke@435 | 1715 | // |
never@507 | 1716 | // When reassociating expressions involving '%' the basic rules are: |
never@507 | 1717 | // (a - b) % k == 0 => a % k == b % k |
never@507 | 1718 | // and: |
never@507 | 1719 | // (a + b) % k == 0 => a % k == (k - b) % k |
never@507 | 1720 | // |
never@507 | 1721 | // For stride > 0 && scale > 0, |
never@507 | 1722 | // Derive the new pre-loop limit "lim" such that the two constraints: |
never@507 | 1723 | // (1) lim = lim0 + N (where N is some positive integer < V) |
never@507 | 1724 | // (2) (e + lim) % V == 0 |
never@507 | 1725 | // are true. |
never@507 | 1726 | // |
never@507 | 1727 | // Substituting (1) into (2), |
never@507 | 1728 | // (e + lim0 + N) % V == 0 |
never@507 | 1729 | // solve for N: |
never@507 | 1730 | // N = (V - (e + lim0)) % V |
never@507 | 1731 | // substitute back into (1), so that new limit |
never@507 | 1732 | // lim = lim0 + (V - (e + lim0)) % V |
never@507 | 1733 | // |
never@507 | 1734 | // For stride > 0 && scale < 0 |
never@507 | 1735 | // Constraints: |
never@507 | 1736 | // lim = lim0 + N |
never@507 | 1737 | // (e - lim) % V == 0 |
never@507 | 1738 | // Solving for lim: |
never@507 | 1739 | // (e - lim0 - N) % V == 0 |
never@507 | 1740 | // N = (e - lim0) % V |
never@507 | 1741 | // lim = lim0 + (e - lim0) % V |
never@507 | 1742 | // |
never@507 | 1743 | // For stride < 0 && scale > 0 |
never@507 | 1744 | // Constraints: |
never@507 | 1745 | // lim = lim0 - N |
never@507 | 1746 | // (e + lim) % V == 0 |
never@507 | 1747 | // Solving for lim: |
never@507 | 1748 | // (e + lim0 - N) % V == 0 |
never@507 | 1749 | // N = (e + lim0) % V |
never@507 | 1750 | // lim = lim0 - (e + lim0) % V |
never@507 | 1751 | // |
never@507 | 1752 | // For stride < 0 && scale < 0 |
never@507 | 1753 | // Constraints: |
never@507 | 1754 | // lim = lim0 - N |
never@507 | 1755 | // (e - lim) % V == 0 |
never@507 | 1756 | // Solving for lim: |
never@507 | 1757 | // (e - lim0 + N) % V == 0 |
never@507 | 1758 | // N = (V - (e - lim0)) % V |
never@507 | 1759 | // lim = lim0 - (V - (e - lim0)) % V |
duke@435 | 1760 | |
never@507 | 1761 | int stride = iv_stride(); |
never@507 | 1762 | int scale = align_to_ref_p.scale_in_bytes(); |
duke@435 | 1763 | int elt_size = align_to_ref_p.memory_size(); |
duke@435 | 1764 | int v_align = vector_width_in_bytes() / elt_size; |
duke@435 | 1765 | int k = align_to_ref_p.offset_in_bytes() / elt_size; |
duke@435 | 1766 | |
duke@435 | 1767 | Node *kn = _igvn.intcon(k); |
never@507 | 1768 | |
never@507 | 1769 | Node *e = kn; |
duke@435 | 1770 | if (align_to_ref_p.invar() != NULL) { |
never@507 | 1771 | // incorporate any extra invariant piece producing k +/- invar >>> log2(elt) |
duke@435 | 1772 | Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
duke@435 | 1773 | Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); |
duke@435 | 1774 | _phase->_igvn.register_new_node_with_optimizer(aref); |
duke@435 | 1775 | _phase->set_ctrl(aref, pre_ctrl); |
never@507 | 1776 | if (align_to_ref_p.negate_invar()) { |
never@507 | 1777 | e = new (_phase->C, 3) SubINode(e, aref); |
duke@435 | 1778 | } else { |
never@507 | 1779 | e = new (_phase->C, 3) AddINode(e, aref); |
duke@435 | 1780 | } |
never@507 | 1781 | _phase->_igvn.register_new_node_with_optimizer(e); |
never@507 | 1782 | _phase->set_ctrl(e, pre_ctrl); |
duke@435 | 1783 | } |
never@507 | 1784 | |
never@507 | 1785 | // compute e +/- lim0 |
never@507 | 1786 | if (scale < 0) { |
never@507 | 1787 | e = new (_phase->C, 3) SubINode(e, lim0); |
never@507 | 1788 | } else { |
never@507 | 1789 | e = new (_phase->C, 3) AddINode(e, lim0); |
never@507 | 1790 | } |
never@507 | 1791 | _phase->_igvn.register_new_node_with_optimizer(e); |
never@507 | 1792 | _phase->set_ctrl(e, pre_ctrl); |
never@507 | 1793 | |
never@507 | 1794 | if (stride * scale > 0) { |
never@507 | 1795 | // compute V - (e +/- lim0) |
never@507 | 1796 | Node* va = _igvn.intcon(v_align); |
never@507 | 1797 | e = new (_phase->C, 3) SubINode(va, e); |
never@507 | 1798 | _phase->_igvn.register_new_node_with_optimizer(e); |
never@507 | 1799 | _phase->set_ctrl(e, pre_ctrl); |
never@507 | 1800 | } |
never@507 | 1801 | // compute N = (exp) % V |
duke@435 | 1802 | Node* va_msk = _igvn.intcon(v_align - 1); |
never@507 | 1803 | Node* N = new (_phase->C, 3) AndINode(e, va_msk); |
never@507 | 1804 | _phase->_igvn.register_new_node_with_optimizer(N); |
never@507 | 1805 | _phase->set_ctrl(N, pre_ctrl); |
never@507 | 1806 | |
never@507 | 1807 | // substitute back into (1), so that new limit |
never@507 | 1808 | // lim = lim0 + N |
never@507 | 1809 | Node* lim; |
never@507 | 1810 | if (stride < 0) { |
never@507 | 1811 | lim = new (_phase->C, 3) SubINode(lim0, N); |
duke@435 | 1812 | } else { |
never@507 | 1813 | lim = new (_phase->C, 3) AddINode(lim0, N); |
duke@435 | 1814 | } |
never@507 | 1815 | _phase->_igvn.register_new_node_with_optimizer(lim); |
never@507 | 1816 | _phase->set_ctrl(lim, pre_ctrl); |
duke@435 | 1817 | Node* constrained = |
never@507 | 1818 | (stride > 0) ? (Node*) new (_phase->C,3) MinINode(lim, orig_limit) |
never@507 | 1819 | : (Node*) new (_phase->C,3) MaxINode(lim, orig_limit); |
duke@435 | 1820 | _phase->_igvn.register_new_node_with_optimizer(constrained); |
duke@435 | 1821 | _phase->set_ctrl(constrained, pre_ctrl); |
duke@435 | 1822 | _igvn.hash_delete(pre_opaq); |
duke@435 | 1823 | pre_opaq->set_req(1, constrained); |
duke@435 | 1824 | } |
duke@435 | 1825 | |
duke@435 | 1826 | //----------------------------get_pre_loop_end--------------------------- |
duke@435 | 1827 | // Find pre loop end from main loop. Returns null if none. |
duke@435 | 1828 | CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode *cl) { |
duke@435 | 1829 | Node *ctrl = cl->in(LoopNode::EntryControl); |
duke@435 | 1830 | if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return NULL; |
duke@435 | 1831 | Node *iffm = ctrl->in(0); |
duke@435 | 1832 | if (!iffm->is_If()) return NULL; |
duke@435 | 1833 | Node *p_f = iffm->in(0); |
duke@435 | 1834 | if (!p_f->is_IfFalse()) return NULL; |
duke@435 | 1835 | if (!p_f->in(0)->is_CountedLoopEnd()) return NULL; |
duke@435 | 1836 | CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd(); |
duke@435 | 1837 | if (!pre_end->loopnode()->is_pre_loop()) return NULL; |
duke@435 | 1838 | return pre_end; |
duke@435 | 1839 | } |
duke@435 | 1840 | |
duke@435 | 1841 | |
duke@435 | 1842 | //------------------------------init--------------------------- |
duke@435 | 1843 | void SuperWord::init() { |
duke@435 | 1844 | _dg.init(); |
duke@435 | 1845 | _packset.clear(); |
duke@435 | 1846 | _disjoint_ptrs.clear(); |
duke@435 | 1847 | _block.clear(); |
duke@435 | 1848 | _data_entry.clear(); |
duke@435 | 1849 | _mem_slice_head.clear(); |
duke@435 | 1850 | _mem_slice_tail.clear(); |
duke@435 | 1851 | _node_info.clear(); |
duke@435 | 1852 | _align_to_ref = NULL; |
duke@435 | 1853 | _lpt = NULL; |
duke@435 | 1854 | _lp = NULL; |
duke@435 | 1855 | _bb = NULL; |
duke@435 | 1856 | _iv = NULL; |
duke@435 | 1857 | } |
duke@435 | 1858 | |
duke@435 | 1859 | //------------------------------print_packset--------------------------- |
duke@435 | 1860 | void SuperWord::print_packset() { |
duke@435 | 1861 | #ifndef PRODUCT |
duke@435 | 1862 | tty->print_cr("packset"); |
duke@435 | 1863 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1864 | tty->print_cr("Pack: %d", i); |
duke@435 | 1865 | Node_List* p = _packset.at(i); |
duke@435 | 1866 | print_pack(p); |
duke@435 | 1867 | } |
duke@435 | 1868 | #endif |
duke@435 | 1869 | } |
duke@435 | 1870 | |
duke@435 | 1871 | //------------------------------print_pack--------------------------- |
duke@435 | 1872 | void SuperWord::print_pack(Node_List* p) { |
duke@435 | 1873 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1874 | print_stmt(p->at(i)); |
duke@435 | 1875 | } |
duke@435 | 1876 | } |
duke@435 | 1877 | |
duke@435 | 1878 | //------------------------------print_bb--------------------------- |
duke@435 | 1879 | void SuperWord::print_bb() { |
duke@435 | 1880 | #ifndef PRODUCT |
duke@435 | 1881 | tty->print_cr("\nBlock"); |
duke@435 | 1882 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1883 | Node* n = _block.at(i); |
duke@435 | 1884 | tty->print("%d ", i); |
duke@435 | 1885 | if (n) { |
duke@435 | 1886 | n->dump(); |
duke@435 | 1887 | } |
duke@435 | 1888 | } |
duke@435 | 1889 | #endif |
duke@435 | 1890 | } |
duke@435 | 1891 | |
duke@435 | 1892 | //------------------------------print_stmt--------------------------- |
duke@435 | 1893 | void SuperWord::print_stmt(Node* s) { |
duke@435 | 1894 | #ifndef PRODUCT |
duke@435 | 1895 | tty->print(" align: %d \t", alignment(s)); |
duke@435 | 1896 | s->dump(); |
duke@435 | 1897 | #endif |
duke@435 | 1898 | } |
duke@435 | 1899 | |
duke@435 | 1900 | //------------------------------blank--------------------------- |
duke@435 | 1901 | char* SuperWord::blank(uint depth) { |
duke@435 | 1902 | static char blanks[101]; |
duke@435 | 1903 | assert(depth < 101, "too deep"); |
duke@435 | 1904 | for (uint i = 0; i < depth; i++) blanks[i] = ' '; |
duke@435 | 1905 | blanks[depth] = '\0'; |
duke@435 | 1906 | return blanks; |
duke@435 | 1907 | } |
duke@435 | 1908 | |
duke@435 | 1909 | |
duke@435 | 1910 | //==============================SWPointer=========================== |
duke@435 | 1911 | |
duke@435 | 1912 | //----------------------------SWPointer------------------------ |
duke@435 | 1913 | SWPointer::SWPointer(MemNode* mem, SuperWord* slp) : |
duke@435 | 1914 | _mem(mem), _slp(slp), _base(NULL), _adr(NULL), |
duke@435 | 1915 | _scale(0), _offset(0), _invar(NULL), _negate_invar(false) { |
duke@435 | 1916 | |
duke@435 | 1917 | Node* adr = mem->in(MemNode::Address); |
duke@435 | 1918 | if (!adr->is_AddP()) { |
duke@435 | 1919 | assert(!valid(), "too complex"); |
duke@435 | 1920 | return; |
duke@435 | 1921 | } |
duke@435 | 1922 | // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) |
duke@435 | 1923 | Node* base = adr->in(AddPNode::Base); |
cfang@1493 | 1924 | //unsafe reference could not be aligned appropriately without runtime checking |
cfang@1493 | 1925 | if (base == NULL || base->bottom_type() == Type::TOP) { |
cfang@1493 | 1926 | assert(!valid(), "unsafe access"); |
cfang@1493 | 1927 | return; |
cfang@1493 | 1928 | } |
duke@435 | 1929 | for (int i = 0; i < 3; i++) { |
duke@435 | 1930 | if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { |
duke@435 | 1931 | assert(!valid(), "too complex"); |
duke@435 | 1932 | return; |
duke@435 | 1933 | } |
duke@435 | 1934 | adr = adr->in(AddPNode::Address); |
duke@435 | 1935 | if (base == adr || !adr->is_AddP()) { |
duke@435 | 1936 | break; // stop looking at addp's |
duke@435 | 1937 | } |
duke@435 | 1938 | } |
duke@435 | 1939 | _base = base; |
duke@435 | 1940 | _adr = adr; |
duke@435 | 1941 | assert(valid(), "Usable"); |
duke@435 | 1942 | } |
duke@435 | 1943 | |
duke@435 | 1944 | // Following is used to create a temporary object during |
duke@435 | 1945 | // the pattern match of an address expression. |
duke@435 | 1946 | SWPointer::SWPointer(SWPointer* p) : |
duke@435 | 1947 | _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), |
duke@435 | 1948 | _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {} |
duke@435 | 1949 | |
duke@435 | 1950 | //------------------------scaled_iv_plus_offset-------------------- |
duke@435 | 1951 | // Match: k*iv + offset |
duke@435 | 1952 | // where: k is a constant that maybe zero, and |
duke@435 | 1953 | // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional |
duke@435 | 1954 | bool SWPointer::scaled_iv_plus_offset(Node* n) { |
duke@435 | 1955 | if (scaled_iv(n)) { |
duke@435 | 1956 | return true; |
duke@435 | 1957 | } |
duke@435 | 1958 | if (offset_plus_k(n)) { |
duke@435 | 1959 | return true; |
duke@435 | 1960 | } |
duke@435 | 1961 | int opc = n->Opcode(); |
duke@435 | 1962 | if (opc == Op_AddI) { |
duke@435 | 1963 | if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) { |
duke@435 | 1964 | return true; |
duke@435 | 1965 | } |
duke@435 | 1966 | if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { |
duke@435 | 1967 | return true; |
duke@435 | 1968 | } |
duke@435 | 1969 | } else if (opc == Op_SubI) { |
duke@435 | 1970 | if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) { |
duke@435 | 1971 | return true; |
duke@435 | 1972 | } |
duke@435 | 1973 | if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { |
duke@435 | 1974 | _scale *= -1; |
duke@435 | 1975 | return true; |
duke@435 | 1976 | } |
duke@435 | 1977 | } |
duke@435 | 1978 | return false; |
duke@435 | 1979 | } |
duke@435 | 1980 | |
duke@435 | 1981 | //----------------------------scaled_iv------------------------ |
duke@435 | 1982 | // Match: k*iv where k is a constant that's not zero |
duke@435 | 1983 | bool SWPointer::scaled_iv(Node* n) { |
duke@435 | 1984 | if (_scale != 0) { |
duke@435 | 1985 | return false; // already found a scale |
duke@435 | 1986 | } |
duke@435 | 1987 | if (n == iv()) { |
duke@435 | 1988 | _scale = 1; |
duke@435 | 1989 | return true; |
duke@435 | 1990 | } |
duke@435 | 1991 | int opc = n->Opcode(); |
duke@435 | 1992 | if (opc == Op_MulI) { |
duke@435 | 1993 | if (n->in(1) == iv() && n->in(2)->is_Con()) { |
duke@435 | 1994 | _scale = n->in(2)->get_int(); |
duke@435 | 1995 | return true; |
duke@435 | 1996 | } else if (n->in(2) == iv() && n->in(1)->is_Con()) { |
duke@435 | 1997 | _scale = n->in(1)->get_int(); |
duke@435 | 1998 | return true; |
duke@435 | 1999 | } |
duke@435 | 2000 | } else if (opc == Op_LShiftI) { |
duke@435 | 2001 | if (n->in(1) == iv() && n->in(2)->is_Con()) { |
duke@435 | 2002 | _scale = 1 << n->in(2)->get_int(); |
duke@435 | 2003 | return true; |
duke@435 | 2004 | } |
duke@435 | 2005 | } else if (opc == Op_ConvI2L) { |
duke@435 | 2006 | if (scaled_iv_plus_offset(n->in(1))) { |
duke@435 | 2007 | return true; |
duke@435 | 2008 | } |
duke@435 | 2009 | } else if (opc == Op_LShiftL) { |
duke@435 | 2010 | if (!has_iv() && _invar == NULL) { |
duke@435 | 2011 | // Need to preserve the current _offset value, so |
duke@435 | 2012 | // create a temporary object for this expression subtree. |
duke@435 | 2013 | // Hacky, so should re-engineer the address pattern match. |
duke@435 | 2014 | SWPointer tmp(this); |
duke@435 | 2015 | if (tmp.scaled_iv_plus_offset(n->in(1))) { |
duke@435 | 2016 | if (tmp._invar == NULL) { |
duke@435 | 2017 | int mult = 1 << n->in(2)->get_int(); |
duke@435 | 2018 | _scale = tmp._scale * mult; |
duke@435 | 2019 | _offset += tmp._offset * mult; |
duke@435 | 2020 | return true; |
duke@435 | 2021 | } |
duke@435 | 2022 | } |
duke@435 | 2023 | } |
duke@435 | 2024 | } |
duke@435 | 2025 | return false; |
duke@435 | 2026 | } |
duke@435 | 2027 | |
duke@435 | 2028 | //----------------------------offset_plus_k------------------------ |
duke@435 | 2029 | // Match: offset is (k [+/- invariant]) |
duke@435 | 2030 | // where k maybe zero and invariant is optional, but not both. |
duke@435 | 2031 | bool SWPointer::offset_plus_k(Node* n, bool negate) { |
duke@435 | 2032 | int opc = n->Opcode(); |
duke@435 | 2033 | if (opc == Op_ConI) { |
duke@435 | 2034 | _offset += negate ? -(n->get_int()) : n->get_int(); |
duke@435 | 2035 | return true; |
duke@435 | 2036 | } else if (opc == Op_ConL) { |
duke@435 | 2037 | // Okay if value fits into an int |
duke@435 | 2038 | const TypeLong* t = n->find_long_type(); |
duke@435 | 2039 | if (t->higher_equal(TypeLong::INT)) { |
duke@435 | 2040 | jlong loff = n->get_long(); |
duke@435 | 2041 | jint off = (jint)loff; |
duke@435 | 2042 | _offset += negate ? -off : loff; |
duke@435 | 2043 | return true; |
duke@435 | 2044 | } |
duke@435 | 2045 | return false; |
duke@435 | 2046 | } |
duke@435 | 2047 | if (_invar != NULL) return false; // already have an invariant |
duke@435 | 2048 | if (opc == Op_AddI) { |
duke@435 | 2049 | if (n->in(2)->is_Con() && invariant(n->in(1))) { |
duke@435 | 2050 | _negate_invar = negate; |
duke@435 | 2051 | _invar = n->in(1); |
duke@435 | 2052 | _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); |
duke@435 | 2053 | return true; |
duke@435 | 2054 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { |
duke@435 | 2055 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); |
duke@435 | 2056 | _negate_invar = negate; |
duke@435 | 2057 | _invar = n->in(2); |
duke@435 | 2058 | return true; |
duke@435 | 2059 | } |
duke@435 | 2060 | } |
duke@435 | 2061 | if (opc == Op_SubI) { |
duke@435 | 2062 | if (n->in(2)->is_Con() && invariant(n->in(1))) { |
duke@435 | 2063 | _negate_invar = negate; |
duke@435 | 2064 | _invar = n->in(1); |
duke@435 | 2065 | _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); |
duke@435 | 2066 | return true; |
duke@435 | 2067 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { |
duke@435 | 2068 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); |
duke@435 | 2069 | _negate_invar = !negate; |
duke@435 | 2070 | _invar = n->in(2); |
duke@435 | 2071 | return true; |
duke@435 | 2072 | } |
duke@435 | 2073 | } |
duke@435 | 2074 | if (invariant(n)) { |
duke@435 | 2075 | _negate_invar = negate; |
duke@435 | 2076 | _invar = n; |
duke@435 | 2077 | return true; |
duke@435 | 2078 | } |
duke@435 | 2079 | return false; |
duke@435 | 2080 | } |
duke@435 | 2081 | |
duke@435 | 2082 | //----------------------------print------------------------ |
duke@435 | 2083 | void SWPointer::print() { |
duke@435 | 2084 | #ifndef PRODUCT |
duke@435 | 2085 | tty->print("base: %d adr: %d scale: %d offset: %d invar: %c%d\n", |
duke@435 | 2086 | _base != NULL ? _base->_idx : 0, |
duke@435 | 2087 | _adr != NULL ? _adr->_idx : 0, |
duke@435 | 2088 | _scale, _offset, |
duke@435 | 2089 | _negate_invar?'-':'+', |
duke@435 | 2090 | _invar != NULL ? _invar->_idx : 0); |
duke@435 | 2091 | #endif |
duke@435 | 2092 | } |
duke@435 | 2093 | |
duke@435 | 2094 | // ========================= OrderedPair ===================== |
duke@435 | 2095 | |
duke@435 | 2096 | const OrderedPair OrderedPair::initial; |
duke@435 | 2097 | |
duke@435 | 2098 | // ========================= SWNodeInfo ===================== |
duke@435 | 2099 | |
duke@435 | 2100 | const SWNodeInfo SWNodeInfo::initial; |
duke@435 | 2101 | |
duke@435 | 2102 | |
duke@435 | 2103 | // ============================ DepGraph =========================== |
duke@435 | 2104 | |
duke@435 | 2105 | //------------------------------make_node--------------------------- |
duke@435 | 2106 | // Make a new dependence graph node for an ideal node. |
duke@435 | 2107 | DepMem* DepGraph::make_node(Node* node) { |
duke@435 | 2108 | DepMem* m = new (_arena) DepMem(node); |
duke@435 | 2109 | if (node != NULL) { |
duke@435 | 2110 | assert(_map.at_grow(node->_idx) == NULL, "one init only"); |
duke@435 | 2111 | _map.at_put_grow(node->_idx, m); |
duke@435 | 2112 | } |
duke@435 | 2113 | return m; |
duke@435 | 2114 | } |
duke@435 | 2115 | |
duke@435 | 2116 | //------------------------------make_edge--------------------------- |
duke@435 | 2117 | // Make a new dependence graph edge from dpred -> dsucc |
duke@435 | 2118 | DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) { |
duke@435 | 2119 | DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head()); |
duke@435 | 2120 | dpred->set_out_head(e); |
duke@435 | 2121 | dsucc->set_in_head(e); |
duke@435 | 2122 | return e; |
duke@435 | 2123 | } |
duke@435 | 2124 | |
duke@435 | 2125 | // ========================== DepMem ======================== |
duke@435 | 2126 | |
duke@435 | 2127 | //------------------------------in_cnt--------------------------- |
duke@435 | 2128 | int DepMem::in_cnt() { |
duke@435 | 2129 | int ct = 0; |
duke@435 | 2130 | for (DepEdge* e = _in_head; e != NULL; e = e->next_in()) ct++; |
duke@435 | 2131 | return ct; |
duke@435 | 2132 | } |
duke@435 | 2133 | |
duke@435 | 2134 | //------------------------------out_cnt--------------------------- |
duke@435 | 2135 | int DepMem::out_cnt() { |
duke@435 | 2136 | int ct = 0; |
duke@435 | 2137 | for (DepEdge* e = _out_head; e != NULL; e = e->next_out()) ct++; |
duke@435 | 2138 | return ct; |
duke@435 | 2139 | } |
duke@435 | 2140 | |
duke@435 | 2141 | //------------------------------print----------------------------- |
duke@435 | 2142 | void DepMem::print() { |
duke@435 | 2143 | #ifndef PRODUCT |
duke@435 | 2144 | tty->print(" DepNode %d (", _node->_idx); |
duke@435 | 2145 | for (DepEdge* p = _in_head; p != NULL; p = p->next_in()) { |
duke@435 | 2146 | Node* pred = p->pred()->node(); |
duke@435 | 2147 | tty->print(" %d", pred != NULL ? pred->_idx : 0); |
duke@435 | 2148 | } |
duke@435 | 2149 | tty->print(") ["); |
duke@435 | 2150 | for (DepEdge* s = _out_head; s != NULL; s = s->next_out()) { |
duke@435 | 2151 | Node* succ = s->succ()->node(); |
duke@435 | 2152 | tty->print(" %d", succ != NULL ? succ->_idx : 0); |
duke@435 | 2153 | } |
duke@435 | 2154 | tty->print_cr(" ]"); |
duke@435 | 2155 | #endif |
duke@435 | 2156 | } |
duke@435 | 2157 | |
duke@435 | 2158 | // =========================== DepEdge ========================= |
duke@435 | 2159 | |
duke@435 | 2160 | //------------------------------DepPreds--------------------------- |
duke@435 | 2161 | void DepEdge::print() { |
duke@435 | 2162 | #ifndef PRODUCT |
duke@435 | 2163 | tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx); |
duke@435 | 2164 | #endif |
duke@435 | 2165 | } |
duke@435 | 2166 | |
duke@435 | 2167 | // =========================== DepPreds ========================= |
duke@435 | 2168 | // Iterator over predecessor edges in the dependence graph. |
duke@435 | 2169 | |
duke@435 | 2170 | //------------------------------DepPreds--------------------------- |
duke@435 | 2171 | DepPreds::DepPreds(Node* n, DepGraph& dg) { |
duke@435 | 2172 | _n = n; |
duke@435 | 2173 | _done = false; |
duke@435 | 2174 | if (_n->is_Store() || _n->is_Load()) { |
duke@435 | 2175 | _next_idx = MemNode::Address; |
duke@435 | 2176 | _end_idx = n->req(); |
duke@435 | 2177 | _dep_next = dg.dep(_n)->in_head(); |
duke@435 | 2178 | } else if (_n->is_Mem()) { |
duke@435 | 2179 | _next_idx = 0; |
duke@435 | 2180 | _end_idx = 0; |
duke@435 | 2181 | _dep_next = dg.dep(_n)->in_head(); |
duke@435 | 2182 | } else { |
duke@435 | 2183 | _next_idx = 1; |
duke@435 | 2184 | _end_idx = _n->req(); |
duke@435 | 2185 | _dep_next = NULL; |
duke@435 | 2186 | } |
duke@435 | 2187 | next(); |
duke@435 | 2188 | } |
duke@435 | 2189 | |
duke@435 | 2190 | //------------------------------next--------------------------- |
duke@435 | 2191 | void DepPreds::next() { |
duke@435 | 2192 | if (_dep_next != NULL) { |
duke@435 | 2193 | _current = _dep_next->pred()->node(); |
duke@435 | 2194 | _dep_next = _dep_next->next_in(); |
duke@435 | 2195 | } else if (_next_idx < _end_idx) { |
duke@435 | 2196 | _current = _n->in(_next_idx++); |
duke@435 | 2197 | } else { |
duke@435 | 2198 | _done = true; |
duke@435 | 2199 | } |
duke@435 | 2200 | } |
duke@435 | 2201 | |
duke@435 | 2202 | // =========================== DepSuccs ========================= |
duke@435 | 2203 | // Iterator over successor edges in the dependence graph. |
duke@435 | 2204 | |
duke@435 | 2205 | //------------------------------DepSuccs--------------------------- |
duke@435 | 2206 | DepSuccs::DepSuccs(Node* n, DepGraph& dg) { |
duke@435 | 2207 | _n = n; |
duke@435 | 2208 | _done = false; |
duke@435 | 2209 | if (_n->is_Load()) { |
duke@435 | 2210 | _next_idx = 0; |
duke@435 | 2211 | _end_idx = _n->outcnt(); |
duke@435 | 2212 | _dep_next = dg.dep(_n)->out_head(); |
duke@435 | 2213 | } else if (_n->is_Mem() || _n->is_Phi() && _n->bottom_type() == Type::MEMORY) { |
duke@435 | 2214 | _next_idx = 0; |
duke@435 | 2215 | _end_idx = 0; |
duke@435 | 2216 | _dep_next = dg.dep(_n)->out_head(); |
duke@435 | 2217 | } else { |
duke@435 | 2218 | _next_idx = 0; |
duke@435 | 2219 | _end_idx = _n->outcnt(); |
duke@435 | 2220 | _dep_next = NULL; |
duke@435 | 2221 | } |
duke@435 | 2222 | next(); |
duke@435 | 2223 | } |
duke@435 | 2224 | |
duke@435 | 2225 | //-------------------------------next--------------------------- |
duke@435 | 2226 | void DepSuccs::next() { |
duke@435 | 2227 | if (_dep_next != NULL) { |
duke@435 | 2228 | _current = _dep_next->succ()->node(); |
duke@435 | 2229 | _dep_next = _dep_next->next_out(); |
duke@435 | 2230 | } else if (_next_idx < _end_idx) { |
duke@435 | 2231 | _current = _n->raw_out(_next_idx++); |
duke@435 | 2232 | } else { |
duke@435 | 2233 | _done = true; |
duke@435 | 2234 | } |
duke@435 | 2235 | } |