Sat, 01 Dec 2007 00:00:00 +0000
Initial load
duke@435 | 1 | /* |
duke@435 | 2 | * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
duke@435 | 19 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
duke@435 | 20 | * CA 95054 USA or visit www.sun.com if you need additional information or |
duke@435 | 21 | * have any questions. |
duke@435 | 22 | */ |
duke@435 | 23 | |
duke@435 | 24 | #include "incls/_precompiled.incl" |
duke@435 | 25 | #include "incls/_superword.cpp.incl" |
duke@435 | 26 | |
duke@435 | 27 | // |
duke@435 | 28 | // S U P E R W O R D T R A N S F O R M |
duke@435 | 29 | //============================================================================= |
duke@435 | 30 | |
duke@435 | 31 | //------------------------------SuperWord--------------------------- |
duke@435 | 32 | SuperWord::SuperWord(PhaseIdealLoop* phase) : |
duke@435 | 33 | _phase(phase), |
duke@435 | 34 | _igvn(phase->_igvn), |
duke@435 | 35 | _arena(phase->C->comp_arena()), |
duke@435 | 36 | _packset(arena(), 8, 0, NULL), // packs for the current block |
duke@435 | 37 | _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb |
duke@435 | 38 | _block(arena(), 8, 0, NULL), // nodes in current block |
duke@435 | 39 | _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside |
duke@435 | 40 | _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads |
duke@435 | 41 | _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails |
duke@435 | 42 | _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node |
duke@435 | 43 | _align_to_ref(NULL), // memory reference to align vectors to |
duke@435 | 44 | _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs |
duke@435 | 45 | _dg(_arena), // dependence graph |
duke@435 | 46 | _visited(arena()), // visited node set |
duke@435 | 47 | _post_visited(arena()), // post visited node set |
duke@435 | 48 | _n_idx_list(arena(), 8), // scratch list of (node,index) pairs |
duke@435 | 49 | _stk(arena(), 8, 0, NULL), // scratch stack of nodes |
duke@435 | 50 | _nlist(arena(), 8, 0, NULL), // scratch list of nodes |
duke@435 | 51 | _lpt(NULL), // loop tree node |
duke@435 | 52 | _lp(NULL), // LoopNode |
duke@435 | 53 | _bb(NULL), // basic block |
duke@435 | 54 | _iv(NULL) // induction var |
duke@435 | 55 | {} |
duke@435 | 56 | |
duke@435 | 57 | //------------------------------transform_loop--------------------------- |
duke@435 | 58 | void SuperWord::transform_loop(IdealLoopTree* lpt) { |
duke@435 | 59 | assert(lpt->_head->is_CountedLoop(), "must be"); |
duke@435 | 60 | CountedLoopNode *cl = lpt->_head->as_CountedLoop(); |
duke@435 | 61 | |
duke@435 | 62 | if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops |
duke@435 | 63 | |
duke@435 | 64 | // Check for no control flow in body (other than exit) |
duke@435 | 65 | Node *cl_exit = cl->loopexit(); |
duke@435 | 66 | if (cl_exit->in(0) != lpt->_head) return; |
duke@435 | 67 | |
duke@435 | 68 | // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) |
duke@435 | 69 | CountedLoopEndNode* pre_end = get_pre_loop_end(cl); |
duke@435 | 70 | if (pre_end == NULL) return; |
duke@435 | 71 | Node *pre_opaq1 = pre_end->limit(); |
duke@435 | 72 | if (pre_opaq1->Opcode() != Op_Opaque1) return; |
duke@435 | 73 | |
duke@435 | 74 | // Do vectors exist on this architecture? |
duke@435 | 75 | if (vector_width_in_bytes() == 0) return; |
duke@435 | 76 | |
duke@435 | 77 | init(); // initialize data structures |
duke@435 | 78 | |
duke@435 | 79 | set_lpt(lpt); |
duke@435 | 80 | set_lp(cl); |
duke@435 | 81 | |
duke@435 | 82 | // For now, define one block which is the entire loop body |
duke@435 | 83 | set_bb(cl); |
duke@435 | 84 | |
duke@435 | 85 | assert(_packset.length() == 0, "packset must be empty"); |
duke@435 | 86 | SLP_extract(); |
duke@435 | 87 | } |
duke@435 | 88 | |
duke@435 | 89 | //------------------------------SLP_extract--------------------------- |
duke@435 | 90 | // Extract the superword level parallelism |
duke@435 | 91 | // |
duke@435 | 92 | // 1) A reverse post-order of nodes in the block is constructed. By scanning |
duke@435 | 93 | // this list from first to last, all definitions are visited before their uses. |
duke@435 | 94 | // |
duke@435 | 95 | // 2) A point-to-point dependence graph is constructed between memory references. |
duke@435 | 96 | // This simplies the upcoming "independence" checker. |
duke@435 | 97 | // |
duke@435 | 98 | // 3) The maximum depth in the node graph from the beginning of the block |
duke@435 | 99 | // to each node is computed. This is used to prune the graph search |
duke@435 | 100 | // in the independence checker. |
duke@435 | 101 | // |
duke@435 | 102 | // 4) For integer types, the necessary bit width is propagated backwards |
duke@435 | 103 | // from stores to allow packed operations on byte, char, and short |
duke@435 | 104 | // integers. This reverses the promotion to type "int" that javac |
duke@435 | 105 | // did for operations like: char c1,c2,c3; c1 = c2 + c3. |
duke@435 | 106 | // |
duke@435 | 107 | // 5) One of the memory references is picked to be an aligned vector reference. |
duke@435 | 108 | // The pre-loop trip count is adjusted to align this reference in the |
duke@435 | 109 | // unrolled body. |
duke@435 | 110 | // |
duke@435 | 111 | // 6) The initial set of pack pairs is seeded with memory references. |
duke@435 | 112 | // |
duke@435 | 113 | // 7) The set of pack pairs is extended by following use->def and def->use links. |
duke@435 | 114 | // |
duke@435 | 115 | // 8) The pairs are combined into vector sized packs. |
duke@435 | 116 | // |
duke@435 | 117 | // 9) Reorder the memory slices to co-locate members of the memory packs. |
duke@435 | 118 | // |
duke@435 | 119 | // 10) Generate ideal vector nodes for the final set of packs and where necessary, |
duke@435 | 120 | // inserting scalar promotion, vector creation from multiple scalars, and |
duke@435 | 121 | // extraction of scalar values from vectors. |
duke@435 | 122 | // |
duke@435 | 123 | void SuperWord::SLP_extract() { |
duke@435 | 124 | |
duke@435 | 125 | // Ready the block |
duke@435 | 126 | |
duke@435 | 127 | construct_bb(); |
duke@435 | 128 | |
duke@435 | 129 | dependence_graph(); |
duke@435 | 130 | |
duke@435 | 131 | compute_max_depth(); |
duke@435 | 132 | |
duke@435 | 133 | compute_vector_element_type(); |
duke@435 | 134 | |
duke@435 | 135 | // Attempt vectorization |
duke@435 | 136 | |
duke@435 | 137 | find_adjacent_refs(); |
duke@435 | 138 | |
duke@435 | 139 | extend_packlist(); |
duke@435 | 140 | |
duke@435 | 141 | combine_packs(); |
duke@435 | 142 | |
duke@435 | 143 | construct_my_pack_map(); |
duke@435 | 144 | |
duke@435 | 145 | filter_packs(); |
duke@435 | 146 | |
duke@435 | 147 | schedule(); |
duke@435 | 148 | |
duke@435 | 149 | output(); |
duke@435 | 150 | } |
duke@435 | 151 | |
duke@435 | 152 | //------------------------------find_adjacent_refs--------------------------- |
duke@435 | 153 | // Find the adjacent memory references and create pack pairs for them. |
duke@435 | 154 | // This is the initial set of packs that will then be extended by |
duke@435 | 155 | // following use->def and def->use links. The align positions are |
duke@435 | 156 | // assigned relative to the reference "align_to_ref" |
duke@435 | 157 | void SuperWord::find_adjacent_refs() { |
duke@435 | 158 | // Get list of memory operations |
duke@435 | 159 | Node_List memops; |
duke@435 | 160 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 161 | Node* n = _block.at(i); |
duke@435 | 162 | if (n->is_Mem() && in_bb(n)) { |
duke@435 | 163 | int align = memory_alignment(n->as_Mem(), 0); |
duke@435 | 164 | if (align != bottom_align) { |
duke@435 | 165 | memops.push(n); |
duke@435 | 166 | } |
duke@435 | 167 | } |
duke@435 | 168 | } |
duke@435 | 169 | if (memops.size() == 0) return; |
duke@435 | 170 | |
duke@435 | 171 | // Find a memory reference to align to. The pre-loop trip count |
duke@435 | 172 | // is modified to align this reference to a vector-aligned address |
duke@435 | 173 | find_align_to_ref(memops); |
duke@435 | 174 | if (align_to_ref() == NULL) return; |
duke@435 | 175 | |
duke@435 | 176 | SWPointer align_to_ref_p(align_to_ref(), this); |
duke@435 | 177 | int offset = align_to_ref_p.offset_in_bytes(); |
duke@435 | 178 | int scale = align_to_ref_p.scale_in_bytes(); |
duke@435 | 179 | int vw = vector_width_in_bytes(); |
duke@435 | 180 | int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; |
duke@435 | 181 | int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; |
duke@435 | 182 | |
duke@435 | 183 | #ifndef PRODUCT |
duke@435 | 184 | if (TraceSuperWord) |
duke@435 | 185 | tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d", |
duke@435 | 186 | offset, iv_adjustment, align_to_ref_p.memory_size()); |
duke@435 | 187 | #endif |
duke@435 | 188 | |
duke@435 | 189 | // Set alignment relative to "align_to_ref" |
duke@435 | 190 | for (int i = memops.size() - 1; i >= 0; i--) { |
duke@435 | 191 | MemNode* s = memops.at(i)->as_Mem(); |
duke@435 | 192 | SWPointer p2(s, this); |
duke@435 | 193 | if (p2.comparable(align_to_ref_p)) { |
duke@435 | 194 | int align = memory_alignment(s, iv_adjustment); |
duke@435 | 195 | set_alignment(s, align); |
duke@435 | 196 | } else { |
duke@435 | 197 | memops.remove(i); |
duke@435 | 198 | } |
duke@435 | 199 | } |
duke@435 | 200 | |
duke@435 | 201 | // Create initial pack pairs of memory operations |
duke@435 | 202 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 203 | Node* s1 = memops.at(i); |
duke@435 | 204 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 205 | Node* s2 = memops.at(j); |
duke@435 | 206 | if (s1 != s2 && are_adjacent_refs(s1, s2)) { |
duke@435 | 207 | int align = alignment(s1); |
duke@435 | 208 | if (stmts_can_pack(s1, s2, align)) { |
duke@435 | 209 | Node_List* pair = new Node_List(); |
duke@435 | 210 | pair->push(s1); |
duke@435 | 211 | pair->push(s2); |
duke@435 | 212 | _packset.append(pair); |
duke@435 | 213 | } |
duke@435 | 214 | } |
duke@435 | 215 | } |
duke@435 | 216 | } |
duke@435 | 217 | |
duke@435 | 218 | #ifndef PRODUCT |
duke@435 | 219 | if (TraceSuperWord) { |
duke@435 | 220 | tty->print_cr("\nAfter find_adjacent_refs"); |
duke@435 | 221 | print_packset(); |
duke@435 | 222 | } |
duke@435 | 223 | #endif |
duke@435 | 224 | } |
duke@435 | 225 | |
duke@435 | 226 | //------------------------------find_align_to_ref--------------------------- |
duke@435 | 227 | // Find a memory reference to align the loop induction variable to. |
duke@435 | 228 | // Looks first at stores then at loads, looking for a memory reference |
duke@435 | 229 | // with the largest number of references similar to it. |
duke@435 | 230 | void SuperWord::find_align_to_ref(Node_List &memops) { |
duke@435 | 231 | GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0); |
duke@435 | 232 | |
duke@435 | 233 | // Count number of comparable memory ops |
duke@435 | 234 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 235 | MemNode* s1 = memops.at(i)->as_Mem(); |
duke@435 | 236 | SWPointer p1(s1, this); |
duke@435 | 237 | // Discard if pre loop can't align this reference |
duke@435 | 238 | if (!ref_is_alignable(p1)) { |
duke@435 | 239 | *cmp_ct.adr_at(i) = 0; |
duke@435 | 240 | continue; |
duke@435 | 241 | } |
duke@435 | 242 | for (uint j = i+1; j < memops.size(); j++) { |
duke@435 | 243 | MemNode* s2 = memops.at(j)->as_Mem(); |
duke@435 | 244 | if (isomorphic(s1, s2)) { |
duke@435 | 245 | SWPointer p2(s2, this); |
duke@435 | 246 | if (p1.comparable(p2)) { |
duke@435 | 247 | (*cmp_ct.adr_at(i))++; |
duke@435 | 248 | (*cmp_ct.adr_at(j))++; |
duke@435 | 249 | } |
duke@435 | 250 | } |
duke@435 | 251 | } |
duke@435 | 252 | } |
duke@435 | 253 | |
duke@435 | 254 | // Find Store (or Load) with the greatest number of "comparable" references |
duke@435 | 255 | int max_ct = 0; |
duke@435 | 256 | int max_idx = -1; |
duke@435 | 257 | int min_size = max_jint; |
duke@435 | 258 | int min_iv_offset = max_jint; |
duke@435 | 259 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 260 | MemNode* s = memops.at(j)->as_Mem(); |
duke@435 | 261 | if (s->is_Store()) { |
duke@435 | 262 | SWPointer p(s, this); |
duke@435 | 263 | if (cmp_ct.at(j) > max_ct || |
duke@435 | 264 | cmp_ct.at(j) == max_ct && (data_size(s) < min_size || |
duke@435 | 265 | data_size(s) == min_size && |
duke@435 | 266 | p.offset_in_bytes() < min_iv_offset)) { |
duke@435 | 267 | max_ct = cmp_ct.at(j); |
duke@435 | 268 | max_idx = j; |
duke@435 | 269 | min_size = data_size(s); |
duke@435 | 270 | min_iv_offset = p.offset_in_bytes(); |
duke@435 | 271 | } |
duke@435 | 272 | } |
duke@435 | 273 | } |
duke@435 | 274 | // If no stores, look at loads |
duke@435 | 275 | if (max_ct == 0) { |
duke@435 | 276 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 277 | MemNode* s = memops.at(j)->as_Mem(); |
duke@435 | 278 | if (s->is_Load()) { |
duke@435 | 279 | SWPointer p(s, this); |
duke@435 | 280 | if (cmp_ct.at(j) > max_ct || |
duke@435 | 281 | cmp_ct.at(j) == max_ct && (data_size(s) < min_size || |
duke@435 | 282 | data_size(s) == min_size && |
duke@435 | 283 | p.offset_in_bytes() < min_iv_offset)) { |
duke@435 | 284 | max_ct = cmp_ct.at(j); |
duke@435 | 285 | max_idx = j; |
duke@435 | 286 | min_size = data_size(s); |
duke@435 | 287 | min_iv_offset = p.offset_in_bytes(); |
duke@435 | 288 | } |
duke@435 | 289 | } |
duke@435 | 290 | } |
duke@435 | 291 | } |
duke@435 | 292 | |
duke@435 | 293 | if (max_ct > 0) |
duke@435 | 294 | set_align_to_ref(memops.at(max_idx)->as_Mem()); |
duke@435 | 295 | |
duke@435 | 296 | #ifndef PRODUCT |
duke@435 | 297 | if (TraceSuperWord && Verbose) { |
duke@435 | 298 | tty->print_cr("\nVector memops after find_align_to_refs"); |
duke@435 | 299 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 300 | MemNode* s = memops.at(i)->as_Mem(); |
duke@435 | 301 | s->dump(); |
duke@435 | 302 | } |
duke@435 | 303 | } |
duke@435 | 304 | #endif |
duke@435 | 305 | } |
duke@435 | 306 | |
duke@435 | 307 | //------------------------------ref_is_alignable--------------------------- |
duke@435 | 308 | // Can the preloop align the reference to position zero in the vector? |
duke@435 | 309 | bool SuperWord::ref_is_alignable(SWPointer& p) { |
duke@435 | 310 | if (!p.has_iv()) { |
duke@435 | 311 | return true; // no induction variable |
duke@435 | 312 | } |
duke@435 | 313 | CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop()); |
duke@435 | 314 | assert(pre_end->stride_is_con(), "pre loop stride is constant"); |
duke@435 | 315 | int preloop_stride = pre_end->stride_con(); |
duke@435 | 316 | |
duke@435 | 317 | int span = preloop_stride * p.scale_in_bytes(); |
duke@435 | 318 | |
duke@435 | 319 | // Stride one accesses are alignable. |
duke@435 | 320 | if (ABS(span) == p.memory_size()) |
duke@435 | 321 | return true; |
duke@435 | 322 | |
duke@435 | 323 | // If initial offset from start of object is computable, |
duke@435 | 324 | // compute alignment within the vector. |
duke@435 | 325 | int vw = vector_width_in_bytes(); |
duke@435 | 326 | if (vw % span == 0) { |
duke@435 | 327 | Node* init_nd = pre_end->init_trip(); |
duke@435 | 328 | if (init_nd->is_Con() && p.invar() == NULL) { |
duke@435 | 329 | int init = init_nd->bottom_type()->is_int()->get_con(); |
duke@435 | 330 | |
duke@435 | 331 | int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes(); |
duke@435 | 332 | assert(init_offset >= 0, "positive offset from object start"); |
duke@435 | 333 | |
duke@435 | 334 | if (span > 0) { |
duke@435 | 335 | return (vw - (init_offset % vw)) % span == 0; |
duke@435 | 336 | } else { |
duke@435 | 337 | assert(span < 0, "nonzero stride * scale"); |
duke@435 | 338 | return (init_offset % vw) % -span == 0; |
duke@435 | 339 | } |
duke@435 | 340 | } |
duke@435 | 341 | } |
duke@435 | 342 | return false; |
duke@435 | 343 | } |
duke@435 | 344 | |
duke@435 | 345 | //---------------------------dependence_graph--------------------------- |
duke@435 | 346 | // Construct dependency graph. |
duke@435 | 347 | // Add dependence edges to load/store nodes for memory dependence |
duke@435 | 348 | // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x) |
duke@435 | 349 | void SuperWord::dependence_graph() { |
duke@435 | 350 | // First, assign a dependence node to each memory node |
duke@435 | 351 | for (int i = 0; i < _block.length(); i++ ) { |
duke@435 | 352 | Node *n = _block.at(i); |
duke@435 | 353 | if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) { |
duke@435 | 354 | _dg.make_node(n); |
duke@435 | 355 | } |
duke@435 | 356 | } |
duke@435 | 357 | |
duke@435 | 358 | // For each memory slice, create the dependences |
duke@435 | 359 | for (int i = 0; i < _mem_slice_head.length(); i++) { |
duke@435 | 360 | Node* n = _mem_slice_head.at(i); |
duke@435 | 361 | Node* n_tail = _mem_slice_tail.at(i); |
duke@435 | 362 | |
duke@435 | 363 | // Get slice in predecessor order (last is first) |
duke@435 | 364 | mem_slice_preds(n_tail, n, _nlist); |
duke@435 | 365 | |
duke@435 | 366 | // Make the slice dependent on the root |
duke@435 | 367 | DepMem* slice = _dg.dep(n); |
duke@435 | 368 | _dg.make_edge(_dg.root(), slice); |
duke@435 | 369 | |
duke@435 | 370 | // Create a sink for the slice |
duke@435 | 371 | DepMem* slice_sink = _dg.make_node(NULL); |
duke@435 | 372 | _dg.make_edge(slice_sink, _dg.tail()); |
duke@435 | 373 | |
duke@435 | 374 | // Now visit each pair of memory ops, creating the edges |
duke@435 | 375 | for (int j = _nlist.length() - 1; j >= 0 ; j--) { |
duke@435 | 376 | Node* s1 = _nlist.at(j); |
duke@435 | 377 | |
duke@435 | 378 | // If no dependency yet, use slice |
duke@435 | 379 | if (_dg.dep(s1)->in_cnt() == 0) { |
duke@435 | 380 | _dg.make_edge(slice, s1); |
duke@435 | 381 | } |
duke@435 | 382 | SWPointer p1(s1->as_Mem(), this); |
duke@435 | 383 | bool sink_dependent = true; |
duke@435 | 384 | for (int k = j - 1; k >= 0; k--) { |
duke@435 | 385 | Node* s2 = _nlist.at(k); |
duke@435 | 386 | if (s1->is_Load() && s2->is_Load()) |
duke@435 | 387 | continue; |
duke@435 | 388 | SWPointer p2(s2->as_Mem(), this); |
duke@435 | 389 | |
duke@435 | 390 | int cmp = p1.cmp(p2); |
duke@435 | 391 | if (SuperWordRTDepCheck && |
duke@435 | 392 | p1.base() != p2.base() && p1.valid() && p2.valid()) { |
duke@435 | 393 | // Create a runtime check to disambiguate |
duke@435 | 394 | OrderedPair pp(p1.base(), p2.base()); |
duke@435 | 395 | _disjoint_ptrs.append_if_missing(pp); |
duke@435 | 396 | } else if (!SWPointer::not_equal(cmp)) { |
duke@435 | 397 | // Possibly same address |
duke@435 | 398 | _dg.make_edge(s1, s2); |
duke@435 | 399 | sink_dependent = false; |
duke@435 | 400 | } |
duke@435 | 401 | } |
duke@435 | 402 | if (sink_dependent) { |
duke@435 | 403 | _dg.make_edge(s1, slice_sink); |
duke@435 | 404 | } |
duke@435 | 405 | } |
duke@435 | 406 | #ifndef PRODUCT |
duke@435 | 407 | if (TraceSuperWord) { |
duke@435 | 408 | tty->print_cr("\nDependence graph for slice: %d", n->_idx); |
duke@435 | 409 | for (int q = 0; q < _nlist.length(); q++) { |
duke@435 | 410 | _dg.print(_nlist.at(q)); |
duke@435 | 411 | } |
duke@435 | 412 | tty->cr(); |
duke@435 | 413 | } |
duke@435 | 414 | #endif |
duke@435 | 415 | _nlist.clear(); |
duke@435 | 416 | } |
duke@435 | 417 | |
duke@435 | 418 | #ifndef PRODUCT |
duke@435 | 419 | if (TraceSuperWord) { |
duke@435 | 420 | tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE"); |
duke@435 | 421 | for (int r = 0; r < _disjoint_ptrs.length(); r++) { |
duke@435 | 422 | _disjoint_ptrs.at(r).print(); |
duke@435 | 423 | tty->cr(); |
duke@435 | 424 | } |
duke@435 | 425 | tty->cr(); |
duke@435 | 426 | } |
duke@435 | 427 | #endif |
duke@435 | 428 | } |
duke@435 | 429 | |
duke@435 | 430 | //---------------------------mem_slice_preds--------------------------- |
duke@435 | 431 | // Return a memory slice (node list) in predecessor order starting at "start" |
duke@435 | 432 | void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) { |
duke@435 | 433 | assert(preds.length() == 0, "start empty"); |
duke@435 | 434 | Node* n = start; |
duke@435 | 435 | Node* prev = NULL; |
duke@435 | 436 | while (true) { |
duke@435 | 437 | assert(in_bb(n), "must be in block"); |
duke@435 | 438 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 439 | Node* out = n->fast_out(i); |
duke@435 | 440 | if (out->is_Load()) { |
duke@435 | 441 | if (in_bb(out)) { |
duke@435 | 442 | preds.push(out); |
duke@435 | 443 | } |
duke@435 | 444 | } else { |
duke@435 | 445 | // FIXME |
duke@435 | 446 | if (out->is_MergeMem() && !in_bb(out)) { |
duke@435 | 447 | // Either unrolling is causing a memory edge not to disappear, |
duke@435 | 448 | // or need to run igvn.optimize() again before SLP |
duke@435 | 449 | } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) { |
duke@435 | 450 | // Ditto. Not sure what else to check further. |
duke@435 | 451 | } else if (out->Opcode() == Op_StoreCM && out->in(4) == n) { |
duke@435 | 452 | // StoreCM has an input edge used as a precedence edge. |
duke@435 | 453 | // Maybe an issue when oop stores are vectorized. |
duke@435 | 454 | } else { |
duke@435 | 455 | assert(out == prev || prev == NULL, "no branches off of store slice"); |
duke@435 | 456 | } |
duke@435 | 457 | } |
duke@435 | 458 | } |
duke@435 | 459 | if (n == stop) break; |
duke@435 | 460 | preds.push(n); |
duke@435 | 461 | prev = n; |
duke@435 | 462 | n = n->in(MemNode::Memory); |
duke@435 | 463 | } |
duke@435 | 464 | } |
duke@435 | 465 | |
duke@435 | 466 | //------------------------------stmts_can_pack--------------------------- |
duke@435 | 467 | // Can s1 and s2 be in a pack with s1 immediately preceeding s2 and |
duke@435 | 468 | // s1 aligned at "align" |
duke@435 | 469 | bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { |
duke@435 | 470 | if (isomorphic(s1, s2)) { |
duke@435 | 471 | if (independent(s1, s2)) { |
duke@435 | 472 | if (!exists_at(s1, 0) && !exists_at(s2, 1)) { |
duke@435 | 473 | if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { |
duke@435 | 474 | int s1_align = alignment(s1); |
duke@435 | 475 | int s2_align = alignment(s2); |
duke@435 | 476 | if (s1_align == top_align || s1_align == align) { |
duke@435 | 477 | if (s2_align == top_align || s2_align == align + data_size(s1)) { |
duke@435 | 478 | return true; |
duke@435 | 479 | } |
duke@435 | 480 | } |
duke@435 | 481 | } |
duke@435 | 482 | } |
duke@435 | 483 | } |
duke@435 | 484 | } |
duke@435 | 485 | return false; |
duke@435 | 486 | } |
duke@435 | 487 | |
duke@435 | 488 | //------------------------------exists_at--------------------------- |
duke@435 | 489 | // Does s exist in a pack at position pos? |
duke@435 | 490 | bool SuperWord::exists_at(Node* s, uint pos) { |
duke@435 | 491 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 492 | Node_List* p = _packset.at(i); |
duke@435 | 493 | if (p->at(pos) == s) { |
duke@435 | 494 | return true; |
duke@435 | 495 | } |
duke@435 | 496 | } |
duke@435 | 497 | return false; |
duke@435 | 498 | } |
duke@435 | 499 | |
duke@435 | 500 | //------------------------------are_adjacent_refs--------------------------- |
duke@435 | 501 | // Is s1 immediately before s2 in memory? |
duke@435 | 502 | bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) { |
duke@435 | 503 | if (!s1->is_Mem() || !s2->is_Mem()) return false; |
duke@435 | 504 | if (!in_bb(s1) || !in_bb(s2)) return false; |
duke@435 | 505 | // FIXME - co_locate_pack fails on Stores in different mem-slices, so |
duke@435 | 506 | // only pack memops that are in the same alias set until that's fixed. |
duke@435 | 507 | if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) != |
duke@435 | 508 | _phase->C->get_alias_index(s2->as_Mem()->adr_type())) |
duke@435 | 509 | return false; |
duke@435 | 510 | SWPointer p1(s1->as_Mem(), this); |
duke@435 | 511 | SWPointer p2(s2->as_Mem(), this); |
duke@435 | 512 | if (p1.base() != p2.base() || !p1.comparable(p2)) return false; |
duke@435 | 513 | int diff = p2.offset_in_bytes() - p1.offset_in_bytes(); |
duke@435 | 514 | return diff == data_size(s1); |
duke@435 | 515 | } |
duke@435 | 516 | |
duke@435 | 517 | //------------------------------isomorphic--------------------------- |
duke@435 | 518 | // Are s1 and s2 similar? |
duke@435 | 519 | bool SuperWord::isomorphic(Node* s1, Node* s2) { |
duke@435 | 520 | if (s1->Opcode() != s2->Opcode()) return false; |
duke@435 | 521 | if (s1->req() != s2->req()) return false; |
duke@435 | 522 | if (s1->in(0) != s2->in(0)) return false; |
duke@435 | 523 | if (velt_type(s1) != velt_type(s2)) return false; |
duke@435 | 524 | return true; |
duke@435 | 525 | } |
duke@435 | 526 | |
duke@435 | 527 | //------------------------------independent--------------------------- |
duke@435 | 528 | // Is there no data path from s1 to s2 or s2 to s1? |
duke@435 | 529 | bool SuperWord::independent(Node* s1, Node* s2) { |
duke@435 | 530 | // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first"); |
duke@435 | 531 | int d1 = depth(s1); |
duke@435 | 532 | int d2 = depth(s2); |
duke@435 | 533 | if (d1 == d2) return s1 != s2; |
duke@435 | 534 | Node* deep = d1 > d2 ? s1 : s2; |
duke@435 | 535 | Node* shallow = d1 > d2 ? s2 : s1; |
duke@435 | 536 | |
duke@435 | 537 | visited_clear(); |
duke@435 | 538 | |
duke@435 | 539 | return independent_path(shallow, deep); |
duke@435 | 540 | } |
duke@435 | 541 | |
duke@435 | 542 | //------------------------------independent_path------------------------------ |
duke@435 | 543 | // Helper for independent |
duke@435 | 544 | bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) { |
duke@435 | 545 | if (dp >= 1000) return false; // stop deep recursion |
duke@435 | 546 | visited_set(deep); |
duke@435 | 547 | int shal_depth = depth(shallow); |
duke@435 | 548 | assert(shal_depth <= depth(deep), "must be"); |
duke@435 | 549 | for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) { |
duke@435 | 550 | Node* pred = preds.current(); |
duke@435 | 551 | if (in_bb(pred) && !visited_test(pred)) { |
duke@435 | 552 | if (shallow == pred) { |
duke@435 | 553 | return false; |
duke@435 | 554 | } |
duke@435 | 555 | if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) { |
duke@435 | 556 | return false; |
duke@435 | 557 | } |
duke@435 | 558 | } |
duke@435 | 559 | } |
duke@435 | 560 | return true; |
duke@435 | 561 | } |
duke@435 | 562 | |
duke@435 | 563 | //------------------------------set_alignment--------------------------- |
duke@435 | 564 | void SuperWord::set_alignment(Node* s1, Node* s2, int align) { |
duke@435 | 565 | set_alignment(s1, align); |
duke@435 | 566 | set_alignment(s2, align + data_size(s1)); |
duke@435 | 567 | } |
duke@435 | 568 | |
duke@435 | 569 | //------------------------------data_size--------------------------- |
duke@435 | 570 | int SuperWord::data_size(Node* s) { |
duke@435 | 571 | const Type* t = velt_type(s); |
duke@435 | 572 | BasicType bt = t->array_element_basic_type(); |
duke@435 | 573 | int bsize = type2aelembytes[bt]; |
duke@435 | 574 | assert(bsize != 0, "valid size"); |
duke@435 | 575 | return bsize; |
duke@435 | 576 | } |
duke@435 | 577 | |
duke@435 | 578 | //------------------------------extend_packlist--------------------------- |
duke@435 | 579 | // Extend packset by following use->def and def->use links from pack members. |
duke@435 | 580 | void SuperWord::extend_packlist() { |
duke@435 | 581 | bool changed; |
duke@435 | 582 | do { |
duke@435 | 583 | changed = false; |
duke@435 | 584 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 585 | Node_List* p = _packset.at(i); |
duke@435 | 586 | changed |= follow_use_defs(p); |
duke@435 | 587 | changed |= follow_def_uses(p); |
duke@435 | 588 | } |
duke@435 | 589 | } while (changed); |
duke@435 | 590 | |
duke@435 | 591 | #ifndef PRODUCT |
duke@435 | 592 | if (TraceSuperWord) { |
duke@435 | 593 | tty->print_cr("\nAfter extend_packlist"); |
duke@435 | 594 | print_packset(); |
duke@435 | 595 | } |
duke@435 | 596 | #endif |
duke@435 | 597 | } |
duke@435 | 598 | |
duke@435 | 599 | //------------------------------follow_use_defs--------------------------- |
duke@435 | 600 | // Extend the packset by visiting operand definitions of nodes in pack p |
duke@435 | 601 | bool SuperWord::follow_use_defs(Node_List* p) { |
duke@435 | 602 | Node* s1 = p->at(0); |
duke@435 | 603 | Node* s2 = p->at(1); |
duke@435 | 604 | assert(p->size() == 2, "just checking"); |
duke@435 | 605 | assert(s1->req() == s2->req(), "just checking"); |
duke@435 | 606 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
duke@435 | 607 | |
duke@435 | 608 | if (s1->is_Load()) return false; |
duke@435 | 609 | |
duke@435 | 610 | int align = alignment(s1); |
duke@435 | 611 | bool changed = false; |
duke@435 | 612 | int start = s1->is_Store() ? MemNode::ValueIn : 1; |
duke@435 | 613 | int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req(); |
duke@435 | 614 | for (int j = start; j < end; j++) { |
duke@435 | 615 | Node* t1 = s1->in(j); |
duke@435 | 616 | Node* t2 = s2->in(j); |
duke@435 | 617 | if (!in_bb(t1) || !in_bb(t2)) |
duke@435 | 618 | continue; |
duke@435 | 619 | if (stmts_can_pack(t1, t2, align)) { |
duke@435 | 620 | if (est_savings(t1, t2) >= 0) { |
duke@435 | 621 | Node_List* pair = new Node_List(); |
duke@435 | 622 | pair->push(t1); |
duke@435 | 623 | pair->push(t2); |
duke@435 | 624 | _packset.append(pair); |
duke@435 | 625 | set_alignment(t1, t2, align); |
duke@435 | 626 | changed = true; |
duke@435 | 627 | } |
duke@435 | 628 | } |
duke@435 | 629 | } |
duke@435 | 630 | return changed; |
duke@435 | 631 | } |
duke@435 | 632 | |
duke@435 | 633 | //------------------------------follow_def_uses--------------------------- |
duke@435 | 634 | // Extend the packset by visiting uses of nodes in pack p |
duke@435 | 635 | bool SuperWord::follow_def_uses(Node_List* p) { |
duke@435 | 636 | bool changed = false; |
duke@435 | 637 | Node* s1 = p->at(0); |
duke@435 | 638 | Node* s2 = p->at(1); |
duke@435 | 639 | assert(p->size() == 2, "just checking"); |
duke@435 | 640 | assert(s1->req() == s2->req(), "just checking"); |
duke@435 | 641 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
duke@435 | 642 | |
duke@435 | 643 | if (s1->is_Store()) return false; |
duke@435 | 644 | |
duke@435 | 645 | int align = alignment(s1); |
duke@435 | 646 | int savings = -1; |
duke@435 | 647 | Node* u1 = NULL; |
duke@435 | 648 | Node* u2 = NULL; |
duke@435 | 649 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
duke@435 | 650 | Node* t1 = s1->fast_out(i); |
duke@435 | 651 | if (!in_bb(t1)) continue; |
duke@435 | 652 | for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 653 | Node* t2 = s2->fast_out(j); |
duke@435 | 654 | if (!in_bb(t2)) continue; |
duke@435 | 655 | if (!opnd_positions_match(s1, t1, s2, t2)) |
duke@435 | 656 | continue; |
duke@435 | 657 | if (stmts_can_pack(t1, t2, align)) { |
duke@435 | 658 | int my_savings = est_savings(t1, t2); |
duke@435 | 659 | if (my_savings > savings) { |
duke@435 | 660 | savings = my_savings; |
duke@435 | 661 | u1 = t1; |
duke@435 | 662 | u2 = t2; |
duke@435 | 663 | } |
duke@435 | 664 | } |
duke@435 | 665 | } |
duke@435 | 666 | } |
duke@435 | 667 | if (savings >= 0) { |
duke@435 | 668 | Node_List* pair = new Node_List(); |
duke@435 | 669 | pair->push(u1); |
duke@435 | 670 | pair->push(u2); |
duke@435 | 671 | _packset.append(pair); |
duke@435 | 672 | set_alignment(u1, u2, align); |
duke@435 | 673 | changed = true; |
duke@435 | 674 | } |
duke@435 | 675 | return changed; |
duke@435 | 676 | } |
duke@435 | 677 | |
duke@435 | 678 | //---------------------------opnd_positions_match------------------------- |
duke@435 | 679 | // Is the use of d1 in u1 at the same operand position as d2 in u2? |
duke@435 | 680 | bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) { |
duke@435 | 681 | uint ct = u1->req(); |
duke@435 | 682 | if (ct != u2->req()) return false; |
duke@435 | 683 | uint i1 = 0; |
duke@435 | 684 | uint i2 = 0; |
duke@435 | 685 | do { |
duke@435 | 686 | for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; |
duke@435 | 687 | for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; |
duke@435 | 688 | if (i1 != i2) { |
duke@435 | 689 | return false; |
duke@435 | 690 | } |
duke@435 | 691 | } while (i1 < ct); |
duke@435 | 692 | return true; |
duke@435 | 693 | } |
duke@435 | 694 | |
duke@435 | 695 | //------------------------------est_savings--------------------------- |
duke@435 | 696 | // Estimate the savings from executing s1 and s2 as a pack |
duke@435 | 697 | int SuperWord::est_savings(Node* s1, Node* s2) { |
duke@435 | 698 | int save = 2 - 1; // 2 operations per instruction in packed form |
duke@435 | 699 | |
duke@435 | 700 | // inputs |
duke@435 | 701 | for (uint i = 1; i < s1->req(); i++) { |
duke@435 | 702 | Node* x1 = s1->in(i); |
duke@435 | 703 | Node* x2 = s2->in(i); |
duke@435 | 704 | if (x1 != x2) { |
duke@435 | 705 | if (are_adjacent_refs(x1, x2)) { |
duke@435 | 706 | save += adjacent_profit(x1, x2); |
duke@435 | 707 | } else if (!in_packset(x1, x2)) { |
duke@435 | 708 | save -= pack_cost(2); |
duke@435 | 709 | } else { |
duke@435 | 710 | save += unpack_cost(2); |
duke@435 | 711 | } |
duke@435 | 712 | } |
duke@435 | 713 | } |
duke@435 | 714 | |
duke@435 | 715 | // uses of result |
duke@435 | 716 | uint ct = 0; |
duke@435 | 717 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
duke@435 | 718 | Node* s1_use = s1->fast_out(i); |
duke@435 | 719 | for (int j = 0; j < _packset.length(); j++) { |
duke@435 | 720 | Node_List* p = _packset.at(j); |
duke@435 | 721 | if (p->at(0) == s1_use) { |
duke@435 | 722 | for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { |
duke@435 | 723 | Node* s2_use = s2->fast_out(k); |
duke@435 | 724 | if (p->at(p->size()-1) == s2_use) { |
duke@435 | 725 | ct++; |
duke@435 | 726 | if (are_adjacent_refs(s1_use, s2_use)) { |
duke@435 | 727 | save += adjacent_profit(s1_use, s2_use); |
duke@435 | 728 | } |
duke@435 | 729 | } |
duke@435 | 730 | } |
duke@435 | 731 | } |
duke@435 | 732 | } |
duke@435 | 733 | } |
duke@435 | 734 | |
duke@435 | 735 | if (ct < s1->outcnt()) save += unpack_cost(1); |
duke@435 | 736 | if (ct < s2->outcnt()) save += unpack_cost(1); |
duke@435 | 737 | |
duke@435 | 738 | return save; |
duke@435 | 739 | } |
duke@435 | 740 | |
duke@435 | 741 | //------------------------------costs--------------------------- |
duke@435 | 742 | int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } |
duke@435 | 743 | int SuperWord::pack_cost(int ct) { return ct; } |
duke@435 | 744 | int SuperWord::unpack_cost(int ct) { return ct; } |
duke@435 | 745 | |
duke@435 | 746 | //------------------------------combine_packs--------------------------- |
duke@435 | 747 | // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last |
duke@435 | 748 | void SuperWord::combine_packs() { |
duke@435 | 749 | bool changed; |
duke@435 | 750 | do { |
duke@435 | 751 | changed = false; |
duke@435 | 752 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 753 | Node_List* p1 = _packset.at(i); |
duke@435 | 754 | if (p1 == NULL) continue; |
duke@435 | 755 | for (int j = 0; j < _packset.length(); j++) { |
duke@435 | 756 | Node_List* p2 = _packset.at(j); |
duke@435 | 757 | if (p2 == NULL) continue; |
duke@435 | 758 | if (p1->at(p1->size()-1) == p2->at(0)) { |
duke@435 | 759 | for (uint k = 1; k < p2->size(); k++) { |
duke@435 | 760 | p1->push(p2->at(k)); |
duke@435 | 761 | } |
duke@435 | 762 | _packset.at_put(j, NULL); |
duke@435 | 763 | changed = true; |
duke@435 | 764 | } |
duke@435 | 765 | } |
duke@435 | 766 | } |
duke@435 | 767 | } while (changed); |
duke@435 | 768 | |
duke@435 | 769 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 770 | Node_List* p1 = _packset.at(i); |
duke@435 | 771 | if (p1 == NULL) { |
duke@435 | 772 | _packset.remove_at(i); |
duke@435 | 773 | } |
duke@435 | 774 | } |
duke@435 | 775 | |
duke@435 | 776 | #ifndef PRODUCT |
duke@435 | 777 | if (TraceSuperWord) { |
duke@435 | 778 | tty->print_cr("\nAfter combine_packs"); |
duke@435 | 779 | print_packset(); |
duke@435 | 780 | } |
duke@435 | 781 | #endif |
duke@435 | 782 | } |
duke@435 | 783 | |
duke@435 | 784 | //-----------------------------construct_my_pack_map-------------------------- |
duke@435 | 785 | // Construct the map from nodes to packs. Only valid after the |
duke@435 | 786 | // point where a node is only in one pack (after combine_packs). |
duke@435 | 787 | void SuperWord::construct_my_pack_map() { |
duke@435 | 788 | Node_List* rslt = NULL; |
duke@435 | 789 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 790 | Node_List* p = _packset.at(i); |
duke@435 | 791 | for (uint j = 0; j < p->size(); j++) { |
duke@435 | 792 | Node* s = p->at(j); |
duke@435 | 793 | assert(my_pack(s) == NULL, "only in one pack"); |
duke@435 | 794 | set_my_pack(s, p); |
duke@435 | 795 | } |
duke@435 | 796 | } |
duke@435 | 797 | } |
duke@435 | 798 | |
duke@435 | 799 | //------------------------------filter_packs--------------------------- |
duke@435 | 800 | // Remove packs that are not implemented or not profitable. |
duke@435 | 801 | void SuperWord::filter_packs() { |
duke@435 | 802 | |
duke@435 | 803 | // Remove packs that are not implemented |
duke@435 | 804 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 805 | Node_List* pk = _packset.at(i); |
duke@435 | 806 | bool impl = implemented(pk); |
duke@435 | 807 | if (!impl) { |
duke@435 | 808 | #ifndef PRODUCT |
duke@435 | 809 | if (TraceSuperWord && Verbose) { |
duke@435 | 810 | tty->print_cr("Unimplemented"); |
duke@435 | 811 | pk->at(0)->dump(); |
duke@435 | 812 | } |
duke@435 | 813 | #endif |
duke@435 | 814 | remove_pack_at(i); |
duke@435 | 815 | } |
duke@435 | 816 | } |
duke@435 | 817 | |
duke@435 | 818 | // Remove packs that are not profitable |
duke@435 | 819 | bool changed; |
duke@435 | 820 | do { |
duke@435 | 821 | changed = false; |
duke@435 | 822 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 823 | Node_List* pk = _packset.at(i); |
duke@435 | 824 | bool prof = profitable(pk); |
duke@435 | 825 | if (!prof) { |
duke@435 | 826 | #ifndef PRODUCT |
duke@435 | 827 | if (TraceSuperWord && Verbose) { |
duke@435 | 828 | tty->print_cr("Unprofitable"); |
duke@435 | 829 | pk->at(0)->dump(); |
duke@435 | 830 | } |
duke@435 | 831 | #endif |
duke@435 | 832 | remove_pack_at(i); |
duke@435 | 833 | changed = true; |
duke@435 | 834 | } |
duke@435 | 835 | } |
duke@435 | 836 | } while (changed); |
duke@435 | 837 | |
duke@435 | 838 | #ifndef PRODUCT |
duke@435 | 839 | if (TraceSuperWord) { |
duke@435 | 840 | tty->print_cr("\nAfter filter_packs"); |
duke@435 | 841 | print_packset(); |
duke@435 | 842 | tty->cr(); |
duke@435 | 843 | } |
duke@435 | 844 | #endif |
duke@435 | 845 | } |
duke@435 | 846 | |
duke@435 | 847 | //------------------------------implemented--------------------------- |
duke@435 | 848 | // Can code be generated for pack p? |
duke@435 | 849 | bool SuperWord::implemented(Node_List* p) { |
duke@435 | 850 | Node* p0 = p->at(0); |
duke@435 | 851 | int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0)); |
duke@435 | 852 | return vopc > 0 && Matcher::has_match_rule(vopc); |
duke@435 | 853 | } |
duke@435 | 854 | |
duke@435 | 855 | //------------------------------profitable--------------------------- |
duke@435 | 856 | // For pack p, are all operands and all uses (with in the block) vector? |
duke@435 | 857 | bool SuperWord::profitable(Node_List* p) { |
duke@435 | 858 | Node* p0 = p->at(0); |
duke@435 | 859 | uint start, end; |
duke@435 | 860 | vector_opd_range(p0, &start, &end); |
duke@435 | 861 | |
duke@435 | 862 | // Return false if some input is not vector and inside block |
duke@435 | 863 | for (uint i = start; i < end; i++) { |
duke@435 | 864 | if (!is_vector_use(p0, i)) { |
duke@435 | 865 | // For now, return false if not scalar promotion case (inputs are the same.) |
duke@435 | 866 | // Later, implement PackNode and allow differring, non-vector inputs |
duke@435 | 867 | // (maybe just the ones from outside the block.) |
duke@435 | 868 | Node* p0_def = p0->in(i); |
duke@435 | 869 | for (uint j = 1; j < p->size(); j++) { |
duke@435 | 870 | Node* use = p->at(j); |
duke@435 | 871 | Node* def = use->in(i); |
duke@435 | 872 | if (p0_def != def) |
duke@435 | 873 | return false; |
duke@435 | 874 | } |
duke@435 | 875 | } |
duke@435 | 876 | } |
duke@435 | 877 | if (!p0->is_Store()) { |
duke@435 | 878 | // For now, return false if not all uses are vector. |
duke@435 | 879 | // Later, implement ExtractNode and allow non-vector uses (maybe |
duke@435 | 880 | // just the ones outside the block.) |
duke@435 | 881 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 882 | Node* def = p->at(i); |
duke@435 | 883 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 884 | Node* use = def->fast_out(j); |
duke@435 | 885 | for (uint k = 0; k < use->req(); k++) { |
duke@435 | 886 | Node* n = use->in(k); |
duke@435 | 887 | if (def == n) { |
duke@435 | 888 | if (!is_vector_use(use, k)) { |
duke@435 | 889 | return false; |
duke@435 | 890 | } |
duke@435 | 891 | } |
duke@435 | 892 | } |
duke@435 | 893 | } |
duke@435 | 894 | } |
duke@435 | 895 | } |
duke@435 | 896 | return true; |
duke@435 | 897 | } |
duke@435 | 898 | |
duke@435 | 899 | //------------------------------schedule--------------------------- |
duke@435 | 900 | // Adjust the memory graph for the packed operations |
duke@435 | 901 | void SuperWord::schedule() { |
duke@435 | 902 | |
duke@435 | 903 | // Co-locate in the memory graph the members of each memory pack |
duke@435 | 904 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 905 | co_locate_pack(_packset.at(i)); |
duke@435 | 906 | } |
duke@435 | 907 | } |
duke@435 | 908 | |
duke@435 | 909 | //------------------------------co_locate_pack--------------------------- |
duke@435 | 910 | // Within a pack, move stores down to the last executed store, |
duke@435 | 911 | // and move loads up to the first executed load. |
duke@435 | 912 | void SuperWord::co_locate_pack(Node_List* pk) { |
duke@435 | 913 | if (pk->at(0)->is_Store()) { |
duke@435 | 914 | // Push Stores down towards last executed pack member |
duke@435 | 915 | MemNode* first = executed_first(pk)->as_Mem(); |
duke@435 | 916 | MemNode* last = executed_last(pk)->as_Mem(); |
duke@435 | 917 | MemNode* insert_pt = last; |
duke@435 | 918 | MemNode* current = last->in(MemNode::Memory)->as_Mem(); |
duke@435 | 919 | while (true) { |
duke@435 | 920 | assert(in_bb(current), "stay in block"); |
duke@435 | 921 | Node* my_mem = current->in(MemNode::Memory); |
duke@435 | 922 | if (in_pack(current, pk)) { |
duke@435 | 923 | // Forward users of my memory state to my input memory state |
duke@435 | 924 | _igvn.hash_delete(current); |
duke@435 | 925 | _igvn.hash_delete(my_mem); |
duke@435 | 926 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
duke@435 | 927 | Node* use = current->out(i); |
duke@435 | 928 | if (use->is_Mem()) { |
duke@435 | 929 | assert(use->in(MemNode::Memory) == current, "must be"); |
duke@435 | 930 | _igvn.hash_delete(use); |
duke@435 | 931 | use->set_req(MemNode::Memory, my_mem); |
duke@435 | 932 | _igvn._worklist.push(use); |
duke@435 | 933 | --i; // deleted this edge; rescan position |
duke@435 | 934 | } |
duke@435 | 935 | } |
duke@435 | 936 | // put current immediately before insert_pt |
duke@435 | 937 | current->set_req(MemNode::Memory, insert_pt->in(MemNode::Memory)); |
duke@435 | 938 | _igvn.hash_delete(insert_pt); |
duke@435 | 939 | insert_pt->set_req(MemNode::Memory, current); |
duke@435 | 940 | _igvn._worklist.push(insert_pt); |
duke@435 | 941 | _igvn._worklist.push(current); |
duke@435 | 942 | insert_pt = current; |
duke@435 | 943 | } |
duke@435 | 944 | if (current == first) break; |
duke@435 | 945 | current = my_mem->as_Mem(); |
duke@435 | 946 | } |
duke@435 | 947 | } else if (pk->at(0)->is_Load()) { |
duke@435 | 948 | // Pull Loads up towards first executed pack member |
duke@435 | 949 | LoadNode* first = executed_first(pk)->as_Load(); |
duke@435 | 950 | Node* first_mem = first->in(MemNode::Memory); |
duke@435 | 951 | _igvn.hash_delete(first_mem); |
duke@435 | 952 | // Give each load same memory state as first |
duke@435 | 953 | for (uint i = 0; i < pk->size(); i++) { |
duke@435 | 954 | LoadNode* ld = pk->at(i)->as_Load(); |
duke@435 | 955 | _igvn.hash_delete(ld); |
duke@435 | 956 | ld->set_req(MemNode::Memory, first_mem); |
duke@435 | 957 | _igvn._worklist.push(ld); |
duke@435 | 958 | } |
duke@435 | 959 | } |
duke@435 | 960 | } |
duke@435 | 961 | |
duke@435 | 962 | //------------------------------output--------------------------- |
duke@435 | 963 | // Convert packs into vector node operations |
duke@435 | 964 | void SuperWord::output() { |
duke@435 | 965 | if (_packset.length() == 0) return; |
duke@435 | 966 | |
duke@435 | 967 | // MUST ENSURE main loop's initial value is properly aligned: |
duke@435 | 968 | // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0 |
duke@435 | 969 | |
duke@435 | 970 | align_initial_loop_index(align_to_ref()); |
duke@435 | 971 | |
duke@435 | 972 | // Insert extract (unpack) operations for scalar uses |
duke@435 | 973 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 974 | insert_extracts(_packset.at(i)); |
duke@435 | 975 | } |
duke@435 | 976 | |
duke@435 | 977 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 978 | Node* n = _block.at(i); |
duke@435 | 979 | Node_List* p = my_pack(n); |
duke@435 | 980 | if (p && n == executed_last(p)) { |
duke@435 | 981 | uint vlen = p->size(); |
duke@435 | 982 | Node* vn = NULL; |
duke@435 | 983 | Node* low_adr = p->at(0); |
duke@435 | 984 | Node* first = executed_first(p); |
duke@435 | 985 | if (n->is_Load()) { |
duke@435 | 986 | int opc = n->Opcode(); |
duke@435 | 987 | Node* ctl = n->in(MemNode::Control); |
duke@435 | 988 | Node* mem = first->in(MemNode::Memory); |
duke@435 | 989 | Node* adr = low_adr->in(MemNode::Address); |
duke@435 | 990 | const TypePtr* atyp = n->adr_type(); |
duke@435 | 991 | vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen); |
duke@435 | 992 | |
duke@435 | 993 | } else if (n->is_Store()) { |
duke@435 | 994 | // Promote value to be stored to vector |
duke@435 | 995 | VectorNode* val = vector_opd(p, MemNode::ValueIn); |
duke@435 | 996 | |
duke@435 | 997 | int opc = n->Opcode(); |
duke@435 | 998 | Node* ctl = n->in(MemNode::Control); |
duke@435 | 999 | Node* mem = first->in(MemNode::Memory); |
duke@435 | 1000 | Node* adr = low_adr->in(MemNode::Address); |
duke@435 | 1001 | const TypePtr* atyp = n->adr_type(); |
duke@435 | 1002 | vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); |
duke@435 | 1003 | |
duke@435 | 1004 | } else if (n->req() == 3) { |
duke@435 | 1005 | // Promote operands to vector |
duke@435 | 1006 | Node* in1 = vector_opd(p, 1); |
duke@435 | 1007 | Node* in2 = vector_opd(p, 2); |
duke@435 | 1008 | vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n)); |
duke@435 | 1009 | |
duke@435 | 1010 | } else { |
duke@435 | 1011 | ShouldNotReachHere(); |
duke@435 | 1012 | } |
duke@435 | 1013 | |
duke@435 | 1014 | _phase->_igvn.register_new_node_with_optimizer(vn); |
duke@435 | 1015 | _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); |
duke@435 | 1016 | for (uint j = 0; j < p->size(); j++) { |
duke@435 | 1017 | Node* pm = p->at(j); |
duke@435 | 1018 | _igvn.hash_delete(pm); |
duke@435 | 1019 | _igvn.subsume_node(pm, vn); |
duke@435 | 1020 | } |
duke@435 | 1021 | _igvn._worklist.push(vn); |
duke@435 | 1022 | } |
duke@435 | 1023 | } |
duke@435 | 1024 | } |
duke@435 | 1025 | |
duke@435 | 1026 | //------------------------------vector_opd--------------------------- |
duke@435 | 1027 | // Create a vector operand for the nodes in pack p for operand: in(opd_idx) |
duke@435 | 1028 | VectorNode* SuperWord::vector_opd(Node_List* p, int opd_idx) { |
duke@435 | 1029 | Node* p0 = p->at(0); |
duke@435 | 1030 | uint vlen = p->size(); |
duke@435 | 1031 | Node* opd = p0->in(opd_idx); |
duke@435 | 1032 | |
duke@435 | 1033 | bool same_opd = true; |
duke@435 | 1034 | for (uint i = 1; i < vlen; i++) { |
duke@435 | 1035 | Node* pi = p->at(i); |
duke@435 | 1036 | Node* in = pi->in(opd_idx); |
duke@435 | 1037 | if (opd != in) { |
duke@435 | 1038 | same_opd = false; |
duke@435 | 1039 | break; |
duke@435 | 1040 | } |
duke@435 | 1041 | } |
duke@435 | 1042 | |
duke@435 | 1043 | if (same_opd) { |
duke@435 | 1044 | if (opd->is_Vector()) { |
duke@435 | 1045 | return (VectorNode*)opd; // input is matching vector |
duke@435 | 1046 | } |
duke@435 | 1047 | // Convert scalar input to vector. Use p0's type because it's container |
duke@435 | 1048 | // maybe smaller than the operand's container. |
duke@435 | 1049 | const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd); |
duke@435 | 1050 | const Type* p0_t = velt_type(p0); |
duke@435 | 1051 | if (p0_t->higher_equal(opd_t)) opd_t = p0_t; |
duke@435 | 1052 | VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, opd_t); |
duke@435 | 1053 | |
duke@435 | 1054 | _phase->_igvn.register_new_node_with_optimizer(vn); |
duke@435 | 1055 | _phase->set_ctrl(vn, _phase->get_ctrl(opd)); |
duke@435 | 1056 | return vn; |
duke@435 | 1057 | } |
duke@435 | 1058 | |
duke@435 | 1059 | // Insert pack operation |
duke@435 | 1060 | const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd); |
duke@435 | 1061 | PackNode* pk = PackNode::make(_phase->C, opd, opd_t); |
duke@435 | 1062 | |
duke@435 | 1063 | for (uint i = 1; i < vlen; i++) { |
duke@435 | 1064 | Node* pi = p->at(i); |
duke@435 | 1065 | Node* in = pi->in(opd_idx); |
duke@435 | 1066 | assert(my_pack(in) == NULL, "Should already have been unpacked"); |
duke@435 | 1067 | assert(opd_t == velt_type(!in_bb(in) ? pi : in), "all same type"); |
duke@435 | 1068 | pk->add_opd(in); |
duke@435 | 1069 | } |
duke@435 | 1070 | _phase->_igvn.register_new_node_with_optimizer(pk); |
duke@435 | 1071 | _phase->set_ctrl(pk, _phase->get_ctrl(opd)); |
duke@435 | 1072 | return pk; |
duke@435 | 1073 | } |
duke@435 | 1074 | |
duke@435 | 1075 | //------------------------------insert_extracts--------------------------- |
duke@435 | 1076 | // If a use of pack p is not a vector use, then replace the |
duke@435 | 1077 | // use with an extract operation. |
duke@435 | 1078 | void SuperWord::insert_extracts(Node_List* p) { |
duke@435 | 1079 | if (p->at(0)->is_Store()) return; |
duke@435 | 1080 | assert(_n_idx_list.is_empty(), "empty (node,index) list"); |
duke@435 | 1081 | |
duke@435 | 1082 | // Inspect each use of each pack member. For each use that is |
duke@435 | 1083 | // not a vector use, replace the use with an extract operation. |
duke@435 | 1084 | |
duke@435 | 1085 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1086 | Node* def = p->at(i); |
duke@435 | 1087 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 1088 | Node* use = def->fast_out(j); |
duke@435 | 1089 | for (uint k = 0; k < use->req(); k++) { |
duke@435 | 1090 | Node* n = use->in(k); |
duke@435 | 1091 | if (def == n) { |
duke@435 | 1092 | if (!is_vector_use(use, k)) { |
duke@435 | 1093 | _n_idx_list.push(use, k); |
duke@435 | 1094 | } |
duke@435 | 1095 | } |
duke@435 | 1096 | } |
duke@435 | 1097 | } |
duke@435 | 1098 | } |
duke@435 | 1099 | |
duke@435 | 1100 | while (_n_idx_list.is_nonempty()) { |
duke@435 | 1101 | Node* use = _n_idx_list.node(); |
duke@435 | 1102 | int idx = _n_idx_list.index(); |
duke@435 | 1103 | _n_idx_list.pop(); |
duke@435 | 1104 | Node* def = use->in(idx); |
duke@435 | 1105 | |
duke@435 | 1106 | // Insert extract operation |
duke@435 | 1107 | _igvn.hash_delete(def); |
duke@435 | 1108 | _igvn.hash_delete(use); |
duke@435 | 1109 | int def_pos = alignment(def) / data_size(def); |
duke@435 | 1110 | const Type* def_t = velt_type(def); |
duke@435 | 1111 | |
duke@435 | 1112 | Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t); |
duke@435 | 1113 | _phase->_igvn.register_new_node_with_optimizer(ex); |
duke@435 | 1114 | _phase->set_ctrl(ex, _phase->get_ctrl(def)); |
duke@435 | 1115 | use->set_req(idx, ex); |
duke@435 | 1116 | _igvn._worklist.push(def); |
duke@435 | 1117 | _igvn._worklist.push(use); |
duke@435 | 1118 | |
duke@435 | 1119 | bb_insert_after(ex, bb_idx(def)); |
duke@435 | 1120 | set_velt_type(ex, def_t); |
duke@435 | 1121 | } |
duke@435 | 1122 | } |
duke@435 | 1123 | |
duke@435 | 1124 | //------------------------------is_vector_use--------------------------- |
duke@435 | 1125 | // Is use->in(u_idx) a vector use? |
duke@435 | 1126 | bool SuperWord::is_vector_use(Node* use, int u_idx) { |
duke@435 | 1127 | Node_List* u_pk = my_pack(use); |
duke@435 | 1128 | if (u_pk == NULL) return false; |
duke@435 | 1129 | Node* def = use->in(u_idx); |
duke@435 | 1130 | Node_List* d_pk = my_pack(def); |
duke@435 | 1131 | if (d_pk == NULL) { |
duke@435 | 1132 | // check for scalar promotion |
duke@435 | 1133 | Node* n = u_pk->at(0)->in(u_idx); |
duke@435 | 1134 | for (uint i = 1; i < u_pk->size(); i++) { |
duke@435 | 1135 | if (u_pk->at(i)->in(u_idx) != n) return false; |
duke@435 | 1136 | } |
duke@435 | 1137 | return true; |
duke@435 | 1138 | } |
duke@435 | 1139 | if (u_pk->size() != d_pk->size()) |
duke@435 | 1140 | return false; |
duke@435 | 1141 | for (uint i = 0; i < u_pk->size(); i++) { |
duke@435 | 1142 | Node* ui = u_pk->at(i); |
duke@435 | 1143 | Node* di = d_pk->at(i); |
duke@435 | 1144 | if (ui->in(u_idx) != di || alignment(ui) != alignment(di)) |
duke@435 | 1145 | return false; |
duke@435 | 1146 | } |
duke@435 | 1147 | return true; |
duke@435 | 1148 | } |
duke@435 | 1149 | |
duke@435 | 1150 | //------------------------------construct_bb--------------------------- |
duke@435 | 1151 | // Construct reverse postorder list of block members |
duke@435 | 1152 | void SuperWord::construct_bb() { |
duke@435 | 1153 | Node* entry = bb(); |
duke@435 | 1154 | |
duke@435 | 1155 | assert(_stk.length() == 0, "stk is empty"); |
duke@435 | 1156 | assert(_block.length() == 0, "block is empty"); |
duke@435 | 1157 | assert(_data_entry.length() == 0, "data_entry is empty"); |
duke@435 | 1158 | assert(_mem_slice_head.length() == 0, "mem_slice_head is empty"); |
duke@435 | 1159 | assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty"); |
duke@435 | 1160 | |
duke@435 | 1161 | // Find non-control nodes with no inputs from within block, |
duke@435 | 1162 | // create a temporary map from node _idx to bb_idx for use |
duke@435 | 1163 | // by the visited and post_visited sets, |
duke@435 | 1164 | // and count number of nodes in block. |
duke@435 | 1165 | int bb_ct = 0; |
duke@435 | 1166 | for (uint i = 0; i < lpt()->_body.size(); i++ ) { |
duke@435 | 1167 | Node *n = lpt()->_body.at(i); |
duke@435 | 1168 | set_bb_idx(n, i); // Create a temporary map |
duke@435 | 1169 | if (in_bb(n)) { |
duke@435 | 1170 | bb_ct++; |
duke@435 | 1171 | if (!n->is_CFG()) { |
duke@435 | 1172 | bool found = false; |
duke@435 | 1173 | for (uint j = 0; j < n->req(); j++) { |
duke@435 | 1174 | Node* def = n->in(j); |
duke@435 | 1175 | if (def && in_bb(def)) { |
duke@435 | 1176 | found = true; |
duke@435 | 1177 | break; |
duke@435 | 1178 | } |
duke@435 | 1179 | } |
duke@435 | 1180 | if (!found) { |
duke@435 | 1181 | assert(n != entry, "can't be entry"); |
duke@435 | 1182 | _data_entry.push(n); |
duke@435 | 1183 | } |
duke@435 | 1184 | } |
duke@435 | 1185 | } |
duke@435 | 1186 | } |
duke@435 | 1187 | |
duke@435 | 1188 | // Find memory slices (head and tail) |
duke@435 | 1189 | for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) { |
duke@435 | 1190 | Node *n = lp()->fast_out(i); |
duke@435 | 1191 | if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { |
duke@435 | 1192 | Node* n_tail = n->in(LoopNode::LoopBackControl); |
duke@435 | 1193 | _mem_slice_head.push(n); |
duke@435 | 1194 | _mem_slice_tail.push(n_tail); |
duke@435 | 1195 | } |
duke@435 | 1196 | } |
duke@435 | 1197 | |
duke@435 | 1198 | // Create an RPO list of nodes in block |
duke@435 | 1199 | |
duke@435 | 1200 | visited_clear(); |
duke@435 | 1201 | post_visited_clear(); |
duke@435 | 1202 | |
duke@435 | 1203 | // Push all non-control nodes with no inputs from within block, then control entry |
duke@435 | 1204 | for (int j = 0; j < _data_entry.length(); j++) { |
duke@435 | 1205 | Node* n = _data_entry.at(j); |
duke@435 | 1206 | visited_set(n); |
duke@435 | 1207 | _stk.push(n); |
duke@435 | 1208 | } |
duke@435 | 1209 | visited_set(entry); |
duke@435 | 1210 | _stk.push(entry); |
duke@435 | 1211 | |
duke@435 | 1212 | // Do a depth first walk over out edges |
duke@435 | 1213 | int rpo_idx = bb_ct - 1; |
duke@435 | 1214 | int size; |
duke@435 | 1215 | while ((size = _stk.length()) > 0) { |
duke@435 | 1216 | Node* n = _stk.top(); // Leave node on stack |
duke@435 | 1217 | if (!visited_test_set(n)) { |
duke@435 | 1218 | // forward arc in graph |
duke@435 | 1219 | } else if (!post_visited_test(n)) { |
duke@435 | 1220 | // cross or back arc |
duke@435 | 1221 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 1222 | Node *use = n->fast_out(i); |
duke@435 | 1223 | if (in_bb(use) && !visited_test(use) && |
duke@435 | 1224 | // Don't go around backedge |
duke@435 | 1225 | (!use->is_Phi() || n == entry)) { |
duke@435 | 1226 | _stk.push(use); |
duke@435 | 1227 | } |
duke@435 | 1228 | } |
duke@435 | 1229 | if (_stk.length() == size) { |
duke@435 | 1230 | // There were no additional uses, post visit node now |
duke@435 | 1231 | _stk.pop(); // Remove node from stack |
duke@435 | 1232 | assert(rpo_idx >= 0, ""); |
duke@435 | 1233 | _block.at_put_grow(rpo_idx, n); |
duke@435 | 1234 | rpo_idx--; |
duke@435 | 1235 | post_visited_set(n); |
duke@435 | 1236 | assert(rpo_idx >= 0 || _stk.is_empty(), ""); |
duke@435 | 1237 | } |
duke@435 | 1238 | } else { |
duke@435 | 1239 | _stk.pop(); // Remove post-visited node from stack |
duke@435 | 1240 | } |
duke@435 | 1241 | } |
duke@435 | 1242 | |
duke@435 | 1243 | // Create real map of block indices for nodes |
duke@435 | 1244 | for (int j = 0; j < _block.length(); j++) { |
duke@435 | 1245 | Node* n = _block.at(j); |
duke@435 | 1246 | set_bb_idx(n, j); |
duke@435 | 1247 | } |
duke@435 | 1248 | |
duke@435 | 1249 | initialize_bb(); // Ensure extra info is allocated. |
duke@435 | 1250 | |
duke@435 | 1251 | #ifndef PRODUCT |
duke@435 | 1252 | if (TraceSuperWord) { |
duke@435 | 1253 | print_bb(); |
duke@435 | 1254 | tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); |
duke@435 | 1255 | for (int m = 0; m < _data_entry.length(); m++) { |
duke@435 | 1256 | tty->print("%3d ", m); |
duke@435 | 1257 | _data_entry.at(m)->dump(); |
duke@435 | 1258 | } |
duke@435 | 1259 | tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE"); |
duke@435 | 1260 | for (int m = 0; m < _mem_slice_head.length(); m++) { |
duke@435 | 1261 | tty->print("%3d ", m); _mem_slice_head.at(m)->dump(); |
duke@435 | 1262 | tty->print(" "); _mem_slice_tail.at(m)->dump(); |
duke@435 | 1263 | } |
duke@435 | 1264 | } |
duke@435 | 1265 | #endif |
duke@435 | 1266 | assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found"); |
duke@435 | 1267 | } |
duke@435 | 1268 | |
duke@435 | 1269 | //------------------------------initialize_bb--------------------------- |
duke@435 | 1270 | // Initialize per node info |
duke@435 | 1271 | void SuperWord::initialize_bb() { |
duke@435 | 1272 | Node* last = _block.at(_block.length() - 1); |
duke@435 | 1273 | grow_node_info(bb_idx(last)); |
duke@435 | 1274 | } |
duke@435 | 1275 | |
duke@435 | 1276 | //------------------------------bb_insert_after--------------------------- |
duke@435 | 1277 | // Insert n into block after pos |
duke@435 | 1278 | void SuperWord::bb_insert_after(Node* n, int pos) { |
duke@435 | 1279 | int n_pos = pos + 1; |
duke@435 | 1280 | // Make room |
duke@435 | 1281 | for (int i = _block.length() - 1; i >= n_pos; i--) { |
duke@435 | 1282 | _block.at_put_grow(i+1, _block.at(i)); |
duke@435 | 1283 | } |
duke@435 | 1284 | for (int j = _node_info.length() - 1; j >= n_pos; j--) { |
duke@435 | 1285 | _node_info.at_put_grow(j+1, _node_info.at(j)); |
duke@435 | 1286 | } |
duke@435 | 1287 | // Set value |
duke@435 | 1288 | _block.at_put_grow(n_pos, n); |
duke@435 | 1289 | _node_info.at_put_grow(n_pos, SWNodeInfo::initial); |
duke@435 | 1290 | // Adjust map from node->_idx to _block index |
duke@435 | 1291 | for (int i = n_pos; i < _block.length(); i++) { |
duke@435 | 1292 | set_bb_idx(_block.at(i), i); |
duke@435 | 1293 | } |
duke@435 | 1294 | } |
duke@435 | 1295 | |
duke@435 | 1296 | //------------------------------compute_max_depth--------------------------- |
duke@435 | 1297 | // Compute max depth for expressions from beginning of block |
duke@435 | 1298 | // Use to prune search paths during test for independence. |
duke@435 | 1299 | void SuperWord::compute_max_depth() { |
duke@435 | 1300 | int ct = 0; |
duke@435 | 1301 | bool again; |
duke@435 | 1302 | do { |
duke@435 | 1303 | again = false; |
duke@435 | 1304 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1305 | Node* n = _block.at(i); |
duke@435 | 1306 | if (!n->is_Phi()) { |
duke@435 | 1307 | int d_orig = depth(n); |
duke@435 | 1308 | int d_in = 0; |
duke@435 | 1309 | for (DepPreds preds(n, _dg); !preds.done(); preds.next()) { |
duke@435 | 1310 | Node* pred = preds.current(); |
duke@435 | 1311 | if (in_bb(pred)) { |
duke@435 | 1312 | d_in = MAX2(d_in, depth(pred)); |
duke@435 | 1313 | } |
duke@435 | 1314 | } |
duke@435 | 1315 | if (d_in + 1 != d_orig) { |
duke@435 | 1316 | set_depth(n, d_in + 1); |
duke@435 | 1317 | again = true; |
duke@435 | 1318 | } |
duke@435 | 1319 | } |
duke@435 | 1320 | } |
duke@435 | 1321 | ct++; |
duke@435 | 1322 | } while (again); |
duke@435 | 1323 | #ifndef PRODUCT |
duke@435 | 1324 | if (TraceSuperWord && Verbose) |
duke@435 | 1325 | tty->print_cr("compute_max_depth iterated: %d times", ct); |
duke@435 | 1326 | #endif |
duke@435 | 1327 | } |
duke@435 | 1328 | |
duke@435 | 1329 | //-------------------------compute_vector_element_type----------------------- |
duke@435 | 1330 | // Compute necessary vector element type for expressions |
duke@435 | 1331 | // This propagates backwards a narrower integer type when the |
duke@435 | 1332 | // upper bits of the value are not needed. |
duke@435 | 1333 | // Example: char a,b,c; a = b + c; |
duke@435 | 1334 | // Normally the type of the add is integer, but for packed character |
duke@435 | 1335 | // operations the type of the add needs to be char. |
duke@435 | 1336 | void SuperWord::compute_vector_element_type() { |
duke@435 | 1337 | #ifndef PRODUCT |
duke@435 | 1338 | if (TraceSuperWord && Verbose) |
duke@435 | 1339 | tty->print_cr("\ncompute_velt_type:"); |
duke@435 | 1340 | #endif |
duke@435 | 1341 | |
duke@435 | 1342 | // Initial type |
duke@435 | 1343 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1344 | Node* n = _block.at(i); |
duke@435 | 1345 | const Type* t = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type()) |
duke@435 | 1346 | : _igvn.type(n); |
duke@435 | 1347 | const Type* vt = container_type(t); |
duke@435 | 1348 | set_velt_type(n, vt); |
duke@435 | 1349 | } |
duke@435 | 1350 | |
duke@435 | 1351 | // Propagate narrowed type backwards through operations |
duke@435 | 1352 | // that don't depend on higher order bits |
duke@435 | 1353 | for (int i = _block.length() - 1; i >= 0; i--) { |
duke@435 | 1354 | Node* n = _block.at(i); |
duke@435 | 1355 | // Only integer types need be examined |
duke@435 | 1356 | if (n->bottom_type()->isa_int()) { |
duke@435 | 1357 | uint start, end; |
duke@435 | 1358 | vector_opd_range(n, &start, &end); |
duke@435 | 1359 | const Type* vt = velt_type(n); |
duke@435 | 1360 | |
duke@435 | 1361 | for (uint j = start; j < end; j++) { |
duke@435 | 1362 | Node* in = n->in(j); |
duke@435 | 1363 | // Don't propagate through a type conversion |
duke@435 | 1364 | if (n->bottom_type() != in->bottom_type()) |
duke@435 | 1365 | continue; |
duke@435 | 1366 | switch(in->Opcode()) { |
duke@435 | 1367 | case Op_AddI: case Op_AddL: |
duke@435 | 1368 | case Op_SubI: case Op_SubL: |
duke@435 | 1369 | case Op_MulI: case Op_MulL: |
duke@435 | 1370 | case Op_AndI: case Op_AndL: |
duke@435 | 1371 | case Op_OrI: case Op_OrL: |
duke@435 | 1372 | case Op_XorI: case Op_XorL: |
duke@435 | 1373 | case Op_LShiftI: case Op_LShiftL: |
duke@435 | 1374 | case Op_CMoveI: case Op_CMoveL: |
duke@435 | 1375 | if (in_bb(in)) { |
duke@435 | 1376 | bool same_type = true; |
duke@435 | 1377 | for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { |
duke@435 | 1378 | Node *use = in->fast_out(k); |
duke@435 | 1379 | if (!in_bb(use) || velt_type(use) != vt) { |
duke@435 | 1380 | same_type = false; |
duke@435 | 1381 | break; |
duke@435 | 1382 | } |
duke@435 | 1383 | } |
duke@435 | 1384 | if (same_type) { |
duke@435 | 1385 | set_velt_type(in, vt); |
duke@435 | 1386 | } |
duke@435 | 1387 | } |
duke@435 | 1388 | } |
duke@435 | 1389 | } |
duke@435 | 1390 | } |
duke@435 | 1391 | } |
duke@435 | 1392 | #ifndef PRODUCT |
duke@435 | 1393 | if (TraceSuperWord && Verbose) { |
duke@435 | 1394 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1395 | Node* n = _block.at(i); |
duke@435 | 1396 | velt_type(n)->dump(); |
duke@435 | 1397 | tty->print("\t"); |
duke@435 | 1398 | n->dump(); |
duke@435 | 1399 | } |
duke@435 | 1400 | } |
duke@435 | 1401 | #endif |
duke@435 | 1402 | } |
duke@435 | 1403 | |
duke@435 | 1404 | //------------------------------memory_alignment--------------------------- |
duke@435 | 1405 | // Alignment within a vector memory reference |
duke@435 | 1406 | int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { |
duke@435 | 1407 | SWPointer p(s, this); |
duke@435 | 1408 | if (!p.valid()) { |
duke@435 | 1409 | return bottom_align; |
duke@435 | 1410 | } |
duke@435 | 1411 | int offset = p.offset_in_bytes(); |
duke@435 | 1412 | offset += iv_adjust_in_bytes; |
duke@435 | 1413 | int off_rem = offset % vector_width_in_bytes(); |
duke@435 | 1414 | int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes(); |
duke@435 | 1415 | return off_mod; |
duke@435 | 1416 | } |
duke@435 | 1417 | |
duke@435 | 1418 | //---------------------------container_type--------------------------- |
duke@435 | 1419 | // Smallest type containing range of values |
duke@435 | 1420 | const Type* SuperWord::container_type(const Type* t) { |
duke@435 | 1421 | if (t->isa_aryptr()) { |
duke@435 | 1422 | t = t->is_aryptr()->elem(); |
duke@435 | 1423 | } |
duke@435 | 1424 | if (t->basic_type() == T_INT) { |
duke@435 | 1425 | if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL; |
duke@435 | 1426 | if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE; |
duke@435 | 1427 | if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR; |
duke@435 | 1428 | if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT; |
duke@435 | 1429 | return TypeInt::INT; |
duke@435 | 1430 | } |
duke@435 | 1431 | return t; |
duke@435 | 1432 | } |
duke@435 | 1433 | |
duke@435 | 1434 | //-------------------------vector_opd_range----------------------- |
duke@435 | 1435 | // (Start, end] half-open range defining which operands are vector |
duke@435 | 1436 | void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) { |
duke@435 | 1437 | switch (n->Opcode()) { |
duke@435 | 1438 | case Op_LoadB: case Op_LoadC: |
duke@435 | 1439 | case Op_LoadI: case Op_LoadL: |
duke@435 | 1440 | case Op_LoadF: case Op_LoadD: |
duke@435 | 1441 | case Op_LoadP: |
duke@435 | 1442 | *start = 0; |
duke@435 | 1443 | *end = 0; |
duke@435 | 1444 | return; |
duke@435 | 1445 | case Op_StoreB: case Op_StoreC: |
duke@435 | 1446 | case Op_StoreI: case Op_StoreL: |
duke@435 | 1447 | case Op_StoreF: case Op_StoreD: |
duke@435 | 1448 | case Op_StoreP: |
duke@435 | 1449 | *start = MemNode::ValueIn; |
duke@435 | 1450 | *end = *start + 1; |
duke@435 | 1451 | return; |
duke@435 | 1452 | case Op_LShiftI: case Op_LShiftL: |
duke@435 | 1453 | *start = 1; |
duke@435 | 1454 | *end = 2; |
duke@435 | 1455 | return; |
duke@435 | 1456 | case Op_CMoveI: case Op_CMoveL: case Op_CMoveF: case Op_CMoveD: |
duke@435 | 1457 | *start = 2; |
duke@435 | 1458 | *end = n->req(); |
duke@435 | 1459 | return; |
duke@435 | 1460 | } |
duke@435 | 1461 | *start = 1; |
duke@435 | 1462 | *end = n->req(); // default is all operands |
duke@435 | 1463 | } |
duke@435 | 1464 | |
duke@435 | 1465 | //------------------------------in_packset--------------------------- |
duke@435 | 1466 | // Are s1 and s2 in a pack pair and ordered as s1,s2? |
duke@435 | 1467 | bool SuperWord::in_packset(Node* s1, Node* s2) { |
duke@435 | 1468 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1469 | Node_List* p = _packset.at(i); |
duke@435 | 1470 | assert(p->size() == 2, "must be"); |
duke@435 | 1471 | if (p->at(0) == s1 && p->at(p->size()-1) == s2) { |
duke@435 | 1472 | return true; |
duke@435 | 1473 | } |
duke@435 | 1474 | } |
duke@435 | 1475 | return false; |
duke@435 | 1476 | } |
duke@435 | 1477 | |
duke@435 | 1478 | //------------------------------in_pack--------------------------- |
duke@435 | 1479 | // Is s in pack p? |
duke@435 | 1480 | Node_List* SuperWord::in_pack(Node* s, Node_List* p) { |
duke@435 | 1481 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1482 | if (p->at(i) == s) { |
duke@435 | 1483 | return p; |
duke@435 | 1484 | } |
duke@435 | 1485 | } |
duke@435 | 1486 | return NULL; |
duke@435 | 1487 | } |
duke@435 | 1488 | |
duke@435 | 1489 | //------------------------------remove_pack_at--------------------------- |
duke@435 | 1490 | // Remove the pack at position pos in the packset |
duke@435 | 1491 | void SuperWord::remove_pack_at(int pos) { |
duke@435 | 1492 | Node_List* p = _packset.at(pos); |
duke@435 | 1493 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1494 | Node* s = p->at(i); |
duke@435 | 1495 | set_my_pack(s, NULL); |
duke@435 | 1496 | } |
duke@435 | 1497 | _packset.remove_at(pos); |
duke@435 | 1498 | } |
duke@435 | 1499 | |
duke@435 | 1500 | //------------------------------executed_first--------------------------- |
duke@435 | 1501 | // Return the node executed first in pack p. Uses the RPO block list |
duke@435 | 1502 | // to determine order. |
duke@435 | 1503 | Node* SuperWord::executed_first(Node_List* p) { |
duke@435 | 1504 | Node* n = p->at(0); |
duke@435 | 1505 | int n_rpo = bb_idx(n); |
duke@435 | 1506 | for (uint i = 1; i < p->size(); i++) { |
duke@435 | 1507 | Node* s = p->at(i); |
duke@435 | 1508 | int s_rpo = bb_idx(s); |
duke@435 | 1509 | if (s_rpo < n_rpo) { |
duke@435 | 1510 | n = s; |
duke@435 | 1511 | n_rpo = s_rpo; |
duke@435 | 1512 | } |
duke@435 | 1513 | } |
duke@435 | 1514 | return n; |
duke@435 | 1515 | } |
duke@435 | 1516 | |
duke@435 | 1517 | //------------------------------executed_last--------------------------- |
duke@435 | 1518 | // Return the node executed last in pack p. |
duke@435 | 1519 | Node* SuperWord::executed_last(Node_List* p) { |
duke@435 | 1520 | Node* n = p->at(0); |
duke@435 | 1521 | int n_rpo = bb_idx(n); |
duke@435 | 1522 | for (uint i = 1; i < p->size(); i++) { |
duke@435 | 1523 | Node* s = p->at(i); |
duke@435 | 1524 | int s_rpo = bb_idx(s); |
duke@435 | 1525 | if (s_rpo > n_rpo) { |
duke@435 | 1526 | n = s; |
duke@435 | 1527 | n_rpo = s_rpo; |
duke@435 | 1528 | } |
duke@435 | 1529 | } |
duke@435 | 1530 | return n; |
duke@435 | 1531 | } |
duke@435 | 1532 | |
duke@435 | 1533 | //----------------------------align_initial_loop_index--------------------------- |
duke@435 | 1534 | // Adjust pre-loop limit so that in main loop, a load/store reference |
duke@435 | 1535 | // to align_to_ref will be a position zero in the vector. |
duke@435 | 1536 | // (iv + k) mod vector_align == 0 |
duke@435 | 1537 | void SuperWord::align_initial_loop_index(MemNode* align_to_ref) { |
duke@435 | 1538 | CountedLoopNode *main_head = lp()->as_CountedLoop(); |
duke@435 | 1539 | assert(main_head->is_main_loop(), ""); |
duke@435 | 1540 | CountedLoopEndNode* pre_end = get_pre_loop_end(main_head); |
duke@435 | 1541 | assert(pre_end != NULL, ""); |
duke@435 | 1542 | Node *pre_opaq1 = pre_end->limit(); |
duke@435 | 1543 | assert(pre_opaq1->Opcode() == Op_Opaque1, ""); |
duke@435 | 1544 | Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1; |
duke@435 | 1545 | Node *pre_limit = pre_opaq->in(1); |
duke@435 | 1546 | |
duke@435 | 1547 | // Where we put new limit calculations |
duke@435 | 1548 | Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl); |
duke@435 | 1549 | |
duke@435 | 1550 | // Ensure the original loop limit is available from the |
duke@435 | 1551 | // pre-loop Opaque1 node. |
duke@435 | 1552 | Node *orig_limit = pre_opaq->original_loop_limit(); |
duke@435 | 1553 | assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); |
duke@435 | 1554 | |
duke@435 | 1555 | SWPointer align_to_ref_p(align_to_ref, this); |
duke@435 | 1556 | |
duke@435 | 1557 | // Let l0 == original pre_limit, l == new pre_limit, V == v_align |
duke@435 | 1558 | // |
duke@435 | 1559 | // For stride > 0 |
duke@435 | 1560 | // Need l such that l > l0 && (l+k)%V == 0 |
duke@435 | 1561 | // Find n such that l = (l0 + n) |
duke@435 | 1562 | // (l0 + n + k) % V == 0 |
duke@435 | 1563 | // n = [V - (l0 + k)%V]%V |
duke@435 | 1564 | // new limit = l0 + [V - (l0 + k)%V]%V |
duke@435 | 1565 | // For stride < 0 |
duke@435 | 1566 | // Need l such that l < l0 && (l+k)%V == 0 |
duke@435 | 1567 | // Find n such that l = (l0 - n) |
duke@435 | 1568 | // (l0 - n + k) % V == 0 |
duke@435 | 1569 | // n = (l0 + k)%V |
duke@435 | 1570 | // new limit = l0 - (l0 + k)%V |
duke@435 | 1571 | |
duke@435 | 1572 | int elt_size = align_to_ref_p.memory_size(); |
duke@435 | 1573 | int v_align = vector_width_in_bytes() / elt_size; |
duke@435 | 1574 | int k = align_to_ref_p.offset_in_bytes() / elt_size; |
duke@435 | 1575 | |
duke@435 | 1576 | Node *kn = _igvn.intcon(k); |
duke@435 | 1577 | Node *limk = new (_phase->C, 3) AddINode(pre_limit, kn); |
duke@435 | 1578 | _phase->_igvn.register_new_node_with_optimizer(limk); |
duke@435 | 1579 | _phase->set_ctrl(limk, pre_ctrl); |
duke@435 | 1580 | if (align_to_ref_p.invar() != NULL) { |
duke@435 | 1581 | Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
duke@435 | 1582 | Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); |
duke@435 | 1583 | _phase->_igvn.register_new_node_with_optimizer(aref); |
duke@435 | 1584 | _phase->set_ctrl(aref, pre_ctrl); |
duke@435 | 1585 | if (!align_to_ref_p.negate_invar()) { |
duke@435 | 1586 | limk = new (_phase->C, 3) AddINode(limk, aref); |
duke@435 | 1587 | } else { |
duke@435 | 1588 | limk = new (_phase->C, 3) SubINode(limk, aref); |
duke@435 | 1589 | } |
duke@435 | 1590 | _phase->_igvn.register_new_node_with_optimizer(limk); |
duke@435 | 1591 | _phase->set_ctrl(limk, pre_ctrl); |
duke@435 | 1592 | } |
duke@435 | 1593 | Node* va_msk = _igvn.intcon(v_align - 1); |
duke@435 | 1594 | Node* n = new (_phase->C, 3) AndINode(limk, va_msk); |
duke@435 | 1595 | _phase->_igvn.register_new_node_with_optimizer(n); |
duke@435 | 1596 | _phase->set_ctrl(n, pre_ctrl); |
duke@435 | 1597 | Node* newlim; |
duke@435 | 1598 | if (iv_stride() > 0) { |
duke@435 | 1599 | Node* va = _igvn.intcon(v_align); |
duke@435 | 1600 | Node* adj = new (_phase->C, 3) SubINode(va, n); |
duke@435 | 1601 | _phase->_igvn.register_new_node_with_optimizer(adj); |
duke@435 | 1602 | _phase->set_ctrl(adj, pre_ctrl); |
duke@435 | 1603 | Node* adj2 = new (_phase->C, 3) AndINode(adj, va_msk); |
duke@435 | 1604 | _phase->_igvn.register_new_node_with_optimizer(adj2); |
duke@435 | 1605 | _phase->set_ctrl(adj2, pre_ctrl); |
duke@435 | 1606 | newlim = new (_phase->C, 3) AddINode(pre_limit, adj2); |
duke@435 | 1607 | } else { |
duke@435 | 1608 | newlim = new (_phase->C, 3) SubINode(pre_limit, n); |
duke@435 | 1609 | } |
duke@435 | 1610 | _phase->_igvn.register_new_node_with_optimizer(newlim); |
duke@435 | 1611 | _phase->set_ctrl(newlim, pre_ctrl); |
duke@435 | 1612 | Node* constrained = |
duke@435 | 1613 | (iv_stride() > 0) ? (Node*) new (_phase->C,3) MinINode(newlim, orig_limit) |
duke@435 | 1614 | : (Node*) new (_phase->C,3) MaxINode(newlim, orig_limit); |
duke@435 | 1615 | _phase->_igvn.register_new_node_with_optimizer(constrained); |
duke@435 | 1616 | _phase->set_ctrl(constrained, pre_ctrl); |
duke@435 | 1617 | _igvn.hash_delete(pre_opaq); |
duke@435 | 1618 | pre_opaq->set_req(1, constrained); |
duke@435 | 1619 | } |
duke@435 | 1620 | |
duke@435 | 1621 | //----------------------------get_pre_loop_end--------------------------- |
duke@435 | 1622 | // Find pre loop end from main loop. Returns null if none. |
duke@435 | 1623 | CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode *cl) { |
duke@435 | 1624 | Node *ctrl = cl->in(LoopNode::EntryControl); |
duke@435 | 1625 | if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return NULL; |
duke@435 | 1626 | Node *iffm = ctrl->in(0); |
duke@435 | 1627 | if (!iffm->is_If()) return NULL; |
duke@435 | 1628 | Node *p_f = iffm->in(0); |
duke@435 | 1629 | if (!p_f->is_IfFalse()) return NULL; |
duke@435 | 1630 | if (!p_f->in(0)->is_CountedLoopEnd()) return NULL; |
duke@435 | 1631 | CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd(); |
duke@435 | 1632 | if (!pre_end->loopnode()->is_pre_loop()) return NULL; |
duke@435 | 1633 | return pre_end; |
duke@435 | 1634 | } |
duke@435 | 1635 | |
duke@435 | 1636 | |
duke@435 | 1637 | //------------------------------init--------------------------- |
duke@435 | 1638 | void SuperWord::init() { |
duke@435 | 1639 | _dg.init(); |
duke@435 | 1640 | _packset.clear(); |
duke@435 | 1641 | _disjoint_ptrs.clear(); |
duke@435 | 1642 | _block.clear(); |
duke@435 | 1643 | _data_entry.clear(); |
duke@435 | 1644 | _mem_slice_head.clear(); |
duke@435 | 1645 | _mem_slice_tail.clear(); |
duke@435 | 1646 | _node_info.clear(); |
duke@435 | 1647 | _align_to_ref = NULL; |
duke@435 | 1648 | _lpt = NULL; |
duke@435 | 1649 | _lp = NULL; |
duke@435 | 1650 | _bb = NULL; |
duke@435 | 1651 | _iv = NULL; |
duke@435 | 1652 | } |
duke@435 | 1653 | |
duke@435 | 1654 | //------------------------------print_packset--------------------------- |
duke@435 | 1655 | void SuperWord::print_packset() { |
duke@435 | 1656 | #ifndef PRODUCT |
duke@435 | 1657 | tty->print_cr("packset"); |
duke@435 | 1658 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1659 | tty->print_cr("Pack: %d", i); |
duke@435 | 1660 | Node_List* p = _packset.at(i); |
duke@435 | 1661 | print_pack(p); |
duke@435 | 1662 | } |
duke@435 | 1663 | #endif |
duke@435 | 1664 | } |
duke@435 | 1665 | |
duke@435 | 1666 | //------------------------------print_pack--------------------------- |
duke@435 | 1667 | void SuperWord::print_pack(Node_List* p) { |
duke@435 | 1668 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1669 | print_stmt(p->at(i)); |
duke@435 | 1670 | } |
duke@435 | 1671 | } |
duke@435 | 1672 | |
duke@435 | 1673 | //------------------------------print_bb--------------------------- |
duke@435 | 1674 | void SuperWord::print_bb() { |
duke@435 | 1675 | #ifndef PRODUCT |
duke@435 | 1676 | tty->print_cr("\nBlock"); |
duke@435 | 1677 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1678 | Node* n = _block.at(i); |
duke@435 | 1679 | tty->print("%d ", i); |
duke@435 | 1680 | if (n) { |
duke@435 | 1681 | n->dump(); |
duke@435 | 1682 | } |
duke@435 | 1683 | } |
duke@435 | 1684 | #endif |
duke@435 | 1685 | } |
duke@435 | 1686 | |
duke@435 | 1687 | //------------------------------print_stmt--------------------------- |
duke@435 | 1688 | void SuperWord::print_stmt(Node* s) { |
duke@435 | 1689 | #ifndef PRODUCT |
duke@435 | 1690 | tty->print(" align: %d \t", alignment(s)); |
duke@435 | 1691 | s->dump(); |
duke@435 | 1692 | #endif |
duke@435 | 1693 | } |
duke@435 | 1694 | |
duke@435 | 1695 | //------------------------------blank--------------------------- |
duke@435 | 1696 | char* SuperWord::blank(uint depth) { |
duke@435 | 1697 | static char blanks[101]; |
duke@435 | 1698 | assert(depth < 101, "too deep"); |
duke@435 | 1699 | for (uint i = 0; i < depth; i++) blanks[i] = ' '; |
duke@435 | 1700 | blanks[depth] = '\0'; |
duke@435 | 1701 | return blanks; |
duke@435 | 1702 | } |
duke@435 | 1703 | |
duke@435 | 1704 | |
duke@435 | 1705 | //==============================SWPointer=========================== |
duke@435 | 1706 | |
duke@435 | 1707 | //----------------------------SWPointer------------------------ |
duke@435 | 1708 | SWPointer::SWPointer(MemNode* mem, SuperWord* slp) : |
duke@435 | 1709 | _mem(mem), _slp(slp), _base(NULL), _adr(NULL), |
duke@435 | 1710 | _scale(0), _offset(0), _invar(NULL), _negate_invar(false) { |
duke@435 | 1711 | |
duke@435 | 1712 | Node* adr = mem->in(MemNode::Address); |
duke@435 | 1713 | if (!adr->is_AddP()) { |
duke@435 | 1714 | assert(!valid(), "too complex"); |
duke@435 | 1715 | return; |
duke@435 | 1716 | } |
duke@435 | 1717 | // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) |
duke@435 | 1718 | Node* base = adr->in(AddPNode::Base); |
duke@435 | 1719 | for (int i = 0; i < 3; i++) { |
duke@435 | 1720 | if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { |
duke@435 | 1721 | assert(!valid(), "too complex"); |
duke@435 | 1722 | return; |
duke@435 | 1723 | } |
duke@435 | 1724 | adr = adr->in(AddPNode::Address); |
duke@435 | 1725 | if (base == adr || !adr->is_AddP()) { |
duke@435 | 1726 | break; // stop looking at addp's |
duke@435 | 1727 | } |
duke@435 | 1728 | } |
duke@435 | 1729 | _base = base; |
duke@435 | 1730 | _adr = adr; |
duke@435 | 1731 | assert(valid(), "Usable"); |
duke@435 | 1732 | } |
duke@435 | 1733 | |
duke@435 | 1734 | // Following is used to create a temporary object during |
duke@435 | 1735 | // the pattern match of an address expression. |
duke@435 | 1736 | SWPointer::SWPointer(SWPointer* p) : |
duke@435 | 1737 | _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), |
duke@435 | 1738 | _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {} |
duke@435 | 1739 | |
duke@435 | 1740 | //------------------------scaled_iv_plus_offset-------------------- |
duke@435 | 1741 | // Match: k*iv + offset |
duke@435 | 1742 | // where: k is a constant that maybe zero, and |
duke@435 | 1743 | // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional |
duke@435 | 1744 | bool SWPointer::scaled_iv_plus_offset(Node* n) { |
duke@435 | 1745 | if (scaled_iv(n)) { |
duke@435 | 1746 | return true; |
duke@435 | 1747 | } |
duke@435 | 1748 | if (offset_plus_k(n)) { |
duke@435 | 1749 | return true; |
duke@435 | 1750 | } |
duke@435 | 1751 | int opc = n->Opcode(); |
duke@435 | 1752 | if (opc == Op_AddI) { |
duke@435 | 1753 | if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) { |
duke@435 | 1754 | return true; |
duke@435 | 1755 | } |
duke@435 | 1756 | if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { |
duke@435 | 1757 | return true; |
duke@435 | 1758 | } |
duke@435 | 1759 | } else if (opc == Op_SubI) { |
duke@435 | 1760 | if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) { |
duke@435 | 1761 | return true; |
duke@435 | 1762 | } |
duke@435 | 1763 | if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { |
duke@435 | 1764 | _scale *= -1; |
duke@435 | 1765 | return true; |
duke@435 | 1766 | } |
duke@435 | 1767 | } |
duke@435 | 1768 | return false; |
duke@435 | 1769 | } |
duke@435 | 1770 | |
duke@435 | 1771 | //----------------------------scaled_iv------------------------ |
duke@435 | 1772 | // Match: k*iv where k is a constant that's not zero |
duke@435 | 1773 | bool SWPointer::scaled_iv(Node* n) { |
duke@435 | 1774 | if (_scale != 0) { |
duke@435 | 1775 | return false; // already found a scale |
duke@435 | 1776 | } |
duke@435 | 1777 | if (n == iv()) { |
duke@435 | 1778 | _scale = 1; |
duke@435 | 1779 | return true; |
duke@435 | 1780 | } |
duke@435 | 1781 | int opc = n->Opcode(); |
duke@435 | 1782 | if (opc == Op_MulI) { |
duke@435 | 1783 | if (n->in(1) == iv() && n->in(2)->is_Con()) { |
duke@435 | 1784 | _scale = n->in(2)->get_int(); |
duke@435 | 1785 | return true; |
duke@435 | 1786 | } else if (n->in(2) == iv() && n->in(1)->is_Con()) { |
duke@435 | 1787 | _scale = n->in(1)->get_int(); |
duke@435 | 1788 | return true; |
duke@435 | 1789 | } |
duke@435 | 1790 | } else if (opc == Op_LShiftI) { |
duke@435 | 1791 | if (n->in(1) == iv() && n->in(2)->is_Con()) { |
duke@435 | 1792 | _scale = 1 << n->in(2)->get_int(); |
duke@435 | 1793 | return true; |
duke@435 | 1794 | } |
duke@435 | 1795 | } else if (opc == Op_ConvI2L) { |
duke@435 | 1796 | if (scaled_iv_plus_offset(n->in(1))) { |
duke@435 | 1797 | return true; |
duke@435 | 1798 | } |
duke@435 | 1799 | } else if (opc == Op_LShiftL) { |
duke@435 | 1800 | if (!has_iv() && _invar == NULL) { |
duke@435 | 1801 | // Need to preserve the current _offset value, so |
duke@435 | 1802 | // create a temporary object for this expression subtree. |
duke@435 | 1803 | // Hacky, so should re-engineer the address pattern match. |
duke@435 | 1804 | SWPointer tmp(this); |
duke@435 | 1805 | if (tmp.scaled_iv_plus_offset(n->in(1))) { |
duke@435 | 1806 | if (tmp._invar == NULL) { |
duke@435 | 1807 | int mult = 1 << n->in(2)->get_int(); |
duke@435 | 1808 | _scale = tmp._scale * mult; |
duke@435 | 1809 | _offset += tmp._offset * mult; |
duke@435 | 1810 | return true; |
duke@435 | 1811 | } |
duke@435 | 1812 | } |
duke@435 | 1813 | } |
duke@435 | 1814 | } |
duke@435 | 1815 | return false; |
duke@435 | 1816 | } |
duke@435 | 1817 | |
duke@435 | 1818 | //----------------------------offset_plus_k------------------------ |
duke@435 | 1819 | // Match: offset is (k [+/- invariant]) |
duke@435 | 1820 | // where k maybe zero and invariant is optional, but not both. |
duke@435 | 1821 | bool SWPointer::offset_plus_k(Node* n, bool negate) { |
duke@435 | 1822 | int opc = n->Opcode(); |
duke@435 | 1823 | if (opc == Op_ConI) { |
duke@435 | 1824 | _offset += negate ? -(n->get_int()) : n->get_int(); |
duke@435 | 1825 | return true; |
duke@435 | 1826 | } else if (opc == Op_ConL) { |
duke@435 | 1827 | // Okay if value fits into an int |
duke@435 | 1828 | const TypeLong* t = n->find_long_type(); |
duke@435 | 1829 | if (t->higher_equal(TypeLong::INT)) { |
duke@435 | 1830 | jlong loff = n->get_long(); |
duke@435 | 1831 | jint off = (jint)loff; |
duke@435 | 1832 | _offset += negate ? -off : loff; |
duke@435 | 1833 | return true; |
duke@435 | 1834 | } |
duke@435 | 1835 | return false; |
duke@435 | 1836 | } |
duke@435 | 1837 | if (_invar != NULL) return false; // already have an invariant |
duke@435 | 1838 | if (opc == Op_AddI) { |
duke@435 | 1839 | if (n->in(2)->is_Con() && invariant(n->in(1))) { |
duke@435 | 1840 | _negate_invar = negate; |
duke@435 | 1841 | _invar = n->in(1); |
duke@435 | 1842 | _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); |
duke@435 | 1843 | return true; |
duke@435 | 1844 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { |
duke@435 | 1845 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); |
duke@435 | 1846 | _negate_invar = negate; |
duke@435 | 1847 | _invar = n->in(2); |
duke@435 | 1848 | return true; |
duke@435 | 1849 | } |
duke@435 | 1850 | } |
duke@435 | 1851 | if (opc == Op_SubI) { |
duke@435 | 1852 | if (n->in(2)->is_Con() && invariant(n->in(1))) { |
duke@435 | 1853 | _negate_invar = negate; |
duke@435 | 1854 | _invar = n->in(1); |
duke@435 | 1855 | _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); |
duke@435 | 1856 | return true; |
duke@435 | 1857 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { |
duke@435 | 1858 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); |
duke@435 | 1859 | _negate_invar = !negate; |
duke@435 | 1860 | _invar = n->in(2); |
duke@435 | 1861 | return true; |
duke@435 | 1862 | } |
duke@435 | 1863 | } |
duke@435 | 1864 | if (invariant(n)) { |
duke@435 | 1865 | _negate_invar = negate; |
duke@435 | 1866 | _invar = n; |
duke@435 | 1867 | return true; |
duke@435 | 1868 | } |
duke@435 | 1869 | return false; |
duke@435 | 1870 | } |
duke@435 | 1871 | |
duke@435 | 1872 | //----------------------------print------------------------ |
duke@435 | 1873 | void SWPointer::print() { |
duke@435 | 1874 | #ifndef PRODUCT |
duke@435 | 1875 | tty->print("base: %d adr: %d scale: %d offset: %d invar: %c%d\n", |
duke@435 | 1876 | _base != NULL ? _base->_idx : 0, |
duke@435 | 1877 | _adr != NULL ? _adr->_idx : 0, |
duke@435 | 1878 | _scale, _offset, |
duke@435 | 1879 | _negate_invar?'-':'+', |
duke@435 | 1880 | _invar != NULL ? _invar->_idx : 0); |
duke@435 | 1881 | #endif |
duke@435 | 1882 | } |
duke@435 | 1883 | |
duke@435 | 1884 | // ========================= OrderedPair ===================== |
duke@435 | 1885 | |
duke@435 | 1886 | const OrderedPair OrderedPair::initial; |
duke@435 | 1887 | |
duke@435 | 1888 | // ========================= SWNodeInfo ===================== |
duke@435 | 1889 | |
duke@435 | 1890 | const SWNodeInfo SWNodeInfo::initial; |
duke@435 | 1891 | |
duke@435 | 1892 | |
duke@435 | 1893 | // ============================ DepGraph =========================== |
duke@435 | 1894 | |
duke@435 | 1895 | //------------------------------make_node--------------------------- |
duke@435 | 1896 | // Make a new dependence graph node for an ideal node. |
duke@435 | 1897 | DepMem* DepGraph::make_node(Node* node) { |
duke@435 | 1898 | DepMem* m = new (_arena) DepMem(node); |
duke@435 | 1899 | if (node != NULL) { |
duke@435 | 1900 | assert(_map.at_grow(node->_idx) == NULL, "one init only"); |
duke@435 | 1901 | _map.at_put_grow(node->_idx, m); |
duke@435 | 1902 | } |
duke@435 | 1903 | return m; |
duke@435 | 1904 | } |
duke@435 | 1905 | |
duke@435 | 1906 | //------------------------------make_edge--------------------------- |
duke@435 | 1907 | // Make a new dependence graph edge from dpred -> dsucc |
duke@435 | 1908 | DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) { |
duke@435 | 1909 | DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head()); |
duke@435 | 1910 | dpred->set_out_head(e); |
duke@435 | 1911 | dsucc->set_in_head(e); |
duke@435 | 1912 | return e; |
duke@435 | 1913 | } |
duke@435 | 1914 | |
duke@435 | 1915 | // ========================== DepMem ======================== |
duke@435 | 1916 | |
duke@435 | 1917 | //------------------------------in_cnt--------------------------- |
duke@435 | 1918 | int DepMem::in_cnt() { |
duke@435 | 1919 | int ct = 0; |
duke@435 | 1920 | for (DepEdge* e = _in_head; e != NULL; e = e->next_in()) ct++; |
duke@435 | 1921 | return ct; |
duke@435 | 1922 | } |
duke@435 | 1923 | |
duke@435 | 1924 | //------------------------------out_cnt--------------------------- |
duke@435 | 1925 | int DepMem::out_cnt() { |
duke@435 | 1926 | int ct = 0; |
duke@435 | 1927 | for (DepEdge* e = _out_head; e != NULL; e = e->next_out()) ct++; |
duke@435 | 1928 | return ct; |
duke@435 | 1929 | } |
duke@435 | 1930 | |
duke@435 | 1931 | //------------------------------print----------------------------- |
duke@435 | 1932 | void DepMem::print() { |
duke@435 | 1933 | #ifndef PRODUCT |
duke@435 | 1934 | tty->print(" DepNode %d (", _node->_idx); |
duke@435 | 1935 | for (DepEdge* p = _in_head; p != NULL; p = p->next_in()) { |
duke@435 | 1936 | Node* pred = p->pred()->node(); |
duke@435 | 1937 | tty->print(" %d", pred != NULL ? pred->_idx : 0); |
duke@435 | 1938 | } |
duke@435 | 1939 | tty->print(") ["); |
duke@435 | 1940 | for (DepEdge* s = _out_head; s != NULL; s = s->next_out()) { |
duke@435 | 1941 | Node* succ = s->succ()->node(); |
duke@435 | 1942 | tty->print(" %d", succ != NULL ? succ->_idx : 0); |
duke@435 | 1943 | } |
duke@435 | 1944 | tty->print_cr(" ]"); |
duke@435 | 1945 | #endif |
duke@435 | 1946 | } |
duke@435 | 1947 | |
duke@435 | 1948 | // =========================== DepEdge ========================= |
duke@435 | 1949 | |
duke@435 | 1950 | //------------------------------DepPreds--------------------------- |
duke@435 | 1951 | void DepEdge::print() { |
duke@435 | 1952 | #ifndef PRODUCT |
duke@435 | 1953 | tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx); |
duke@435 | 1954 | #endif |
duke@435 | 1955 | } |
duke@435 | 1956 | |
duke@435 | 1957 | // =========================== DepPreds ========================= |
duke@435 | 1958 | // Iterator over predecessor edges in the dependence graph. |
duke@435 | 1959 | |
duke@435 | 1960 | //------------------------------DepPreds--------------------------- |
duke@435 | 1961 | DepPreds::DepPreds(Node* n, DepGraph& dg) { |
duke@435 | 1962 | _n = n; |
duke@435 | 1963 | _done = false; |
duke@435 | 1964 | if (_n->is_Store() || _n->is_Load()) { |
duke@435 | 1965 | _next_idx = MemNode::Address; |
duke@435 | 1966 | _end_idx = n->req(); |
duke@435 | 1967 | _dep_next = dg.dep(_n)->in_head(); |
duke@435 | 1968 | } else if (_n->is_Mem()) { |
duke@435 | 1969 | _next_idx = 0; |
duke@435 | 1970 | _end_idx = 0; |
duke@435 | 1971 | _dep_next = dg.dep(_n)->in_head(); |
duke@435 | 1972 | } else { |
duke@435 | 1973 | _next_idx = 1; |
duke@435 | 1974 | _end_idx = _n->req(); |
duke@435 | 1975 | _dep_next = NULL; |
duke@435 | 1976 | } |
duke@435 | 1977 | next(); |
duke@435 | 1978 | } |
duke@435 | 1979 | |
duke@435 | 1980 | //------------------------------next--------------------------- |
duke@435 | 1981 | void DepPreds::next() { |
duke@435 | 1982 | if (_dep_next != NULL) { |
duke@435 | 1983 | _current = _dep_next->pred()->node(); |
duke@435 | 1984 | _dep_next = _dep_next->next_in(); |
duke@435 | 1985 | } else if (_next_idx < _end_idx) { |
duke@435 | 1986 | _current = _n->in(_next_idx++); |
duke@435 | 1987 | } else { |
duke@435 | 1988 | _done = true; |
duke@435 | 1989 | } |
duke@435 | 1990 | } |
duke@435 | 1991 | |
duke@435 | 1992 | // =========================== DepSuccs ========================= |
duke@435 | 1993 | // Iterator over successor edges in the dependence graph. |
duke@435 | 1994 | |
duke@435 | 1995 | //------------------------------DepSuccs--------------------------- |
duke@435 | 1996 | DepSuccs::DepSuccs(Node* n, DepGraph& dg) { |
duke@435 | 1997 | _n = n; |
duke@435 | 1998 | _done = false; |
duke@435 | 1999 | if (_n->is_Load()) { |
duke@435 | 2000 | _next_idx = 0; |
duke@435 | 2001 | _end_idx = _n->outcnt(); |
duke@435 | 2002 | _dep_next = dg.dep(_n)->out_head(); |
duke@435 | 2003 | } else if (_n->is_Mem() || _n->is_Phi() && _n->bottom_type() == Type::MEMORY) { |
duke@435 | 2004 | _next_idx = 0; |
duke@435 | 2005 | _end_idx = 0; |
duke@435 | 2006 | _dep_next = dg.dep(_n)->out_head(); |
duke@435 | 2007 | } else { |
duke@435 | 2008 | _next_idx = 0; |
duke@435 | 2009 | _end_idx = _n->outcnt(); |
duke@435 | 2010 | _dep_next = NULL; |
duke@435 | 2011 | } |
duke@435 | 2012 | next(); |
duke@435 | 2013 | } |
duke@435 | 2014 | |
duke@435 | 2015 | //-------------------------------next--------------------------- |
duke@435 | 2016 | void DepSuccs::next() { |
duke@435 | 2017 | if (_dep_next != NULL) { |
duke@435 | 2018 | _current = _dep_next->succ()->node(); |
duke@435 | 2019 | _dep_next = _dep_next->next_out(); |
duke@435 | 2020 | } else if (_next_idx < _end_idx) { |
duke@435 | 2021 | _current = _n->raw_out(_next_idx++); |
duke@435 | 2022 | } else { |
duke@435 | 2023 | _done = true; |
duke@435 | 2024 | } |
duke@435 | 2025 | } |