Mon, 25 Feb 2008 15:05:44 -0800
6633953: type2aelembytes{T_ADDRESS} should be 8 bytes in 64 bit VM
Summary: T_ADDRESS size is defined as 'int' size (4 bytes) but C2 use it for raw pointers and as memory type for StoreP and LoadP nodes.
Reviewed-by: jrose
duke@435 | 1 | /* |
duke@435 | 2 | * Copyright 2007 Sun Microsystems, Inc. All Rights Reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
duke@435 | 19 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
duke@435 | 20 | * CA 95054 USA or visit www.sun.com if you need additional information or |
duke@435 | 21 | * have any questions. |
duke@435 | 22 | */ |
duke@435 | 23 | |
duke@435 | 24 | #include "incls/_precompiled.incl" |
duke@435 | 25 | #include "incls/_superword.cpp.incl" |
duke@435 | 26 | |
duke@435 | 27 | // |
duke@435 | 28 | // S U P E R W O R D T R A N S F O R M |
duke@435 | 29 | //============================================================================= |
duke@435 | 30 | |
duke@435 | 31 | //------------------------------SuperWord--------------------------- |
duke@435 | 32 | SuperWord::SuperWord(PhaseIdealLoop* phase) : |
duke@435 | 33 | _phase(phase), |
duke@435 | 34 | _igvn(phase->_igvn), |
duke@435 | 35 | _arena(phase->C->comp_arena()), |
duke@435 | 36 | _packset(arena(), 8, 0, NULL), // packs for the current block |
duke@435 | 37 | _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb |
duke@435 | 38 | _block(arena(), 8, 0, NULL), // nodes in current block |
duke@435 | 39 | _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside |
duke@435 | 40 | _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads |
duke@435 | 41 | _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails |
duke@435 | 42 | _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node |
duke@435 | 43 | _align_to_ref(NULL), // memory reference to align vectors to |
duke@435 | 44 | _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs |
duke@435 | 45 | _dg(_arena), // dependence graph |
duke@435 | 46 | _visited(arena()), // visited node set |
duke@435 | 47 | _post_visited(arena()), // post visited node set |
duke@435 | 48 | _n_idx_list(arena(), 8), // scratch list of (node,index) pairs |
duke@435 | 49 | _stk(arena(), 8, 0, NULL), // scratch stack of nodes |
duke@435 | 50 | _nlist(arena(), 8, 0, NULL), // scratch list of nodes |
duke@435 | 51 | _lpt(NULL), // loop tree node |
duke@435 | 52 | _lp(NULL), // LoopNode |
duke@435 | 53 | _bb(NULL), // basic block |
duke@435 | 54 | _iv(NULL) // induction var |
duke@435 | 55 | {} |
duke@435 | 56 | |
duke@435 | 57 | //------------------------------transform_loop--------------------------- |
duke@435 | 58 | void SuperWord::transform_loop(IdealLoopTree* lpt) { |
duke@435 | 59 | assert(lpt->_head->is_CountedLoop(), "must be"); |
duke@435 | 60 | CountedLoopNode *cl = lpt->_head->as_CountedLoop(); |
duke@435 | 61 | |
duke@435 | 62 | if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops |
duke@435 | 63 | |
duke@435 | 64 | // Check for no control flow in body (other than exit) |
duke@435 | 65 | Node *cl_exit = cl->loopexit(); |
duke@435 | 66 | if (cl_exit->in(0) != lpt->_head) return; |
duke@435 | 67 | |
duke@435 | 68 | // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) |
duke@435 | 69 | CountedLoopEndNode* pre_end = get_pre_loop_end(cl); |
duke@435 | 70 | if (pre_end == NULL) return; |
duke@435 | 71 | Node *pre_opaq1 = pre_end->limit(); |
duke@435 | 72 | if (pre_opaq1->Opcode() != Op_Opaque1) return; |
duke@435 | 73 | |
duke@435 | 74 | // Do vectors exist on this architecture? |
duke@435 | 75 | if (vector_width_in_bytes() == 0) return; |
duke@435 | 76 | |
duke@435 | 77 | init(); // initialize data structures |
duke@435 | 78 | |
duke@435 | 79 | set_lpt(lpt); |
duke@435 | 80 | set_lp(cl); |
duke@435 | 81 | |
duke@435 | 82 | // For now, define one block which is the entire loop body |
duke@435 | 83 | set_bb(cl); |
duke@435 | 84 | |
duke@435 | 85 | assert(_packset.length() == 0, "packset must be empty"); |
duke@435 | 86 | SLP_extract(); |
duke@435 | 87 | } |
duke@435 | 88 | |
duke@435 | 89 | //------------------------------SLP_extract--------------------------- |
duke@435 | 90 | // Extract the superword level parallelism |
duke@435 | 91 | // |
duke@435 | 92 | // 1) A reverse post-order of nodes in the block is constructed. By scanning |
duke@435 | 93 | // this list from first to last, all definitions are visited before their uses. |
duke@435 | 94 | // |
duke@435 | 95 | // 2) A point-to-point dependence graph is constructed between memory references. |
duke@435 | 96 | // This simplies the upcoming "independence" checker. |
duke@435 | 97 | // |
duke@435 | 98 | // 3) The maximum depth in the node graph from the beginning of the block |
duke@435 | 99 | // to each node is computed. This is used to prune the graph search |
duke@435 | 100 | // in the independence checker. |
duke@435 | 101 | // |
duke@435 | 102 | // 4) For integer types, the necessary bit width is propagated backwards |
duke@435 | 103 | // from stores to allow packed operations on byte, char, and short |
duke@435 | 104 | // integers. This reverses the promotion to type "int" that javac |
duke@435 | 105 | // did for operations like: char c1,c2,c3; c1 = c2 + c3. |
duke@435 | 106 | // |
duke@435 | 107 | // 5) One of the memory references is picked to be an aligned vector reference. |
duke@435 | 108 | // The pre-loop trip count is adjusted to align this reference in the |
duke@435 | 109 | // unrolled body. |
duke@435 | 110 | // |
duke@435 | 111 | // 6) The initial set of pack pairs is seeded with memory references. |
duke@435 | 112 | // |
duke@435 | 113 | // 7) The set of pack pairs is extended by following use->def and def->use links. |
duke@435 | 114 | // |
duke@435 | 115 | // 8) The pairs are combined into vector sized packs. |
duke@435 | 116 | // |
duke@435 | 117 | // 9) Reorder the memory slices to co-locate members of the memory packs. |
duke@435 | 118 | // |
duke@435 | 119 | // 10) Generate ideal vector nodes for the final set of packs and where necessary, |
duke@435 | 120 | // inserting scalar promotion, vector creation from multiple scalars, and |
duke@435 | 121 | // extraction of scalar values from vectors. |
duke@435 | 122 | // |
duke@435 | 123 | void SuperWord::SLP_extract() { |
duke@435 | 124 | |
duke@435 | 125 | // Ready the block |
duke@435 | 126 | |
duke@435 | 127 | construct_bb(); |
duke@435 | 128 | |
duke@435 | 129 | dependence_graph(); |
duke@435 | 130 | |
duke@435 | 131 | compute_max_depth(); |
duke@435 | 132 | |
duke@435 | 133 | compute_vector_element_type(); |
duke@435 | 134 | |
duke@435 | 135 | // Attempt vectorization |
duke@435 | 136 | |
duke@435 | 137 | find_adjacent_refs(); |
duke@435 | 138 | |
duke@435 | 139 | extend_packlist(); |
duke@435 | 140 | |
duke@435 | 141 | combine_packs(); |
duke@435 | 142 | |
duke@435 | 143 | construct_my_pack_map(); |
duke@435 | 144 | |
duke@435 | 145 | filter_packs(); |
duke@435 | 146 | |
duke@435 | 147 | schedule(); |
duke@435 | 148 | |
duke@435 | 149 | output(); |
duke@435 | 150 | } |
duke@435 | 151 | |
duke@435 | 152 | //------------------------------find_adjacent_refs--------------------------- |
duke@435 | 153 | // Find the adjacent memory references and create pack pairs for them. |
duke@435 | 154 | // This is the initial set of packs that will then be extended by |
duke@435 | 155 | // following use->def and def->use links. The align positions are |
duke@435 | 156 | // assigned relative to the reference "align_to_ref" |
duke@435 | 157 | void SuperWord::find_adjacent_refs() { |
duke@435 | 158 | // Get list of memory operations |
duke@435 | 159 | Node_List memops; |
duke@435 | 160 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 161 | Node* n = _block.at(i); |
kvn@464 | 162 | if (n->is_Mem() && in_bb(n) && |
kvn@464 | 163 | is_java_primitive(n->as_Mem()->memory_type())) { |
duke@435 | 164 | int align = memory_alignment(n->as_Mem(), 0); |
duke@435 | 165 | if (align != bottom_align) { |
duke@435 | 166 | memops.push(n); |
duke@435 | 167 | } |
duke@435 | 168 | } |
duke@435 | 169 | } |
duke@435 | 170 | if (memops.size() == 0) return; |
duke@435 | 171 | |
duke@435 | 172 | // Find a memory reference to align to. The pre-loop trip count |
duke@435 | 173 | // is modified to align this reference to a vector-aligned address |
duke@435 | 174 | find_align_to_ref(memops); |
duke@435 | 175 | if (align_to_ref() == NULL) return; |
duke@435 | 176 | |
duke@435 | 177 | SWPointer align_to_ref_p(align_to_ref(), this); |
duke@435 | 178 | int offset = align_to_ref_p.offset_in_bytes(); |
duke@435 | 179 | int scale = align_to_ref_p.scale_in_bytes(); |
duke@435 | 180 | int vw = vector_width_in_bytes(); |
duke@435 | 181 | int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; |
duke@435 | 182 | int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; |
duke@435 | 183 | |
duke@435 | 184 | #ifndef PRODUCT |
duke@435 | 185 | if (TraceSuperWord) |
duke@435 | 186 | tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d", |
duke@435 | 187 | offset, iv_adjustment, align_to_ref_p.memory_size()); |
duke@435 | 188 | #endif |
duke@435 | 189 | |
duke@435 | 190 | // Set alignment relative to "align_to_ref" |
duke@435 | 191 | for (int i = memops.size() - 1; i >= 0; i--) { |
duke@435 | 192 | MemNode* s = memops.at(i)->as_Mem(); |
duke@435 | 193 | SWPointer p2(s, this); |
duke@435 | 194 | if (p2.comparable(align_to_ref_p)) { |
duke@435 | 195 | int align = memory_alignment(s, iv_adjustment); |
duke@435 | 196 | set_alignment(s, align); |
duke@435 | 197 | } else { |
duke@435 | 198 | memops.remove(i); |
duke@435 | 199 | } |
duke@435 | 200 | } |
duke@435 | 201 | |
duke@435 | 202 | // Create initial pack pairs of memory operations |
duke@435 | 203 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 204 | Node* s1 = memops.at(i); |
duke@435 | 205 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 206 | Node* s2 = memops.at(j); |
duke@435 | 207 | if (s1 != s2 && are_adjacent_refs(s1, s2)) { |
duke@435 | 208 | int align = alignment(s1); |
duke@435 | 209 | if (stmts_can_pack(s1, s2, align)) { |
duke@435 | 210 | Node_List* pair = new Node_List(); |
duke@435 | 211 | pair->push(s1); |
duke@435 | 212 | pair->push(s2); |
duke@435 | 213 | _packset.append(pair); |
duke@435 | 214 | } |
duke@435 | 215 | } |
duke@435 | 216 | } |
duke@435 | 217 | } |
duke@435 | 218 | |
duke@435 | 219 | #ifndef PRODUCT |
duke@435 | 220 | if (TraceSuperWord) { |
duke@435 | 221 | tty->print_cr("\nAfter find_adjacent_refs"); |
duke@435 | 222 | print_packset(); |
duke@435 | 223 | } |
duke@435 | 224 | #endif |
duke@435 | 225 | } |
duke@435 | 226 | |
duke@435 | 227 | //------------------------------find_align_to_ref--------------------------- |
duke@435 | 228 | // Find a memory reference to align the loop induction variable to. |
duke@435 | 229 | // Looks first at stores then at loads, looking for a memory reference |
duke@435 | 230 | // with the largest number of references similar to it. |
duke@435 | 231 | void SuperWord::find_align_to_ref(Node_List &memops) { |
duke@435 | 232 | GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0); |
duke@435 | 233 | |
duke@435 | 234 | // Count number of comparable memory ops |
duke@435 | 235 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 236 | MemNode* s1 = memops.at(i)->as_Mem(); |
duke@435 | 237 | SWPointer p1(s1, this); |
duke@435 | 238 | // Discard if pre loop can't align this reference |
duke@435 | 239 | if (!ref_is_alignable(p1)) { |
duke@435 | 240 | *cmp_ct.adr_at(i) = 0; |
duke@435 | 241 | continue; |
duke@435 | 242 | } |
duke@435 | 243 | for (uint j = i+1; j < memops.size(); j++) { |
duke@435 | 244 | MemNode* s2 = memops.at(j)->as_Mem(); |
duke@435 | 245 | if (isomorphic(s1, s2)) { |
duke@435 | 246 | SWPointer p2(s2, this); |
duke@435 | 247 | if (p1.comparable(p2)) { |
duke@435 | 248 | (*cmp_ct.adr_at(i))++; |
duke@435 | 249 | (*cmp_ct.adr_at(j))++; |
duke@435 | 250 | } |
duke@435 | 251 | } |
duke@435 | 252 | } |
duke@435 | 253 | } |
duke@435 | 254 | |
duke@435 | 255 | // Find Store (or Load) with the greatest number of "comparable" references |
duke@435 | 256 | int max_ct = 0; |
duke@435 | 257 | int max_idx = -1; |
duke@435 | 258 | int min_size = max_jint; |
duke@435 | 259 | int min_iv_offset = max_jint; |
duke@435 | 260 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 261 | MemNode* s = memops.at(j)->as_Mem(); |
duke@435 | 262 | if (s->is_Store()) { |
duke@435 | 263 | SWPointer p(s, this); |
duke@435 | 264 | if (cmp_ct.at(j) > max_ct || |
duke@435 | 265 | cmp_ct.at(j) == max_ct && (data_size(s) < min_size || |
duke@435 | 266 | data_size(s) == min_size && |
duke@435 | 267 | p.offset_in_bytes() < min_iv_offset)) { |
duke@435 | 268 | max_ct = cmp_ct.at(j); |
duke@435 | 269 | max_idx = j; |
duke@435 | 270 | min_size = data_size(s); |
duke@435 | 271 | min_iv_offset = p.offset_in_bytes(); |
duke@435 | 272 | } |
duke@435 | 273 | } |
duke@435 | 274 | } |
duke@435 | 275 | // If no stores, look at loads |
duke@435 | 276 | if (max_ct == 0) { |
duke@435 | 277 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 278 | MemNode* s = memops.at(j)->as_Mem(); |
duke@435 | 279 | if (s->is_Load()) { |
duke@435 | 280 | SWPointer p(s, this); |
duke@435 | 281 | if (cmp_ct.at(j) > max_ct || |
duke@435 | 282 | cmp_ct.at(j) == max_ct && (data_size(s) < min_size || |
duke@435 | 283 | data_size(s) == min_size && |
duke@435 | 284 | p.offset_in_bytes() < min_iv_offset)) { |
duke@435 | 285 | max_ct = cmp_ct.at(j); |
duke@435 | 286 | max_idx = j; |
duke@435 | 287 | min_size = data_size(s); |
duke@435 | 288 | min_iv_offset = p.offset_in_bytes(); |
duke@435 | 289 | } |
duke@435 | 290 | } |
duke@435 | 291 | } |
duke@435 | 292 | } |
duke@435 | 293 | |
duke@435 | 294 | if (max_ct > 0) |
duke@435 | 295 | set_align_to_ref(memops.at(max_idx)->as_Mem()); |
duke@435 | 296 | |
duke@435 | 297 | #ifndef PRODUCT |
duke@435 | 298 | if (TraceSuperWord && Verbose) { |
duke@435 | 299 | tty->print_cr("\nVector memops after find_align_to_refs"); |
duke@435 | 300 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 301 | MemNode* s = memops.at(i)->as_Mem(); |
duke@435 | 302 | s->dump(); |
duke@435 | 303 | } |
duke@435 | 304 | } |
duke@435 | 305 | #endif |
duke@435 | 306 | } |
duke@435 | 307 | |
duke@435 | 308 | //------------------------------ref_is_alignable--------------------------- |
duke@435 | 309 | // Can the preloop align the reference to position zero in the vector? |
duke@435 | 310 | bool SuperWord::ref_is_alignable(SWPointer& p) { |
duke@435 | 311 | if (!p.has_iv()) { |
duke@435 | 312 | return true; // no induction variable |
duke@435 | 313 | } |
duke@435 | 314 | CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop()); |
duke@435 | 315 | assert(pre_end->stride_is_con(), "pre loop stride is constant"); |
duke@435 | 316 | int preloop_stride = pre_end->stride_con(); |
duke@435 | 317 | |
duke@435 | 318 | int span = preloop_stride * p.scale_in_bytes(); |
duke@435 | 319 | |
duke@435 | 320 | // Stride one accesses are alignable. |
duke@435 | 321 | if (ABS(span) == p.memory_size()) |
duke@435 | 322 | return true; |
duke@435 | 323 | |
duke@435 | 324 | // If initial offset from start of object is computable, |
duke@435 | 325 | // compute alignment within the vector. |
duke@435 | 326 | int vw = vector_width_in_bytes(); |
duke@435 | 327 | if (vw % span == 0) { |
duke@435 | 328 | Node* init_nd = pre_end->init_trip(); |
duke@435 | 329 | if (init_nd->is_Con() && p.invar() == NULL) { |
duke@435 | 330 | int init = init_nd->bottom_type()->is_int()->get_con(); |
duke@435 | 331 | |
duke@435 | 332 | int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes(); |
duke@435 | 333 | assert(init_offset >= 0, "positive offset from object start"); |
duke@435 | 334 | |
duke@435 | 335 | if (span > 0) { |
duke@435 | 336 | return (vw - (init_offset % vw)) % span == 0; |
duke@435 | 337 | } else { |
duke@435 | 338 | assert(span < 0, "nonzero stride * scale"); |
duke@435 | 339 | return (init_offset % vw) % -span == 0; |
duke@435 | 340 | } |
duke@435 | 341 | } |
duke@435 | 342 | } |
duke@435 | 343 | return false; |
duke@435 | 344 | } |
duke@435 | 345 | |
duke@435 | 346 | //---------------------------dependence_graph--------------------------- |
duke@435 | 347 | // Construct dependency graph. |
duke@435 | 348 | // Add dependence edges to load/store nodes for memory dependence |
duke@435 | 349 | // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x) |
duke@435 | 350 | void SuperWord::dependence_graph() { |
duke@435 | 351 | // First, assign a dependence node to each memory node |
duke@435 | 352 | for (int i = 0; i < _block.length(); i++ ) { |
duke@435 | 353 | Node *n = _block.at(i); |
duke@435 | 354 | if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) { |
duke@435 | 355 | _dg.make_node(n); |
duke@435 | 356 | } |
duke@435 | 357 | } |
duke@435 | 358 | |
duke@435 | 359 | // For each memory slice, create the dependences |
duke@435 | 360 | for (int i = 0; i < _mem_slice_head.length(); i++) { |
duke@435 | 361 | Node* n = _mem_slice_head.at(i); |
duke@435 | 362 | Node* n_tail = _mem_slice_tail.at(i); |
duke@435 | 363 | |
duke@435 | 364 | // Get slice in predecessor order (last is first) |
duke@435 | 365 | mem_slice_preds(n_tail, n, _nlist); |
duke@435 | 366 | |
duke@435 | 367 | // Make the slice dependent on the root |
duke@435 | 368 | DepMem* slice = _dg.dep(n); |
duke@435 | 369 | _dg.make_edge(_dg.root(), slice); |
duke@435 | 370 | |
duke@435 | 371 | // Create a sink for the slice |
duke@435 | 372 | DepMem* slice_sink = _dg.make_node(NULL); |
duke@435 | 373 | _dg.make_edge(slice_sink, _dg.tail()); |
duke@435 | 374 | |
duke@435 | 375 | // Now visit each pair of memory ops, creating the edges |
duke@435 | 376 | for (int j = _nlist.length() - 1; j >= 0 ; j--) { |
duke@435 | 377 | Node* s1 = _nlist.at(j); |
duke@435 | 378 | |
duke@435 | 379 | // If no dependency yet, use slice |
duke@435 | 380 | if (_dg.dep(s1)->in_cnt() == 0) { |
duke@435 | 381 | _dg.make_edge(slice, s1); |
duke@435 | 382 | } |
duke@435 | 383 | SWPointer p1(s1->as_Mem(), this); |
duke@435 | 384 | bool sink_dependent = true; |
duke@435 | 385 | for (int k = j - 1; k >= 0; k--) { |
duke@435 | 386 | Node* s2 = _nlist.at(k); |
duke@435 | 387 | if (s1->is_Load() && s2->is_Load()) |
duke@435 | 388 | continue; |
duke@435 | 389 | SWPointer p2(s2->as_Mem(), this); |
duke@435 | 390 | |
duke@435 | 391 | int cmp = p1.cmp(p2); |
duke@435 | 392 | if (SuperWordRTDepCheck && |
duke@435 | 393 | p1.base() != p2.base() && p1.valid() && p2.valid()) { |
duke@435 | 394 | // Create a runtime check to disambiguate |
duke@435 | 395 | OrderedPair pp(p1.base(), p2.base()); |
duke@435 | 396 | _disjoint_ptrs.append_if_missing(pp); |
duke@435 | 397 | } else if (!SWPointer::not_equal(cmp)) { |
duke@435 | 398 | // Possibly same address |
duke@435 | 399 | _dg.make_edge(s1, s2); |
duke@435 | 400 | sink_dependent = false; |
duke@435 | 401 | } |
duke@435 | 402 | } |
duke@435 | 403 | if (sink_dependent) { |
duke@435 | 404 | _dg.make_edge(s1, slice_sink); |
duke@435 | 405 | } |
duke@435 | 406 | } |
duke@435 | 407 | #ifndef PRODUCT |
duke@435 | 408 | if (TraceSuperWord) { |
duke@435 | 409 | tty->print_cr("\nDependence graph for slice: %d", n->_idx); |
duke@435 | 410 | for (int q = 0; q < _nlist.length(); q++) { |
duke@435 | 411 | _dg.print(_nlist.at(q)); |
duke@435 | 412 | } |
duke@435 | 413 | tty->cr(); |
duke@435 | 414 | } |
duke@435 | 415 | #endif |
duke@435 | 416 | _nlist.clear(); |
duke@435 | 417 | } |
duke@435 | 418 | |
duke@435 | 419 | #ifndef PRODUCT |
duke@435 | 420 | if (TraceSuperWord) { |
duke@435 | 421 | tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE"); |
duke@435 | 422 | for (int r = 0; r < _disjoint_ptrs.length(); r++) { |
duke@435 | 423 | _disjoint_ptrs.at(r).print(); |
duke@435 | 424 | tty->cr(); |
duke@435 | 425 | } |
duke@435 | 426 | tty->cr(); |
duke@435 | 427 | } |
duke@435 | 428 | #endif |
duke@435 | 429 | } |
duke@435 | 430 | |
duke@435 | 431 | //---------------------------mem_slice_preds--------------------------- |
duke@435 | 432 | // Return a memory slice (node list) in predecessor order starting at "start" |
duke@435 | 433 | void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) { |
duke@435 | 434 | assert(preds.length() == 0, "start empty"); |
duke@435 | 435 | Node* n = start; |
duke@435 | 436 | Node* prev = NULL; |
duke@435 | 437 | while (true) { |
duke@435 | 438 | assert(in_bb(n), "must be in block"); |
duke@435 | 439 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 440 | Node* out = n->fast_out(i); |
duke@435 | 441 | if (out->is_Load()) { |
duke@435 | 442 | if (in_bb(out)) { |
duke@435 | 443 | preds.push(out); |
duke@435 | 444 | } |
duke@435 | 445 | } else { |
duke@435 | 446 | // FIXME |
duke@435 | 447 | if (out->is_MergeMem() && !in_bb(out)) { |
duke@435 | 448 | // Either unrolling is causing a memory edge not to disappear, |
duke@435 | 449 | // or need to run igvn.optimize() again before SLP |
duke@435 | 450 | } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) { |
duke@435 | 451 | // Ditto. Not sure what else to check further. |
duke@435 | 452 | } else if (out->Opcode() == Op_StoreCM && out->in(4) == n) { |
duke@435 | 453 | // StoreCM has an input edge used as a precedence edge. |
duke@435 | 454 | // Maybe an issue when oop stores are vectorized. |
duke@435 | 455 | } else { |
duke@435 | 456 | assert(out == prev || prev == NULL, "no branches off of store slice"); |
duke@435 | 457 | } |
duke@435 | 458 | } |
duke@435 | 459 | } |
duke@435 | 460 | if (n == stop) break; |
duke@435 | 461 | preds.push(n); |
duke@435 | 462 | prev = n; |
duke@435 | 463 | n = n->in(MemNode::Memory); |
duke@435 | 464 | } |
duke@435 | 465 | } |
duke@435 | 466 | |
duke@435 | 467 | //------------------------------stmts_can_pack--------------------------- |
duke@435 | 468 | // Can s1 and s2 be in a pack with s1 immediately preceeding s2 and |
duke@435 | 469 | // s1 aligned at "align" |
duke@435 | 470 | bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { |
duke@435 | 471 | if (isomorphic(s1, s2)) { |
duke@435 | 472 | if (independent(s1, s2)) { |
duke@435 | 473 | if (!exists_at(s1, 0) && !exists_at(s2, 1)) { |
duke@435 | 474 | if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { |
duke@435 | 475 | int s1_align = alignment(s1); |
duke@435 | 476 | int s2_align = alignment(s2); |
duke@435 | 477 | if (s1_align == top_align || s1_align == align) { |
duke@435 | 478 | if (s2_align == top_align || s2_align == align + data_size(s1)) { |
duke@435 | 479 | return true; |
duke@435 | 480 | } |
duke@435 | 481 | } |
duke@435 | 482 | } |
duke@435 | 483 | } |
duke@435 | 484 | } |
duke@435 | 485 | } |
duke@435 | 486 | return false; |
duke@435 | 487 | } |
duke@435 | 488 | |
duke@435 | 489 | //------------------------------exists_at--------------------------- |
duke@435 | 490 | // Does s exist in a pack at position pos? |
duke@435 | 491 | bool SuperWord::exists_at(Node* s, uint pos) { |
duke@435 | 492 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 493 | Node_List* p = _packset.at(i); |
duke@435 | 494 | if (p->at(pos) == s) { |
duke@435 | 495 | return true; |
duke@435 | 496 | } |
duke@435 | 497 | } |
duke@435 | 498 | return false; |
duke@435 | 499 | } |
duke@435 | 500 | |
duke@435 | 501 | //------------------------------are_adjacent_refs--------------------------- |
duke@435 | 502 | // Is s1 immediately before s2 in memory? |
duke@435 | 503 | bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) { |
duke@435 | 504 | if (!s1->is_Mem() || !s2->is_Mem()) return false; |
duke@435 | 505 | if (!in_bb(s1) || !in_bb(s2)) return false; |
duke@435 | 506 | // FIXME - co_locate_pack fails on Stores in different mem-slices, so |
duke@435 | 507 | // only pack memops that are in the same alias set until that's fixed. |
duke@435 | 508 | if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) != |
duke@435 | 509 | _phase->C->get_alias_index(s2->as_Mem()->adr_type())) |
duke@435 | 510 | return false; |
duke@435 | 511 | SWPointer p1(s1->as_Mem(), this); |
duke@435 | 512 | SWPointer p2(s2->as_Mem(), this); |
duke@435 | 513 | if (p1.base() != p2.base() || !p1.comparable(p2)) return false; |
duke@435 | 514 | int diff = p2.offset_in_bytes() - p1.offset_in_bytes(); |
duke@435 | 515 | return diff == data_size(s1); |
duke@435 | 516 | } |
duke@435 | 517 | |
duke@435 | 518 | //------------------------------isomorphic--------------------------- |
duke@435 | 519 | // Are s1 and s2 similar? |
duke@435 | 520 | bool SuperWord::isomorphic(Node* s1, Node* s2) { |
duke@435 | 521 | if (s1->Opcode() != s2->Opcode()) return false; |
duke@435 | 522 | if (s1->req() != s2->req()) return false; |
duke@435 | 523 | if (s1->in(0) != s2->in(0)) return false; |
duke@435 | 524 | if (velt_type(s1) != velt_type(s2)) return false; |
duke@435 | 525 | return true; |
duke@435 | 526 | } |
duke@435 | 527 | |
duke@435 | 528 | //------------------------------independent--------------------------- |
duke@435 | 529 | // Is there no data path from s1 to s2 or s2 to s1? |
duke@435 | 530 | bool SuperWord::independent(Node* s1, Node* s2) { |
duke@435 | 531 | // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first"); |
duke@435 | 532 | int d1 = depth(s1); |
duke@435 | 533 | int d2 = depth(s2); |
duke@435 | 534 | if (d1 == d2) return s1 != s2; |
duke@435 | 535 | Node* deep = d1 > d2 ? s1 : s2; |
duke@435 | 536 | Node* shallow = d1 > d2 ? s2 : s1; |
duke@435 | 537 | |
duke@435 | 538 | visited_clear(); |
duke@435 | 539 | |
duke@435 | 540 | return independent_path(shallow, deep); |
duke@435 | 541 | } |
duke@435 | 542 | |
duke@435 | 543 | //------------------------------independent_path------------------------------ |
duke@435 | 544 | // Helper for independent |
duke@435 | 545 | bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) { |
duke@435 | 546 | if (dp >= 1000) return false; // stop deep recursion |
duke@435 | 547 | visited_set(deep); |
duke@435 | 548 | int shal_depth = depth(shallow); |
duke@435 | 549 | assert(shal_depth <= depth(deep), "must be"); |
duke@435 | 550 | for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) { |
duke@435 | 551 | Node* pred = preds.current(); |
duke@435 | 552 | if (in_bb(pred) && !visited_test(pred)) { |
duke@435 | 553 | if (shallow == pred) { |
duke@435 | 554 | return false; |
duke@435 | 555 | } |
duke@435 | 556 | if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) { |
duke@435 | 557 | return false; |
duke@435 | 558 | } |
duke@435 | 559 | } |
duke@435 | 560 | } |
duke@435 | 561 | return true; |
duke@435 | 562 | } |
duke@435 | 563 | |
duke@435 | 564 | //------------------------------set_alignment--------------------------- |
duke@435 | 565 | void SuperWord::set_alignment(Node* s1, Node* s2, int align) { |
duke@435 | 566 | set_alignment(s1, align); |
duke@435 | 567 | set_alignment(s2, align + data_size(s1)); |
duke@435 | 568 | } |
duke@435 | 569 | |
duke@435 | 570 | //------------------------------data_size--------------------------- |
duke@435 | 571 | int SuperWord::data_size(Node* s) { |
duke@435 | 572 | const Type* t = velt_type(s); |
duke@435 | 573 | BasicType bt = t->array_element_basic_type(); |
kvn@464 | 574 | int bsize = type2aelembytes(bt); |
duke@435 | 575 | assert(bsize != 0, "valid size"); |
duke@435 | 576 | return bsize; |
duke@435 | 577 | } |
duke@435 | 578 | |
duke@435 | 579 | //------------------------------extend_packlist--------------------------- |
duke@435 | 580 | // Extend packset by following use->def and def->use links from pack members. |
duke@435 | 581 | void SuperWord::extend_packlist() { |
duke@435 | 582 | bool changed; |
duke@435 | 583 | do { |
duke@435 | 584 | changed = false; |
duke@435 | 585 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 586 | Node_List* p = _packset.at(i); |
duke@435 | 587 | changed |= follow_use_defs(p); |
duke@435 | 588 | changed |= follow_def_uses(p); |
duke@435 | 589 | } |
duke@435 | 590 | } while (changed); |
duke@435 | 591 | |
duke@435 | 592 | #ifndef PRODUCT |
duke@435 | 593 | if (TraceSuperWord) { |
duke@435 | 594 | tty->print_cr("\nAfter extend_packlist"); |
duke@435 | 595 | print_packset(); |
duke@435 | 596 | } |
duke@435 | 597 | #endif |
duke@435 | 598 | } |
duke@435 | 599 | |
duke@435 | 600 | //------------------------------follow_use_defs--------------------------- |
duke@435 | 601 | // Extend the packset by visiting operand definitions of nodes in pack p |
duke@435 | 602 | bool SuperWord::follow_use_defs(Node_List* p) { |
duke@435 | 603 | Node* s1 = p->at(0); |
duke@435 | 604 | Node* s2 = p->at(1); |
duke@435 | 605 | assert(p->size() == 2, "just checking"); |
duke@435 | 606 | assert(s1->req() == s2->req(), "just checking"); |
duke@435 | 607 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
duke@435 | 608 | |
duke@435 | 609 | if (s1->is_Load()) return false; |
duke@435 | 610 | |
duke@435 | 611 | int align = alignment(s1); |
duke@435 | 612 | bool changed = false; |
duke@435 | 613 | int start = s1->is_Store() ? MemNode::ValueIn : 1; |
duke@435 | 614 | int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req(); |
duke@435 | 615 | for (int j = start; j < end; j++) { |
duke@435 | 616 | Node* t1 = s1->in(j); |
duke@435 | 617 | Node* t2 = s2->in(j); |
duke@435 | 618 | if (!in_bb(t1) || !in_bb(t2)) |
duke@435 | 619 | continue; |
duke@435 | 620 | if (stmts_can_pack(t1, t2, align)) { |
duke@435 | 621 | if (est_savings(t1, t2) >= 0) { |
duke@435 | 622 | Node_List* pair = new Node_List(); |
duke@435 | 623 | pair->push(t1); |
duke@435 | 624 | pair->push(t2); |
duke@435 | 625 | _packset.append(pair); |
duke@435 | 626 | set_alignment(t1, t2, align); |
duke@435 | 627 | changed = true; |
duke@435 | 628 | } |
duke@435 | 629 | } |
duke@435 | 630 | } |
duke@435 | 631 | return changed; |
duke@435 | 632 | } |
duke@435 | 633 | |
duke@435 | 634 | //------------------------------follow_def_uses--------------------------- |
duke@435 | 635 | // Extend the packset by visiting uses of nodes in pack p |
duke@435 | 636 | bool SuperWord::follow_def_uses(Node_List* p) { |
duke@435 | 637 | bool changed = false; |
duke@435 | 638 | Node* s1 = p->at(0); |
duke@435 | 639 | Node* s2 = p->at(1); |
duke@435 | 640 | assert(p->size() == 2, "just checking"); |
duke@435 | 641 | assert(s1->req() == s2->req(), "just checking"); |
duke@435 | 642 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
duke@435 | 643 | |
duke@435 | 644 | if (s1->is_Store()) return false; |
duke@435 | 645 | |
duke@435 | 646 | int align = alignment(s1); |
duke@435 | 647 | int savings = -1; |
duke@435 | 648 | Node* u1 = NULL; |
duke@435 | 649 | Node* u2 = NULL; |
duke@435 | 650 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
duke@435 | 651 | Node* t1 = s1->fast_out(i); |
duke@435 | 652 | if (!in_bb(t1)) continue; |
duke@435 | 653 | for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 654 | Node* t2 = s2->fast_out(j); |
duke@435 | 655 | if (!in_bb(t2)) continue; |
duke@435 | 656 | if (!opnd_positions_match(s1, t1, s2, t2)) |
duke@435 | 657 | continue; |
duke@435 | 658 | if (stmts_can_pack(t1, t2, align)) { |
duke@435 | 659 | int my_savings = est_savings(t1, t2); |
duke@435 | 660 | if (my_savings > savings) { |
duke@435 | 661 | savings = my_savings; |
duke@435 | 662 | u1 = t1; |
duke@435 | 663 | u2 = t2; |
duke@435 | 664 | } |
duke@435 | 665 | } |
duke@435 | 666 | } |
duke@435 | 667 | } |
duke@435 | 668 | if (savings >= 0) { |
duke@435 | 669 | Node_List* pair = new Node_List(); |
duke@435 | 670 | pair->push(u1); |
duke@435 | 671 | pair->push(u2); |
duke@435 | 672 | _packset.append(pair); |
duke@435 | 673 | set_alignment(u1, u2, align); |
duke@435 | 674 | changed = true; |
duke@435 | 675 | } |
duke@435 | 676 | return changed; |
duke@435 | 677 | } |
duke@435 | 678 | |
duke@435 | 679 | //---------------------------opnd_positions_match------------------------- |
duke@435 | 680 | // Is the use of d1 in u1 at the same operand position as d2 in u2? |
duke@435 | 681 | bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) { |
duke@435 | 682 | uint ct = u1->req(); |
duke@435 | 683 | if (ct != u2->req()) return false; |
duke@435 | 684 | uint i1 = 0; |
duke@435 | 685 | uint i2 = 0; |
duke@435 | 686 | do { |
duke@435 | 687 | for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; |
duke@435 | 688 | for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; |
duke@435 | 689 | if (i1 != i2) { |
duke@435 | 690 | return false; |
duke@435 | 691 | } |
duke@435 | 692 | } while (i1 < ct); |
duke@435 | 693 | return true; |
duke@435 | 694 | } |
duke@435 | 695 | |
duke@435 | 696 | //------------------------------est_savings--------------------------- |
duke@435 | 697 | // Estimate the savings from executing s1 and s2 as a pack |
duke@435 | 698 | int SuperWord::est_savings(Node* s1, Node* s2) { |
duke@435 | 699 | int save = 2 - 1; // 2 operations per instruction in packed form |
duke@435 | 700 | |
duke@435 | 701 | // inputs |
duke@435 | 702 | for (uint i = 1; i < s1->req(); i++) { |
duke@435 | 703 | Node* x1 = s1->in(i); |
duke@435 | 704 | Node* x2 = s2->in(i); |
duke@435 | 705 | if (x1 != x2) { |
duke@435 | 706 | if (are_adjacent_refs(x1, x2)) { |
duke@435 | 707 | save += adjacent_profit(x1, x2); |
duke@435 | 708 | } else if (!in_packset(x1, x2)) { |
duke@435 | 709 | save -= pack_cost(2); |
duke@435 | 710 | } else { |
duke@435 | 711 | save += unpack_cost(2); |
duke@435 | 712 | } |
duke@435 | 713 | } |
duke@435 | 714 | } |
duke@435 | 715 | |
duke@435 | 716 | // uses of result |
duke@435 | 717 | uint ct = 0; |
duke@435 | 718 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
duke@435 | 719 | Node* s1_use = s1->fast_out(i); |
duke@435 | 720 | for (int j = 0; j < _packset.length(); j++) { |
duke@435 | 721 | Node_List* p = _packset.at(j); |
duke@435 | 722 | if (p->at(0) == s1_use) { |
duke@435 | 723 | for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { |
duke@435 | 724 | Node* s2_use = s2->fast_out(k); |
duke@435 | 725 | if (p->at(p->size()-1) == s2_use) { |
duke@435 | 726 | ct++; |
duke@435 | 727 | if (are_adjacent_refs(s1_use, s2_use)) { |
duke@435 | 728 | save += adjacent_profit(s1_use, s2_use); |
duke@435 | 729 | } |
duke@435 | 730 | } |
duke@435 | 731 | } |
duke@435 | 732 | } |
duke@435 | 733 | } |
duke@435 | 734 | } |
duke@435 | 735 | |
duke@435 | 736 | if (ct < s1->outcnt()) save += unpack_cost(1); |
duke@435 | 737 | if (ct < s2->outcnt()) save += unpack_cost(1); |
duke@435 | 738 | |
duke@435 | 739 | return save; |
duke@435 | 740 | } |
duke@435 | 741 | |
duke@435 | 742 | //------------------------------costs--------------------------- |
duke@435 | 743 | int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } |
duke@435 | 744 | int SuperWord::pack_cost(int ct) { return ct; } |
duke@435 | 745 | int SuperWord::unpack_cost(int ct) { return ct; } |
duke@435 | 746 | |
duke@435 | 747 | //------------------------------combine_packs--------------------------- |
duke@435 | 748 | // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last |
duke@435 | 749 | void SuperWord::combine_packs() { |
duke@435 | 750 | bool changed; |
duke@435 | 751 | do { |
duke@435 | 752 | changed = false; |
duke@435 | 753 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 754 | Node_List* p1 = _packset.at(i); |
duke@435 | 755 | if (p1 == NULL) continue; |
duke@435 | 756 | for (int j = 0; j < _packset.length(); j++) { |
duke@435 | 757 | Node_List* p2 = _packset.at(j); |
duke@435 | 758 | if (p2 == NULL) continue; |
duke@435 | 759 | if (p1->at(p1->size()-1) == p2->at(0)) { |
duke@435 | 760 | for (uint k = 1; k < p2->size(); k++) { |
duke@435 | 761 | p1->push(p2->at(k)); |
duke@435 | 762 | } |
duke@435 | 763 | _packset.at_put(j, NULL); |
duke@435 | 764 | changed = true; |
duke@435 | 765 | } |
duke@435 | 766 | } |
duke@435 | 767 | } |
duke@435 | 768 | } while (changed); |
duke@435 | 769 | |
duke@435 | 770 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 771 | Node_List* p1 = _packset.at(i); |
duke@435 | 772 | if (p1 == NULL) { |
duke@435 | 773 | _packset.remove_at(i); |
duke@435 | 774 | } |
duke@435 | 775 | } |
duke@435 | 776 | |
duke@435 | 777 | #ifndef PRODUCT |
duke@435 | 778 | if (TraceSuperWord) { |
duke@435 | 779 | tty->print_cr("\nAfter combine_packs"); |
duke@435 | 780 | print_packset(); |
duke@435 | 781 | } |
duke@435 | 782 | #endif |
duke@435 | 783 | } |
duke@435 | 784 | |
duke@435 | 785 | //-----------------------------construct_my_pack_map-------------------------- |
duke@435 | 786 | // Construct the map from nodes to packs. Only valid after the |
duke@435 | 787 | // point where a node is only in one pack (after combine_packs). |
duke@435 | 788 | void SuperWord::construct_my_pack_map() { |
duke@435 | 789 | Node_List* rslt = NULL; |
duke@435 | 790 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 791 | Node_List* p = _packset.at(i); |
duke@435 | 792 | for (uint j = 0; j < p->size(); j++) { |
duke@435 | 793 | Node* s = p->at(j); |
duke@435 | 794 | assert(my_pack(s) == NULL, "only in one pack"); |
duke@435 | 795 | set_my_pack(s, p); |
duke@435 | 796 | } |
duke@435 | 797 | } |
duke@435 | 798 | } |
duke@435 | 799 | |
duke@435 | 800 | //------------------------------filter_packs--------------------------- |
duke@435 | 801 | // Remove packs that are not implemented or not profitable. |
duke@435 | 802 | void SuperWord::filter_packs() { |
duke@435 | 803 | |
duke@435 | 804 | // Remove packs that are not implemented |
duke@435 | 805 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 806 | Node_List* pk = _packset.at(i); |
duke@435 | 807 | bool impl = implemented(pk); |
duke@435 | 808 | if (!impl) { |
duke@435 | 809 | #ifndef PRODUCT |
duke@435 | 810 | if (TraceSuperWord && Verbose) { |
duke@435 | 811 | tty->print_cr("Unimplemented"); |
duke@435 | 812 | pk->at(0)->dump(); |
duke@435 | 813 | } |
duke@435 | 814 | #endif |
duke@435 | 815 | remove_pack_at(i); |
duke@435 | 816 | } |
duke@435 | 817 | } |
duke@435 | 818 | |
duke@435 | 819 | // Remove packs that are not profitable |
duke@435 | 820 | bool changed; |
duke@435 | 821 | do { |
duke@435 | 822 | changed = false; |
duke@435 | 823 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 824 | Node_List* pk = _packset.at(i); |
duke@435 | 825 | bool prof = profitable(pk); |
duke@435 | 826 | if (!prof) { |
duke@435 | 827 | #ifndef PRODUCT |
duke@435 | 828 | if (TraceSuperWord && Verbose) { |
duke@435 | 829 | tty->print_cr("Unprofitable"); |
duke@435 | 830 | pk->at(0)->dump(); |
duke@435 | 831 | } |
duke@435 | 832 | #endif |
duke@435 | 833 | remove_pack_at(i); |
duke@435 | 834 | changed = true; |
duke@435 | 835 | } |
duke@435 | 836 | } |
duke@435 | 837 | } while (changed); |
duke@435 | 838 | |
duke@435 | 839 | #ifndef PRODUCT |
duke@435 | 840 | if (TraceSuperWord) { |
duke@435 | 841 | tty->print_cr("\nAfter filter_packs"); |
duke@435 | 842 | print_packset(); |
duke@435 | 843 | tty->cr(); |
duke@435 | 844 | } |
duke@435 | 845 | #endif |
duke@435 | 846 | } |
duke@435 | 847 | |
duke@435 | 848 | //------------------------------implemented--------------------------- |
duke@435 | 849 | // Can code be generated for pack p? |
duke@435 | 850 | bool SuperWord::implemented(Node_List* p) { |
duke@435 | 851 | Node* p0 = p->at(0); |
duke@435 | 852 | int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0)); |
duke@435 | 853 | return vopc > 0 && Matcher::has_match_rule(vopc); |
duke@435 | 854 | } |
duke@435 | 855 | |
duke@435 | 856 | //------------------------------profitable--------------------------- |
duke@435 | 857 | // For pack p, are all operands and all uses (with in the block) vector? |
duke@435 | 858 | bool SuperWord::profitable(Node_List* p) { |
duke@435 | 859 | Node* p0 = p->at(0); |
duke@435 | 860 | uint start, end; |
duke@435 | 861 | vector_opd_range(p0, &start, &end); |
duke@435 | 862 | |
duke@435 | 863 | // Return false if some input is not vector and inside block |
duke@435 | 864 | for (uint i = start; i < end; i++) { |
duke@435 | 865 | if (!is_vector_use(p0, i)) { |
duke@435 | 866 | // For now, return false if not scalar promotion case (inputs are the same.) |
duke@435 | 867 | // Later, implement PackNode and allow differring, non-vector inputs |
duke@435 | 868 | // (maybe just the ones from outside the block.) |
duke@435 | 869 | Node* p0_def = p0->in(i); |
duke@435 | 870 | for (uint j = 1; j < p->size(); j++) { |
duke@435 | 871 | Node* use = p->at(j); |
duke@435 | 872 | Node* def = use->in(i); |
duke@435 | 873 | if (p0_def != def) |
duke@435 | 874 | return false; |
duke@435 | 875 | } |
duke@435 | 876 | } |
duke@435 | 877 | } |
duke@435 | 878 | if (!p0->is_Store()) { |
duke@435 | 879 | // For now, return false if not all uses are vector. |
duke@435 | 880 | // Later, implement ExtractNode and allow non-vector uses (maybe |
duke@435 | 881 | // just the ones outside the block.) |
duke@435 | 882 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 883 | Node* def = p->at(i); |
duke@435 | 884 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 885 | Node* use = def->fast_out(j); |
duke@435 | 886 | for (uint k = 0; k < use->req(); k++) { |
duke@435 | 887 | Node* n = use->in(k); |
duke@435 | 888 | if (def == n) { |
duke@435 | 889 | if (!is_vector_use(use, k)) { |
duke@435 | 890 | return false; |
duke@435 | 891 | } |
duke@435 | 892 | } |
duke@435 | 893 | } |
duke@435 | 894 | } |
duke@435 | 895 | } |
duke@435 | 896 | } |
duke@435 | 897 | return true; |
duke@435 | 898 | } |
duke@435 | 899 | |
duke@435 | 900 | //------------------------------schedule--------------------------- |
duke@435 | 901 | // Adjust the memory graph for the packed operations |
duke@435 | 902 | void SuperWord::schedule() { |
duke@435 | 903 | |
duke@435 | 904 | // Co-locate in the memory graph the members of each memory pack |
duke@435 | 905 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 906 | co_locate_pack(_packset.at(i)); |
duke@435 | 907 | } |
duke@435 | 908 | } |
duke@435 | 909 | |
duke@435 | 910 | //------------------------------co_locate_pack--------------------------- |
duke@435 | 911 | // Within a pack, move stores down to the last executed store, |
duke@435 | 912 | // and move loads up to the first executed load. |
duke@435 | 913 | void SuperWord::co_locate_pack(Node_List* pk) { |
duke@435 | 914 | if (pk->at(0)->is_Store()) { |
duke@435 | 915 | // Push Stores down towards last executed pack member |
duke@435 | 916 | MemNode* first = executed_first(pk)->as_Mem(); |
duke@435 | 917 | MemNode* last = executed_last(pk)->as_Mem(); |
duke@435 | 918 | MemNode* insert_pt = last; |
duke@435 | 919 | MemNode* current = last->in(MemNode::Memory)->as_Mem(); |
duke@435 | 920 | while (true) { |
duke@435 | 921 | assert(in_bb(current), "stay in block"); |
duke@435 | 922 | Node* my_mem = current->in(MemNode::Memory); |
duke@435 | 923 | if (in_pack(current, pk)) { |
duke@435 | 924 | // Forward users of my memory state to my input memory state |
duke@435 | 925 | _igvn.hash_delete(current); |
duke@435 | 926 | _igvn.hash_delete(my_mem); |
duke@435 | 927 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
duke@435 | 928 | Node* use = current->out(i); |
duke@435 | 929 | if (use->is_Mem()) { |
duke@435 | 930 | assert(use->in(MemNode::Memory) == current, "must be"); |
duke@435 | 931 | _igvn.hash_delete(use); |
duke@435 | 932 | use->set_req(MemNode::Memory, my_mem); |
duke@435 | 933 | _igvn._worklist.push(use); |
duke@435 | 934 | --i; // deleted this edge; rescan position |
duke@435 | 935 | } |
duke@435 | 936 | } |
duke@435 | 937 | // put current immediately before insert_pt |
duke@435 | 938 | current->set_req(MemNode::Memory, insert_pt->in(MemNode::Memory)); |
duke@435 | 939 | _igvn.hash_delete(insert_pt); |
duke@435 | 940 | insert_pt->set_req(MemNode::Memory, current); |
duke@435 | 941 | _igvn._worklist.push(insert_pt); |
duke@435 | 942 | _igvn._worklist.push(current); |
duke@435 | 943 | insert_pt = current; |
duke@435 | 944 | } |
duke@435 | 945 | if (current == first) break; |
duke@435 | 946 | current = my_mem->as_Mem(); |
duke@435 | 947 | } |
duke@435 | 948 | } else if (pk->at(0)->is_Load()) { |
duke@435 | 949 | // Pull Loads up towards first executed pack member |
duke@435 | 950 | LoadNode* first = executed_first(pk)->as_Load(); |
duke@435 | 951 | Node* first_mem = first->in(MemNode::Memory); |
duke@435 | 952 | _igvn.hash_delete(first_mem); |
duke@435 | 953 | // Give each load same memory state as first |
duke@435 | 954 | for (uint i = 0; i < pk->size(); i++) { |
duke@435 | 955 | LoadNode* ld = pk->at(i)->as_Load(); |
duke@435 | 956 | _igvn.hash_delete(ld); |
duke@435 | 957 | ld->set_req(MemNode::Memory, first_mem); |
duke@435 | 958 | _igvn._worklist.push(ld); |
duke@435 | 959 | } |
duke@435 | 960 | } |
duke@435 | 961 | } |
duke@435 | 962 | |
duke@435 | 963 | //------------------------------output--------------------------- |
duke@435 | 964 | // Convert packs into vector node operations |
duke@435 | 965 | void SuperWord::output() { |
duke@435 | 966 | if (_packset.length() == 0) return; |
duke@435 | 967 | |
duke@435 | 968 | // MUST ENSURE main loop's initial value is properly aligned: |
duke@435 | 969 | // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0 |
duke@435 | 970 | |
duke@435 | 971 | align_initial_loop_index(align_to_ref()); |
duke@435 | 972 | |
duke@435 | 973 | // Insert extract (unpack) operations for scalar uses |
duke@435 | 974 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 975 | insert_extracts(_packset.at(i)); |
duke@435 | 976 | } |
duke@435 | 977 | |
duke@435 | 978 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 979 | Node* n = _block.at(i); |
duke@435 | 980 | Node_List* p = my_pack(n); |
duke@435 | 981 | if (p && n == executed_last(p)) { |
duke@435 | 982 | uint vlen = p->size(); |
duke@435 | 983 | Node* vn = NULL; |
duke@435 | 984 | Node* low_adr = p->at(0); |
duke@435 | 985 | Node* first = executed_first(p); |
duke@435 | 986 | if (n->is_Load()) { |
duke@435 | 987 | int opc = n->Opcode(); |
duke@435 | 988 | Node* ctl = n->in(MemNode::Control); |
duke@435 | 989 | Node* mem = first->in(MemNode::Memory); |
duke@435 | 990 | Node* adr = low_adr->in(MemNode::Address); |
duke@435 | 991 | const TypePtr* atyp = n->adr_type(); |
duke@435 | 992 | vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen); |
duke@435 | 993 | |
duke@435 | 994 | } else if (n->is_Store()) { |
duke@435 | 995 | // Promote value to be stored to vector |
duke@435 | 996 | VectorNode* val = vector_opd(p, MemNode::ValueIn); |
duke@435 | 997 | |
duke@435 | 998 | int opc = n->Opcode(); |
duke@435 | 999 | Node* ctl = n->in(MemNode::Control); |
duke@435 | 1000 | Node* mem = first->in(MemNode::Memory); |
duke@435 | 1001 | Node* adr = low_adr->in(MemNode::Address); |
duke@435 | 1002 | const TypePtr* atyp = n->adr_type(); |
duke@435 | 1003 | vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); |
duke@435 | 1004 | |
duke@435 | 1005 | } else if (n->req() == 3) { |
duke@435 | 1006 | // Promote operands to vector |
duke@435 | 1007 | Node* in1 = vector_opd(p, 1); |
duke@435 | 1008 | Node* in2 = vector_opd(p, 2); |
duke@435 | 1009 | vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n)); |
duke@435 | 1010 | |
duke@435 | 1011 | } else { |
duke@435 | 1012 | ShouldNotReachHere(); |
duke@435 | 1013 | } |
duke@435 | 1014 | |
duke@435 | 1015 | _phase->_igvn.register_new_node_with_optimizer(vn); |
duke@435 | 1016 | _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); |
duke@435 | 1017 | for (uint j = 0; j < p->size(); j++) { |
duke@435 | 1018 | Node* pm = p->at(j); |
duke@435 | 1019 | _igvn.hash_delete(pm); |
duke@435 | 1020 | _igvn.subsume_node(pm, vn); |
duke@435 | 1021 | } |
duke@435 | 1022 | _igvn._worklist.push(vn); |
duke@435 | 1023 | } |
duke@435 | 1024 | } |
duke@435 | 1025 | } |
duke@435 | 1026 | |
duke@435 | 1027 | //------------------------------vector_opd--------------------------- |
duke@435 | 1028 | // Create a vector operand for the nodes in pack p for operand: in(opd_idx) |
duke@435 | 1029 | VectorNode* SuperWord::vector_opd(Node_List* p, int opd_idx) { |
duke@435 | 1030 | Node* p0 = p->at(0); |
duke@435 | 1031 | uint vlen = p->size(); |
duke@435 | 1032 | Node* opd = p0->in(opd_idx); |
duke@435 | 1033 | |
duke@435 | 1034 | bool same_opd = true; |
duke@435 | 1035 | for (uint i = 1; i < vlen; i++) { |
duke@435 | 1036 | Node* pi = p->at(i); |
duke@435 | 1037 | Node* in = pi->in(opd_idx); |
duke@435 | 1038 | if (opd != in) { |
duke@435 | 1039 | same_opd = false; |
duke@435 | 1040 | break; |
duke@435 | 1041 | } |
duke@435 | 1042 | } |
duke@435 | 1043 | |
duke@435 | 1044 | if (same_opd) { |
duke@435 | 1045 | if (opd->is_Vector()) { |
duke@435 | 1046 | return (VectorNode*)opd; // input is matching vector |
duke@435 | 1047 | } |
duke@435 | 1048 | // Convert scalar input to vector. Use p0's type because it's container |
duke@435 | 1049 | // maybe smaller than the operand's container. |
duke@435 | 1050 | const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd); |
duke@435 | 1051 | const Type* p0_t = velt_type(p0); |
duke@435 | 1052 | if (p0_t->higher_equal(opd_t)) opd_t = p0_t; |
duke@435 | 1053 | VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, opd_t); |
duke@435 | 1054 | |
duke@435 | 1055 | _phase->_igvn.register_new_node_with_optimizer(vn); |
duke@435 | 1056 | _phase->set_ctrl(vn, _phase->get_ctrl(opd)); |
duke@435 | 1057 | return vn; |
duke@435 | 1058 | } |
duke@435 | 1059 | |
duke@435 | 1060 | // Insert pack operation |
duke@435 | 1061 | const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd); |
duke@435 | 1062 | PackNode* pk = PackNode::make(_phase->C, opd, opd_t); |
duke@435 | 1063 | |
duke@435 | 1064 | for (uint i = 1; i < vlen; i++) { |
duke@435 | 1065 | Node* pi = p->at(i); |
duke@435 | 1066 | Node* in = pi->in(opd_idx); |
duke@435 | 1067 | assert(my_pack(in) == NULL, "Should already have been unpacked"); |
duke@435 | 1068 | assert(opd_t == velt_type(!in_bb(in) ? pi : in), "all same type"); |
duke@435 | 1069 | pk->add_opd(in); |
duke@435 | 1070 | } |
duke@435 | 1071 | _phase->_igvn.register_new_node_with_optimizer(pk); |
duke@435 | 1072 | _phase->set_ctrl(pk, _phase->get_ctrl(opd)); |
duke@435 | 1073 | return pk; |
duke@435 | 1074 | } |
duke@435 | 1075 | |
duke@435 | 1076 | //------------------------------insert_extracts--------------------------- |
duke@435 | 1077 | // If a use of pack p is not a vector use, then replace the |
duke@435 | 1078 | // use with an extract operation. |
duke@435 | 1079 | void SuperWord::insert_extracts(Node_List* p) { |
duke@435 | 1080 | if (p->at(0)->is_Store()) return; |
duke@435 | 1081 | assert(_n_idx_list.is_empty(), "empty (node,index) list"); |
duke@435 | 1082 | |
duke@435 | 1083 | // Inspect each use of each pack member. For each use that is |
duke@435 | 1084 | // not a vector use, replace the use with an extract operation. |
duke@435 | 1085 | |
duke@435 | 1086 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1087 | Node* def = p->at(i); |
duke@435 | 1088 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 1089 | Node* use = def->fast_out(j); |
duke@435 | 1090 | for (uint k = 0; k < use->req(); k++) { |
duke@435 | 1091 | Node* n = use->in(k); |
duke@435 | 1092 | if (def == n) { |
duke@435 | 1093 | if (!is_vector_use(use, k)) { |
duke@435 | 1094 | _n_idx_list.push(use, k); |
duke@435 | 1095 | } |
duke@435 | 1096 | } |
duke@435 | 1097 | } |
duke@435 | 1098 | } |
duke@435 | 1099 | } |
duke@435 | 1100 | |
duke@435 | 1101 | while (_n_idx_list.is_nonempty()) { |
duke@435 | 1102 | Node* use = _n_idx_list.node(); |
duke@435 | 1103 | int idx = _n_idx_list.index(); |
duke@435 | 1104 | _n_idx_list.pop(); |
duke@435 | 1105 | Node* def = use->in(idx); |
duke@435 | 1106 | |
duke@435 | 1107 | // Insert extract operation |
duke@435 | 1108 | _igvn.hash_delete(def); |
duke@435 | 1109 | _igvn.hash_delete(use); |
duke@435 | 1110 | int def_pos = alignment(def) / data_size(def); |
duke@435 | 1111 | const Type* def_t = velt_type(def); |
duke@435 | 1112 | |
duke@435 | 1113 | Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t); |
duke@435 | 1114 | _phase->_igvn.register_new_node_with_optimizer(ex); |
duke@435 | 1115 | _phase->set_ctrl(ex, _phase->get_ctrl(def)); |
duke@435 | 1116 | use->set_req(idx, ex); |
duke@435 | 1117 | _igvn._worklist.push(def); |
duke@435 | 1118 | _igvn._worklist.push(use); |
duke@435 | 1119 | |
duke@435 | 1120 | bb_insert_after(ex, bb_idx(def)); |
duke@435 | 1121 | set_velt_type(ex, def_t); |
duke@435 | 1122 | } |
duke@435 | 1123 | } |
duke@435 | 1124 | |
duke@435 | 1125 | //------------------------------is_vector_use--------------------------- |
duke@435 | 1126 | // Is use->in(u_idx) a vector use? |
duke@435 | 1127 | bool SuperWord::is_vector_use(Node* use, int u_idx) { |
duke@435 | 1128 | Node_List* u_pk = my_pack(use); |
duke@435 | 1129 | if (u_pk == NULL) return false; |
duke@435 | 1130 | Node* def = use->in(u_idx); |
duke@435 | 1131 | Node_List* d_pk = my_pack(def); |
duke@435 | 1132 | if (d_pk == NULL) { |
duke@435 | 1133 | // check for scalar promotion |
duke@435 | 1134 | Node* n = u_pk->at(0)->in(u_idx); |
duke@435 | 1135 | for (uint i = 1; i < u_pk->size(); i++) { |
duke@435 | 1136 | if (u_pk->at(i)->in(u_idx) != n) return false; |
duke@435 | 1137 | } |
duke@435 | 1138 | return true; |
duke@435 | 1139 | } |
duke@435 | 1140 | if (u_pk->size() != d_pk->size()) |
duke@435 | 1141 | return false; |
duke@435 | 1142 | for (uint i = 0; i < u_pk->size(); i++) { |
duke@435 | 1143 | Node* ui = u_pk->at(i); |
duke@435 | 1144 | Node* di = d_pk->at(i); |
duke@435 | 1145 | if (ui->in(u_idx) != di || alignment(ui) != alignment(di)) |
duke@435 | 1146 | return false; |
duke@435 | 1147 | } |
duke@435 | 1148 | return true; |
duke@435 | 1149 | } |
duke@435 | 1150 | |
duke@435 | 1151 | //------------------------------construct_bb--------------------------- |
duke@435 | 1152 | // Construct reverse postorder list of block members |
duke@435 | 1153 | void SuperWord::construct_bb() { |
duke@435 | 1154 | Node* entry = bb(); |
duke@435 | 1155 | |
duke@435 | 1156 | assert(_stk.length() == 0, "stk is empty"); |
duke@435 | 1157 | assert(_block.length() == 0, "block is empty"); |
duke@435 | 1158 | assert(_data_entry.length() == 0, "data_entry is empty"); |
duke@435 | 1159 | assert(_mem_slice_head.length() == 0, "mem_slice_head is empty"); |
duke@435 | 1160 | assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty"); |
duke@435 | 1161 | |
duke@435 | 1162 | // Find non-control nodes with no inputs from within block, |
duke@435 | 1163 | // create a temporary map from node _idx to bb_idx for use |
duke@435 | 1164 | // by the visited and post_visited sets, |
duke@435 | 1165 | // and count number of nodes in block. |
duke@435 | 1166 | int bb_ct = 0; |
duke@435 | 1167 | for (uint i = 0; i < lpt()->_body.size(); i++ ) { |
duke@435 | 1168 | Node *n = lpt()->_body.at(i); |
duke@435 | 1169 | set_bb_idx(n, i); // Create a temporary map |
duke@435 | 1170 | if (in_bb(n)) { |
duke@435 | 1171 | bb_ct++; |
duke@435 | 1172 | if (!n->is_CFG()) { |
duke@435 | 1173 | bool found = false; |
duke@435 | 1174 | for (uint j = 0; j < n->req(); j++) { |
duke@435 | 1175 | Node* def = n->in(j); |
duke@435 | 1176 | if (def && in_bb(def)) { |
duke@435 | 1177 | found = true; |
duke@435 | 1178 | break; |
duke@435 | 1179 | } |
duke@435 | 1180 | } |
duke@435 | 1181 | if (!found) { |
duke@435 | 1182 | assert(n != entry, "can't be entry"); |
duke@435 | 1183 | _data_entry.push(n); |
duke@435 | 1184 | } |
duke@435 | 1185 | } |
duke@435 | 1186 | } |
duke@435 | 1187 | } |
duke@435 | 1188 | |
duke@435 | 1189 | // Find memory slices (head and tail) |
duke@435 | 1190 | for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) { |
duke@435 | 1191 | Node *n = lp()->fast_out(i); |
duke@435 | 1192 | if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { |
duke@435 | 1193 | Node* n_tail = n->in(LoopNode::LoopBackControl); |
duke@435 | 1194 | _mem_slice_head.push(n); |
duke@435 | 1195 | _mem_slice_tail.push(n_tail); |
duke@435 | 1196 | } |
duke@435 | 1197 | } |
duke@435 | 1198 | |
duke@435 | 1199 | // Create an RPO list of nodes in block |
duke@435 | 1200 | |
duke@435 | 1201 | visited_clear(); |
duke@435 | 1202 | post_visited_clear(); |
duke@435 | 1203 | |
duke@435 | 1204 | // Push all non-control nodes with no inputs from within block, then control entry |
duke@435 | 1205 | for (int j = 0; j < _data_entry.length(); j++) { |
duke@435 | 1206 | Node* n = _data_entry.at(j); |
duke@435 | 1207 | visited_set(n); |
duke@435 | 1208 | _stk.push(n); |
duke@435 | 1209 | } |
duke@435 | 1210 | visited_set(entry); |
duke@435 | 1211 | _stk.push(entry); |
duke@435 | 1212 | |
duke@435 | 1213 | // Do a depth first walk over out edges |
duke@435 | 1214 | int rpo_idx = bb_ct - 1; |
duke@435 | 1215 | int size; |
duke@435 | 1216 | while ((size = _stk.length()) > 0) { |
duke@435 | 1217 | Node* n = _stk.top(); // Leave node on stack |
duke@435 | 1218 | if (!visited_test_set(n)) { |
duke@435 | 1219 | // forward arc in graph |
duke@435 | 1220 | } else if (!post_visited_test(n)) { |
duke@435 | 1221 | // cross or back arc |
duke@435 | 1222 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 1223 | Node *use = n->fast_out(i); |
duke@435 | 1224 | if (in_bb(use) && !visited_test(use) && |
duke@435 | 1225 | // Don't go around backedge |
duke@435 | 1226 | (!use->is_Phi() || n == entry)) { |
duke@435 | 1227 | _stk.push(use); |
duke@435 | 1228 | } |
duke@435 | 1229 | } |
duke@435 | 1230 | if (_stk.length() == size) { |
duke@435 | 1231 | // There were no additional uses, post visit node now |
duke@435 | 1232 | _stk.pop(); // Remove node from stack |
duke@435 | 1233 | assert(rpo_idx >= 0, ""); |
duke@435 | 1234 | _block.at_put_grow(rpo_idx, n); |
duke@435 | 1235 | rpo_idx--; |
duke@435 | 1236 | post_visited_set(n); |
duke@435 | 1237 | assert(rpo_idx >= 0 || _stk.is_empty(), ""); |
duke@435 | 1238 | } |
duke@435 | 1239 | } else { |
duke@435 | 1240 | _stk.pop(); // Remove post-visited node from stack |
duke@435 | 1241 | } |
duke@435 | 1242 | } |
duke@435 | 1243 | |
duke@435 | 1244 | // Create real map of block indices for nodes |
duke@435 | 1245 | for (int j = 0; j < _block.length(); j++) { |
duke@435 | 1246 | Node* n = _block.at(j); |
duke@435 | 1247 | set_bb_idx(n, j); |
duke@435 | 1248 | } |
duke@435 | 1249 | |
duke@435 | 1250 | initialize_bb(); // Ensure extra info is allocated. |
duke@435 | 1251 | |
duke@435 | 1252 | #ifndef PRODUCT |
duke@435 | 1253 | if (TraceSuperWord) { |
duke@435 | 1254 | print_bb(); |
duke@435 | 1255 | tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); |
duke@435 | 1256 | for (int m = 0; m < _data_entry.length(); m++) { |
duke@435 | 1257 | tty->print("%3d ", m); |
duke@435 | 1258 | _data_entry.at(m)->dump(); |
duke@435 | 1259 | } |
duke@435 | 1260 | tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE"); |
duke@435 | 1261 | for (int m = 0; m < _mem_slice_head.length(); m++) { |
duke@435 | 1262 | tty->print("%3d ", m); _mem_slice_head.at(m)->dump(); |
duke@435 | 1263 | tty->print(" "); _mem_slice_tail.at(m)->dump(); |
duke@435 | 1264 | } |
duke@435 | 1265 | } |
duke@435 | 1266 | #endif |
duke@435 | 1267 | assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found"); |
duke@435 | 1268 | } |
duke@435 | 1269 | |
duke@435 | 1270 | //------------------------------initialize_bb--------------------------- |
duke@435 | 1271 | // Initialize per node info |
duke@435 | 1272 | void SuperWord::initialize_bb() { |
duke@435 | 1273 | Node* last = _block.at(_block.length() - 1); |
duke@435 | 1274 | grow_node_info(bb_idx(last)); |
duke@435 | 1275 | } |
duke@435 | 1276 | |
duke@435 | 1277 | //------------------------------bb_insert_after--------------------------- |
duke@435 | 1278 | // Insert n into block after pos |
duke@435 | 1279 | void SuperWord::bb_insert_after(Node* n, int pos) { |
duke@435 | 1280 | int n_pos = pos + 1; |
duke@435 | 1281 | // Make room |
duke@435 | 1282 | for (int i = _block.length() - 1; i >= n_pos; i--) { |
duke@435 | 1283 | _block.at_put_grow(i+1, _block.at(i)); |
duke@435 | 1284 | } |
duke@435 | 1285 | for (int j = _node_info.length() - 1; j >= n_pos; j--) { |
duke@435 | 1286 | _node_info.at_put_grow(j+1, _node_info.at(j)); |
duke@435 | 1287 | } |
duke@435 | 1288 | // Set value |
duke@435 | 1289 | _block.at_put_grow(n_pos, n); |
duke@435 | 1290 | _node_info.at_put_grow(n_pos, SWNodeInfo::initial); |
duke@435 | 1291 | // Adjust map from node->_idx to _block index |
duke@435 | 1292 | for (int i = n_pos; i < _block.length(); i++) { |
duke@435 | 1293 | set_bb_idx(_block.at(i), i); |
duke@435 | 1294 | } |
duke@435 | 1295 | } |
duke@435 | 1296 | |
duke@435 | 1297 | //------------------------------compute_max_depth--------------------------- |
duke@435 | 1298 | // Compute max depth for expressions from beginning of block |
duke@435 | 1299 | // Use to prune search paths during test for independence. |
duke@435 | 1300 | void SuperWord::compute_max_depth() { |
duke@435 | 1301 | int ct = 0; |
duke@435 | 1302 | bool again; |
duke@435 | 1303 | do { |
duke@435 | 1304 | again = false; |
duke@435 | 1305 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1306 | Node* n = _block.at(i); |
duke@435 | 1307 | if (!n->is_Phi()) { |
duke@435 | 1308 | int d_orig = depth(n); |
duke@435 | 1309 | int d_in = 0; |
duke@435 | 1310 | for (DepPreds preds(n, _dg); !preds.done(); preds.next()) { |
duke@435 | 1311 | Node* pred = preds.current(); |
duke@435 | 1312 | if (in_bb(pred)) { |
duke@435 | 1313 | d_in = MAX2(d_in, depth(pred)); |
duke@435 | 1314 | } |
duke@435 | 1315 | } |
duke@435 | 1316 | if (d_in + 1 != d_orig) { |
duke@435 | 1317 | set_depth(n, d_in + 1); |
duke@435 | 1318 | again = true; |
duke@435 | 1319 | } |
duke@435 | 1320 | } |
duke@435 | 1321 | } |
duke@435 | 1322 | ct++; |
duke@435 | 1323 | } while (again); |
duke@435 | 1324 | #ifndef PRODUCT |
duke@435 | 1325 | if (TraceSuperWord && Verbose) |
duke@435 | 1326 | tty->print_cr("compute_max_depth iterated: %d times", ct); |
duke@435 | 1327 | #endif |
duke@435 | 1328 | } |
duke@435 | 1329 | |
duke@435 | 1330 | //-------------------------compute_vector_element_type----------------------- |
duke@435 | 1331 | // Compute necessary vector element type for expressions |
duke@435 | 1332 | // This propagates backwards a narrower integer type when the |
duke@435 | 1333 | // upper bits of the value are not needed. |
duke@435 | 1334 | // Example: char a,b,c; a = b + c; |
duke@435 | 1335 | // Normally the type of the add is integer, but for packed character |
duke@435 | 1336 | // operations the type of the add needs to be char. |
duke@435 | 1337 | void SuperWord::compute_vector_element_type() { |
duke@435 | 1338 | #ifndef PRODUCT |
duke@435 | 1339 | if (TraceSuperWord && Verbose) |
duke@435 | 1340 | tty->print_cr("\ncompute_velt_type:"); |
duke@435 | 1341 | #endif |
duke@435 | 1342 | |
duke@435 | 1343 | // Initial type |
duke@435 | 1344 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1345 | Node* n = _block.at(i); |
duke@435 | 1346 | const Type* t = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type()) |
duke@435 | 1347 | : _igvn.type(n); |
duke@435 | 1348 | const Type* vt = container_type(t); |
duke@435 | 1349 | set_velt_type(n, vt); |
duke@435 | 1350 | } |
duke@435 | 1351 | |
duke@435 | 1352 | // Propagate narrowed type backwards through operations |
duke@435 | 1353 | // that don't depend on higher order bits |
duke@435 | 1354 | for (int i = _block.length() - 1; i >= 0; i--) { |
duke@435 | 1355 | Node* n = _block.at(i); |
duke@435 | 1356 | // Only integer types need be examined |
duke@435 | 1357 | if (n->bottom_type()->isa_int()) { |
duke@435 | 1358 | uint start, end; |
duke@435 | 1359 | vector_opd_range(n, &start, &end); |
duke@435 | 1360 | const Type* vt = velt_type(n); |
duke@435 | 1361 | |
duke@435 | 1362 | for (uint j = start; j < end; j++) { |
duke@435 | 1363 | Node* in = n->in(j); |
duke@435 | 1364 | // Don't propagate through a type conversion |
duke@435 | 1365 | if (n->bottom_type() != in->bottom_type()) |
duke@435 | 1366 | continue; |
duke@435 | 1367 | switch(in->Opcode()) { |
duke@435 | 1368 | case Op_AddI: case Op_AddL: |
duke@435 | 1369 | case Op_SubI: case Op_SubL: |
duke@435 | 1370 | case Op_MulI: case Op_MulL: |
duke@435 | 1371 | case Op_AndI: case Op_AndL: |
duke@435 | 1372 | case Op_OrI: case Op_OrL: |
duke@435 | 1373 | case Op_XorI: case Op_XorL: |
duke@435 | 1374 | case Op_LShiftI: case Op_LShiftL: |
duke@435 | 1375 | case Op_CMoveI: case Op_CMoveL: |
duke@435 | 1376 | if (in_bb(in)) { |
duke@435 | 1377 | bool same_type = true; |
duke@435 | 1378 | for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { |
duke@435 | 1379 | Node *use = in->fast_out(k); |
duke@435 | 1380 | if (!in_bb(use) || velt_type(use) != vt) { |
duke@435 | 1381 | same_type = false; |
duke@435 | 1382 | break; |
duke@435 | 1383 | } |
duke@435 | 1384 | } |
duke@435 | 1385 | if (same_type) { |
duke@435 | 1386 | set_velt_type(in, vt); |
duke@435 | 1387 | } |
duke@435 | 1388 | } |
duke@435 | 1389 | } |
duke@435 | 1390 | } |
duke@435 | 1391 | } |
duke@435 | 1392 | } |
duke@435 | 1393 | #ifndef PRODUCT |
duke@435 | 1394 | if (TraceSuperWord && Verbose) { |
duke@435 | 1395 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1396 | Node* n = _block.at(i); |
duke@435 | 1397 | velt_type(n)->dump(); |
duke@435 | 1398 | tty->print("\t"); |
duke@435 | 1399 | n->dump(); |
duke@435 | 1400 | } |
duke@435 | 1401 | } |
duke@435 | 1402 | #endif |
duke@435 | 1403 | } |
duke@435 | 1404 | |
duke@435 | 1405 | //------------------------------memory_alignment--------------------------- |
duke@435 | 1406 | // Alignment within a vector memory reference |
duke@435 | 1407 | int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { |
duke@435 | 1408 | SWPointer p(s, this); |
duke@435 | 1409 | if (!p.valid()) { |
duke@435 | 1410 | return bottom_align; |
duke@435 | 1411 | } |
duke@435 | 1412 | int offset = p.offset_in_bytes(); |
duke@435 | 1413 | offset += iv_adjust_in_bytes; |
duke@435 | 1414 | int off_rem = offset % vector_width_in_bytes(); |
duke@435 | 1415 | int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes(); |
duke@435 | 1416 | return off_mod; |
duke@435 | 1417 | } |
duke@435 | 1418 | |
duke@435 | 1419 | //---------------------------container_type--------------------------- |
duke@435 | 1420 | // Smallest type containing range of values |
duke@435 | 1421 | const Type* SuperWord::container_type(const Type* t) { |
duke@435 | 1422 | if (t->isa_aryptr()) { |
duke@435 | 1423 | t = t->is_aryptr()->elem(); |
duke@435 | 1424 | } |
duke@435 | 1425 | if (t->basic_type() == T_INT) { |
duke@435 | 1426 | if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL; |
duke@435 | 1427 | if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE; |
duke@435 | 1428 | if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR; |
duke@435 | 1429 | if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT; |
duke@435 | 1430 | return TypeInt::INT; |
duke@435 | 1431 | } |
duke@435 | 1432 | return t; |
duke@435 | 1433 | } |
duke@435 | 1434 | |
duke@435 | 1435 | //-------------------------vector_opd_range----------------------- |
duke@435 | 1436 | // (Start, end] half-open range defining which operands are vector |
duke@435 | 1437 | void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) { |
duke@435 | 1438 | switch (n->Opcode()) { |
duke@435 | 1439 | case Op_LoadB: case Op_LoadC: |
duke@435 | 1440 | case Op_LoadI: case Op_LoadL: |
duke@435 | 1441 | case Op_LoadF: case Op_LoadD: |
duke@435 | 1442 | case Op_LoadP: |
duke@435 | 1443 | *start = 0; |
duke@435 | 1444 | *end = 0; |
duke@435 | 1445 | return; |
duke@435 | 1446 | case Op_StoreB: case Op_StoreC: |
duke@435 | 1447 | case Op_StoreI: case Op_StoreL: |
duke@435 | 1448 | case Op_StoreF: case Op_StoreD: |
duke@435 | 1449 | case Op_StoreP: |
duke@435 | 1450 | *start = MemNode::ValueIn; |
duke@435 | 1451 | *end = *start + 1; |
duke@435 | 1452 | return; |
duke@435 | 1453 | case Op_LShiftI: case Op_LShiftL: |
duke@435 | 1454 | *start = 1; |
duke@435 | 1455 | *end = 2; |
duke@435 | 1456 | return; |
duke@435 | 1457 | case Op_CMoveI: case Op_CMoveL: case Op_CMoveF: case Op_CMoveD: |
duke@435 | 1458 | *start = 2; |
duke@435 | 1459 | *end = n->req(); |
duke@435 | 1460 | return; |
duke@435 | 1461 | } |
duke@435 | 1462 | *start = 1; |
duke@435 | 1463 | *end = n->req(); // default is all operands |
duke@435 | 1464 | } |
duke@435 | 1465 | |
duke@435 | 1466 | //------------------------------in_packset--------------------------- |
duke@435 | 1467 | // Are s1 and s2 in a pack pair and ordered as s1,s2? |
duke@435 | 1468 | bool SuperWord::in_packset(Node* s1, Node* s2) { |
duke@435 | 1469 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1470 | Node_List* p = _packset.at(i); |
duke@435 | 1471 | assert(p->size() == 2, "must be"); |
duke@435 | 1472 | if (p->at(0) == s1 && p->at(p->size()-1) == s2) { |
duke@435 | 1473 | return true; |
duke@435 | 1474 | } |
duke@435 | 1475 | } |
duke@435 | 1476 | return false; |
duke@435 | 1477 | } |
duke@435 | 1478 | |
duke@435 | 1479 | //------------------------------in_pack--------------------------- |
duke@435 | 1480 | // Is s in pack p? |
duke@435 | 1481 | Node_List* SuperWord::in_pack(Node* s, Node_List* p) { |
duke@435 | 1482 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1483 | if (p->at(i) == s) { |
duke@435 | 1484 | return p; |
duke@435 | 1485 | } |
duke@435 | 1486 | } |
duke@435 | 1487 | return NULL; |
duke@435 | 1488 | } |
duke@435 | 1489 | |
duke@435 | 1490 | //------------------------------remove_pack_at--------------------------- |
duke@435 | 1491 | // Remove the pack at position pos in the packset |
duke@435 | 1492 | void SuperWord::remove_pack_at(int pos) { |
duke@435 | 1493 | Node_List* p = _packset.at(pos); |
duke@435 | 1494 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1495 | Node* s = p->at(i); |
duke@435 | 1496 | set_my_pack(s, NULL); |
duke@435 | 1497 | } |
duke@435 | 1498 | _packset.remove_at(pos); |
duke@435 | 1499 | } |
duke@435 | 1500 | |
duke@435 | 1501 | //------------------------------executed_first--------------------------- |
duke@435 | 1502 | // Return the node executed first in pack p. Uses the RPO block list |
duke@435 | 1503 | // to determine order. |
duke@435 | 1504 | Node* SuperWord::executed_first(Node_List* p) { |
duke@435 | 1505 | Node* n = p->at(0); |
duke@435 | 1506 | int n_rpo = bb_idx(n); |
duke@435 | 1507 | for (uint i = 1; i < p->size(); i++) { |
duke@435 | 1508 | Node* s = p->at(i); |
duke@435 | 1509 | int s_rpo = bb_idx(s); |
duke@435 | 1510 | if (s_rpo < n_rpo) { |
duke@435 | 1511 | n = s; |
duke@435 | 1512 | n_rpo = s_rpo; |
duke@435 | 1513 | } |
duke@435 | 1514 | } |
duke@435 | 1515 | return n; |
duke@435 | 1516 | } |
duke@435 | 1517 | |
duke@435 | 1518 | //------------------------------executed_last--------------------------- |
duke@435 | 1519 | // Return the node executed last in pack p. |
duke@435 | 1520 | Node* SuperWord::executed_last(Node_List* p) { |
duke@435 | 1521 | Node* n = p->at(0); |
duke@435 | 1522 | int n_rpo = bb_idx(n); |
duke@435 | 1523 | for (uint i = 1; i < p->size(); i++) { |
duke@435 | 1524 | Node* s = p->at(i); |
duke@435 | 1525 | int s_rpo = bb_idx(s); |
duke@435 | 1526 | if (s_rpo > n_rpo) { |
duke@435 | 1527 | n = s; |
duke@435 | 1528 | n_rpo = s_rpo; |
duke@435 | 1529 | } |
duke@435 | 1530 | } |
duke@435 | 1531 | return n; |
duke@435 | 1532 | } |
duke@435 | 1533 | |
duke@435 | 1534 | //----------------------------align_initial_loop_index--------------------------- |
duke@435 | 1535 | // Adjust pre-loop limit so that in main loop, a load/store reference |
duke@435 | 1536 | // to align_to_ref will be a position zero in the vector. |
duke@435 | 1537 | // (iv + k) mod vector_align == 0 |
duke@435 | 1538 | void SuperWord::align_initial_loop_index(MemNode* align_to_ref) { |
duke@435 | 1539 | CountedLoopNode *main_head = lp()->as_CountedLoop(); |
duke@435 | 1540 | assert(main_head->is_main_loop(), ""); |
duke@435 | 1541 | CountedLoopEndNode* pre_end = get_pre_loop_end(main_head); |
duke@435 | 1542 | assert(pre_end != NULL, ""); |
duke@435 | 1543 | Node *pre_opaq1 = pre_end->limit(); |
duke@435 | 1544 | assert(pre_opaq1->Opcode() == Op_Opaque1, ""); |
duke@435 | 1545 | Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1; |
duke@435 | 1546 | Node *pre_limit = pre_opaq->in(1); |
duke@435 | 1547 | |
duke@435 | 1548 | // Where we put new limit calculations |
duke@435 | 1549 | Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl); |
duke@435 | 1550 | |
duke@435 | 1551 | // Ensure the original loop limit is available from the |
duke@435 | 1552 | // pre-loop Opaque1 node. |
duke@435 | 1553 | Node *orig_limit = pre_opaq->original_loop_limit(); |
duke@435 | 1554 | assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); |
duke@435 | 1555 | |
duke@435 | 1556 | SWPointer align_to_ref_p(align_to_ref, this); |
duke@435 | 1557 | |
duke@435 | 1558 | // Let l0 == original pre_limit, l == new pre_limit, V == v_align |
duke@435 | 1559 | // |
duke@435 | 1560 | // For stride > 0 |
duke@435 | 1561 | // Need l such that l > l0 && (l+k)%V == 0 |
duke@435 | 1562 | // Find n such that l = (l0 + n) |
duke@435 | 1563 | // (l0 + n + k) % V == 0 |
duke@435 | 1564 | // n = [V - (l0 + k)%V]%V |
duke@435 | 1565 | // new limit = l0 + [V - (l0 + k)%V]%V |
duke@435 | 1566 | // For stride < 0 |
duke@435 | 1567 | // Need l such that l < l0 && (l+k)%V == 0 |
duke@435 | 1568 | // Find n such that l = (l0 - n) |
duke@435 | 1569 | // (l0 - n + k) % V == 0 |
duke@435 | 1570 | // n = (l0 + k)%V |
duke@435 | 1571 | // new limit = l0 - (l0 + k)%V |
duke@435 | 1572 | |
duke@435 | 1573 | int elt_size = align_to_ref_p.memory_size(); |
duke@435 | 1574 | int v_align = vector_width_in_bytes() / elt_size; |
duke@435 | 1575 | int k = align_to_ref_p.offset_in_bytes() / elt_size; |
duke@435 | 1576 | |
duke@435 | 1577 | Node *kn = _igvn.intcon(k); |
duke@435 | 1578 | Node *limk = new (_phase->C, 3) AddINode(pre_limit, kn); |
duke@435 | 1579 | _phase->_igvn.register_new_node_with_optimizer(limk); |
duke@435 | 1580 | _phase->set_ctrl(limk, pre_ctrl); |
duke@435 | 1581 | if (align_to_ref_p.invar() != NULL) { |
duke@435 | 1582 | Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
duke@435 | 1583 | Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); |
duke@435 | 1584 | _phase->_igvn.register_new_node_with_optimizer(aref); |
duke@435 | 1585 | _phase->set_ctrl(aref, pre_ctrl); |
duke@435 | 1586 | if (!align_to_ref_p.negate_invar()) { |
duke@435 | 1587 | limk = new (_phase->C, 3) AddINode(limk, aref); |
duke@435 | 1588 | } else { |
duke@435 | 1589 | limk = new (_phase->C, 3) SubINode(limk, aref); |
duke@435 | 1590 | } |
duke@435 | 1591 | _phase->_igvn.register_new_node_with_optimizer(limk); |
duke@435 | 1592 | _phase->set_ctrl(limk, pre_ctrl); |
duke@435 | 1593 | } |
duke@435 | 1594 | Node* va_msk = _igvn.intcon(v_align - 1); |
duke@435 | 1595 | Node* n = new (_phase->C, 3) AndINode(limk, va_msk); |
duke@435 | 1596 | _phase->_igvn.register_new_node_with_optimizer(n); |
duke@435 | 1597 | _phase->set_ctrl(n, pre_ctrl); |
duke@435 | 1598 | Node* newlim; |
duke@435 | 1599 | if (iv_stride() > 0) { |
duke@435 | 1600 | Node* va = _igvn.intcon(v_align); |
duke@435 | 1601 | Node* adj = new (_phase->C, 3) SubINode(va, n); |
duke@435 | 1602 | _phase->_igvn.register_new_node_with_optimizer(adj); |
duke@435 | 1603 | _phase->set_ctrl(adj, pre_ctrl); |
duke@435 | 1604 | Node* adj2 = new (_phase->C, 3) AndINode(adj, va_msk); |
duke@435 | 1605 | _phase->_igvn.register_new_node_with_optimizer(adj2); |
duke@435 | 1606 | _phase->set_ctrl(adj2, pre_ctrl); |
duke@435 | 1607 | newlim = new (_phase->C, 3) AddINode(pre_limit, adj2); |
duke@435 | 1608 | } else { |
duke@435 | 1609 | newlim = new (_phase->C, 3) SubINode(pre_limit, n); |
duke@435 | 1610 | } |
duke@435 | 1611 | _phase->_igvn.register_new_node_with_optimizer(newlim); |
duke@435 | 1612 | _phase->set_ctrl(newlim, pre_ctrl); |
duke@435 | 1613 | Node* constrained = |
duke@435 | 1614 | (iv_stride() > 0) ? (Node*) new (_phase->C,3) MinINode(newlim, orig_limit) |
duke@435 | 1615 | : (Node*) new (_phase->C,3) MaxINode(newlim, orig_limit); |
duke@435 | 1616 | _phase->_igvn.register_new_node_with_optimizer(constrained); |
duke@435 | 1617 | _phase->set_ctrl(constrained, pre_ctrl); |
duke@435 | 1618 | _igvn.hash_delete(pre_opaq); |
duke@435 | 1619 | pre_opaq->set_req(1, constrained); |
duke@435 | 1620 | } |
duke@435 | 1621 | |
duke@435 | 1622 | //----------------------------get_pre_loop_end--------------------------- |
duke@435 | 1623 | // Find pre loop end from main loop. Returns null if none. |
duke@435 | 1624 | CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode *cl) { |
duke@435 | 1625 | Node *ctrl = cl->in(LoopNode::EntryControl); |
duke@435 | 1626 | if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return NULL; |
duke@435 | 1627 | Node *iffm = ctrl->in(0); |
duke@435 | 1628 | if (!iffm->is_If()) return NULL; |
duke@435 | 1629 | Node *p_f = iffm->in(0); |
duke@435 | 1630 | if (!p_f->is_IfFalse()) return NULL; |
duke@435 | 1631 | if (!p_f->in(0)->is_CountedLoopEnd()) return NULL; |
duke@435 | 1632 | CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd(); |
duke@435 | 1633 | if (!pre_end->loopnode()->is_pre_loop()) return NULL; |
duke@435 | 1634 | return pre_end; |
duke@435 | 1635 | } |
duke@435 | 1636 | |
duke@435 | 1637 | |
duke@435 | 1638 | //------------------------------init--------------------------- |
duke@435 | 1639 | void SuperWord::init() { |
duke@435 | 1640 | _dg.init(); |
duke@435 | 1641 | _packset.clear(); |
duke@435 | 1642 | _disjoint_ptrs.clear(); |
duke@435 | 1643 | _block.clear(); |
duke@435 | 1644 | _data_entry.clear(); |
duke@435 | 1645 | _mem_slice_head.clear(); |
duke@435 | 1646 | _mem_slice_tail.clear(); |
duke@435 | 1647 | _node_info.clear(); |
duke@435 | 1648 | _align_to_ref = NULL; |
duke@435 | 1649 | _lpt = NULL; |
duke@435 | 1650 | _lp = NULL; |
duke@435 | 1651 | _bb = NULL; |
duke@435 | 1652 | _iv = NULL; |
duke@435 | 1653 | } |
duke@435 | 1654 | |
duke@435 | 1655 | //------------------------------print_packset--------------------------- |
duke@435 | 1656 | void SuperWord::print_packset() { |
duke@435 | 1657 | #ifndef PRODUCT |
duke@435 | 1658 | tty->print_cr("packset"); |
duke@435 | 1659 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1660 | tty->print_cr("Pack: %d", i); |
duke@435 | 1661 | Node_List* p = _packset.at(i); |
duke@435 | 1662 | print_pack(p); |
duke@435 | 1663 | } |
duke@435 | 1664 | #endif |
duke@435 | 1665 | } |
duke@435 | 1666 | |
duke@435 | 1667 | //------------------------------print_pack--------------------------- |
duke@435 | 1668 | void SuperWord::print_pack(Node_List* p) { |
duke@435 | 1669 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1670 | print_stmt(p->at(i)); |
duke@435 | 1671 | } |
duke@435 | 1672 | } |
duke@435 | 1673 | |
duke@435 | 1674 | //------------------------------print_bb--------------------------- |
duke@435 | 1675 | void SuperWord::print_bb() { |
duke@435 | 1676 | #ifndef PRODUCT |
duke@435 | 1677 | tty->print_cr("\nBlock"); |
duke@435 | 1678 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1679 | Node* n = _block.at(i); |
duke@435 | 1680 | tty->print("%d ", i); |
duke@435 | 1681 | if (n) { |
duke@435 | 1682 | n->dump(); |
duke@435 | 1683 | } |
duke@435 | 1684 | } |
duke@435 | 1685 | #endif |
duke@435 | 1686 | } |
duke@435 | 1687 | |
duke@435 | 1688 | //------------------------------print_stmt--------------------------- |
duke@435 | 1689 | void SuperWord::print_stmt(Node* s) { |
duke@435 | 1690 | #ifndef PRODUCT |
duke@435 | 1691 | tty->print(" align: %d \t", alignment(s)); |
duke@435 | 1692 | s->dump(); |
duke@435 | 1693 | #endif |
duke@435 | 1694 | } |
duke@435 | 1695 | |
duke@435 | 1696 | //------------------------------blank--------------------------- |
duke@435 | 1697 | char* SuperWord::blank(uint depth) { |
duke@435 | 1698 | static char blanks[101]; |
duke@435 | 1699 | assert(depth < 101, "too deep"); |
duke@435 | 1700 | for (uint i = 0; i < depth; i++) blanks[i] = ' '; |
duke@435 | 1701 | blanks[depth] = '\0'; |
duke@435 | 1702 | return blanks; |
duke@435 | 1703 | } |
duke@435 | 1704 | |
duke@435 | 1705 | |
duke@435 | 1706 | //==============================SWPointer=========================== |
duke@435 | 1707 | |
duke@435 | 1708 | //----------------------------SWPointer------------------------ |
duke@435 | 1709 | SWPointer::SWPointer(MemNode* mem, SuperWord* slp) : |
duke@435 | 1710 | _mem(mem), _slp(slp), _base(NULL), _adr(NULL), |
duke@435 | 1711 | _scale(0), _offset(0), _invar(NULL), _negate_invar(false) { |
duke@435 | 1712 | |
duke@435 | 1713 | Node* adr = mem->in(MemNode::Address); |
duke@435 | 1714 | if (!adr->is_AddP()) { |
duke@435 | 1715 | assert(!valid(), "too complex"); |
duke@435 | 1716 | return; |
duke@435 | 1717 | } |
duke@435 | 1718 | // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) |
duke@435 | 1719 | Node* base = adr->in(AddPNode::Base); |
duke@435 | 1720 | for (int i = 0; i < 3; i++) { |
duke@435 | 1721 | if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { |
duke@435 | 1722 | assert(!valid(), "too complex"); |
duke@435 | 1723 | return; |
duke@435 | 1724 | } |
duke@435 | 1725 | adr = adr->in(AddPNode::Address); |
duke@435 | 1726 | if (base == adr || !adr->is_AddP()) { |
duke@435 | 1727 | break; // stop looking at addp's |
duke@435 | 1728 | } |
duke@435 | 1729 | } |
duke@435 | 1730 | _base = base; |
duke@435 | 1731 | _adr = adr; |
duke@435 | 1732 | assert(valid(), "Usable"); |
duke@435 | 1733 | } |
duke@435 | 1734 | |
duke@435 | 1735 | // Following is used to create a temporary object during |
duke@435 | 1736 | // the pattern match of an address expression. |
duke@435 | 1737 | SWPointer::SWPointer(SWPointer* p) : |
duke@435 | 1738 | _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), |
duke@435 | 1739 | _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {} |
duke@435 | 1740 | |
duke@435 | 1741 | //------------------------scaled_iv_plus_offset-------------------- |
duke@435 | 1742 | // Match: k*iv + offset |
duke@435 | 1743 | // where: k is a constant that maybe zero, and |
duke@435 | 1744 | // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional |
duke@435 | 1745 | bool SWPointer::scaled_iv_plus_offset(Node* n) { |
duke@435 | 1746 | if (scaled_iv(n)) { |
duke@435 | 1747 | return true; |
duke@435 | 1748 | } |
duke@435 | 1749 | if (offset_plus_k(n)) { |
duke@435 | 1750 | return true; |
duke@435 | 1751 | } |
duke@435 | 1752 | int opc = n->Opcode(); |
duke@435 | 1753 | if (opc == Op_AddI) { |
duke@435 | 1754 | if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) { |
duke@435 | 1755 | return true; |
duke@435 | 1756 | } |
duke@435 | 1757 | if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { |
duke@435 | 1758 | return true; |
duke@435 | 1759 | } |
duke@435 | 1760 | } else if (opc == Op_SubI) { |
duke@435 | 1761 | if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) { |
duke@435 | 1762 | return true; |
duke@435 | 1763 | } |
duke@435 | 1764 | if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { |
duke@435 | 1765 | _scale *= -1; |
duke@435 | 1766 | return true; |
duke@435 | 1767 | } |
duke@435 | 1768 | } |
duke@435 | 1769 | return false; |
duke@435 | 1770 | } |
duke@435 | 1771 | |
duke@435 | 1772 | //----------------------------scaled_iv------------------------ |
duke@435 | 1773 | // Match: k*iv where k is a constant that's not zero |
duke@435 | 1774 | bool SWPointer::scaled_iv(Node* n) { |
duke@435 | 1775 | if (_scale != 0) { |
duke@435 | 1776 | return false; // already found a scale |
duke@435 | 1777 | } |
duke@435 | 1778 | if (n == iv()) { |
duke@435 | 1779 | _scale = 1; |
duke@435 | 1780 | return true; |
duke@435 | 1781 | } |
duke@435 | 1782 | int opc = n->Opcode(); |
duke@435 | 1783 | if (opc == Op_MulI) { |
duke@435 | 1784 | if (n->in(1) == iv() && n->in(2)->is_Con()) { |
duke@435 | 1785 | _scale = n->in(2)->get_int(); |
duke@435 | 1786 | return true; |
duke@435 | 1787 | } else if (n->in(2) == iv() && n->in(1)->is_Con()) { |
duke@435 | 1788 | _scale = n->in(1)->get_int(); |
duke@435 | 1789 | return true; |
duke@435 | 1790 | } |
duke@435 | 1791 | } else if (opc == Op_LShiftI) { |
duke@435 | 1792 | if (n->in(1) == iv() && n->in(2)->is_Con()) { |
duke@435 | 1793 | _scale = 1 << n->in(2)->get_int(); |
duke@435 | 1794 | return true; |
duke@435 | 1795 | } |
duke@435 | 1796 | } else if (opc == Op_ConvI2L) { |
duke@435 | 1797 | if (scaled_iv_plus_offset(n->in(1))) { |
duke@435 | 1798 | return true; |
duke@435 | 1799 | } |
duke@435 | 1800 | } else if (opc == Op_LShiftL) { |
duke@435 | 1801 | if (!has_iv() && _invar == NULL) { |
duke@435 | 1802 | // Need to preserve the current _offset value, so |
duke@435 | 1803 | // create a temporary object for this expression subtree. |
duke@435 | 1804 | // Hacky, so should re-engineer the address pattern match. |
duke@435 | 1805 | SWPointer tmp(this); |
duke@435 | 1806 | if (tmp.scaled_iv_plus_offset(n->in(1))) { |
duke@435 | 1807 | if (tmp._invar == NULL) { |
duke@435 | 1808 | int mult = 1 << n->in(2)->get_int(); |
duke@435 | 1809 | _scale = tmp._scale * mult; |
duke@435 | 1810 | _offset += tmp._offset * mult; |
duke@435 | 1811 | return true; |
duke@435 | 1812 | } |
duke@435 | 1813 | } |
duke@435 | 1814 | } |
duke@435 | 1815 | } |
duke@435 | 1816 | return false; |
duke@435 | 1817 | } |
duke@435 | 1818 | |
duke@435 | 1819 | //----------------------------offset_plus_k------------------------ |
duke@435 | 1820 | // Match: offset is (k [+/- invariant]) |
duke@435 | 1821 | // where k maybe zero and invariant is optional, but not both. |
duke@435 | 1822 | bool SWPointer::offset_plus_k(Node* n, bool negate) { |
duke@435 | 1823 | int opc = n->Opcode(); |
duke@435 | 1824 | if (opc == Op_ConI) { |
duke@435 | 1825 | _offset += negate ? -(n->get_int()) : n->get_int(); |
duke@435 | 1826 | return true; |
duke@435 | 1827 | } else if (opc == Op_ConL) { |
duke@435 | 1828 | // Okay if value fits into an int |
duke@435 | 1829 | const TypeLong* t = n->find_long_type(); |
duke@435 | 1830 | if (t->higher_equal(TypeLong::INT)) { |
duke@435 | 1831 | jlong loff = n->get_long(); |
duke@435 | 1832 | jint off = (jint)loff; |
duke@435 | 1833 | _offset += negate ? -off : loff; |
duke@435 | 1834 | return true; |
duke@435 | 1835 | } |
duke@435 | 1836 | return false; |
duke@435 | 1837 | } |
duke@435 | 1838 | if (_invar != NULL) return false; // already have an invariant |
duke@435 | 1839 | if (opc == Op_AddI) { |
duke@435 | 1840 | if (n->in(2)->is_Con() && invariant(n->in(1))) { |
duke@435 | 1841 | _negate_invar = negate; |
duke@435 | 1842 | _invar = n->in(1); |
duke@435 | 1843 | _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); |
duke@435 | 1844 | return true; |
duke@435 | 1845 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { |
duke@435 | 1846 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); |
duke@435 | 1847 | _negate_invar = negate; |
duke@435 | 1848 | _invar = n->in(2); |
duke@435 | 1849 | return true; |
duke@435 | 1850 | } |
duke@435 | 1851 | } |
duke@435 | 1852 | if (opc == Op_SubI) { |
duke@435 | 1853 | if (n->in(2)->is_Con() && invariant(n->in(1))) { |
duke@435 | 1854 | _negate_invar = negate; |
duke@435 | 1855 | _invar = n->in(1); |
duke@435 | 1856 | _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); |
duke@435 | 1857 | return true; |
duke@435 | 1858 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { |
duke@435 | 1859 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); |
duke@435 | 1860 | _negate_invar = !negate; |
duke@435 | 1861 | _invar = n->in(2); |
duke@435 | 1862 | return true; |
duke@435 | 1863 | } |
duke@435 | 1864 | } |
duke@435 | 1865 | if (invariant(n)) { |
duke@435 | 1866 | _negate_invar = negate; |
duke@435 | 1867 | _invar = n; |
duke@435 | 1868 | return true; |
duke@435 | 1869 | } |
duke@435 | 1870 | return false; |
duke@435 | 1871 | } |
duke@435 | 1872 | |
duke@435 | 1873 | //----------------------------print------------------------ |
duke@435 | 1874 | void SWPointer::print() { |
duke@435 | 1875 | #ifndef PRODUCT |
duke@435 | 1876 | tty->print("base: %d adr: %d scale: %d offset: %d invar: %c%d\n", |
duke@435 | 1877 | _base != NULL ? _base->_idx : 0, |
duke@435 | 1878 | _adr != NULL ? _adr->_idx : 0, |
duke@435 | 1879 | _scale, _offset, |
duke@435 | 1880 | _negate_invar?'-':'+', |
duke@435 | 1881 | _invar != NULL ? _invar->_idx : 0); |
duke@435 | 1882 | #endif |
duke@435 | 1883 | } |
duke@435 | 1884 | |
duke@435 | 1885 | // ========================= OrderedPair ===================== |
duke@435 | 1886 | |
duke@435 | 1887 | const OrderedPair OrderedPair::initial; |
duke@435 | 1888 | |
duke@435 | 1889 | // ========================= SWNodeInfo ===================== |
duke@435 | 1890 | |
duke@435 | 1891 | const SWNodeInfo SWNodeInfo::initial; |
duke@435 | 1892 | |
duke@435 | 1893 | |
duke@435 | 1894 | // ============================ DepGraph =========================== |
duke@435 | 1895 | |
duke@435 | 1896 | //------------------------------make_node--------------------------- |
duke@435 | 1897 | // Make a new dependence graph node for an ideal node. |
duke@435 | 1898 | DepMem* DepGraph::make_node(Node* node) { |
duke@435 | 1899 | DepMem* m = new (_arena) DepMem(node); |
duke@435 | 1900 | if (node != NULL) { |
duke@435 | 1901 | assert(_map.at_grow(node->_idx) == NULL, "one init only"); |
duke@435 | 1902 | _map.at_put_grow(node->_idx, m); |
duke@435 | 1903 | } |
duke@435 | 1904 | return m; |
duke@435 | 1905 | } |
duke@435 | 1906 | |
duke@435 | 1907 | //------------------------------make_edge--------------------------- |
duke@435 | 1908 | // Make a new dependence graph edge from dpred -> dsucc |
duke@435 | 1909 | DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) { |
duke@435 | 1910 | DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head()); |
duke@435 | 1911 | dpred->set_out_head(e); |
duke@435 | 1912 | dsucc->set_in_head(e); |
duke@435 | 1913 | return e; |
duke@435 | 1914 | } |
duke@435 | 1915 | |
duke@435 | 1916 | // ========================== DepMem ======================== |
duke@435 | 1917 | |
duke@435 | 1918 | //------------------------------in_cnt--------------------------- |
duke@435 | 1919 | int DepMem::in_cnt() { |
duke@435 | 1920 | int ct = 0; |
duke@435 | 1921 | for (DepEdge* e = _in_head; e != NULL; e = e->next_in()) ct++; |
duke@435 | 1922 | return ct; |
duke@435 | 1923 | } |
duke@435 | 1924 | |
duke@435 | 1925 | //------------------------------out_cnt--------------------------- |
duke@435 | 1926 | int DepMem::out_cnt() { |
duke@435 | 1927 | int ct = 0; |
duke@435 | 1928 | for (DepEdge* e = _out_head; e != NULL; e = e->next_out()) ct++; |
duke@435 | 1929 | return ct; |
duke@435 | 1930 | } |
duke@435 | 1931 | |
duke@435 | 1932 | //------------------------------print----------------------------- |
duke@435 | 1933 | void DepMem::print() { |
duke@435 | 1934 | #ifndef PRODUCT |
duke@435 | 1935 | tty->print(" DepNode %d (", _node->_idx); |
duke@435 | 1936 | for (DepEdge* p = _in_head; p != NULL; p = p->next_in()) { |
duke@435 | 1937 | Node* pred = p->pred()->node(); |
duke@435 | 1938 | tty->print(" %d", pred != NULL ? pred->_idx : 0); |
duke@435 | 1939 | } |
duke@435 | 1940 | tty->print(") ["); |
duke@435 | 1941 | for (DepEdge* s = _out_head; s != NULL; s = s->next_out()) { |
duke@435 | 1942 | Node* succ = s->succ()->node(); |
duke@435 | 1943 | tty->print(" %d", succ != NULL ? succ->_idx : 0); |
duke@435 | 1944 | } |
duke@435 | 1945 | tty->print_cr(" ]"); |
duke@435 | 1946 | #endif |
duke@435 | 1947 | } |
duke@435 | 1948 | |
duke@435 | 1949 | // =========================== DepEdge ========================= |
duke@435 | 1950 | |
duke@435 | 1951 | //------------------------------DepPreds--------------------------- |
duke@435 | 1952 | void DepEdge::print() { |
duke@435 | 1953 | #ifndef PRODUCT |
duke@435 | 1954 | tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx); |
duke@435 | 1955 | #endif |
duke@435 | 1956 | } |
duke@435 | 1957 | |
duke@435 | 1958 | // =========================== DepPreds ========================= |
duke@435 | 1959 | // Iterator over predecessor edges in the dependence graph. |
duke@435 | 1960 | |
duke@435 | 1961 | //------------------------------DepPreds--------------------------- |
duke@435 | 1962 | DepPreds::DepPreds(Node* n, DepGraph& dg) { |
duke@435 | 1963 | _n = n; |
duke@435 | 1964 | _done = false; |
duke@435 | 1965 | if (_n->is_Store() || _n->is_Load()) { |
duke@435 | 1966 | _next_idx = MemNode::Address; |
duke@435 | 1967 | _end_idx = n->req(); |
duke@435 | 1968 | _dep_next = dg.dep(_n)->in_head(); |
duke@435 | 1969 | } else if (_n->is_Mem()) { |
duke@435 | 1970 | _next_idx = 0; |
duke@435 | 1971 | _end_idx = 0; |
duke@435 | 1972 | _dep_next = dg.dep(_n)->in_head(); |
duke@435 | 1973 | } else { |
duke@435 | 1974 | _next_idx = 1; |
duke@435 | 1975 | _end_idx = _n->req(); |
duke@435 | 1976 | _dep_next = NULL; |
duke@435 | 1977 | } |
duke@435 | 1978 | next(); |
duke@435 | 1979 | } |
duke@435 | 1980 | |
duke@435 | 1981 | //------------------------------next--------------------------- |
duke@435 | 1982 | void DepPreds::next() { |
duke@435 | 1983 | if (_dep_next != NULL) { |
duke@435 | 1984 | _current = _dep_next->pred()->node(); |
duke@435 | 1985 | _dep_next = _dep_next->next_in(); |
duke@435 | 1986 | } else if (_next_idx < _end_idx) { |
duke@435 | 1987 | _current = _n->in(_next_idx++); |
duke@435 | 1988 | } else { |
duke@435 | 1989 | _done = true; |
duke@435 | 1990 | } |
duke@435 | 1991 | } |
duke@435 | 1992 | |
duke@435 | 1993 | // =========================== DepSuccs ========================= |
duke@435 | 1994 | // Iterator over successor edges in the dependence graph. |
duke@435 | 1995 | |
duke@435 | 1996 | //------------------------------DepSuccs--------------------------- |
duke@435 | 1997 | DepSuccs::DepSuccs(Node* n, DepGraph& dg) { |
duke@435 | 1998 | _n = n; |
duke@435 | 1999 | _done = false; |
duke@435 | 2000 | if (_n->is_Load()) { |
duke@435 | 2001 | _next_idx = 0; |
duke@435 | 2002 | _end_idx = _n->outcnt(); |
duke@435 | 2003 | _dep_next = dg.dep(_n)->out_head(); |
duke@435 | 2004 | } else if (_n->is_Mem() || _n->is_Phi() && _n->bottom_type() == Type::MEMORY) { |
duke@435 | 2005 | _next_idx = 0; |
duke@435 | 2006 | _end_idx = 0; |
duke@435 | 2007 | _dep_next = dg.dep(_n)->out_head(); |
duke@435 | 2008 | } else { |
duke@435 | 2009 | _next_idx = 0; |
duke@435 | 2010 | _end_idx = _n->outcnt(); |
duke@435 | 2011 | _dep_next = NULL; |
duke@435 | 2012 | } |
duke@435 | 2013 | next(); |
duke@435 | 2014 | } |
duke@435 | 2015 | |
duke@435 | 2016 | //-------------------------------next--------------------------- |
duke@435 | 2017 | void DepSuccs::next() { |
duke@435 | 2018 | if (_dep_next != NULL) { |
duke@435 | 2019 | _current = _dep_next->succ()->node(); |
duke@435 | 2020 | _dep_next = _dep_next->next_out(); |
duke@435 | 2021 | } else if (_next_idx < _end_idx) { |
duke@435 | 2022 | _current = _n->raw_out(_next_idx++); |
duke@435 | 2023 | } else { |
duke@435 | 2024 | _done = true; |
duke@435 | 2025 | } |
duke@435 | 2026 | } |