Wed, 22 Aug 2012 11:55:40 -0700
7192963: assert(_in[req-1] == this) failed: Must pass arg count to 'new'
Summary: Fixed Pack node generation. Not vectorize shift instructions if count is not the same for all shifts and if count is vector.
Reviewed-by: twisti
duke@435 | 1 | /* |
kvn@3882 | 2 | * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
duke@435 | 22 | */ |
duke@435 | 23 | |
stefank@2314 | 24 | #include "precompiled.hpp" |
stefank@2314 | 25 | #include "compiler/compileLog.hpp" |
stefank@2314 | 26 | #include "libadt/vectset.hpp" |
stefank@2314 | 27 | #include "memory/allocation.inline.hpp" |
stefank@2314 | 28 | #include "opto/addnode.hpp" |
stefank@2314 | 29 | #include "opto/callnode.hpp" |
stefank@2314 | 30 | #include "opto/divnode.hpp" |
stefank@2314 | 31 | #include "opto/matcher.hpp" |
stefank@2314 | 32 | #include "opto/memnode.hpp" |
stefank@2314 | 33 | #include "opto/mulnode.hpp" |
stefank@2314 | 34 | #include "opto/opcodes.hpp" |
stefank@2314 | 35 | #include "opto/superword.hpp" |
stefank@2314 | 36 | #include "opto/vectornode.hpp" |
duke@435 | 37 | |
duke@435 | 38 | // |
duke@435 | 39 | // S U P E R W O R D T R A N S F O R M |
duke@435 | 40 | //============================================================================= |
duke@435 | 41 | |
duke@435 | 42 | //------------------------------SuperWord--------------------------- |
duke@435 | 43 | SuperWord::SuperWord(PhaseIdealLoop* phase) : |
duke@435 | 44 | _phase(phase), |
duke@435 | 45 | _igvn(phase->_igvn), |
duke@435 | 46 | _arena(phase->C->comp_arena()), |
duke@435 | 47 | _packset(arena(), 8, 0, NULL), // packs for the current block |
duke@435 | 48 | _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb |
duke@435 | 49 | _block(arena(), 8, 0, NULL), // nodes in current block |
duke@435 | 50 | _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside |
duke@435 | 51 | _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads |
duke@435 | 52 | _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails |
duke@435 | 53 | _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node |
duke@435 | 54 | _align_to_ref(NULL), // memory reference to align vectors to |
duke@435 | 55 | _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs |
duke@435 | 56 | _dg(_arena), // dependence graph |
duke@435 | 57 | _visited(arena()), // visited node set |
duke@435 | 58 | _post_visited(arena()), // post visited node set |
duke@435 | 59 | _n_idx_list(arena(), 8), // scratch list of (node,index) pairs |
duke@435 | 60 | _stk(arena(), 8, 0, NULL), // scratch stack of nodes |
duke@435 | 61 | _nlist(arena(), 8, 0, NULL), // scratch list of nodes |
duke@435 | 62 | _lpt(NULL), // loop tree node |
duke@435 | 63 | _lp(NULL), // LoopNode |
duke@435 | 64 | _bb(NULL), // basic block |
duke@435 | 65 | _iv(NULL) // induction var |
duke@435 | 66 | {} |
duke@435 | 67 | |
duke@435 | 68 | //------------------------------transform_loop--------------------------- |
duke@435 | 69 | void SuperWord::transform_loop(IdealLoopTree* lpt) { |
kvn@3882 | 70 | assert(UseSuperWord, "should be"); |
kvn@3882 | 71 | // Do vectors exist on this architecture? |
kvn@3882 | 72 | if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; |
kvn@3882 | 73 | |
duke@435 | 74 | assert(lpt->_head->is_CountedLoop(), "must be"); |
duke@435 | 75 | CountedLoopNode *cl = lpt->_head->as_CountedLoop(); |
duke@435 | 76 | |
kvn@3048 | 77 | if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop |
kvn@3048 | 78 | |
duke@435 | 79 | if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops |
duke@435 | 80 | |
duke@435 | 81 | // Check for no control flow in body (other than exit) |
duke@435 | 82 | Node *cl_exit = cl->loopexit(); |
duke@435 | 83 | if (cl_exit->in(0) != lpt->_head) return; |
duke@435 | 84 | |
never@540 | 85 | // Make sure the are no extra control users of the loop backedge |
never@540 | 86 | if (cl->back_control()->outcnt() != 1) { |
never@540 | 87 | return; |
never@540 | 88 | } |
never@540 | 89 | |
duke@435 | 90 | // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) |
duke@435 | 91 | CountedLoopEndNode* pre_end = get_pre_loop_end(cl); |
duke@435 | 92 | if (pre_end == NULL) return; |
duke@435 | 93 | Node *pre_opaq1 = pre_end->limit(); |
duke@435 | 94 | if (pre_opaq1->Opcode() != Op_Opaque1) return; |
duke@435 | 95 | |
duke@435 | 96 | init(); // initialize data structures |
duke@435 | 97 | |
duke@435 | 98 | set_lpt(lpt); |
duke@435 | 99 | set_lp(cl); |
duke@435 | 100 | |
kvn@3882 | 101 | // For now, define one block which is the entire loop body |
duke@435 | 102 | set_bb(cl); |
duke@435 | 103 | |
duke@435 | 104 | assert(_packset.length() == 0, "packset must be empty"); |
duke@435 | 105 | SLP_extract(); |
duke@435 | 106 | } |
duke@435 | 107 | |
duke@435 | 108 | //------------------------------SLP_extract--------------------------- |
duke@435 | 109 | // Extract the superword level parallelism |
duke@435 | 110 | // |
duke@435 | 111 | // 1) A reverse post-order of nodes in the block is constructed. By scanning |
duke@435 | 112 | // this list from first to last, all definitions are visited before their uses. |
duke@435 | 113 | // |
duke@435 | 114 | // 2) A point-to-point dependence graph is constructed between memory references. |
duke@435 | 115 | // This simplies the upcoming "independence" checker. |
duke@435 | 116 | // |
duke@435 | 117 | // 3) The maximum depth in the node graph from the beginning of the block |
duke@435 | 118 | // to each node is computed. This is used to prune the graph search |
duke@435 | 119 | // in the independence checker. |
duke@435 | 120 | // |
duke@435 | 121 | // 4) For integer types, the necessary bit width is propagated backwards |
duke@435 | 122 | // from stores to allow packed operations on byte, char, and short |
duke@435 | 123 | // integers. This reverses the promotion to type "int" that javac |
duke@435 | 124 | // did for operations like: char c1,c2,c3; c1 = c2 + c3. |
duke@435 | 125 | // |
duke@435 | 126 | // 5) One of the memory references is picked to be an aligned vector reference. |
duke@435 | 127 | // The pre-loop trip count is adjusted to align this reference in the |
duke@435 | 128 | // unrolled body. |
duke@435 | 129 | // |
duke@435 | 130 | // 6) The initial set of pack pairs is seeded with memory references. |
duke@435 | 131 | // |
duke@435 | 132 | // 7) The set of pack pairs is extended by following use->def and def->use links. |
duke@435 | 133 | // |
duke@435 | 134 | // 8) The pairs are combined into vector sized packs. |
duke@435 | 135 | // |
duke@435 | 136 | // 9) Reorder the memory slices to co-locate members of the memory packs. |
duke@435 | 137 | // |
duke@435 | 138 | // 10) Generate ideal vector nodes for the final set of packs and where necessary, |
duke@435 | 139 | // inserting scalar promotion, vector creation from multiple scalars, and |
duke@435 | 140 | // extraction of scalar values from vectors. |
duke@435 | 141 | // |
duke@435 | 142 | void SuperWord::SLP_extract() { |
duke@435 | 143 | |
duke@435 | 144 | // Ready the block |
duke@435 | 145 | |
duke@435 | 146 | construct_bb(); |
duke@435 | 147 | |
duke@435 | 148 | dependence_graph(); |
duke@435 | 149 | |
duke@435 | 150 | compute_max_depth(); |
duke@435 | 151 | |
duke@435 | 152 | compute_vector_element_type(); |
duke@435 | 153 | |
duke@435 | 154 | // Attempt vectorization |
duke@435 | 155 | |
duke@435 | 156 | find_adjacent_refs(); |
duke@435 | 157 | |
duke@435 | 158 | extend_packlist(); |
duke@435 | 159 | |
duke@435 | 160 | combine_packs(); |
duke@435 | 161 | |
duke@435 | 162 | construct_my_pack_map(); |
duke@435 | 163 | |
duke@435 | 164 | filter_packs(); |
duke@435 | 165 | |
duke@435 | 166 | schedule(); |
duke@435 | 167 | |
duke@435 | 168 | output(); |
duke@435 | 169 | } |
duke@435 | 170 | |
duke@435 | 171 | //------------------------------find_adjacent_refs--------------------------- |
duke@435 | 172 | // Find the adjacent memory references and create pack pairs for them. |
duke@435 | 173 | // This is the initial set of packs that will then be extended by |
duke@435 | 174 | // following use->def and def->use links. The align positions are |
duke@435 | 175 | // assigned relative to the reference "align_to_ref" |
duke@435 | 176 | void SuperWord::find_adjacent_refs() { |
duke@435 | 177 | // Get list of memory operations |
duke@435 | 178 | Node_List memops; |
duke@435 | 179 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 180 | Node* n = _block.at(i); |
kvn@3882 | 181 | if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) && |
kvn@464 | 182 | is_java_primitive(n->as_Mem()->memory_type())) { |
duke@435 | 183 | int align = memory_alignment(n->as_Mem(), 0); |
duke@435 | 184 | if (align != bottom_align) { |
duke@435 | 185 | memops.push(n); |
duke@435 | 186 | } |
duke@435 | 187 | } |
duke@435 | 188 | } |
duke@435 | 189 | |
kvn@3882 | 190 | Node_List align_to_refs; |
kvn@3882 | 191 | int best_iv_adjustment = 0; |
kvn@3882 | 192 | MemNode* best_align_to_mem_ref = NULL; |
duke@435 | 193 | |
kvn@3882 | 194 | while (memops.size() != 0) { |
kvn@3882 | 195 | // Find a memory reference to align to. |
kvn@3882 | 196 | MemNode* mem_ref = find_align_to_ref(memops); |
kvn@3882 | 197 | if (mem_ref == NULL) break; |
kvn@3882 | 198 | align_to_refs.push(mem_ref); |
kvn@3882 | 199 | int iv_adjustment = get_iv_adjustment(mem_ref); |
duke@435 | 200 | |
kvn@3882 | 201 | if (best_align_to_mem_ref == NULL) { |
kvn@3882 | 202 | // Set memory reference which is the best from all memory operations |
kvn@3882 | 203 | // to be used for alignment. The pre-loop trip count is modified to align |
kvn@3882 | 204 | // this reference to a vector-aligned address. |
kvn@3882 | 205 | best_align_to_mem_ref = mem_ref; |
kvn@3882 | 206 | best_iv_adjustment = iv_adjustment; |
kvn@3882 | 207 | } |
duke@435 | 208 | |
kvn@3882 | 209 | SWPointer align_to_ref_p(mem_ref, this); |
kvn@3882 | 210 | // Set alignment relative to "align_to_ref" for all related memory operations. |
kvn@3882 | 211 | for (int i = memops.size() - 1; i >= 0; i--) { |
kvn@3882 | 212 | MemNode* s = memops.at(i)->as_Mem(); |
kvn@3882 | 213 | if (isomorphic(s, mem_ref)) { |
kvn@3882 | 214 | SWPointer p2(s, this); |
kvn@3882 | 215 | if (p2.comparable(align_to_ref_p)) { |
kvn@3882 | 216 | int align = memory_alignment(s, iv_adjustment); |
kvn@3882 | 217 | set_alignment(s, align); |
duke@435 | 218 | } |
duke@435 | 219 | } |
duke@435 | 220 | } |
kvn@3882 | 221 | |
kvn@3882 | 222 | // Create initial pack pairs of memory operations for which |
kvn@3882 | 223 | // alignment is set and vectors will be aligned. |
kvn@3882 | 224 | bool create_pack = true; |
kvn@3886 | 225 | if (memory_alignment(mem_ref, best_iv_adjustment) == 0) { |
kvn@3886 | 226 | if (!Matcher::misaligned_vectors_ok()) { |
kvn@3886 | 227 | int vw = vector_width(mem_ref); |
kvn@3886 | 228 | int vw_best = vector_width(best_align_to_mem_ref); |
kvn@3886 | 229 | if (vw > vw_best) { |
kvn@3886 | 230 | // Do not vectorize a memory access with more elements per vector |
kvn@3886 | 231 | // if unaligned memory access is not allowed because number of |
kvn@3886 | 232 | // iterations in pre-loop will be not enough to align it. |
kvn@3886 | 233 | create_pack = false; |
kvn@3886 | 234 | } |
kvn@3886 | 235 | } |
kvn@3886 | 236 | } else { |
kvn@3882 | 237 | if (same_velt_type(mem_ref, best_align_to_mem_ref)) { |
kvn@3882 | 238 | // Can't allow vectorization of unaligned memory accesses with the |
kvn@3882 | 239 | // same type since it could be overlapped accesses to the same array. |
kvn@3882 | 240 | create_pack = false; |
kvn@3882 | 241 | } else { |
kvn@3882 | 242 | // Allow independent (different type) unaligned memory operations |
kvn@3882 | 243 | // if HW supports them. |
kvn@3882 | 244 | if (!Matcher::misaligned_vectors_ok()) { |
kvn@3882 | 245 | create_pack = false; |
kvn@3882 | 246 | } else { |
kvn@3882 | 247 | // Check if packs of the same memory type but |
kvn@3882 | 248 | // with a different alignment were created before. |
kvn@3882 | 249 | for (uint i = 0; i < align_to_refs.size(); i++) { |
kvn@3882 | 250 | MemNode* mr = align_to_refs.at(i)->as_Mem(); |
kvn@3882 | 251 | if (same_velt_type(mr, mem_ref) && |
kvn@3882 | 252 | memory_alignment(mr, iv_adjustment) != 0) |
kvn@3882 | 253 | create_pack = false; |
kvn@3882 | 254 | } |
kvn@3882 | 255 | } |
kvn@3882 | 256 | } |
kvn@3882 | 257 | } |
kvn@3882 | 258 | if (create_pack) { |
kvn@3882 | 259 | for (uint i = 0; i < memops.size(); i++) { |
kvn@3882 | 260 | Node* s1 = memops.at(i); |
kvn@3882 | 261 | int align = alignment(s1); |
kvn@3882 | 262 | if (align == top_align) continue; |
kvn@3882 | 263 | for (uint j = 0; j < memops.size(); j++) { |
kvn@3882 | 264 | Node* s2 = memops.at(j); |
kvn@3882 | 265 | if (alignment(s2) == top_align) continue; |
kvn@3882 | 266 | if (s1 != s2 && are_adjacent_refs(s1, s2)) { |
kvn@3882 | 267 | if (stmts_can_pack(s1, s2, align)) { |
kvn@3882 | 268 | Node_List* pair = new Node_List(); |
kvn@3882 | 269 | pair->push(s1); |
kvn@3882 | 270 | pair->push(s2); |
kvn@3882 | 271 | _packset.append(pair); |
kvn@3882 | 272 | } |
kvn@3882 | 273 | } |
kvn@3882 | 274 | } |
kvn@3882 | 275 | } |
kvn@3882 | 276 | } else { // Don't create unaligned pack |
kvn@3882 | 277 | // First, remove remaining memory ops of the same type from the list. |
kvn@3882 | 278 | for (int i = memops.size() - 1; i >= 0; i--) { |
kvn@3882 | 279 | MemNode* s = memops.at(i)->as_Mem(); |
kvn@3882 | 280 | if (same_velt_type(s, mem_ref)) { |
kvn@3882 | 281 | memops.remove(i); |
kvn@3882 | 282 | } |
kvn@3882 | 283 | } |
kvn@3882 | 284 | |
kvn@3882 | 285 | // Second, remove already constructed packs of the same type. |
kvn@3882 | 286 | for (int i = _packset.length() - 1; i >= 0; i--) { |
kvn@3882 | 287 | Node_List* p = _packset.at(i); |
kvn@3882 | 288 | MemNode* s = p->at(0)->as_Mem(); |
kvn@3882 | 289 | if (same_velt_type(s, mem_ref)) { |
kvn@3882 | 290 | remove_pack_at(i); |
kvn@3882 | 291 | } |
kvn@3882 | 292 | } |
kvn@3882 | 293 | |
kvn@3882 | 294 | // If needed find the best memory reference for loop alignment again. |
kvn@3882 | 295 | if (same_velt_type(mem_ref, best_align_to_mem_ref)) { |
kvn@3882 | 296 | // Put memory ops from remaining packs back on memops list for |
kvn@3882 | 297 | // the best alignment search. |
kvn@3882 | 298 | uint orig_msize = memops.size(); |
kvn@3882 | 299 | for (int i = 0; i < _packset.length(); i++) { |
kvn@3882 | 300 | Node_List* p = _packset.at(i); |
kvn@3882 | 301 | MemNode* s = p->at(0)->as_Mem(); |
kvn@3882 | 302 | assert(!same_velt_type(s, mem_ref), "sanity"); |
kvn@3882 | 303 | memops.push(s); |
kvn@3882 | 304 | } |
kvn@3882 | 305 | MemNode* best_align_to_mem_ref = find_align_to_ref(memops); |
kvn@3882 | 306 | if (best_align_to_mem_ref == NULL) break; |
kvn@3882 | 307 | best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref); |
kvn@3882 | 308 | // Restore list. |
kvn@3882 | 309 | while (memops.size() > orig_msize) |
kvn@3882 | 310 | (void)memops.pop(); |
kvn@3882 | 311 | } |
kvn@3882 | 312 | } // unaligned memory accesses |
kvn@3882 | 313 | |
kvn@3882 | 314 | // Remove used mem nodes. |
kvn@3882 | 315 | for (int i = memops.size() - 1; i >= 0; i--) { |
kvn@3882 | 316 | MemNode* m = memops.at(i)->as_Mem(); |
kvn@3882 | 317 | if (alignment(m) != top_align) { |
kvn@3882 | 318 | memops.remove(i); |
kvn@3882 | 319 | } |
kvn@3882 | 320 | } |
kvn@3882 | 321 | |
kvn@3882 | 322 | } // while (memops.size() != 0 |
kvn@3882 | 323 | set_align_to_ref(best_align_to_mem_ref); |
duke@435 | 324 | |
duke@435 | 325 | #ifndef PRODUCT |
duke@435 | 326 | if (TraceSuperWord) { |
duke@435 | 327 | tty->print_cr("\nAfter find_adjacent_refs"); |
duke@435 | 328 | print_packset(); |
duke@435 | 329 | } |
duke@435 | 330 | #endif |
duke@435 | 331 | } |
duke@435 | 332 | |
duke@435 | 333 | //------------------------------find_align_to_ref--------------------------- |
duke@435 | 334 | // Find a memory reference to align the loop induction variable to. |
duke@435 | 335 | // Looks first at stores then at loads, looking for a memory reference |
duke@435 | 336 | // with the largest number of references similar to it. |
kvn@3882 | 337 | MemNode* SuperWord::find_align_to_ref(Node_List &memops) { |
duke@435 | 338 | GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0); |
duke@435 | 339 | |
duke@435 | 340 | // Count number of comparable memory ops |
duke@435 | 341 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 342 | MemNode* s1 = memops.at(i)->as_Mem(); |
duke@435 | 343 | SWPointer p1(s1, this); |
duke@435 | 344 | // Discard if pre loop can't align this reference |
duke@435 | 345 | if (!ref_is_alignable(p1)) { |
duke@435 | 346 | *cmp_ct.adr_at(i) = 0; |
duke@435 | 347 | continue; |
duke@435 | 348 | } |
duke@435 | 349 | for (uint j = i+1; j < memops.size(); j++) { |
duke@435 | 350 | MemNode* s2 = memops.at(j)->as_Mem(); |
duke@435 | 351 | if (isomorphic(s1, s2)) { |
duke@435 | 352 | SWPointer p2(s2, this); |
duke@435 | 353 | if (p1.comparable(p2)) { |
duke@435 | 354 | (*cmp_ct.adr_at(i))++; |
duke@435 | 355 | (*cmp_ct.adr_at(j))++; |
duke@435 | 356 | } |
duke@435 | 357 | } |
duke@435 | 358 | } |
duke@435 | 359 | } |
duke@435 | 360 | |
kvn@3882 | 361 | // Find Store (or Load) with the greatest number of "comparable" references, |
kvn@3882 | 362 | // biggest vector size, smallest data size and smallest iv offset. |
duke@435 | 363 | int max_ct = 0; |
kvn@3882 | 364 | int max_vw = 0; |
duke@435 | 365 | int max_idx = -1; |
duke@435 | 366 | int min_size = max_jint; |
duke@435 | 367 | int min_iv_offset = max_jint; |
duke@435 | 368 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 369 | MemNode* s = memops.at(j)->as_Mem(); |
duke@435 | 370 | if (s->is_Store()) { |
kvn@3886 | 371 | int vw = vector_width_in_bytes(s); |
kvn@3882 | 372 | assert(vw > 1, "sanity"); |
duke@435 | 373 | SWPointer p(s, this); |
kvn@3882 | 374 | if (cmp_ct.at(j) > max_ct || |
kvn@3882 | 375 | cmp_ct.at(j) == max_ct && |
kvn@3882 | 376 | (vw > max_vw || |
kvn@3882 | 377 | vw == max_vw && |
kvn@3882 | 378 | (data_size(s) < min_size || |
kvn@3882 | 379 | data_size(s) == min_size && |
kvn@3882 | 380 | (p.offset_in_bytes() < min_iv_offset)))) { |
duke@435 | 381 | max_ct = cmp_ct.at(j); |
kvn@3882 | 382 | max_vw = vw; |
duke@435 | 383 | max_idx = j; |
duke@435 | 384 | min_size = data_size(s); |
duke@435 | 385 | min_iv_offset = p.offset_in_bytes(); |
duke@435 | 386 | } |
duke@435 | 387 | } |
duke@435 | 388 | } |
duke@435 | 389 | // If no stores, look at loads |
duke@435 | 390 | if (max_ct == 0) { |
duke@435 | 391 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 392 | MemNode* s = memops.at(j)->as_Mem(); |
duke@435 | 393 | if (s->is_Load()) { |
kvn@3886 | 394 | int vw = vector_width_in_bytes(s); |
kvn@3882 | 395 | assert(vw > 1, "sanity"); |
duke@435 | 396 | SWPointer p(s, this); |
kvn@3882 | 397 | if (cmp_ct.at(j) > max_ct || |
kvn@3882 | 398 | cmp_ct.at(j) == max_ct && |
kvn@3882 | 399 | (vw > max_vw || |
kvn@3882 | 400 | vw == max_vw && |
kvn@3882 | 401 | (data_size(s) < min_size || |
kvn@3882 | 402 | data_size(s) == min_size && |
kvn@3882 | 403 | (p.offset_in_bytes() < min_iv_offset)))) { |
duke@435 | 404 | max_ct = cmp_ct.at(j); |
kvn@3882 | 405 | max_vw = vw; |
duke@435 | 406 | max_idx = j; |
duke@435 | 407 | min_size = data_size(s); |
duke@435 | 408 | min_iv_offset = p.offset_in_bytes(); |
duke@435 | 409 | } |
duke@435 | 410 | } |
duke@435 | 411 | } |
duke@435 | 412 | } |
duke@435 | 413 | |
kvn@3882 | 414 | #ifdef ASSERT |
duke@435 | 415 | if (TraceSuperWord && Verbose) { |
duke@435 | 416 | tty->print_cr("\nVector memops after find_align_to_refs"); |
duke@435 | 417 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 418 | MemNode* s = memops.at(i)->as_Mem(); |
duke@435 | 419 | s->dump(); |
duke@435 | 420 | } |
duke@435 | 421 | } |
duke@435 | 422 | #endif |
kvn@3882 | 423 | |
kvn@3882 | 424 | if (max_ct > 0) { |
kvn@3882 | 425 | #ifdef ASSERT |
kvn@3882 | 426 | if (TraceSuperWord) { |
kvn@3882 | 427 | tty->print("\nVector align to node: "); |
kvn@3882 | 428 | memops.at(max_idx)->as_Mem()->dump(); |
kvn@3882 | 429 | } |
kvn@3882 | 430 | #endif |
kvn@3882 | 431 | return memops.at(max_idx)->as_Mem(); |
kvn@3882 | 432 | } |
kvn@3882 | 433 | return NULL; |
duke@435 | 434 | } |
duke@435 | 435 | |
duke@435 | 436 | //------------------------------ref_is_alignable--------------------------- |
duke@435 | 437 | // Can the preloop align the reference to position zero in the vector? |
duke@435 | 438 | bool SuperWord::ref_is_alignable(SWPointer& p) { |
duke@435 | 439 | if (!p.has_iv()) { |
duke@435 | 440 | return true; // no induction variable |
duke@435 | 441 | } |
duke@435 | 442 | CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop()); |
duke@435 | 443 | assert(pre_end->stride_is_con(), "pre loop stride is constant"); |
duke@435 | 444 | int preloop_stride = pre_end->stride_con(); |
duke@435 | 445 | |
duke@435 | 446 | int span = preloop_stride * p.scale_in_bytes(); |
duke@435 | 447 | |
duke@435 | 448 | // Stride one accesses are alignable. |
duke@435 | 449 | if (ABS(span) == p.memory_size()) |
duke@435 | 450 | return true; |
duke@435 | 451 | |
duke@435 | 452 | // If initial offset from start of object is computable, |
duke@435 | 453 | // compute alignment within the vector. |
kvn@3886 | 454 | int vw = vector_width_in_bytes(p.mem()); |
kvn@3882 | 455 | assert(vw > 1, "sanity"); |
duke@435 | 456 | if (vw % span == 0) { |
duke@435 | 457 | Node* init_nd = pre_end->init_trip(); |
duke@435 | 458 | if (init_nd->is_Con() && p.invar() == NULL) { |
duke@435 | 459 | int init = init_nd->bottom_type()->is_int()->get_con(); |
duke@435 | 460 | |
duke@435 | 461 | int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes(); |
duke@435 | 462 | assert(init_offset >= 0, "positive offset from object start"); |
duke@435 | 463 | |
duke@435 | 464 | if (span > 0) { |
duke@435 | 465 | return (vw - (init_offset % vw)) % span == 0; |
duke@435 | 466 | } else { |
duke@435 | 467 | assert(span < 0, "nonzero stride * scale"); |
duke@435 | 468 | return (init_offset % vw) % -span == 0; |
duke@435 | 469 | } |
duke@435 | 470 | } |
duke@435 | 471 | } |
duke@435 | 472 | return false; |
duke@435 | 473 | } |
duke@435 | 474 | |
kvn@3882 | 475 | //---------------------------get_iv_adjustment--------------------------- |
kvn@3882 | 476 | // Calculate loop's iv adjustment for this memory ops. |
kvn@3882 | 477 | int SuperWord::get_iv_adjustment(MemNode* mem_ref) { |
kvn@3882 | 478 | SWPointer align_to_ref_p(mem_ref, this); |
kvn@3882 | 479 | int offset = align_to_ref_p.offset_in_bytes(); |
kvn@3882 | 480 | int scale = align_to_ref_p.scale_in_bytes(); |
kvn@3886 | 481 | int vw = vector_width_in_bytes(mem_ref); |
kvn@3882 | 482 | assert(vw > 1, "sanity"); |
kvn@3882 | 483 | int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; |
kvn@3882 | 484 | int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; |
kvn@3882 | 485 | |
kvn@3882 | 486 | #ifndef PRODUCT |
kvn@3882 | 487 | if (TraceSuperWord) |
kvn@3882 | 488 | tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d", |
kvn@3882 | 489 | offset, iv_adjustment, align_to_ref_p.memory_size(), scale, iv_stride(), vw); |
kvn@3882 | 490 | #endif |
kvn@3882 | 491 | return iv_adjustment; |
kvn@3882 | 492 | } |
kvn@3882 | 493 | |
duke@435 | 494 | //---------------------------dependence_graph--------------------------- |
duke@435 | 495 | // Construct dependency graph. |
duke@435 | 496 | // Add dependence edges to load/store nodes for memory dependence |
duke@435 | 497 | // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x) |
duke@435 | 498 | void SuperWord::dependence_graph() { |
duke@435 | 499 | // First, assign a dependence node to each memory node |
duke@435 | 500 | for (int i = 0; i < _block.length(); i++ ) { |
duke@435 | 501 | Node *n = _block.at(i); |
duke@435 | 502 | if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) { |
duke@435 | 503 | _dg.make_node(n); |
duke@435 | 504 | } |
duke@435 | 505 | } |
duke@435 | 506 | |
duke@435 | 507 | // For each memory slice, create the dependences |
duke@435 | 508 | for (int i = 0; i < _mem_slice_head.length(); i++) { |
duke@435 | 509 | Node* n = _mem_slice_head.at(i); |
duke@435 | 510 | Node* n_tail = _mem_slice_tail.at(i); |
duke@435 | 511 | |
duke@435 | 512 | // Get slice in predecessor order (last is first) |
duke@435 | 513 | mem_slice_preds(n_tail, n, _nlist); |
duke@435 | 514 | |
duke@435 | 515 | // Make the slice dependent on the root |
duke@435 | 516 | DepMem* slice = _dg.dep(n); |
duke@435 | 517 | _dg.make_edge(_dg.root(), slice); |
duke@435 | 518 | |
duke@435 | 519 | // Create a sink for the slice |
duke@435 | 520 | DepMem* slice_sink = _dg.make_node(NULL); |
duke@435 | 521 | _dg.make_edge(slice_sink, _dg.tail()); |
duke@435 | 522 | |
duke@435 | 523 | // Now visit each pair of memory ops, creating the edges |
duke@435 | 524 | for (int j = _nlist.length() - 1; j >= 0 ; j--) { |
duke@435 | 525 | Node* s1 = _nlist.at(j); |
duke@435 | 526 | |
duke@435 | 527 | // If no dependency yet, use slice |
duke@435 | 528 | if (_dg.dep(s1)->in_cnt() == 0) { |
duke@435 | 529 | _dg.make_edge(slice, s1); |
duke@435 | 530 | } |
duke@435 | 531 | SWPointer p1(s1->as_Mem(), this); |
duke@435 | 532 | bool sink_dependent = true; |
duke@435 | 533 | for (int k = j - 1; k >= 0; k--) { |
duke@435 | 534 | Node* s2 = _nlist.at(k); |
duke@435 | 535 | if (s1->is_Load() && s2->is_Load()) |
duke@435 | 536 | continue; |
duke@435 | 537 | SWPointer p2(s2->as_Mem(), this); |
duke@435 | 538 | |
duke@435 | 539 | int cmp = p1.cmp(p2); |
duke@435 | 540 | if (SuperWordRTDepCheck && |
duke@435 | 541 | p1.base() != p2.base() && p1.valid() && p2.valid()) { |
duke@435 | 542 | // Create a runtime check to disambiguate |
duke@435 | 543 | OrderedPair pp(p1.base(), p2.base()); |
duke@435 | 544 | _disjoint_ptrs.append_if_missing(pp); |
duke@435 | 545 | } else if (!SWPointer::not_equal(cmp)) { |
duke@435 | 546 | // Possibly same address |
duke@435 | 547 | _dg.make_edge(s1, s2); |
duke@435 | 548 | sink_dependent = false; |
duke@435 | 549 | } |
duke@435 | 550 | } |
duke@435 | 551 | if (sink_dependent) { |
duke@435 | 552 | _dg.make_edge(s1, slice_sink); |
duke@435 | 553 | } |
duke@435 | 554 | } |
duke@435 | 555 | #ifndef PRODUCT |
duke@435 | 556 | if (TraceSuperWord) { |
duke@435 | 557 | tty->print_cr("\nDependence graph for slice: %d", n->_idx); |
duke@435 | 558 | for (int q = 0; q < _nlist.length(); q++) { |
duke@435 | 559 | _dg.print(_nlist.at(q)); |
duke@435 | 560 | } |
duke@435 | 561 | tty->cr(); |
duke@435 | 562 | } |
duke@435 | 563 | #endif |
duke@435 | 564 | _nlist.clear(); |
duke@435 | 565 | } |
duke@435 | 566 | |
duke@435 | 567 | #ifndef PRODUCT |
duke@435 | 568 | if (TraceSuperWord) { |
duke@435 | 569 | tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE"); |
duke@435 | 570 | for (int r = 0; r < _disjoint_ptrs.length(); r++) { |
duke@435 | 571 | _disjoint_ptrs.at(r).print(); |
duke@435 | 572 | tty->cr(); |
duke@435 | 573 | } |
duke@435 | 574 | tty->cr(); |
duke@435 | 575 | } |
duke@435 | 576 | #endif |
duke@435 | 577 | } |
duke@435 | 578 | |
duke@435 | 579 | //---------------------------mem_slice_preds--------------------------- |
duke@435 | 580 | // Return a memory slice (node list) in predecessor order starting at "start" |
duke@435 | 581 | void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) { |
duke@435 | 582 | assert(preds.length() == 0, "start empty"); |
duke@435 | 583 | Node* n = start; |
duke@435 | 584 | Node* prev = NULL; |
duke@435 | 585 | while (true) { |
duke@435 | 586 | assert(in_bb(n), "must be in block"); |
duke@435 | 587 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 588 | Node* out = n->fast_out(i); |
duke@435 | 589 | if (out->is_Load()) { |
duke@435 | 590 | if (in_bb(out)) { |
duke@435 | 591 | preds.push(out); |
duke@435 | 592 | } |
duke@435 | 593 | } else { |
duke@435 | 594 | // FIXME |
duke@435 | 595 | if (out->is_MergeMem() && !in_bb(out)) { |
duke@435 | 596 | // Either unrolling is causing a memory edge not to disappear, |
duke@435 | 597 | // or need to run igvn.optimize() again before SLP |
duke@435 | 598 | } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) { |
duke@435 | 599 | // Ditto. Not sure what else to check further. |
cfang@1102 | 600 | } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) { |
duke@435 | 601 | // StoreCM has an input edge used as a precedence edge. |
duke@435 | 602 | // Maybe an issue when oop stores are vectorized. |
duke@435 | 603 | } else { |
duke@435 | 604 | assert(out == prev || prev == NULL, "no branches off of store slice"); |
duke@435 | 605 | } |
duke@435 | 606 | } |
duke@435 | 607 | } |
duke@435 | 608 | if (n == stop) break; |
duke@435 | 609 | preds.push(n); |
duke@435 | 610 | prev = n; |
duke@435 | 611 | n = n->in(MemNode::Memory); |
duke@435 | 612 | } |
duke@435 | 613 | } |
duke@435 | 614 | |
duke@435 | 615 | //------------------------------stmts_can_pack--------------------------- |
twisti@1040 | 616 | // Can s1 and s2 be in a pack with s1 immediately preceding s2 and |
duke@435 | 617 | // s1 aligned at "align" |
duke@435 | 618 | bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { |
cfang@1422 | 619 | |
cfang@1422 | 620 | // Do not use superword for non-primitives |
kvn@3882 | 621 | BasicType bt1 = velt_basic_type(s1); |
kvn@3882 | 622 | BasicType bt2 = velt_basic_type(s2); |
kvn@3882 | 623 | if(!is_java_primitive(bt1) || !is_java_primitive(bt2)) |
cfang@1422 | 624 | return false; |
kvn@3882 | 625 | if (Matcher::max_vector_size(bt1) < 2) { |
kvn@3882 | 626 | return false; // No vectors for this type |
kvn@3882 | 627 | } |
cfang@1422 | 628 | |
duke@435 | 629 | if (isomorphic(s1, s2)) { |
duke@435 | 630 | if (independent(s1, s2)) { |
duke@435 | 631 | if (!exists_at(s1, 0) && !exists_at(s2, 1)) { |
duke@435 | 632 | if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { |
duke@435 | 633 | int s1_align = alignment(s1); |
duke@435 | 634 | int s2_align = alignment(s2); |
duke@435 | 635 | if (s1_align == top_align || s1_align == align) { |
duke@435 | 636 | if (s2_align == top_align || s2_align == align + data_size(s1)) { |
duke@435 | 637 | return true; |
duke@435 | 638 | } |
duke@435 | 639 | } |
duke@435 | 640 | } |
duke@435 | 641 | } |
duke@435 | 642 | } |
duke@435 | 643 | } |
duke@435 | 644 | return false; |
duke@435 | 645 | } |
duke@435 | 646 | |
duke@435 | 647 | //------------------------------exists_at--------------------------- |
duke@435 | 648 | // Does s exist in a pack at position pos? |
duke@435 | 649 | bool SuperWord::exists_at(Node* s, uint pos) { |
duke@435 | 650 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 651 | Node_List* p = _packset.at(i); |
duke@435 | 652 | if (p->at(pos) == s) { |
duke@435 | 653 | return true; |
duke@435 | 654 | } |
duke@435 | 655 | } |
duke@435 | 656 | return false; |
duke@435 | 657 | } |
duke@435 | 658 | |
duke@435 | 659 | //------------------------------are_adjacent_refs--------------------------- |
duke@435 | 660 | // Is s1 immediately before s2 in memory? |
duke@435 | 661 | bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) { |
duke@435 | 662 | if (!s1->is_Mem() || !s2->is_Mem()) return false; |
duke@435 | 663 | if (!in_bb(s1) || !in_bb(s2)) return false; |
never@1940 | 664 | |
never@1940 | 665 | // Do not use superword for non-primitives |
never@1940 | 666 | if (!is_java_primitive(s1->as_Mem()->memory_type()) || |
never@1940 | 667 | !is_java_primitive(s2->as_Mem()->memory_type())) { |
never@1940 | 668 | return false; |
never@1940 | 669 | } |
never@1940 | 670 | |
duke@435 | 671 | // FIXME - co_locate_pack fails on Stores in different mem-slices, so |
duke@435 | 672 | // only pack memops that are in the same alias set until that's fixed. |
duke@435 | 673 | if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) != |
duke@435 | 674 | _phase->C->get_alias_index(s2->as_Mem()->adr_type())) |
duke@435 | 675 | return false; |
duke@435 | 676 | SWPointer p1(s1->as_Mem(), this); |
duke@435 | 677 | SWPointer p2(s2->as_Mem(), this); |
duke@435 | 678 | if (p1.base() != p2.base() || !p1.comparable(p2)) return false; |
duke@435 | 679 | int diff = p2.offset_in_bytes() - p1.offset_in_bytes(); |
duke@435 | 680 | return diff == data_size(s1); |
duke@435 | 681 | } |
duke@435 | 682 | |
duke@435 | 683 | //------------------------------isomorphic--------------------------- |
duke@435 | 684 | // Are s1 and s2 similar? |
duke@435 | 685 | bool SuperWord::isomorphic(Node* s1, Node* s2) { |
duke@435 | 686 | if (s1->Opcode() != s2->Opcode()) return false; |
duke@435 | 687 | if (s1->req() != s2->req()) return false; |
duke@435 | 688 | if (s1->in(0) != s2->in(0)) return false; |
kvn@3882 | 689 | if (!same_velt_type(s1, s2)) return false; |
duke@435 | 690 | return true; |
duke@435 | 691 | } |
duke@435 | 692 | |
duke@435 | 693 | //------------------------------independent--------------------------- |
duke@435 | 694 | // Is there no data path from s1 to s2 or s2 to s1? |
duke@435 | 695 | bool SuperWord::independent(Node* s1, Node* s2) { |
duke@435 | 696 | // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first"); |
duke@435 | 697 | int d1 = depth(s1); |
duke@435 | 698 | int d2 = depth(s2); |
duke@435 | 699 | if (d1 == d2) return s1 != s2; |
duke@435 | 700 | Node* deep = d1 > d2 ? s1 : s2; |
duke@435 | 701 | Node* shallow = d1 > d2 ? s2 : s1; |
duke@435 | 702 | |
duke@435 | 703 | visited_clear(); |
duke@435 | 704 | |
duke@435 | 705 | return independent_path(shallow, deep); |
duke@435 | 706 | } |
duke@435 | 707 | |
duke@435 | 708 | //------------------------------independent_path------------------------------ |
duke@435 | 709 | // Helper for independent |
duke@435 | 710 | bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) { |
duke@435 | 711 | if (dp >= 1000) return false; // stop deep recursion |
duke@435 | 712 | visited_set(deep); |
duke@435 | 713 | int shal_depth = depth(shallow); |
duke@435 | 714 | assert(shal_depth <= depth(deep), "must be"); |
duke@435 | 715 | for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) { |
duke@435 | 716 | Node* pred = preds.current(); |
duke@435 | 717 | if (in_bb(pred) && !visited_test(pred)) { |
duke@435 | 718 | if (shallow == pred) { |
duke@435 | 719 | return false; |
duke@435 | 720 | } |
duke@435 | 721 | if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) { |
duke@435 | 722 | return false; |
duke@435 | 723 | } |
duke@435 | 724 | } |
duke@435 | 725 | } |
duke@435 | 726 | return true; |
duke@435 | 727 | } |
duke@435 | 728 | |
duke@435 | 729 | //------------------------------set_alignment--------------------------- |
duke@435 | 730 | void SuperWord::set_alignment(Node* s1, Node* s2, int align) { |
duke@435 | 731 | set_alignment(s1, align); |
kvn@3882 | 732 | if (align == top_align || align == bottom_align) { |
kvn@3882 | 733 | set_alignment(s2, align); |
kvn@3882 | 734 | } else { |
kvn@3882 | 735 | set_alignment(s2, align + data_size(s1)); |
kvn@3882 | 736 | } |
duke@435 | 737 | } |
duke@435 | 738 | |
duke@435 | 739 | //------------------------------data_size--------------------------- |
duke@435 | 740 | int SuperWord::data_size(Node* s) { |
kvn@3882 | 741 | int bsize = type2aelembytes(velt_basic_type(s)); |
duke@435 | 742 | assert(bsize != 0, "valid size"); |
duke@435 | 743 | return bsize; |
duke@435 | 744 | } |
duke@435 | 745 | |
duke@435 | 746 | //------------------------------extend_packlist--------------------------- |
duke@435 | 747 | // Extend packset by following use->def and def->use links from pack members. |
duke@435 | 748 | void SuperWord::extend_packlist() { |
duke@435 | 749 | bool changed; |
duke@435 | 750 | do { |
duke@435 | 751 | changed = false; |
duke@435 | 752 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 753 | Node_List* p = _packset.at(i); |
duke@435 | 754 | changed |= follow_use_defs(p); |
duke@435 | 755 | changed |= follow_def_uses(p); |
duke@435 | 756 | } |
duke@435 | 757 | } while (changed); |
duke@435 | 758 | |
duke@435 | 759 | #ifndef PRODUCT |
duke@435 | 760 | if (TraceSuperWord) { |
duke@435 | 761 | tty->print_cr("\nAfter extend_packlist"); |
duke@435 | 762 | print_packset(); |
duke@435 | 763 | } |
duke@435 | 764 | #endif |
duke@435 | 765 | } |
duke@435 | 766 | |
duke@435 | 767 | //------------------------------follow_use_defs--------------------------- |
duke@435 | 768 | // Extend the packset by visiting operand definitions of nodes in pack p |
duke@435 | 769 | bool SuperWord::follow_use_defs(Node_List* p) { |
kvn@3882 | 770 | assert(p->size() == 2, "just checking"); |
duke@435 | 771 | Node* s1 = p->at(0); |
duke@435 | 772 | Node* s2 = p->at(1); |
duke@435 | 773 | assert(s1->req() == s2->req(), "just checking"); |
duke@435 | 774 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
duke@435 | 775 | |
duke@435 | 776 | if (s1->is_Load()) return false; |
duke@435 | 777 | |
duke@435 | 778 | int align = alignment(s1); |
duke@435 | 779 | bool changed = false; |
duke@435 | 780 | int start = s1->is_Store() ? MemNode::ValueIn : 1; |
duke@435 | 781 | int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req(); |
duke@435 | 782 | for (int j = start; j < end; j++) { |
duke@435 | 783 | Node* t1 = s1->in(j); |
duke@435 | 784 | Node* t2 = s2->in(j); |
duke@435 | 785 | if (!in_bb(t1) || !in_bb(t2)) |
duke@435 | 786 | continue; |
duke@435 | 787 | if (stmts_can_pack(t1, t2, align)) { |
duke@435 | 788 | if (est_savings(t1, t2) >= 0) { |
duke@435 | 789 | Node_List* pair = new Node_List(); |
duke@435 | 790 | pair->push(t1); |
duke@435 | 791 | pair->push(t2); |
duke@435 | 792 | _packset.append(pair); |
duke@435 | 793 | set_alignment(t1, t2, align); |
duke@435 | 794 | changed = true; |
duke@435 | 795 | } |
duke@435 | 796 | } |
duke@435 | 797 | } |
duke@435 | 798 | return changed; |
duke@435 | 799 | } |
duke@435 | 800 | |
duke@435 | 801 | //------------------------------follow_def_uses--------------------------- |
duke@435 | 802 | // Extend the packset by visiting uses of nodes in pack p |
duke@435 | 803 | bool SuperWord::follow_def_uses(Node_List* p) { |
duke@435 | 804 | bool changed = false; |
duke@435 | 805 | Node* s1 = p->at(0); |
duke@435 | 806 | Node* s2 = p->at(1); |
duke@435 | 807 | assert(p->size() == 2, "just checking"); |
duke@435 | 808 | assert(s1->req() == s2->req(), "just checking"); |
duke@435 | 809 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
duke@435 | 810 | |
duke@435 | 811 | if (s1->is_Store()) return false; |
duke@435 | 812 | |
duke@435 | 813 | int align = alignment(s1); |
duke@435 | 814 | int savings = -1; |
duke@435 | 815 | Node* u1 = NULL; |
duke@435 | 816 | Node* u2 = NULL; |
duke@435 | 817 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
duke@435 | 818 | Node* t1 = s1->fast_out(i); |
duke@435 | 819 | if (!in_bb(t1)) continue; |
duke@435 | 820 | for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 821 | Node* t2 = s2->fast_out(j); |
duke@435 | 822 | if (!in_bb(t2)) continue; |
duke@435 | 823 | if (!opnd_positions_match(s1, t1, s2, t2)) |
duke@435 | 824 | continue; |
duke@435 | 825 | if (stmts_can_pack(t1, t2, align)) { |
duke@435 | 826 | int my_savings = est_savings(t1, t2); |
duke@435 | 827 | if (my_savings > savings) { |
duke@435 | 828 | savings = my_savings; |
duke@435 | 829 | u1 = t1; |
duke@435 | 830 | u2 = t2; |
duke@435 | 831 | } |
duke@435 | 832 | } |
duke@435 | 833 | } |
duke@435 | 834 | } |
duke@435 | 835 | if (savings >= 0) { |
duke@435 | 836 | Node_List* pair = new Node_List(); |
duke@435 | 837 | pair->push(u1); |
duke@435 | 838 | pair->push(u2); |
duke@435 | 839 | _packset.append(pair); |
duke@435 | 840 | set_alignment(u1, u2, align); |
duke@435 | 841 | changed = true; |
duke@435 | 842 | } |
duke@435 | 843 | return changed; |
duke@435 | 844 | } |
duke@435 | 845 | |
duke@435 | 846 | //---------------------------opnd_positions_match------------------------- |
duke@435 | 847 | // Is the use of d1 in u1 at the same operand position as d2 in u2? |
duke@435 | 848 | bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) { |
duke@435 | 849 | uint ct = u1->req(); |
duke@435 | 850 | if (ct != u2->req()) return false; |
duke@435 | 851 | uint i1 = 0; |
duke@435 | 852 | uint i2 = 0; |
duke@435 | 853 | do { |
duke@435 | 854 | for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; |
duke@435 | 855 | for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; |
duke@435 | 856 | if (i1 != i2) { |
kvn@3882 | 857 | if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) { |
kvn@3882 | 858 | // Further analysis relies on operands position matching. |
kvn@3882 | 859 | u2->swap_edges(i1, i2); |
kvn@3882 | 860 | } else { |
kvn@3882 | 861 | return false; |
kvn@3882 | 862 | } |
duke@435 | 863 | } |
duke@435 | 864 | } while (i1 < ct); |
duke@435 | 865 | return true; |
duke@435 | 866 | } |
duke@435 | 867 | |
duke@435 | 868 | //------------------------------est_savings--------------------------- |
duke@435 | 869 | // Estimate the savings from executing s1 and s2 as a pack |
duke@435 | 870 | int SuperWord::est_savings(Node* s1, Node* s2) { |
kvn@3882 | 871 | int save_in = 2 - 1; // 2 operations per instruction in packed form |
duke@435 | 872 | |
duke@435 | 873 | // inputs |
duke@435 | 874 | for (uint i = 1; i < s1->req(); i++) { |
duke@435 | 875 | Node* x1 = s1->in(i); |
duke@435 | 876 | Node* x2 = s2->in(i); |
duke@435 | 877 | if (x1 != x2) { |
duke@435 | 878 | if (are_adjacent_refs(x1, x2)) { |
kvn@3882 | 879 | save_in += adjacent_profit(x1, x2); |
duke@435 | 880 | } else if (!in_packset(x1, x2)) { |
kvn@3882 | 881 | save_in -= pack_cost(2); |
duke@435 | 882 | } else { |
kvn@3882 | 883 | save_in += unpack_cost(2); |
duke@435 | 884 | } |
duke@435 | 885 | } |
duke@435 | 886 | } |
duke@435 | 887 | |
duke@435 | 888 | // uses of result |
duke@435 | 889 | uint ct = 0; |
kvn@3882 | 890 | int save_use = 0; |
duke@435 | 891 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
duke@435 | 892 | Node* s1_use = s1->fast_out(i); |
duke@435 | 893 | for (int j = 0; j < _packset.length(); j++) { |
duke@435 | 894 | Node_List* p = _packset.at(j); |
duke@435 | 895 | if (p->at(0) == s1_use) { |
duke@435 | 896 | for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { |
duke@435 | 897 | Node* s2_use = s2->fast_out(k); |
duke@435 | 898 | if (p->at(p->size()-1) == s2_use) { |
duke@435 | 899 | ct++; |
duke@435 | 900 | if (are_adjacent_refs(s1_use, s2_use)) { |
kvn@3882 | 901 | save_use += adjacent_profit(s1_use, s2_use); |
duke@435 | 902 | } |
duke@435 | 903 | } |
duke@435 | 904 | } |
duke@435 | 905 | } |
duke@435 | 906 | } |
duke@435 | 907 | } |
duke@435 | 908 | |
kvn@3882 | 909 | if (ct < s1->outcnt()) save_use += unpack_cost(1); |
kvn@3882 | 910 | if (ct < s2->outcnt()) save_use += unpack_cost(1); |
duke@435 | 911 | |
kvn@3882 | 912 | return MAX2(save_in, save_use); |
duke@435 | 913 | } |
duke@435 | 914 | |
duke@435 | 915 | //------------------------------costs--------------------------- |
duke@435 | 916 | int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } |
duke@435 | 917 | int SuperWord::pack_cost(int ct) { return ct; } |
duke@435 | 918 | int SuperWord::unpack_cost(int ct) { return ct; } |
duke@435 | 919 | |
duke@435 | 920 | //------------------------------combine_packs--------------------------- |
duke@435 | 921 | // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last |
duke@435 | 922 | void SuperWord::combine_packs() { |
kvn@3882 | 923 | bool changed = true; |
kvn@3882 | 924 | // Combine packs regardless max vector size. |
kvn@3882 | 925 | while (changed) { |
duke@435 | 926 | changed = false; |
duke@435 | 927 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 928 | Node_List* p1 = _packset.at(i); |
duke@435 | 929 | if (p1 == NULL) continue; |
duke@435 | 930 | for (int j = 0; j < _packset.length(); j++) { |
duke@435 | 931 | Node_List* p2 = _packset.at(j); |
duke@435 | 932 | if (p2 == NULL) continue; |
kvn@3882 | 933 | if (i == j) continue; |
duke@435 | 934 | if (p1->at(p1->size()-1) == p2->at(0)) { |
duke@435 | 935 | for (uint k = 1; k < p2->size(); k++) { |
duke@435 | 936 | p1->push(p2->at(k)); |
duke@435 | 937 | } |
duke@435 | 938 | _packset.at_put(j, NULL); |
duke@435 | 939 | changed = true; |
duke@435 | 940 | } |
duke@435 | 941 | } |
duke@435 | 942 | } |
kvn@3882 | 943 | } |
duke@435 | 944 | |
kvn@3882 | 945 | // Split packs which have size greater then max vector size. |
kvn@3882 | 946 | for (int i = 0; i < _packset.length(); i++) { |
kvn@3882 | 947 | Node_List* p1 = _packset.at(i); |
kvn@3882 | 948 | if (p1 != NULL) { |
kvn@3882 | 949 | BasicType bt = velt_basic_type(p1->at(0)); |
kvn@3882 | 950 | uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector |
kvn@3882 | 951 | assert(is_power_of_2(max_vlen), "sanity"); |
kvn@3882 | 952 | uint psize = p1->size(); |
kvn@3882 | 953 | if (!is_power_of_2(psize)) { |
kvn@3882 | 954 | // Skip pack which can't be vector. |
kvn@3882 | 955 | // case1: for(...) { a[i] = i; } elements values are different (i+x) |
kvn@3882 | 956 | // case2: for(...) { a[i] = b[i+1]; } can't align both, load and store |
kvn@3882 | 957 | _packset.at_put(i, NULL); |
kvn@3882 | 958 | continue; |
kvn@3882 | 959 | } |
kvn@3882 | 960 | if (psize > max_vlen) { |
kvn@3882 | 961 | Node_List* pack = new Node_List(); |
kvn@3882 | 962 | for (uint j = 0; j < psize; j++) { |
kvn@3882 | 963 | pack->push(p1->at(j)); |
kvn@3882 | 964 | if (pack->size() >= max_vlen) { |
kvn@3882 | 965 | assert(is_power_of_2(pack->size()), "sanity"); |
kvn@3882 | 966 | _packset.append(pack); |
kvn@3882 | 967 | pack = new Node_List(); |
kvn@3882 | 968 | } |
kvn@3882 | 969 | } |
kvn@3882 | 970 | _packset.at_put(i, NULL); |
kvn@3882 | 971 | } |
kvn@3882 | 972 | } |
kvn@3882 | 973 | } |
kvn@3882 | 974 | |
kvn@3882 | 975 | // Compress list. |
duke@435 | 976 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 977 | Node_List* p1 = _packset.at(i); |
duke@435 | 978 | if (p1 == NULL) { |
duke@435 | 979 | _packset.remove_at(i); |
duke@435 | 980 | } |
duke@435 | 981 | } |
duke@435 | 982 | |
duke@435 | 983 | #ifndef PRODUCT |
duke@435 | 984 | if (TraceSuperWord) { |
duke@435 | 985 | tty->print_cr("\nAfter combine_packs"); |
duke@435 | 986 | print_packset(); |
duke@435 | 987 | } |
duke@435 | 988 | #endif |
duke@435 | 989 | } |
duke@435 | 990 | |
duke@435 | 991 | //-----------------------------construct_my_pack_map-------------------------- |
duke@435 | 992 | // Construct the map from nodes to packs. Only valid after the |
duke@435 | 993 | // point where a node is only in one pack (after combine_packs). |
duke@435 | 994 | void SuperWord::construct_my_pack_map() { |
duke@435 | 995 | Node_List* rslt = NULL; |
duke@435 | 996 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 997 | Node_List* p = _packset.at(i); |
duke@435 | 998 | for (uint j = 0; j < p->size(); j++) { |
duke@435 | 999 | Node* s = p->at(j); |
duke@435 | 1000 | assert(my_pack(s) == NULL, "only in one pack"); |
duke@435 | 1001 | set_my_pack(s, p); |
duke@435 | 1002 | } |
duke@435 | 1003 | } |
duke@435 | 1004 | } |
duke@435 | 1005 | |
duke@435 | 1006 | //------------------------------filter_packs--------------------------- |
duke@435 | 1007 | // Remove packs that are not implemented or not profitable. |
duke@435 | 1008 | void SuperWord::filter_packs() { |
duke@435 | 1009 | |
duke@435 | 1010 | // Remove packs that are not implemented |
duke@435 | 1011 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 1012 | Node_List* pk = _packset.at(i); |
duke@435 | 1013 | bool impl = implemented(pk); |
duke@435 | 1014 | if (!impl) { |
duke@435 | 1015 | #ifndef PRODUCT |
duke@435 | 1016 | if (TraceSuperWord && Verbose) { |
duke@435 | 1017 | tty->print_cr("Unimplemented"); |
duke@435 | 1018 | pk->at(0)->dump(); |
duke@435 | 1019 | } |
duke@435 | 1020 | #endif |
duke@435 | 1021 | remove_pack_at(i); |
duke@435 | 1022 | } |
duke@435 | 1023 | } |
duke@435 | 1024 | |
duke@435 | 1025 | // Remove packs that are not profitable |
duke@435 | 1026 | bool changed; |
duke@435 | 1027 | do { |
duke@435 | 1028 | changed = false; |
duke@435 | 1029 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 1030 | Node_List* pk = _packset.at(i); |
duke@435 | 1031 | bool prof = profitable(pk); |
duke@435 | 1032 | if (!prof) { |
duke@435 | 1033 | #ifndef PRODUCT |
duke@435 | 1034 | if (TraceSuperWord && Verbose) { |
duke@435 | 1035 | tty->print_cr("Unprofitable"); |
duke@435 | 1036 | pk->at(0)->dump(); |
duke@435 | 1037 | } |
duke@435 | 1038 | #endif |
duke@435 | 1039 | remove_pack_at(i); |
duke@435 | 1040 | changed = true; |
duke@435 | 1041 | } |
duke@435 | 1042 | } |
duke@435 | 1043 | } while (changed); |
duke@435 | 1044 | |
duke@435 | 1045 | #ifndef PRODUCT |
duke@435 | 1046 | if (TraceSuperWord) { |
duke@435 | 1047 | tty->print_cr("\nAfter filter_packs"); |
duke@435 | 1048 | print_packset(); |
duke@435 | 1049 | tty->cr(); |
duke@435 | 1050 | } |
duke@435 | 1051 | #endif |
duke@435 | 1052 | } |
duke@435 | 1053 | |
duke@435 | 1054 | //------------------------------implemented--------------------------- |
duke@435 | 1055 | // Can code be generated for pack p? |
duke@435 | 1056 | bool SuperWord::implemented(Node_List* p) { |
duke@435 | 1057 | Node* p0 = p->at(0); |
kvn@4006 | 1058 | return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0)); |
kvn@4006 | 1059 | } |
kvn@4006 | 1060 | |
kvn@4006 | 1061 | //------------------------------same_inputs-------------------------- |
kvn@4006 | 1062 | // For pack p, are all idx operands the same? |
kvn@4006 | 1063 | static bool same_inputs(Node_List* p, int idx) { |
kvn@4006 | 1064 | Node* p0 = p->at(0); |
kvn@4006 | 1065 | uint vlen = p->size(); |
kvn@4006 | 1066 | Node* p0_def = p0->in(idx); |
kvn@4006 | 1067 | for (uint i = 1; i < vlen; i++) { |
kvn@4006 | 1068 | Node* pi = p->at(i); |
kvn@4006 | 1069 | Node* pi_def = pi->in(idx); |
kvn@4006 | 1070 | if (p0_def != pi_def) |
kvn@4006 | 1071 | return false; |
kvn@4004 | 1072 | } |
kvn@4006 | 1073 | return true; |
duke@435 | 1074 | } |
duke@435 | 1075 | |
duke@435 | 1076 | //------------------------------profitable--------------------------- |
duke@435 | 1077 | // For pack p, are all operands and all uses (with in the block) vector? |
duke@435 | 1078 | bool SuperWord::profitable(Node_List* p) { |
duke@435 | 1079 | Node* p0 = p->at(0); |
duke@435 | 1080 | uint start, end; |
kvn@4006 | 1081 | VectorNode::vector_operands(p0, &start, &end); |
duke@435 | 1082 | |
duke@435 | 1083 | // Return false if some input is not vector and inside block |
duke@435 | 1084 | for (uint i = start; i < end; i++) { |
duke@435 | 1085 | if (!is_vector_use(p0, i)) { |
duke@435 | 1086 | // For now, return false if not scalar promotion case (inputs are the same.) |
twisti@1040 | 1087 | // Later, implement PackNode and allow differing, non-vector inputs |
duke@435 | 1088 | // (maybe just the ones from outside the block.) |
kvn@4006 | 1089 | if (!same_inputs(p, i)) { |
kvn@4006 | 1090 | return false; |
duke@435 | 1091 | } |
duke@435 | 1092 | } |
duke@435 | 1093 | } |
kvn@4006 | 1094 | if (VectorNode::is_shift(p0)) { |
kvn@4006 | 1095 | // For now, return false if shift count is vector because |
kvn@4006 | 1096 | // hw does not support it. |
kvn@4006 | 1097 | if (is_vector_use(p0, 2)) |
kvn@4006 | 1098 | return false; |
kvn@4006 | 1099 | // For the same reason return false if different shift counts. |
kvn@4006 | 1100 | if (!same_inputs(p, 2)) |
kvn@4006 | 1101 | return false; |
kvn@4006 | 1102 | } |
duke@435 | 1103 | if (!p0->is_Store()) { |
duke@435 | 1104 | // For now, return false if not all uses are vector. |
duke@435 | 1105 | // Later, implement ExtractNode and allow non-vector uses (maybe |
duke@435 | 1106 | // just the ones outside the block.) |
duke@435 | 1107 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1108 | Node* def = p->at(i); |
duke@435 | 1109 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 1110 | Node* use = def->fast_out(j); |
duke@435 | 1111 | for (uint k = 0; k < use->req(); k++) { |
duke@435 | 1112 | Node* n = use->in(k); |
duke@435 | 1113 | if (def == n) { |
duke@435 | 1114 | if (!is_vector_use(use, k)) { |
duke@435 | 1115 | return false; |
duke@435 | 1116 | } |
duke@435 | 1117 | } |
duke@435 | 1118 | } |
duke@435 | 1119 | } |
duke@435 | 1120 | } |
duke@435 | 1121 | } |
duke@435 | 1122 | return true; |
duke@435 | 1123 | } |
duke@435 | 1124 | |
duke@435 | 1125 | //------------------------------schedule--------------------------- |
duke@435 | 1126 | // Adjust the memory graph for the packed operations |
duke@435 | 1127 | void SuperWord::schedule() { |
duke@435 | 1128 | |
duke@435 | 1129 | // Co-locate in the memory graph the members of each memory pack |
duke@435 | 1130 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1131 | co_locate_pack(_packset.at(i)); |
duke@435 | 1132 | } |
duke@435 | 1133 | } |
duke@435 | 1134 | |
cfang@1102 | 1135 | //-------------------------------remove_and_insert------------------- |
kvn@3882 | 1136 | // Remove "current" from its current position in the memory graph and insert |
kvn@3882 | 1137 | // it after the appropriate insertion point (lip or uip). |
cfang@1102 | 1138 | void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, |
cfang@1102 | 1139 | Node *uip, Unique_Node_List &sched_before) { |
cfang@1102 | 1140 | Node* my_mem = current->in(MemNode::Memory); |
kvn@3882 | 1141 | bool sched_up = sched_before.member(current); |
cfang@1102 | 1142 | |
kvn@3882 | 1143 | // remove current_store from its current position in the memmory graph |
cfang@1102 | 1144 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
cfang@1102 | 1145 | Node* use = current->out(i); |
cfang@1102 | 1146 | if (use->is_Mem()) { |
cfang@1102 | 1147 | assert(use->in(MemNode::Memory) == current, "must be"); |
cfang@1102 | 1148 | if (use == prev) { // connect prev to my_mem |
kvn@3882 | 1149 | _igvn.replace_input_of(use, MemNode::Memory, my_mem); |
kvn@3882 | 1150 | --i; //deleted this edge; rescan position |
cfang@1102 | 1151 | } else if (sched_before.member(use)) { |
kvn@3882 | 1152 | if (!sched_up) { // Will be moved together with current |
kvn@3882 | 1153 | _igvn.replace_input_of(use, MemNode::Memory, uip); |
kvn@3882 | 1154 | --i; //deleted this edge; rescan position |
kvn@3882 | 1155 | } |
cfang@1102 | 1156 | } else { |
kvn@3882 | 1157 | if (sched_up) { // Will be moved together with current |
kvn@3882 | 1158 | _igvn.replace_input_of(use, MemNode::Memory, lip); |
kvn@3882 | 1159 | --i; //deleted this edge; rescan position |
kvn@3882 | 1160 | } |
cfang@1102 | 1161 | } |
cfang@1102 | 1162 | } |
cfang@1102 | 1163 | } |
cfang@1102 | 1164 | |
cfang@1102 | 1165 | Node *insert_pt = sched_up ? uip : lip; |
cfang@1102 | 1166 | |
cfang@1102 | 1167 | // all uses of insert_pt's memory state should use current's instead |
cfang@1102 | 1168 | for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { |
cfang@1102 | 1169 | Node* use = insert_pt->out(i); |
cfang@1102 | 1170 | if (use->is_Mem()) { |
cfang@1102 | 1171 | assert(use->in(MemNode::Memory) == insert_pt, "must be"); |
kvn@3847 | 1172 | _igvn.replace_input_of(use, MemNode::Memory, current); |
cfang@1102 | 1173 | --i; //deleted this edge; rescan position |
cfang@1102 | 1174 | } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) { |
cfang@1102 | 1175 | uint pos; //lip (lower insert point) must be the last one in the memory slice |
cfang@1102 | 1176 | for (pos=1; pos < use->req(); pos++) { |
cfang@1102 | 1177 | if (use->in(pos) == insert_pt) break; |
cfang@1102 | 1178 | } |
kvn@3847 | 1179 | _igvn.replace_input_of(use, pos, current); |
cfang@1102 | 1180 | --i; |
cfang@1102 | 1181 | } |
cfang@1102 | 1182 | } |
cfang@1102 | 1183 | |
cfang@1102 | 1184 | //connect current to insert_pt |
kvn@3882 | 1185 | _igvn.replace_input_of(current, MemNode::Memory, insert_pt); |
cfang@1102 | 1186 | } |
cfang@1102 | 1187 | |
cfang@1102 | 1188 | //------------------------------co_locate_pack---------------------------------- |
cfang@1102 | 1189 | // To schedule a store pack, we need to move any sandwiched memory ops either before |
cfang@1102 | 1190 | // or after the pack, based upon dependence information: |
cfang@1102 | 1191 | // (1) If any store in the pack depends on the sandwiched memory op, the |
cfang@1102 | 1192 | // sandwiched memory op must be scheduled BEFORE the pack; |
cfang@1102 | 1193 | // (2) If a sandwiched memory op depends on any store in the pack, the |
cfang@1102 | 1194 | // sandwiched memory op must be scheduled AFTER the pack; |
cfang@1102 | 1195 | // (3) If a sandwiched memory op (say, memA) depends on another sandwiched |
cfang@1102 | 1196 | // memory op (say memB), memB must be scheduled before memA. So, if memA is |
cfang@1102 | 1197 | // scheduled before the pack, memB must also be scheduled before the pack; |
cfang@1102 | 1198 | // (4) If there is no dependence restriction for a sandwiched memory op, we simply |
cfang@1102 | 1199 | // schedule this store AFTER the pack |
cfang@1102 | 1200 | // (5) We know there is no dependence cycle, so there in no other case; |
cfang@1102 | 1201 | // (6) Finally, all memory ops in another single pack should be moved in the same direction. |
cfang@1102 | 1202 | // |
cfang@1387 | 1203 | // To schedule a load pack, we use the memory state of either the first or the last load in |
cfang@1387 | 1204 | // the pack, based on the dependence constraint. |
duke@435 | 1205 | void SuperWord::co_locate_pack(Node_List* pk) { |
duke@435 | 1206 | if (pk->at(0)->is_Store()) { |
duke@435 | 1207 | MemNode* first = executed_first(pk)->as_Mem(); |
duke@435 | 1208 | MemNode* last = executed_last(pk)->as_Mem(); |
cfang@1102 | 1209 | Unique_Node_List schedule_before_pack; |
cfang@1102 | 1210 | Unique_Node_List memops; |
cfang@1102 | 1211 | |
duke@435 | 1212 | MemNode* current = last->in(MemNode::Memory)->as_Mem(); |
cfang@1102 | 1213 | MemNode* previous = last; |
duke@435 | 1214 | while (true) { |
duke@435 | 1215 | assert(in_bb(current), "stay in block"); |
cfang@1102 | 1216 | memops.push(previous); |
cfang@1102 | 1217 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
cfang@1102 | 1218 | Node* use = current->out(i); |
cfang@1102 | 1219 | if (use->is_Mem() && use != previous) |
cfang@1102 | 1220 | memops.push(use); |
cfang@1102 | 1221 | } |
kvn@3882 | 1222 | if (current == first) break; |
cfang@1102 | 1223 | previous = current; |
cfang@1102 | 1224 | current = current->in(MemNode::Memory)->as_Mem(); |
cfang@1102 | 1225 | } |
cfang@1102 | 1226 | |
cfang@1102 | 1227 | // determine which memory operations should be scheduled before the pack |
cfang@1102 | 1228 | for (uint i = 1; i < memops.size(); i++) { |
cfang@1102 | 1229 | Node *s1 = memops.at(i); |
cfang@1102 | 1230 | if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) { |
cfang@1102 | 1231 | for (uint j = 0; j< i; j++) { |
cfang@1102 | 1232 | Node *s2 = memops.at(j); |
cfang@1102 | 1233 | if (!independent(s1, s2)) { |
cfang@1102 | 1234 | if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { |
kvn@3882 | 1235 | schedule_before_pack.push(s1); // s1 must be scheduled before |
cfang@1102 | 1236 | Node_List* mem_pk = my_pack(s1); |
cfang@1102 | 1237 | if (mem_pk != NULL) { |
cfang@1102 | 1238 | for (uint ii = 0; ii < mem_pk->size(); ii++) { |
kvn@3882 | 1239 | Node* s = mem_pk->at(ii); // follow partner |
cfang@1102 | 1240 | if (memops.member(s) && !schedule_before_pack.member(s)) |
cfang@1102 | 1241 | schedule_before_pack.push(s); |
cfang@1102 | 1242 | } |
cfang@1102 | 1243 | } |
kvn@3882 | 1244 | break; |
cfang@1102 | 1245 | } |
cfang@1102 | 1246 | } |
cfang@1102 | 1247 | } |
cfang@1102 | 1248 | } |
cfang@1102 | 1249 | } |
cfang@1102 | 1250 | |
kvn@3882 | 1251 | Node* upper_insert_pt = first->in(MemNode::Memory); |
kvn@3882 | 1252 | // Following code moves loads connected to upper_insert_pt below aliased stores. |
kvn@3882 | 1253 | // Collect such loads here and reconnect them back to upper_insert_pt later. |
kvn@3882 | 1254 | memops.clear(); |
kvn@3882 | 1255 | for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) { |
kvn@3882 | 1256 | Node* use = upper_insert_pt->out(i); |
kvn@3882 | 1257 | if (!use->is_Store()) |
kvn@3882 | 1258 | memops.push(use); |
kvn@3882 | 1259 | } |
kvn@3882 | 1260 | |
cfang@1102 | 1261 | MemNode* lower_insert_pt = last; |
cfang@1102 | 1262 | previous = last; //previous store in pk |
cfang@1102 | 1263 | current = last->in(MemNode::Memory)->as_Mem(); |
cfang@1102 | 1264 | |
kvn@3882 | 1265 | // start scheduling from "last" to "first" |
cfang@1102 | 1266 | while (true) { |
cfang@1102 | 1267 | assert(in_bb(current), "stay in block"); |
cfang@1102 | 1268 | assert(in_pack(previous, pk), "previous stays in pack"); |
duke@435 | 1269 | Node* my_mem = current->in(MemNode::Memory); |
cfang@1102 | 1270 | |
duke@435 | 1271 | if (in_pack(current, pk)) { |
cfang@1102 | 1272 | // Forward users of my memory state (except "previous) to my input memory state |
duke@435 | 1273 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
duke@435 | 1274 | Node* use = current->out(i); |
cfang@1102 | 1275 | if (use->is_Mem() && use != previous) { |
duke@435 | 1276 | assert(use->in(MemNode::Memory) == current, "must be"); |
cfang@1102 | 1277 | if (schedule_before_pack.member(use)) { |
kvn@3847 | 1278 | _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt); |
cfang@1102 | 1279 | } else { |
kvn@3847 | 1280 | _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt); |
cfang@1102 | 1281 | } |
duke@435 | 1282 | --i; // deleted this edge; rescan position |
duke@435 | 1283 | } |
duke@435 | 1284 | } |
cfang@1102 | 1285 | previous = current; |
cfang@1102 | 1286 | } else { // !in_pack(current, pk) ==> a sandwiched store |
cfang@1102 | 1287 | remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack); |
duke@435 | 1288 | } |
cfang@1102 | 1289 | |
duke@435 | 1290 | if (current == first) break; |
duke@435 | 1291 | current = my_mem->as_Mem(); |
cfang@1102 | 1292 | } // end while |
kvn@3882 | 1293 | |
kvn@3882 | 1294 | // Reconnect loads back to upper_insert_pt. |
kvn@3882 | 1295 | for (uint i = 0; i < memops.size(); i++) { |
kvn@3882 | 1296 | Node *ld = memops.at(i); |
kvn@3882 | 1297 | if (ld->in(MemNode::Memory) != upper_insert_pt) { |
kvn@3882 | 1298 | _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt); |
kvn@3882 | 1299 | } |
kvn@3882 | 1300 | } |
cfang@1102 | 1301 | } else if (pk->at(0)->is_Load()) { //load |
cfang@1387 | 1302 | // all loads in the pack should have the same memory state. By default, |
cfang@1387 | 1303 | // we use the memory state of the last load. However, if any load could |
cfang@1387 | 1304 | // not be moved down due to the dependence constraint, we use the memory |
cfang@1387 | 1305 | // state of the first load. |
cfang@1387 | 1306 | Node* last_mem = executed_last(pk)->in(MemNode::Memory); |
cfang@1387 | 1307 | Node* first_mem = executed_first(pk)->in(MemNode::Memory); |
cfang@1387 | 1308 | bool schedule_last = true; |
cfang@1387 | 1309 | for (uint i = 0; i < pk->size(); i++) { |
cfang@1387 | 1310 | Node* ld = pk->at(i); |
cfang@1387 | 1311 | for (Node* current = last_mem; current != ld->in(MemNode::Memory); |
cfang@1387 | 1312 | current=current->in(MemNode::Memory)) { |
cfang@1387 | 1313 | assert(current != first_mem, "corrupted memory graph"); |
cfang@1387 | 1314 | if(current->is_Mem() && !independent(current, ld)){ |
cfang@1387 | 1315 | schedule_last = false; // a later store depends on this load |
cfang@1387 | 1316 | break; |
cfang@1387 | 1317 | } |
cfang@1387 | 1318 | } |
cfang@1387 | 1319 | } |
cfang@1387 | 1320 | |
cfang@1387 | 1321 | Node* mem_input = schedule_last ? last_mem : first_mem; |
cfang@1387 | 1322 | _igvn.hash_delete(mem_input); |
cfang@1387 | 1323 | // Give each load the same memory state |
duke@435 | 1324 | for (uint i = 0; i < pk->size(); i++) { |
duke@435 | 1325 | LoadNode* ld = pk->at(i)->as_Load(); |
kvn@3847 | 1326 | _igvn.replace_input_of(ld, MemNode::Memory, mem_input); |
duke@435 | 1327 | } |
duke@435 | 1328 | } |
duke@435 | 1329 | } |
duke@435 | 1330 | |
duke@435 | 1331 | //------------------------------output--------------------------- |
duke@435 | 1332 | // Convert packs into vector node operations |
duke@435 | 1333 | void SuperWord::output() { |
duke@435 | 1334 | if (_packset.length() == 0) return; |
duke@435 | 1335 | |
kvn@2727 | 1336 | #ifndef PRODUCT |
kvn@2727 | 1337 | if (TraceLoopOpts) { |
kvn@2727 | 1338 | tty->print("SuperWord "); |
kvn@2727 | 1339 | lpt()->dump_head(); |
kvn@2727 | 1340 | } |
kvn@2727 | 1341 | #endif |
kvn@2727 | 1342 | |
duke@435 | 1343 | // MUST ENSURE main loop's initial value is properly aligned: |
duke@435 | 1344 | // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0 |
duke@435 | 1345 | |
duke@435 | 1346 | align_initial_loop_index(align_to_ref()); |
duke@435 | 1347 | |
duke@435 | 1348 | // Insert extract (unpack) operations for scalar uses |
duke@435 | 1349 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1350 | insert_extracts(_packset.at(i)); |
duke@435 | 1351 | } |
duke@435 | 1352 | |
duke@435 | 1353 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1354 | Node* n = _block.at(i); |
duke@435 | 1355 | Node_List* p = my_pack(n); |
duke@435 | 1356 | if (p && n == executed_last(p)) { |
duke@435 | 1357 | uint vlen = p->size(); |
duke@435 | 1358 | Node* vn = NULL; |
duke@435 | 1359 | Node* low_adr = p->at(0); |
duke@435 | 1360 | Node* first = executed_first(p); |
kvn@3882 | 1361 | int opc = n->Opcode(); |
duke@435 | 1362 | if (n->is_Load()) { |
duke@435 | 1363 | Node* ctl = n->in(MemNode::Control); |
duke@435 | 1364 | Node* mem = first->in(MemNode::Memory); |
duke@435 | 1365 | Node* adr = low_adr->in(MemNode::Address); |
duke@435 | 1366 | const TypePtr* atyp = n->adr_type(); |
kvn@3882 | 1367 | vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n)); |
duke@435 | 1368 | } else if (n->is_Store()) { |
duke@435 | 1369 | // Promote value to be stored to vector |
kvn@3040 | 1370 | Node* val = vector_opd(p, MemNode::ValueIn); |
duke@435 | 1371 | Node* ctl = n->in(MemNode::Control); |
duke@435 | 1372 | Node* mem = first->in(MemNode::Memory); |
duke@435 | 1373 | Node* adr = low_adr->in(MemNode::Address); |
duke@435 | 1374 | const TypePtr* atyp = n->adr_type(); |
kvn@3882 | 1375 | vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); |
duke@435 | 1376 | } else if (n->req() == 3) { |
duke@435 | 1377 | // Promote operands to vector |
duke@435 | 1378 | Node* in1 = vector_opd(p, 1); |
duke@435 | 1379 | Node* in2 = vector_opd(p, 2); |
kvn@4001 | 1380 | if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) { |
kvn@4001 | 1381 | // Move invariant vector input into second position to avoid register spilling. |
kvn@4001 | 1382 | Node* tmp = in1; |
kvn@4001 | 1383 | in1 = in2; |
kvn@4001 | 1384 | in2 = tmp; |
kvn@4001 | 1385 | } |
kvn@3882 | 1386 | vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); |
duke@435 | 1387 | } else { |
duke@435 | 1388 | ShouldNotReachHere(); |
duke@435 | 1389 | } |
kvn@3882 | 1390 | assert(vn != NULL, "sanity"); |
duke@435 | 1391 | _phase->_igvn.register_new_node_with_optimizer(vn); |
duke@435 | 1392 | _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); |
duke@435 | 1393 | for (uint j = 0; j < p->size(); j++) { |
duke@435 | 1394 | Node* pm = p->at(j); |
kvn@1976 | 1395 | _igvn.replace_node(pm, vn); |
duke@435 | 1396 | } |
duke@435 | 1397 | _igvn._worklist.push(vn); |
kvn@3882 | 1398 | #ifdef ASSERT |
kvn@3886 | 1399 | if (TraceNewVectors) { |
kvn@3882 | 1400 | tty->print("new Vector node: "); |
kvn@3882 | 1401 | vn->dump(); |
kvn@3882 | 1402 | } |
kvn@3882 | 1403 | #endif |
duke@435 | 1404 | } |
duke@435 | 1405 | } |
duke@435 | 1406 | } |
duke@435 | 1407 | |
duke@435 | 1408 | //------------------------------vector_opd--------------------------- |
duke@435 | 1409 | // Create a vector operand for the nodes in pack p for operand: in(opd_idx) |
kvn@3040 | 1410 | Node* SuperWord::vector_opd(Node_List* p, int opd_idx) { |
duke@435 | 1411 | Node* p0 = p->at(0); |
duke@435 | 1412 | uint vlen = p->size(); |
duke@435 | 1413 | Node* opd = p0->in(opd_idx); |
duke@435 | 1414 | |
kvn@4006 | 1415 | if (same_inputs(p, opd_idx)) { |
kvn@3882 | 1416 | if (opd->is_Vector() || opd->is_LoadVector()) { |
kvn@4004 | 1417 | assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector"); |
kvn@3040 | 1418 | return opd; // input is matching vector |
duke@435 | 1419 | } |
kvn@4001 | 1420 | if ((opd_idx == 2) && VectorNode::is_shift(p0)) { |
kvn@4001 | 1421 | // No vector is needed for shift count. |
kvn@4001 | 1422 | // Vector instructions do not mask shift count, do it here. |
kvn@4001 | 1423 | Compile* C = _phase->C; |
kvn@4001 | 1424 | Node* cnt = opd; |
kvn@4001 | 1425 | juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1); |
kvn@4001 | 1426 | const TypeInt* t = opd->find_int_type(); |
kvn@4001 | 1427 | if (t != NULL && t->is_con()) { |
kvn@4001 | 1428 | juint shift = t->get_con(); |
kvn@4001 | 1429 | if (shift > mask) { // Unsigned cmp |
kvn@4001 | 1430 | cnt = ConNode::make(C, TypeInt::make(shift & mask)); |
kvn@4001 | 1431 | } |
kvn@4001 | 1432 | } else { |
kvn@4001 | 1433 | if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { |
kvn@4001 | 1434 | cnt = ConNode::make(C, TypeInt::make(mask)); |
kvn@4001 | 1435 | _phase->_igvn.register_new_node_with_optimizer(cnt); |
kvn@4001 | 1436 | cnt = new (C, 3) AndINode(opd, cnt); |
kvn@4001 | 1437 | _phase->_igvn.register_new_node_with_optimizer(cnt); |
kvn@4001 | 1438 | _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); |
kvn@4001 | 1439 | } |
kvn@4001 | 1440 | assert(opd->bottom_type()->isa_int(), "int type only"); |
kvn@4001 | 1441 | // Move non constant shift count into XMM register. |
kvn@4001 | 1442 | cnt = new (_phase->C, 2) MoveI2FNode(cnt); |
kvn@4001 | 1443 | } |
kvn@4001 | 1444 | if (cnt != opd) { |
kvn@4001 | 1445 | _phase->_igvn.register_new_node_with_optimizer(cnt); |
kvn@4001 | 1446 | _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); |
kvn@4001 | 1447 | } |
kvn@4001 | 1448 | return cnt; |
kvn@4001 | 1449 | } |
kvn@3882 | 1450 | assert(!opd->is_StoreVector(), "such vector is not expected here"); |
kvn@3748 | 1451 | // Convert scalar input to vector with the same number of elements as |
kvn@3748 | 1452 | // p0's vector. Use p0's type because size of operand's container in |
kvn@3748 | 1453 | // vector should match p0's size regardless operand's size. |
kvn@3748 | 1454 | const Type* p0_t = velt_type(p0); |
kvn@3748 | 1455 | VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t); |
duke@435 | 1456 | |
duke@435 | 1457 | _phase->_igvn.register_new_node_with_optimizer(vn); |
duke@435 | 1458 | _phase->set_ctrl(vn, _phase->get_ctrl(opd)); |
kvn@3882 | 1459 | #ifdef ASSERT |
kvn@3886 | 1460 | if (TraceNewVectors) { |
kvn@3882 | 1461 | tty->print("new Vector node: "); |
kvn@3882 | 1462 | vn->dump(); |
kvn@3882 | 1463 | } |
kvn@3882 | 1464 | #endif |
duke@435 | 1465 | return vn; |
duke@435 | 1466 | } |
duke@435 | 1467 | |
duke@435 | 1468 | // Insert pack operation |
kvn@3882 | 1469 | BasicType bt = velt_basic_type(p0); |
kvn@3882 | 1470 | PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt); |
kvn@3748 | 1471 | DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); ) |
duke@435 | 1472 | |
duke@435 | 1473 | for (uint i = 1; i < vlen; i++) { |
duke@435 | 1474 | Node* pi = p->at(i); |
duke@435 | 1475 | Node* in = pi->in(opd_idx); |
duke@435 | 1476 | assert(my_pack(in) == NULL, "Should already have been unpacked"); |
kvn@3748 | 1477 | assert(opd_bt == in->bottom_type()->basic_type(), "all same type"); |
kvn@4006 | 1478 | pk->add_opd(in); |
duke@435 | 1479 | } |
duke@435 | 1480 | _phase->_igvn.register_new_node_with_optimizer(pk); |
duke@435 | 1481 | _phase->set_ctrl(pk, _phase->get_ctrl(opd)); |
kvn@3882 | 1482 | #ifdef ASSERT |
kvn@3886 | 1483 | if (TraceNewVectors) { |
kvn@3886 | 1484 | tty->print("new Vector node: "); |
kvn@3882 | 1485 | pk->dump(); |
kvn@3882 | 1486 | } |
kvn@3882 | 1487 | #endif |
duke@435 | 1488 | return pk; |
duke@435 | 1489 | } |
duke@435 | 1490 | |
duke@435 | 1491 | //------------------------------insert_extracts--------------------------- |
duke@435 | 1492 | // If a use of pack p is not a vector use, then replace the |
duke@435 | 1493 | // use with an extract operation. |
duke@435 | 1494 | void SuperWord::insert_extracts(Node_List* p) { |
duke@435 | 1495 | if (p->at(0)->is_Store()) return; |
duke@435 | 1496 | assert(_n_idx_list.is_empty(), "empty (node,index) list"); |
duke@435 | 1497 | |
duke@435 | 1498 | // Inspect each use of each pack member. For each use that is |
duke@435 | 1499 | // not a vector use, replace the use with an extract operation. |
duke@435 | 1500 | |
duke@435 | 1501 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1502 | Node* def = p->at(i); |
duke@435 | 1503 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 1504 | Node* use = def->fast_out(j); |
duke@435 | 1505 | for (uint k = 0; k < use->req(); k++) { |
duke@435 | 1506 | Node* n = use->in(k); |
duke@435 | 1507 | if (def == n) { |
duke@435 | 1508 | if (!is_vector_use(use, k)) { |
duke@435 | 1509 | _n_idx_list.push(use, k); |
duke@435 | 1510 | } |
duke@435 | 1511 | } |
duke@435 | 1512 | } |
duke@435 | 1513 | } |
duke@435 | 1514 | } |
duke@435 | 1515 | |
duke@435 | 1516 | while (_n_idx_list.is_nonempty()) { |
duke@435 | 1517 | Node* use = _n_idx_list.node(); |
duke@435 | 1518 | int idx = _n_idx_list.index(); |
duke@435 | 1519 | _n_idx_list.pop(); |
duke@435 | 1520 | Node* def = use->in(idx); |
duke@435 | 1521 | |
duke@435 | 1522 | // Insert extract operation |
duke@435 | 1523 | _igvn.hash_delete(def); |
duke@435 | 1524 | int def_pos = alignment(def) / data_size(def); |
duke@435 | 1525 | |
kvn@3882 | 1526 | Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def)); |
duke@435 | 1527 | _phase->_igvn.register_new_node_with_optimizer(ex); |
duke@435 | 1528 | _phase->set_ctrl(ex, _phase->get_ctrl(def)); |
kvn@3847 | 1529 | _igvn.replace_input_of(use, idx, ex); |
duke@435 | 1530 | _igvn._worklist.push(def); |
duke@435 | 1531 | |
duke@435 | 1532 | bb_insert_after(ex, bb_idx(def)); |
kvn@3882 | 1533 | set_velt_type(ex, velt_type(def)); |
duke@435 | 1534 | } |
duke@435 | 1535 | } |
duke@435 | 1536 | |
duke@435 | 1537 | //------------------------------is_vector_use--------------------------- |
duke@435 | 1538 | // Is use->in(u_idx) a vector use? |
duke@435 | 1539 | bool SuperWord::is_vector_use(Node* use, int u_idx) { |
duke@435 | 1540 | Node_List* u_pk = my_pack(use); |
duke@435 | 1541 | if (u_pk == NULL) return false; |
duke@435 | 1542 | Node* def = use->in(u_idx); |
duke@435 | 1543 | Node_List* d_pk = my_pack(def); |
duke@435 | 1544 | if (d_pk == NULL) { |
duke@435 | 1545 | // check for scalar promotion |
duke@435 | 1546 | Node* n = u_pk->at(0)->in(u_idx); |
duke@435 | 1547 | for (uint i = 1; i < u_pk->size(); i++) { |
duke@435 | 1548 | if (u_pk->at(i)->in(u_idx) != n) return false; |
duke@435 | 1549 | } |
duke@435 | 1550 | return true; |
duke@435 | 1551 | } |
duke@435 | 1552 | if (u_pk->size() != d_pk->size()) |
duke@435 | 1553 | return false; |
duke@435 | 1554 | for (uint i = 0; i < u_pk->size(); i++) { |
duke@435 | 1555 | Node* ui = u_pk->at(i); |
duke@435 | 1556 | Node* di = d_pk->at(i); |
duke@435 | 1557 | if (ui->in(u_idx) != di || alignment(ui) != alignment(di)) |
duke@435 | 1558 | return false; |
duke@435 | 1559 | } |
duke@435 | 1560 | return true; |
duke@435 | 1561 | } |
duke@435 | 1562 | |
duke@435 | 1563 | //------------------------------construct_bb--------------------------- |
duke@435 | 1564 | // Construct reverse postorder list of block members |
duke@435 | 1565 | void SuperWord::construct_bb() { |
duke@435 | 1566 | Node* entry = bb(); |
duke@435 | 1567 | |
duke@435 | 1568 | assert(_stk.length() == 0, "stk is empty"); |
duke@435 | 1569 | assert(_block.length() == 0, "block is empty"); |
duke@435 | 1570 | assert(_data_entry.length() == 0, "data_entry is empty"); |
duke@435 | 1571 | assert(_mem_slice_head.length() == 0, "mem_slice_head is empty"); |
duke@435 | 1572 | assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty"); |
duke@435 | 1573 | |
duke@435 | 1574 | // Find non-control nodes with no inputs from within block, |
duke@435 | 1575 | // create a temporary map from node _idx to bb_idx for use |
duke@435 | 1576 | // by the visited and post_visited sets, |
duke@435 | 1577 | // and count number of nodes in block. |
duke@435 | 1578 | int bb_ct = 0; |
duke@435 | 1579 | for (uint i = 0; i < lpt()->_body.size(); i++ ) { |
duke@435 | 1580 | Node *n = lpt()->_body.at(i); |
duke@435 | 1581 | set_bb_idx(n, i); // Create a temporary map |
duke@435 | 1582 | if (in_bb(n)) { |
duke@435 | 1583 | bb_ct++; |
duke@435 | 1584 | if (!n->is_CFG()) { |
duke@435 | 1585 | bool found = false; |
duke@435 | 1586 | for (uint j = 0; j < n->req(); j++) { |
duke@435 | 1587 | Node* def = n->in(j); |
duke@435 | 1588 | if (def && in_bb(def)) { |
duke@435 | 1589 | found = true; |
duke@435 | 1590 | break; |
duke@435 | 1591 | } |
duke@435 | 1592 | } |
duke@435 | 1593 | if (!found) { |
duke@435 | 1594 | assert(n != entry, "can't be entry"); |
duke@435 | 1595 | _data_entry.push(n); |
duke@435 | 1596 | } |
duke@435 | 1597 | } |
duke@435 | 1598 | } |
duke@435 | 1599 | } |
duke@435 | 1600 | |
duke@435 | 1601 | // Find memory slices (head and tail) |
duke@435 | 1602 | for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) { |
duke@435 | 1603 | Node *n = lp()->fast_out(i); |
duke@435 | 1604 | if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { |
duke@435 | 1605 | Node* n_tail = n->in(LoopNode::LoopBackControl); |
kvn@688 | 1606 | if (n_tail != n->in(LoopNode::EntryControl)) { |
kvn@688 | 1607 | _mem_slice_head.push(n); |
kvn@688 | 1608 | _mem_slice_tail.push(n_tail); |
kvn@688 | 1609 | } |
duke@435 | 1610 | } |
duke@435 | 1611 | } |
duke@435 | 1612 | |
duke@435 | 1613 | // Create an RPO list of nodes in block |
duke@435 | 1614 | |
duke@435 | 1615 | visited_clear(); |
duke@435 | 1616 | post_visited_clear(); |
duke@435 | 1617 | |
duke@435 | 1618 | // Push all non-control nodes with no inputs from within block, then control entry |
duke@435 | 1619 | for (int j = 0; j < _data_entry.length(); j++) { |
duke@435 | 1620 | Node* n = _data_entry.at(j); |
duke@435 | 1621 | visited_set(n); |
duke@435 | 1622 | _stk.push(n); |
duke@435 | 1623 | } |
duke@435 | 1624 | visited_set(entry); |
duke@435 | 1625 | _stk.push(entry); |
duke@435 | 1626 | |
duke@435 | 1627 | // Do a depth first walk over out edges |
duke@435 | 1628 | int rpo_idx = bb_ct - 1; |
duke@435 | 1629 | int size; |
duke@435 | 1630 | while ((size = _stk.length()) > 0) { |
duke@435 | 1631 | Node* n = _stk.top(); // Leave node on stack |
duke@435 | 1632 | if (!visited_test_set(n)) { |
duke@435 | 1633 | // forward arc in graph |
duke@435 | 1634 | } else if (!post_visited_test(n)) { |
duke@435 | 1635 | // cross or back arc |
duke@435 | 1636 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 1637 | Node *use = n->fast_out(i); |
duke@435 | 1638 | if (in_bb(use) && !visited_test(use) && |
duke@435 | 1639 | // Don't go around backedge |
duke@435 | 1640 | (!use->is_Phi() || n == entry)) { |
duke@435 | 1641 | _stk.push(use); |
duke@435 | 1642 | } |
duke@435 | 1643 | } |
duke@435 | 1644 | if (_stk.length() == size) { |
duke@435 | 1645 | // There were no additional uses, post visit node now |
duke@435 | 1646 | _stk.pop(); // Remove node from stack |
duke@435 | 1647 | assert(rpo_idx >= 0, ""); |
duke@435 | 1648 | _block.at_put_grow(rpo_idx, n); |
duke@435 | 1649 | rpo_idx--; |
duke@435 | 1650 | post_visited_set(n); |
duke@435 | 1651 | assert(rpo_idx >= 0 || _stk.is_empty(), ""); |
duke@435 | 1652 | } |
duke@435 | 1653 | } else { |
duke@435 | 1654 | _stk.pop(); // Remove post-visited node from stack |
duke@435 | 1655 | } |
duke@435 | 1656 | } |
duke@435 | 1657 | |
duke@435 | 1658 | // Create real map of block indices for nodes |
duke@435 | 1659 | for (int j = 0; j < _block.length(); j++) { |
duke@435 | 1660 | Node* n = _block.at(j); |
duke@435 | 1661 | set_bb_idx(n, j); |
duke@435 | 1662 | } |
duke@435 | 1663 | |
duke@435 | 1664 | initialize_bb(); // Ensure extra info is allocated. |
duke@435 | 1665 | |
duke@435 | 1666 | #ifndef PRODUCT |
duke@435 | 1667 | if (TraceSuperWord) { |
duke@435 | 1668 | print_bb(); |
duke@435 | 1669 | tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); |
duke@435 | 1670 | for (int m = 0; m < _data_entry.length(); m++) { |
duke@435 | 1671 | tty->print("%3d ", m); |
duke@435 | 1672 | _data_entry.at(m)->dump(); |
duke@435 | 1673 | } |
duke@435 | 1674 | tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE"); |
duke@435 | 1675 | for (int m = 0; m < _mem_slice_head.length(); m++) { |
duke@435 | 1676 | tty->print("%3d ", m); _mem_slice_head.at(m)->dump(); |
duke@435 | 1677 | tty->print(" "); _mem_slice_tail.at(m)->dump(); |
duke@435 | 1678 | } |
duke@435 | 1679 | } |
duke@435 | 1680 | #endif |
duke@435 | 1681 | assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found"); |
duke@435 | 1682 | } |
duke@435 | 1683 | |
duke@435 | 1684 | //------------------------------initialize_bb--------------------------- |
duke@435 | 1685 | // Initialize per node info |
duke@435 | 1686 | void SuperWord::initialize_bb() { |
duke@435 | 1687 | Node* last = _block.at(_block.length() - 1); |
duke@435 | 1688 | grow_node_info(bb_idx(last)); |
duke@435 | 1689 | } |
duke@435 | 1690 | |
duke@435 | 1691 | //------------------------------bb_insert_after--------------------------- |
duke@435 | 1692 | // Insert n into block after pos |
duke@435 | 1693 | void SuperWord::bb_insert_after(Node* n, int pos) { |
duke@435 | 1694 | int n_pos = pos + 1; |
duke@435 | 1695 | // Make room |
duke@435 | 1696 | for (int i = _block.length() - 1; i >= n_pos; i--) { |
duke@435 | 1697 | _block.at_put_grow(i+1, _block.at(i)); |
duke@435 | 1698 | } |
duke@435 | 1699 | for (int j = _node_info.length() - 1; j >= n_pos; j--) { |
duke@435 | 1700 | _node_info.at_put_grow(j+1, _node_info.at(j)); |
duke@435 | 1701 | } |
duke@435 | 1702 | // Set value |
duke@435 | 1703 | _block.at_put_grow(n_pos, n); |
duke@435 | 1704 | _node_info.at_put_grow(n_pos, SWNodeInfo::initial); |
duke@435 | 1705 | // Adjust map from node->_idx to _block index |
duke@435 | 1706 | for (int i = n_pos; i < _block.length(); i++) { |
duke@435 | 1707 | set_bb_idx(_block.at(i), i); |
duke@435 | 1708 | } |
duke@435 | 1709 | } |
duke@435 | 1710 | |
duke@435 | 1711 | //------------------------------compute_max_depth--------------------------- |
duke@435 | 1712 | // Compute max depth for expressions from beginning of block |
duke@435 | 1713 | // Use to prune search paths during test for independence. |
duke@435 | 1714 | void SuperWord::compute_max_depth() { |
duke@435 | 1715 | int ct = 0; |
duke@435 | 1716 | bool again; |
duke@435 | 1717 | do { |
duke@435 | 1718 | again = false; |
duke@435 | 1719 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1720 | Node* n = _block.at(i); |
duke@435 | 1721 | if (!n->is_Phi()) { |
duke@435 | 1722 | int d_orig = depth(n); |
duke@435 | 1723 | int d_in = 0; |
duke@435 | 1724 | for (DepPreds preds(n, _dg); !preds.done(); preds.next()) { |
duke@435 | 1725 | Node* pred = preds.current(); |
duke@435 | 1726 | if (in_bb(pred)) { |
duke@435 | 1727 | d_in = MAX2(d_in, depth(pred)); |
duke@435 | 1728 | } |
duke@435 | 1729 | } |
duke@435 | 1730 | if (d_in + 1 != d_orig) { |
duke@435 | 1731 | set_depth(n, d_in + 1); |
duke@435 | 1732 | again = true; |
duke@435 | 1733 | } |
duke@435 | 1734 | } |
duke@435 | 1735 | } |
duke@435 | 1736 | ct++; |
duke@435 | 1737 | } while (again); |
duke@435 | 1738 | #ifndef PRODUCT |
duke@435 | 1739 | if (TraceSuperWord && Verbose) |
duke@435 | 1740 | tty->print_cr("compute_max_depth iterated: %d times", ct); |
duke@435 | 1741 | #endif |
duke@435 | 1742 | } |
duke@435 | 1743 | |
duke@435 | 1744 | //-------------------------compute_vector_element_type----------------------- |
duke@435 | 1745 | // Compute necessary vector element type for expressions |
duke@435 | 1746 | // This propagates backwards a narrower integer type when the |
duke@435 | 1747 | // upper bits of the value are not needed. |
duke@435 | 1748 | // Example: char a,b,c; a = b + c; |
duke@435 | 1749 | // Normally the type of the add is integer, but for packed character |
duke@435 | 1750 | // operations the type of the add needs to be char. |
duke@435 | 1751 | void SuperWord::compute_vector_element_type() { |
duke@435 | 1752 | #ifndef PRODUCT |
duke@435 | 1753 | if (TraceSuperWord && Verbose) |
duke@435 | 1754 | tty->print_cr("\ncompute_velt_type:"); |
duke@435 | 1755 | #endif |
duke@435 | 1756 | |
duke@435 | 1757 | // Initial type |
duke@435 | 1758 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1759 | Node* n = _block.at(i); |
kvn@3882 | 1760 | set_velt_type(n, container_type(n)); |
duke@435 | 1761 | } |
duke@435 | 1762 | |
duke@435 | 1763 | // Propagate narrowed type backwards through operations |
duke@435 | 1764 | // that don't depend on higher order bits |
duke@435 | 1765 | for (int i = _block.length() - 1; i >= 0; i--) { |
duke@435 | 1766 | Node* n = _block.at(i); |
duke@435 | 1767 | // Only integer types need be examined |
kvn@4001 | 1768 | const Type* vt = velt_type(n); |
kvn@4001 | 1769 | if (vt->basic_type() == T_INT) { |
duke@435 | 1770 | uint start, end; |
kvn@4006 | 1771 | VectorNode::vector_operands(n, &start, &end); |
duke@435 | 1772 | const Type* vt = velt_type(n); |
duke@435 | 1773 | |
duke@435 | 1774 | for (uint j = start; j < end; j++) { |
duke@435 | 1775 | Node* in = n->in(j); |
kvn@4001 | 1776 | // Don't propagate through a memory |
kvn@4001 | 1777 | if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT && |
kvn@4001 | 1778 | data_size(n) < data_size(in)) { |
kvn@4001 | 1779 | bool same_type = true; |
kvn@4001 | 1780 | for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { |
kvn@4001 | 1781 | Node *use = in->fast_out(k); |
kvn@4001 | 1782 | if (!in_bb(use) || !same_velt_type(use, n)) { |
kvn@4001 | 1783 | same_type = false; |
kvn@4001 | 1784 | break; |
duke@435 | 1785 | } |
kvn@4001 | 1786 | } |
kvn@4001 | 1787 | if (same_type) { |
kvn@4001 | 1788 | set_velt_type(in, vt); |
duke@435 | 1789 | } |
duke@435 | 1790 | } |
duke@435 | 1791 | } |
duke@435 | 1792 | } |
duke@435 | 1793 | } |
duke@435 | 1794 | #ifndef PRODUCT |
duke@435 | 1795 | if (TraceSuperWord && Verbose) { |
duke@435 | 1796 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1797 | Node* n = _block.at(i); |
duke@435 | 1798 | velt_type(n)->dump(); |
duke@435 | 1799 | tty->print("\t"); |
duke@435 | 1800 | n->dump(); |
duke@435 | 1801 | } |
duke@435 | 1802 | } |
duke@435 | 1803 | #endif |
duke@435 | 1804 | } |
duke@435 | 1805 | |
duke@435 | 1806 | //------------------------------memory_alignment--------------------------- |
duke@435 | 1807 | // Alignment within a vector memory reference |
duke@435 | 1808 | int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { |
duke@435 | 1809 | SWPointer p(s, this); |
duke@435 | 1810 | if (!p.valid()) { |
duke@435 | 1811 | return bottom_align; |
duke@435 | 1812 | } |
kvn@3886 | 1813 | int vw = vector_width_in_bytes(s); |
kvn@3882 | 1814 | if (vw < 2) { |
kvn@3882 | 1815 | return bottom_align; // No vectors for this type |
kvn@3882 | 1816 | } |
duke@435 | 1817 | int offset = p.offset_in_bytes(); |
duke@435 | 1818 | offset += iv_adjust_in_bytes; |
kvn@3882 | 1819 | int off_rem = offset % vw; |
kvn@3882 | 1820 | int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; |
duke@435 | 1821 | return off_mod; |
duke@435 | 1822 | } |
duke@435 | 1823 | |
duke@435 | 1824 | //---------------------------container_type--------------------------- |
duke@435 | 1825 | // Smallest type containing range of values |
kvn@3882 | 1826 | const Type* SuperWord::container_type(Node* n) { |
kvn@3882 | 1827 | if (n->is_Mem()) { |
kvn@3882 | 1828 | return Type::get_const_basic_type(n->as_Mem()->memory_type()); |
duke@435 | 1829 | } |
kvn@3882 | 1830 | const Type* t = _igvn.type(n); |
duke@435 | 1831 | if (t->basic_type() == T_INT) { |
kvn@4001 | 1832 | // A narrow type of arithmetic operations will be determined by |
kvn@4001 | 1833 | // propagating the type of memory operations. |
duke@435 | 1834 | return TypeInt::INT; |
duke@435 | 1835 | } |
duke@435 | 1836 | return t; |
duke@435 | 1837 | } |
duke@435 | 1838 | |
kvn@3882 | 1839 | bool SuperWord::same_velt_type(Node* n1, Node* n2) { |
kvn@3882 | 1840 | const Type* vt1 = velt_type(n1); |
kvn@3882 | 1841 | const Type* vt2 = velt_type(n1); |
kvn@3882 | 1842 | if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) { |
kvn@3882 | 1843 | // Compare vectors element sizes for integer types. |
kvn@3882 | 1844 | return data_size(n1) == data_size(n2); |
kvn@3882 | 1845 | } |
kvn@3882 | 1846 | return vt1 == vt2; |
kvn@3882 | 1847 | } |
kvn@3882 | 1848 | |
duke@435 | 1849 | //------------------------------in_packset--------------------------- |
duke@435 | 1850 | // Are s1 and s2 in a pack pair and ordered as s1,s2? |
duke@435 | 1851 | bool SuperWord::in_packset(Node* s1, Node* s2) { |
duke@435 | 1852 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1853 | Node_List* p = _packset.at(i); |
duke@435 | 1854 | assert(p->size() == 2, "must be"); |
duke@435 | 1855 | if (p->at(0) == s1 && p->at(p->size()-1) == s2) { |
duke@435 | 1856 | return true; |
duke@435 | 1857 | } |
duke@435 | 1858 | } |
duke@435 | 1859 | return false; |
duke@435 | 1860 | } |
duke@435 | 1861 | |
duke@435 | 1862 | //------------------------------in_pack--------------------------- |
duke@435 | 1863 | // Is s in pack p? |
duke@435 | 1864 | Node_List* SuperWord::in_pack(Node* s, Node_List* p) { |
duke@435 | 1865 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1866 | if (p->at(i) == s) { |
duke@435 | 1867 | return p; |
duke@435 | 1868 | } |
duke@435 | 1869 | } |
duke@435 | 1870 | return NULL; |
duke@435 | 1871 | } |
duke@435 | 1872 | |
duke@435 | 1873 | //------------------------------remove_pack_at--------------------------- |
duke@435 | 1874 | // Remove the pack at position pos in the packset |
duke@435 | 1875 | void SuperWord::remove_pack_at(int pos) { |
duke@435 | 1876 | Node_List* p = _packset.at(pos); |
duke@435 | 1877 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1878 | Node* s = p->at(i); |
duke@435 | 1879 | set_my_pack(s, NULL); |
duke@435 | 1880 | } |
duke@435 | 1881 | _packset.remove_at(pos); |
duke@435 | 1882 | } |
duke@435 | 1883 | |
duke@435 | 1884 | //------------------------------executed_first--------------------------- |
duke@435 | 1885 | // Return the node executed first in pack p. Uses the RPO block list |
duke@435 | 1886 | // to determine order. |
duke@435 | 1887 | Node* SuperWord::executed_first(Node_List* p) { |
duke@435 | 1888 | Node* n = p->at(0); |
duke@435 | 1889 | int n_rpo = bb_idx(n); |
duke@435 | 1890 | for (uint i = 1; i < p->size(); i++) { |
duke@435 | 1891 | Node* s = p->at(i); |
duke@435 | 1892 | int s_rpo = bb_idx(s); |
duke@435 | 1893 | if (s_rpo < n_rpo) { |
duke@435 | 1894 | n = s; |
duke@435 | 1895 | n_rpo = s_rpo; |
duke@435 | 1896 | } |
duke@435 | 1897 | } |
duke@435 | 1898 | return n; |
duke@435 | 1899 | } |
duke@435 | 1900 | |
duke@435 | 1901 | //------------------------------executed_last--------------------------- |
duke@435 | 1902 | // Return the node executed last in pack p. |
duke@435 | 1903 | Node* SuperWord::executed_last(Node_List* p) { |
duke@435 | 1904 | Node* n = p->at(0); |
duke@435 | 1905 | int n_rpo = bb_idx(n); |
duke@435 | 1906 | for (uint i = 1; i < p->size(); i++) { |
duke@435 | 1907 | Node* s = p->at(i); |
duke@435 | 1908 | int s_rpo = bb_idx(s); |
duke@435 | 1909 | if (s_rpo > n_rpo) { |
duke@435 | 1910 | n = s; |
duke@435 | 1911 | n_rpo = s_rpo; |
duke@435 | 1912 | } |
duke@435 | 1913 | } |
duke@435 | 1914 | return n; |
duke@435 | 1915 | } |
duke@435 | 1916 | |
duke@435 | 1917 | //----------------------------align_initial_loop_index--------------------------- |
duke@435 | 1918 | // Adjust pre-loop limit so that in main loop, a load/store reference |
duke@435 | 1919 | // to align_to_ref will be a position zero in the vector. |
duke@435 | 1920 | // (iv + k) mod vector_align == 0 |
duke@435 | 1921 | void SuperWord::align_initial_loop_index(MemNode* align_to_ref) { |
duke@435 | 1922 | CountedLoopNode *main_head = lp()->as_CountedLoop(); |
duke@435 | 1923 | assert(main_head->is_main_loop(), ""); |
duke@435 | 1924 | CountedLoopEndNode* pre_end = get_pre_loop_end(main_head); |
duke@435 | 1925 | assert(pre_end != NULL, ""); |
duke@435 | 1926 | Node *pre_opaq1 = pre_end->limit(); |
duke@435 | 1927 | assert(pre_opaq1->Opcode() == Op_Opaque1, ""); |
duke@435 | 1928 | Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1; |
never@507 | 1929 | Node *lim0 = pre_opaq->in(1); |
duke@435 | 1930 | |
duke@435 | 1931 | // Where we put new limit calculations |
duke@435 | 1932 | Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl); |
duke@435 | 1933 | |
duke@435 | 1934 | // Ensure the original loop limit is available from the |
duke@435 | 1935 | // pre-loop Opaque1 node. |
duke@435 | 1936 | Node *orig_limit = pre_opaq->original_loop_limit(); |
duke@435 | 1937 | assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); |
duke@435 | 1938 | |
duke@435 | 1939 | SWPointer align_to_ref_p(align_to_ref, this); |
kvn@3882 | 1940 | assert(align_to_ref_p.valid(), "sanity"); |
duke@435 | 1941 | |
never@507 | 1942 | // Given: |
never@507 | 1943 | // lim0 == original pre loop limit |
never@507 | 1944 | // V == v_align (power of 2) |
never@507 | 1945 | // invar == extra invariant piece of the address expression |
kvn@4001 | 1946 | // e == offset [ +/- invar ] |
duke@435 | 1947 | // |
never@507 | 1948 | // When reassociating expressions involving '%' the basic rules are: |
never@507 | 1949 | // (a - b) % k == 0 => a % k == b % k |
never@507 | 1950 | // and: |
never@507 | 1951 | // (a + b) % k == 0 => a % k == (k - b) % k |
never@507 | 1952 | // |
never@507 | 1953 | // For stride > 0 && scale > 0, |
never@507 | 1954 | // Derive the new pre-loop limit "lim" such that the two constraints: |
never@507 | 1955 | // (1) lim = lim0 + N (where N is some positive integer < V) |
never@507 | 1956 | // (2) (e + lim) % V == 0 |
never@507 | 1957 | // are true. |
never@507 | 1958 | // |
never@507 | 1959 | // Substituting (1) into (2), |
never@507 | 1960 | // (e + lim0 + N) % V == 0 |
never@507 | 1961 | // solve for N: |
never@507 | 1962 | // N = (V - (e + lim0)) % V |
never@507 | 1963 | // substitute back into (1), so that new limit |
never@507 | 1964 | // lim = lim0 + (V - (e + lim0)) % V |
never@507 | 1965 | // |
never@507 | 1966 | // For stride > 0 && scale < 0 |
never@507 | 1967 | // Constraints: |
never@507 | 1968 | // lim = lim0 + N |
never@507 | 1969 | // (e - lim) % V == 0 |
never@507 | 1970 | // Solving for lim: |
never@507 | 1971 | // (e - lim0 - N) % V == 0 |
never@507 | 1972 | // N = (e - lim0) % V |
never@507 | 1973 | // lim = lim0 + (e - lim0) % V |
never@507 | 1974 | // |
never@507 | 1975 | // For stride < 0 && scale > 0 |
never@507 | 1976 | // Constraints: |
never@507 | 1977 | // lim = lim0 - N |
never@507 | 1978 | // (e + lim) % V == 0 |
never@507 | 1979 | // Solving for lim: |
never@507 | 1980 | // (e + lim0 - N) % V == 0 |
never@507 | 1981 | // N = (e + lim0) % V |
never@507 | 1982 | // lim = lim0 - (e + lim0) % V |
never@507 | 1983 | // |
never@507 | 1984 | // For stride < 0 && scale < 0 |
never@507 | 1985 | // Constraints: |
never@507 | 1986 | // lim = lim0 - N |
never@507 | 1987 | // (e - lim) % V == 0 |
never@507 | 1988 | // Solving for lim: |
never@507 | 1989 | // (e - lim0 + N) % V == 0 |
never@507 | 1990 | // N = (V - (e - lim0)) % V |
never@507 | 1991 | // lim = lim0 - (V - (e - lim0)) % V |
duke@435 | 1992 | |
kvn@3886 | 1993 | int vw = vector_width_in_bytes(align_to_ref); |
never@507 | 1994 | int stride = iv_stride(); |
never@507 | 1995 | int scale = align_to_ref_p.scale_in_bytes(); |
duke@435 | 1996 | int elt_size = align_to_ref_p.memory_size(); |
kvn@3882 | 1997 | int v_align = vw / elt_size; |
kvn@3886 | 1998 | assert(v_align > 1, "sanity"); |
kvn@4001 | 1999 | int offset = align_to_ref_p.offset_in_bytes() / elt_size; |
kvn@4001 | 2000 | Node *offsn = _igvn.intcon(offset); |
duke@435 | 2001 | |
kvn@4001 | 2002 | Node *e = offsn; |
duke@435 | 2003 | if (align_to_ref_p.invar() != NULL) { |
kvn@4001 | 2004 | // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt) |
duke@435 | 2005 | Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
duke@435 | 2006 | Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); |
duke@435 | 2007 | _phase->_igvn.register_new_node_with_optimizer(aref); |
duke@435 | 2008 | _phase->set_ctrl(aref, pre_ctrl); |
never@507 | 2009 | if (align_to_ref_p.negate_invar()) { |
never@507 | 2010 | e = new (_phase->C, 3) SubINode(e, aref); |
duke@435 | 2011 | } else { |
never@507 | 2012 | e = new (_phase->C, 3) AddINode(e, aref); |
duke@435 | 2013 | } |
never@507 | 2014 | _phase->_igvn.register_new_node_with_optimizer(e); |
never@507 | 2015 | _phase->set_ctrl(e, pre_ctrl); |
duke@435 | 2016 | } |
kvn@3882 | 2017 | if (vw > ObjectAlignmentInBytes) { |
kvn@3882 | 2018 | // incorporate base e +/- base && Mask >>> log2(elt) |
kvn@3882 | 2019 | Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); |
kvn@3882 | 2020 | _phase->_igvn.register_new_node_with_optimizer(xbase); |
kvn@4001 | 2021 | #ifdef _LP64 |
kvn@4001 | 2022 | xbase = new (_phase->C, 2) ConvL2INode(xbase); |
kvn@4001 | 2023 | _phase->_igvn.register_new_node_with_optimizer(xbase); |
kvn@4001 | 2024 | #endif |
kvn@4001 | 2025 | Node* mask = _igvn.intcon(vw-1); |
kvn@4001 | 2026 | Node* masked_xbase = new (_phase->C, 3) AndINode(xbase, mask); |
kvn@3882 | 2027 | _phase->_igvn.register_new_node_with_optimizer(masked_xbase); |
kvn@3882 | 2028 | Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
kvn@3882 | 2029 | Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); |
kvn@3882 | 2030 | _phase->_igvn.register_new_node_with_optimizer(bref); |
kvn@3882 | 2031 | _phase->set_ctrl(bref, pre_ctrl); |
kvn@3882 | 2032 | e = new (_phase->C, 3) AddINode(e, bref); |
kvn@3882 | 2033 | _phase->_igvn.register_new_node_with_optimizer(e); |
kvn@3882 | 2034 | _phase->set_ctrl(e, pre_ctrl); |
kvn@3882 | 2035 | } |
never@507 | 2036 | |
never@507 | 2037 | // compute e +/- lim0 |
never@507 | 2038 | if (scale < 0) { |
never@507 | 2039 | e = new (_phase->C, 3) SubINode(e, lim0); |
never@507 | 2040 | } else { |
never@507 | 2041 | e = new (_phase->C, 3) AddINode(e, lim0); |
never@507 | 2042 | } |
never@507 | 2043 | _phase->_igvn.register_new_node_with_optimizer(e); |
never@507 | 2044 | _phase->set_ctrl(e, pre_ctrl); |
never@507 | 2045 | |
never@507 | 2046 | if (stride * scale > 0) { |
never@507 | 2047 | // compute V - (e +/- lim0) |
never@507 | 2048 | Node* va = _igvn.intcon(v_align); |
never@507 | 2049 | e = new (_phase->C, 3) SubINode(va, e); |
never@507 | 2050 | _phase->_igvn.register_new_node_with_optimizer(e); |
never@507 | 2051 | _phase->set_ctrl(e, pre_ctrl); |
never@507 | 2052 | } |
never@507 | 2053 | // compute N = (exp) % V |
duke@435 | 2054 | Node* va_msk = _igvn.intcon(v_align - 1); |
never@507 | 2055 | Node* N = new (_phase->C, 3) AndINode(e, va_msk); |
never@507 | 2056 | _phase->_igvn.register_new_node_with_optimizer(N); |
never@507 | 2057 | _phase->set_ctrl(N, pre_ctrl); |
never@507 | 2058 | |
never@507 | 2059 | // substitute back into (1), so that new limit |
never@507 | 2060 | // lim = lim0 + N |
never@507 | 2061 | Node* lim; |
never@507 | 2062 | if (stride < 0) { |
never@507 | 2063 | lim = new (_phase->C, 3) SubINode(lim0, N); |
duke@435 | 2064 | } else { |
never@507 | 2065 | lim = new (_phase->C, 3) AddINode(lim0, N); |
duke@435 | 2066 | } |
never@507 | 2067 | _phase->_igvn.register_new_node_with_optimizer(lim); |
never@507 | 2068 | _phase->set_ctrl(lim, pre_ctrl); |
duke@435 | 2069 | Node* constrained = |
never@507 | 2070 | (stride > 0) ? (Node*) new (_phase->C,3) MinINode(lim, orig_limit) |
never@507 | 2071 | : (Node*) new (_phase->C,3) MaxINode(lim, orig_limit); |
duke@435 | 2072 | _phase->_igvn.register_new_node_with_optimizer(constrained); |
duke@435 | 2073 | _phase->set_ctrl(constrained, pre_ctrl); |
duke@435 | 2074 | _igvn.hash_delete(pre_opaq); |
duke@435 | 2075 | pre_opaq->set_req(1, constrained); |
duke@435 | 2076 | } |
duke@435 | 2077 | |
duke@435 | 2078 | //----------------------------get_pre_loop_end--------------------------- |
duke@435 | 2079 | // Find pre loop end from main loop. Returns null if none. |
duke@435 | 2080 | CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode *cl) { |
duke@435 | 2081 | Node *ctrl = cl->in(LoopNode::EntryControl); |
duke@435 | 2082 | if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return NULL; |
duke@435 | 2083 | Node *iffm = ctrl->in(0); |
duke@435 | 2084 | if (!iffm->is_If()) return NULL; |
duke@435 | 2085 | Node *p_f = iffm->in(0); |
duke@435 | 2086 | if (!p_f->is_IfFalse()) return NULL; |
duke@435 | 2087 | if (!p_f->in(0)->is_CountedLoopEnd()) return NULL; |
duke@435 | 2088 | CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd(); |
duke@435 | 2089 | if (!pre_end->loopnode()->is_pre_loop()) return NULL; |
duke@435 | 2090 | return pre_end; |
duke@435 | 2091 | } |
duke@435 | 2092 | |
duke@435 | 2093 | |
duke@435 | 2094 | //------------------------------init--------------------------- |
duke@435 | 2095 | void SuperWord::init() { |
duke@435 | 2096 | _dg.init(); |
duke@435 | 2097 | _packset.clear(); |
duke@435 | 2098 | _disjoint_ptrs.clear(); |
duke@435 | 2099 | _block.clear(); |
duke@435 | 2100 | _data_entry.clear(); |
duke@435 | 2101 | _mem_slice_head.clear(); |
duke@435 | 2102 | _mem_slice_tail.clear(); |
duke@435 | 2103 | _node_info.clear(); |
duke@435 | 2104 | _align_to_ref = NULL; |
duke@435 | 2105 | _lpt = NULL; |
duke@435 | 2106 | _lp = NULL; |
duke@435 | 2107 | _bb = NULL; |
duke@435 | 2108 | _iv = NULL; |
duke@435 | 2109 | } |
duke@435 | 2110 | |
duke@435 | 2111 | //------------------------------print_packset--------------------------- |
duke@435 | 2112 | void SuperWord::print_packset() { |
duke@435 | 2113 | #ifndef PRODUCT |
duke@435 | 2114 | tty->print_cr("packset"); |
duke@435 | 2115 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 2116 | tty->print_cr("Pack: %d", i); |
duke@435 | 2117 | Node_List* p = _packset.at(i); |
duke@435 | 2118 | print_pack(p); |
duke@435 | 2119 | } |
duke@435 | 2120 | #endif |
duke@435 | 2121 | } |
duke@435 | 2122 | |
duke@435 | 2123 | //------------------------------print_pack--------------------------- |
duke@435 | 2124 | void SuperWord::print_pack(Node_List* p) { |
duke@435 | 2125 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 2126 | print_stmt(p->at(i)); |
duke@435 | 2127 | } |
duke@435 | 2128 | } |
duke@435 | 2129 | |
duke@435 | 2130 | //------------------------------print_bb--------------------------- |
duke@435 | 2131 | void SuperWord::print_bb() { |
duke@435 | 2132 | #ifndef PRODUCT |
duke@435 | 2133 | tty->print_cr("\nBlock"); |
duke@435 | 2134 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 2135 | Node* n = _block.at(i); |
duke@435 | 2136 | tty->print("%d ", i); |
duke@435 | 2137 | if (n) { |
duke@435 | 2138 | n->dump(); |
duke@435 | 2139 | } |
duke@435 | 2140 | } |
duke@435 | 2141 | #endif |
duke@435 | 2142 | } |
duke@435 | 2143 | |
duke@435 | 2144 | //------------------------------print_stmt--------------------------- |
duke@435 | 2145 | void SuperWord::print_stmt(Node* s) { |
duke@435 | 2146 | #ifndef PRODUCT |
duke@435 | 2147 | tty->print(" align: %d \t", alignment(s)); |
duke@435 | 2148 | s->dump(); |
duke@435 | 2149 | #endif |
duke@435 | 2150 | } |
duke@435 | 2151 | |
duke@435 | 2152 | //------------------------------blank--------------------------- |
duke@435 | 2153 | char* SuperWord::blank(uint depth) { |
duke@435 | 2154 | static char blanks[101]; |
duke@435 | 2155 | assert(depth < 101, "too deep"); |
duke@435 | 2156 | for (uint i = 0; i < depth; i++) blanks[i] = ' '; |
duke@435 | 2157 | blanks[depth] = '\0'; |
duke@435 | 2158 | return blanks; |
duke@435 | 2159 | } |
duke@435 | 2160 | |
duke@435 | 2161 | |
duke@435 | 2162 | //==============================SWPointer=========================== |
duke@435 | 2163 | |
duke@435 | 2164 | //----------------------------SWPointer------------------------ |
duke@435 | 2165 | SWPointer::SWPointer(MemNode* mem, SuperWord* slp) : |
duke@435 | 2166 | _mem(mem), _slp(slp), _base(NULL), _adr(NULL), |
duke@435 | 2167 | _scale(0), _offset(0), _invar(NULL), _negate_invar(false) { |
duke@435 | 2168 | |
duke@435 | 2169 | Node* adr = mem->in(MemNode::Address); |
duke@435 | 2170 | if (!adr->is_AddP()) { |
duke@435 | 2171 | assert(!valid(), "too complex"); |
duke@435 | 2172 | return; |
duke@435 | 2173 | } |
duke@435 | 2174 | // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) |
duke@435 | 2175 | Node* base = adr->in(AddPNode::Base); |
cfang@1493 | 2176 | //unsafe reference could not be aligned appropriately without runtime checking |
cfang@1493 | 2177 | if (base == NULL || base->bottom_type() == Type::TOP) { |
cfang@1493 | 2178 | assert(!valid(), "unsafe access"); |
cfang@1493 | 2179 | return; |
cfang@1493 | 2180 | } |
duke@435 | 2181 | for (int i = 0; i < 3; i++) { |
duke@435 | 2182 | if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { |
duke@435 | 2183 | assert(!valid(), "too complex"); |
duke@435 | 2184 | return; |
duke@435 | 2185 | } |
duke@435 | 2186 | adr = adr->in(AddPNode::Address); |
duke@435 | 2187 | if (base == adr || !adr->is_AddP()) { |
duke@435 | 2188 | break; // stop looking at addp's |
duke@435 | 2189 | } |
duke@435 | 2190 | } |
duke@435 | 2191 | _base = base; |
duke@435 | 2192 | _adr = adr; |
duke@435 | 2193 | assert(valid(), "Usable"); |
duke@435 | 2194 | } |
duke@435 | 2195 | |
duke@435 | 2196 | // Following is used to create a temporary object during |
duke@435 | 2197 | // the pattern match of an address expression. |
duke@435 | 2198 | SWPointer::SWPointer(SWPointer* p) : |
duke@435 | 2199 | _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), |
duke@435 | 2200 | _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {} |
duke@435 | 2201 | |
duke@435 | 2202 | //------------------------scaled_iv_plus_offset-------------------- |
duke@435 | 2203 | // Match: k*iv + offset |
duke@435 | 2204 | // where: k is a constant that maybe zero, and |
duke@435 | 2205 | // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional |
duke@435 | 2206 | bool SWPointer::scaled_iv_plus_offset(Node* n) { |
duke@435 | 2207 | if (scaled_iv(n)) { |
duke@435 | 2208 | return true; |
duke@435 | 2209 | } |
duke@435 | 2210 | if (offset_plus_k(n)) { |
duke@435 | 2211 | return true; |
duke@435 | 2212 | } |
duke@435 | 2213 | int opc = n->Opcode(); |
duke@435 | 2214 | if (opc == Op_AddI) { |
duke@435 | 2215 | if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) { |
duke@435 | 2216 | return true; |
duke@435 | 2217 | } |
duke@435 | 2218 | if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { |
duke@435 | 2219 | return true; |
duke@435 | 2220 | } |
duke@435 | 2221 | } else if (opc == Op_SubI) { |
duke@435 | 2222 | if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) { |
duke@435 | 2223 | return true; |
duke@435 | 2224 | } |
duke@435 | 2225 | if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { |
duke@435 | 2226 | _scale *= -1; |
duke@435 | 2227 | return true; |
duke@435 | 2228 | } |
duke@435 | 2229 | } |
duke@435 | 2230 | return false; |
duke@435 | 2231 | } |
duke@435 | 2232 | |
duke@435 | 2233 | //----------------------------scaled_iv------------------------ |
duke@435 | 2234 | // Match: k*iv where k is a constant that's not zero |
duke@435 | 2235 | bool SWPointer::scaled_iv(Node* n) { |
duke@435 | 2236 | if (_scale != 0) { |
duke@435 | 2237 | return false; // already found a scale |
duke@435 | 2238 | } |
duke@435 | 2239 | if (n == iv()) { |
duke@435 | 2240 | _scale = 1; |
duke@435 | 2241 | return true; |
duke@435 | 2242 | } |
duke@435 | 2243 | int opc = n->Opcode(); |
duke@435 | 2244 | if (opc == Op_MulI) { |
duke@435 | 2245 | if (n->in(1) == iv() && n->in(2)->is_Con()) { |
duke@435 | 2246 | _scale = n->in(2)->get_int(); |
duke@435 | 2247 | return true; |
duke@435 | 2248 | } else if (n->in(2) == iv() && n->in(1)->is_Con()) { |
duke@435 | 2249 | _scale = n->in(1)->get_int(); |
duke@435 | 2250 | return true; |
duke@435 | 2251 | } |
duke@435 | 2252 | } else if (opc == Op_LShiftI) { |
duke@435 | 2253 | if (n->in(1) == iv() && n->in(2)->is_Con()) { |
duke@435 | 2254 | _scale = 1 << n->in(2)->get_int(); |
duke@435 | 2255 | return true; |
duke@435 | 2256 | } |
duke@435 | 2257 | } else if (opc == Op_ConvI2L) { |
duke@435 | 2258 | if (scaled_iv_plus_offset(n->in(1))) { |
duke@435 | 2259 | return true; |
duke@435 | 2260 | } |
duke@435 | 2261 | } else if (opc == Op_LShiftL) { |
duke@435 | 2262 | if (!has_iv() && _invar == NULL) { |
duke@435 | 2263 | // Need to preserve the current _offset value, so |
duke@435 | 2264 | // create a temporary object for this expression subtree. |
duke@435 | 2265 | // Hacky, so should re-engineer the address pattern match. |
duke@435 | 2266 | SWPointer tmp(this); |
duke@435 | 2267 | if (tmp.scaled_iv_plus_offset(n->in(1))) { |
duke@435 | 2268 | if (tmp._invar == NULL) { |
duke@435 | 2269 | int mult = 1 << n->in(2)->get_int(); |
duke@435 | 2270 | _scale = tmp._scale * mult; |
duke@435 | 2271 | _offset += tmp._offset * mult; |
duke@435 | 2272 | return true; |
duke@435 | 2273 | } |
duke@435 | 2274 | } |
duke@435 | 2275 | } |
duke@435 | 2276 | } |
duke@435 | 2277 | return false; |
duke@435 | 2278 | } |
duke@435 | 2279 | |
duke@435 | 2280 | //----------------------------offset_plus_k------------------------ |
duke@435 | 2281 | // Match: offset is (k [+/- invariant]) |
duke@435 | 2282 | // where k maybe zero and invariant is optional, but not both. |
duke@435 | 2283 | bool SWPointer::offset_plus_k(Node* n, bool negate) { |
duke@435 | 2284 | int opc = n->Opcode(); |
duke@435 | 2285 | if (opc == Op_ConI) { |
duke@435 | 2286 | _offset += negate ? -(n->get_int()) : n->get_int(); |
duke@435 | 2287 | return true; |
duke@435 | 2288 | } else if (opc == Op_ConL) { |
duke@435 | 2289 | // Okay if value fits into an int |
duke@435 | 2290 | const TypeLong* t = n->find_long_type(); |
duke@435 | 2291 | if (t->higher_equal(TypeLong::INT)) { |
duke@435 | 2292 | jlong loff = n->get_long(); |
duke@435 | 2293 | jint off = (jint)loff; |
duke@435 | 2294 | _offset += negate ? -off : loff; |
duke@435 | 2295 | return true; |
duke@435 | 2296 | } |
duke@435 | 2297 | return false; |
duke@435 | 2298 | } |
duke@435 | 2299 | if (_invar != NULL) return false; // already have an invariant |
duke@435 | 2300 | if (opc == Op_AddI) { |
duke@435 | 2301 | if (n->in(2)->is_Con() && invariant(n->in(1))) { |
duke@435 | 2302 | _negate_invar = negate; |
duke@435 | 2303 | _invar = n->in(1); |
duke@435 | 2304 | _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); |
duke@435 | 2305 | return true; |
duke@435 | 2306 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { |
duke@435 | 2307 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); |
duke@435 | 2308 | _negate_invar = negate; |
duke@435 | 2309 | _invar = n->in(2); |
duke@435 | 2310 | return true; |
duke@435 | 2311 | } |
duke@435 | 2312 | } |
duke@435 | 2313 | if (opc == Op_SubI) { |
duke@435 | 2314 | if (n->in(2)->is_Con() && invariant(n->in(1))) { |
duke@435 | 2315 | _negate_invar = negate; |
duke@435 | 2316 | _invar = n->in(1); |
duke@435 | 2317 | _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); |
duke@435 | 2318 | return true; |
duke@435 | 2319 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { |
duke@435 | 2320 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); |
duke@435 | 2321 | _negate_invar = !negate; |
duke@435 | 2322 | _invar = n->in(2); |
duke@435 | 2323 | return true; |
duke@435 | 2324 | } |
duke@435 | 2325 | } |
duke@435 | 2326 | if (invariant(n)) { |
duke@435 | 2327 | _negate_invar = negate; |
duke@435 | 2328 | _invar = n; |
duke@435 | 2329 | return true; |
duke@435 | 2330 | } |
duke@435 | 2331 | return false; |
duke@435 | 2332 | } |
duke@435 | 2333 | |
duke@435 | 2334 | //----------------------------print------------------------ |
duke@435 | 2335 | void SWPointer::print() { |
duke@435 | 2336 | #ifndef PRODUCT |
duke@435 | 2337 | tty->print("base: %d adr: %d scale: %d offset: %d invar: %c%d\n", |
duke@435 | 2338 | _base != NULL ? _base->_idx : 0, |
duke@435 | 2339 | _adr != NULL ? _adr->_idx : 0, |
duke@435 | 2340 | _scale, _offset, |
duke@435 | 2341 | _negate_invar?'-':'+', |
duke@435 | 2342 | _invar != NULL ? _invar->_idx : 0); |
duke@435 | 2343 | #endif |
duke@435 | 2344 | } |
duke@435 | 2345 | |
duke@435 | 2346 | // ========================= OrderedPair ===================== |
duke@435 | 2347 | |
duke@435 | 2348 | const OrderedPair OrderedPair::initial; |
duke@435 | 2349 | |
duke@435 | 2350 | // ========================= SWNodeInfo ===================== |
duke@435 | 2351 | |
duke@435 | 2352 | const SWNodeInfo SWNodeInfo::initial; |
duke@435 | 2353 | |
duke@435 | 2354 | |
duke@435 | 2355 | // ============================ DepGraph =========================== |
duke@435 | 2356 | |
duke@435 | 2357 | //------------------------------make_node--------------------------- |
duke@435 | 2358 | // Make a new dependence graph node for an ideal node. |
duke@435 | 2359 | DepMem* DepGraph::make_node(Node* node) { |
duke@435 | 2360 | DepMem* m = new (_arena) DepMem(node); |
duke@435 | 2361 | if (node != NULL) { |
duke@435 | 2362 | assert(_map.at_grow(node->_idx) == NULL, "one init only"); |
duke@435 | 2363 | _map.at_put_grow(node->_idx, m); |
duke@435 | 2364 | } |
duke@435 | 2365 | return m; |
duke@435 | 2366 | } |
duke@435 | 2367 | |
duke@435 | 2368 | //------------------------------make_edge--------------------------- |
duke@435 | 2369 | // Make a new dependence graph edge from dpred -> dsucc |
duke@435 | 2370 | DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) { |
duke@435 | 2371 | DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head()); |
duke@435 | 2372 | dpred->set_out_head(e); |
duke@435 | 2373 | dsucc->set_in_head(e); |
duke@435 | 2374 | return e; |
duke@435 | 2375 | } |
duke@435 | 2376 | |
duke@435 | 2377 | // ========================== DepMem ======================== |
duke@435 | 2378 | |
duke@435 | 2379 | //------------------------------in_cnt--------------------------- |
duke@435 | 2380 | int DepMem::in_cnt() { |
duke@435 | 2381 | int ct = 0; |
duke@435 | 2382 | for (DepEdge* e = _in_head; e != NULL; e = e->next_in()) ct++; |
duke@435 | 2383 | return ct; |
duke@435 | 2384 | } |
duke@435 | 2385 | |
duke@435 | 2386 | //------------------------------out_cnt--------------------------- |
duke@435 | 2387 | int DepMem::out_cnt() { |
duke@435 | 2388 | int ct = 0; |
duke@435 | 2389 | for (DepEdge* e = _out_head; e != NULL; e = e->next_out()) ct++; |
duke@435 | 2390 | return ct; |
duke@435 | 2391 | } |
duke@435 | 2392 | |
duke@435 | 2393 | //------------------------------print----------------------------- |
duke@435 | 2394 | void DepMem::print() { |
duke@435 | 2395 | #ifndef PRODUCT |
duke@435 | 2396 | tty->print(" DepNode %d (", _node->_idx); |
duke@435 | 2397 | for (DepEdge* p = _in_head; p != NULL; p = p->next_in()) { |
duke@435 | 2398 | Node* pred = p->pred()->node(); |
duke@435 | 2399 | tty->print(" %d", pred != NULL ? pred->_idx : 0); |
duke@435 | 2400 | } |
duke@435 | 2401 | tty->print(") ["); |
duke@435 | 2402 | for (DepEdge* s = _out_head; s != NULL; s = s->next_out()) { |
duke@435 | 2403 | Node* succ = s->succ()->node(); |
duke@435 | 2404 | tty->print(" %d", succ != NULL ? succ->_idx : 0); |
duke@435 | 2405 | } |
duke@435 | 2406 | tty->print_cr(" ]"); |
duke@435 | 2407 | #endif |
duke@435 | 2408 | } |
duke@435 | 2409 | |
duke@435 | 2410 | // =========================== DepEdge ========================= |
duke@435 | 2411 | |
duke@435 | 2412 | //------------------------------DepPreds--------------------------- |
duke@435 | 2413 | void DepEdge::print() { |
duke@435 | 2414 | #ifndef PRODUCT |
duke@435 | 2415 | tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx); |
duke@435 | 2416 | #endif |
duke@435 | 2417 | } |
duke@435 | 2418 | |
duke@435 | 2419 | // =========================== DepPreds ========================= |
duke@435 | 2420 | // Iterator over predecessor edges in the dependence graph. |
duke@435 | 2421 | |
duke@435 | 2422 | //------------------------------DepPreds--------------------------- |
duke@435 | 2423 | DepPreds::DepPreds(Node* n, DepGraph& dg) { |
duke@435 | 2424 | _n = n; |
duke@435 | 2425 | _done = false; |
duke@435 | 2426 | if (_n->is_Store() || _n->is_Load()) { |
duke@435 | 2427 | _next_idx = MemNode::Address; |
duke@435 | 2428 | _end_idx = n->req(); |
duke@435 | 2429 | _dep_next = dg.dep(_n)->in_head(); |
duke@435 | 2430 | } else if (_n->is_Mem()) { |
duke@435 | 2431 | _next_idx = 0; |
duke@435 | 2432 | _end_idx = 0; |
duke@435 | 2433 | _dep_next = dg.dep(_n)->in_head(); |
duke@435 | 2434 | } else { |
duke@435 | 2435 | _next_idx = 1; |
duke@435 | 2436 | _end_idx = _n->req(); |
duke@435 | 2437 | _dep_next = NULL; |
duke@435 | 2438 | } |
duke@435 | 2439 | next(); |
duke@435 | 2440 | } |
duke@435 | 2441 | |
duke@435 | 2442 | //------------------------------next--------------------------- |
duke@435 | 2443 | void DepPreds::next() { |
duke@435 | 2444 | if (_dep_next != NULL) { |
duke@435 | 2445 | _current = _dep_next->pred()->node(); |
duke@435 | 2446 | _dep_next = _dep_next->next_in(); |
duke@435 | 2447 | } else if (_next_idx < _end_idx) { |
duke@435 | 2448 | _current = _n->in(_next_idx++); |
duke@435 | 2449 | } else { |
duke@435 | 2450 | _done = true; |
duke@435 | 2451 | } |
duke@435 | 2452 | } |
duke@435 | 2453 | |
duke@435 | 2454 | // =========================== DepSuccs ========================= |
duke@435 | 2455 | // Iterator over successor edges in the dependence graph. |
duke@435 | 2456 | |
duke@435 | 2457 | //------------------------------DepSuccs--------------------------- |
duke@435 | 2458 | DepSuccs::DepSuccs(Node* n, DepGraph& dg) { |
duke@435 | 2459 | _n = n; |
duke@435 | 2460 | _done = false; |
duke@435 | 2461 | if (_n->is_Load()) { |
duke@435 | 2462 | _next_idx = 0; |
duke@435 | 2463 | _end_idx = _n->outcnt(); |
duke@435 | 2464 | _dep_next = dg.dep(_n)->out_head(); |
duke@435 | 2465 | } else if (_n->is_Mem() || _n->is_Phi() && _n->bottom_type() == Type::MEMORY) { |
duke@435 | 2466 | _next_idx = 0; |
duke@435 | 2467 | _end_idx = 0; |
duke@435 | 2468 | _dep_next = dg.dep(_n)->out_head(); |
duke@435 | 2469 | } else { |
duke@435 | 2470 | _next_idx = 0; |
duke@435 | 2471 | _end_idx = _n->outcnt(); |
duke@435 | 2472 | _dep_next = NULL; |
duke@435 | 2473 | } |
duke@435 | 2474 | next(); |
duke@435 | 2475 | } |
duke@435 | 2476 | |
duke@435 | 2477 | //-------------------------------next--------------------------- |
duke@435 | 2478 | void DepSuccs::next() { |
duke@435 | 2479 | if (_dep_next != NULL) { |
duke@435 | 2480 | _current = _dep_next->succ()->node(); |
duke@435 | 2481 | _dep_next = _dep_next->next_out(); |
duke@435 | 2482 | } else if (_next_idx < _end_idx) { |
duke@435 | 2483 | _current = _n->raw_out(_next_idx++); |
duke@435 | 2484 | } else { |
duke@435 | 2485 | _done = true; |
duke@435 | 2486 | } |
duke@435 | 2487 | } |