Thu, 16 Feb 2012 17:12:49 -0800
7145346: VerifyStackAtCalls is broken
Summary: Replace call_epilog() encoding with macroassembler use. Moved duplicated code to x86.ad. Fixed return_addr() definition.
Reviewed-by: never
duke@435 | 1 | /* |
never@1940 | 2 | * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. |
duke@435 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
duke@435 | 4 | * |
duke@435 | 5 | * This code is free software; you can redistribute it and/or modify it |
duke@435 | 6 | * under the terms of the GNU General Public License version 2 only, as |
duke@435 | 7 | * published by the Free Software Foundation. |
duke@435 | 8 | * |
duke@435 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
duke@435 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
duke@435 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
duke@435 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
duke@435 | 13 | * accompanied this code). |
duke@435 | 14 | * |
duke@435 | 15 | * You should have received a copy of the GNU General Public License version |
duke@435 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
duke@435 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
duke@435 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
duke@435 | 22 | */ |
duke@435 | 23 | |
stefank@2314 | 24 | #include "precompiled.hpp" |
stefank@2314 | 25 | #include "compiler/compileLog.hpp" |
stefank@2314 | 26 | #include "libadt/vectset.hpp" |
stefank@2314 | 27 | #include "memory/allocation.inline.hpp" |
stefank@2314 | 28 | #include "opto/addnode.hpp" |
stefank@2314 | 29 | #include "opto/callnode.hpp" |
stefank@2314 | 30 | #include "opto/divnode.hpp" |
stefank@2314 | 31 | #include "opto/matcher.hpp" |
stefank@2314 | 32 | #include "opto/memnode.hpp" |
stefank@2314 | 33 | #include "opto/mulnode.hpp" |
stefank@2314 | 34 | #include "opto/opcodes.hpp" |
stefank@2314 | 35 | #include "opto/superword.hpp" |
stefank@2314 | 36 | #include "opto/vectornode.hpp" |
duke@435 | 37 | |
duke@435 | 38 | // |
duke@435 | 39 | // S U P E R W O R D T R A N S F O R M |
duke@435 | 40 | //============================================================================= |
duke@435 | 41 | |
duke@435 | 42 | //------------------------------SuperWord--------------------------- |
duke@435 | 43 | SuperWord::SuperWord(PhaseIdealLoop* phase) : |
duke@435 | 44 | _phase(phase), |
duke@435 | 45 | _igvn(phase->_igvn), |
duke@435 | 46 | _arena(phase->C->comp_arena()), |
duke@435 | 47 | _packset(arena(), 8, 0, NULL), // packs for the current block |
duke@435 | 48 | _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb |
duke@435 | 49 | _block(arena(), 8, 0, NULL), // nodes in current block |
duke@435 | 50 | _data_entry(arena(), 8, 0, NULL), // nodes with all inputs from outside |
duke@435 | 51 | _mem_slice_head(arena(), 8, 0, NULL), // memory slice heads |
duke@435 | 52 | _mem_slice_tail(arena(), 8, 0, NULL), // memory slice tails |
duke@435 | 53 | _node_info(arena(), 8, 0, SWNodeInfo::initial), // info needed per node |
duke@435 | 54 | _align_to_ref(NULL), // memory reference to align vectors to |
duke@435 | 55 | _disjoint_ptrs(arena(), 8, 0, OrderedPair::initial), // runtime disambiguated pointer pairs |
duke@435 | 56 | _dg(_arena), // dependence graph |
duke@435 | 57 | _visited(arena()), // visited node set |
duke@435 | 58 | _post_visited(arena()), // post visited node set |
duke@435 | 59 | _n_idx_list(arena(), 8), // scratch list of (node,index) pairs |
duke@435 | 60 | _stk(arena(), 8, 0, NULL), // scratch stack of nodes |
duke@435 | 61 | _nlist(arena(), 8, 0, NULL), // scratch list of nodes |
duke@435 | 62 | _lpt(NULL), // loop tree node |
duke@435 | 63 | _lp(NULL), // LoopNode |
duke@435 | 64 | _bb(NULL), // basic block |
duke@435 | 65 | _iv(NULL) // induction var |
duke@435 | 66 | {} |
duke@435 | 67 | |
duke@435 | 68 | //------------------------------transform_loop--------------------------- |
duke@435 | 69 | void SuperWord::transform_loop(IdealLoopTree* lpt) { |
duke@435 | 70 | assert(lpt->_head->is_CountedLoop(), "must be"); |
duke@435 | 71 | CountedLoopNode *cl = lpt->_head->as_CountedLoop(); |
duke@435 | 72 | |
kvn@3048 | 73 | if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop |
kvn@3048 | 74 | |
duke@435 | 75 | if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops |
duke@435 | 76 | |
duke@435 | 77 | // Check for no control flow in body (other than exit) |
duke@435 | 78 | Node *cl_exit = cl->loopexit(); |
duke@435 | 79 | if (cl_exit->in(0) != lpt->_head) return; |
duke@435 | 80 | |
never@540 | 81 | // Make sure the are no extra control users of the loop backedge |
never@540 | 82 | if (cl->back_control()->outcnt() != 1) { |
never@540 | 83 | return; |
never@540 | 84 | } |
never@540 | 85 | |
duke@435 | 86 | // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit)))) |
duke@435 | 87 | CountedLoopEndNode* pre_end = get_pre_loop_end(cl); |
duke@435 | 88 | if (pre_end == NULL) return; |
duke@435 | 89 | Node *pre_opaq1 = pre_end->limit(); |
duke@435 | 90 | if (pre_opaq1->Opcode() != Op_Opaque1) return; |
duke@435 | 91 | |
duke@435 | 92 | // Do vectors exist on this architecture? |
duke@435 | 93 | if (vector_width_in_bytes() == 0) return; |
duke@435 | 94 | |
duke@435 | 95 | init(); // initialize data structures |
duke@435 | 96 | |
duke@435 | 97 | set_lpt(lpt); |
duke@435 | 98 | set_lp(cl); |
duke@435 | 99 | |
duke@435 | 100 | // For now, define one block which is the entire loop body |
duke@435 | 101 | set_bb(cl); |
duke@435 | 102 | |
duke@435 | 103 | assert(_packset.length() == 0, "packset must be empty"); |
duke@435 | 104 | SLP_extract(); |
duke@435 | 105 | } |
duke@435 | 106 | |
duke@435 | 107 | //------------------------------SLP_extract--------------------------- |
duke@435 | 108 | // Extract the superword level parallelism |
duke@435 | 109 | // |
duke@435 | 110 | // 1) A reverse post-order of nodes in the block is constructed. By scanning |
duke@435 | 111 | // this list from first to last, all definitions are visited before their uses. |
duke@435 | 112 | // |
duke@435 | 113 | // 2) A point-to-point dependence graph is constructed between memory references. |
duke@435 | 114 | // This simplies the upcoming "independence" checker. |
duke@435 | 115 | // |
duke@435 | 116 | // 3) The maximum depth in the node graph from the beginning of the block |
duke@435 | 117 | // to each node is computed. This is used to prune the graph search |
duke@435 | 118 | // in the independence checker. |
duke@435 | 119 | // |
duke@435 | 120 | // 4) For integer types, the necessary bit width is propagated backwards |
duke@435 | 121 | // from stores to allow packed operations on byte, char, and short |
duke@435 | 122 | // integers. This reverses the promotion to type "int" that javac |
duke@435 | 123 | // did for operations like: char c1,c2,c3; c1 = c2 + c3. |
duke@435 | 124 | // |
duke@435 | 125 | // 5) One of the memory references is picked to be an aligned vector reference. |
duke@435 | 126 | // The pre-loop trip count is adjusted to align this reference in the |
duke@435 | 127 | // unrolled body. |
duke@435 | 128 | // |
duke@435 | 129 | // 6) The initial set of pack pairs is seeded with memory references. |
duke@435 | 130 | // |
duke@435 | 131 | // 7) The set of pack pairs is extended by following use->def and def->use links. |
duke@435 | 132 | // |
duke@435 | 133 | // 8) The pairs are combined into vector sized packs. |
duke@435 | 134 | // |
duke@435 | 135 | // 9) Reorder the memory slices to co-locate members of the memory packs. |
duke@435 | 136 | // |
duke@435 | 137 | // 10) Generate ideal vector nodes for the final set of packs and where necessary, |
duke@435 | 138 | // inserting scalar promotion, vector creation from multiple scalars, and |
duke@435 | 139 | // extraction of scalar values from vectors. |
duke@435 | 140 | // |
duke@435 | 141 | void SuperWord::SLP_extract() { |
duke@435 | 142 | |
duke@435 | 143 | // Ready the block |
duke@435 | 144 | |
duke@435 | 145 | construct_bb(); |
duke@435 | 146 | |
duke@435 | 147 | dependence_graph(); |
duke@435 | 148 | |
duke@435 | 149 | compute_max_depth(); |
duke@435 | 150 | |
duke@435 | 151 | compute_vector_element_type(); |
duke@435 | 152 | |
duke@435 | 153 | // Attempt vectorization |
duke@435 | 154 | |
duke@435 | 155 | find_adjacent_refs(); |
duke@435 | 156 | |
duke@435 | 157 | extend_packlist(); |
duke@435 | 158 | |
duke@435 | 159 | combine_packs(); |
duke@435 | 160 | |
duke@435 | 161 | construct_my_pack_map(); |
duke@435 | 162 | |
duke@435 | 163 | filter_packs(); |
duke@435 | 164 | |
duke@435 | 165 | schedule(); |
duke@435 | 166 | |
duke@435 | 167 | output(); |
duke@435 | 168 | } |
duke@435 | 169 | |
duke@435 | 170 | //------------------------------find_adjacent_refs--------------------------- |
duke@435 | 171 | // Find the adjacent memory references and create pack pairs for them. |
duke@435 | 172 | // This is the initial set of packs that will then be extended by |
duke@435 | 173 | // following use->def and def->use links. The align positions are |
duke@435 | 174 | // assigned relative to the reference "align_to_ref" |
duke@435 | 175 | void SuperWord::find_adjacent_refs() { |
duke@435 | 176 | // Get list of memory operations |
duke@435 | 177 | Node_List memops; |
duke@435 | 178 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 179 | Node* n = _block.at(i); |
kvn@464 | 180 | if (n->is_Mem() && in_bb(n) && |
kvn@464 | 181 | is_java_primitive(n->as_Mem()->memory_type())) { |
duke@435 | 182 | int align = memory_alignment(n->as_Mem(), 0); |
duke@435 | 183 | if (align != bottom_align) { |
duke@435 | 184 | memops.push(n); |
duke@435 | 185 | } |
duke@435 | 186 | } |
duke@435 | 187 | } |
duke@435 | 188 | if (memops.size() == 0) return; |
duke@435 | 189 | |
duke@435 | 190 | // Find a memory reference to align to. The pre-loop trip count |
duke@435 | 191 | // is modified to align this reference to a vector-aligned address |
duke@435 | 192 | find_align_to_ref(memops); |
duke@435 | 193 | if (align_to_ref() == NULL) return; |
duke@435 | 194 | |
duke@435 | 195 | SWPointer align_to_ref_p(align_to_ref(), this); |
duke@435 | 196 | int offset = align_to_ref_p.offset_in_bytes(); |
duke@435 | 197 | int scale = align_to_ref_p.scale_in_bytes(); |
duke@435 | 198 | int vw = vector_width_in_bytes(); |
duke@435 | 199 | int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; |
duke@435 | 200 | int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; |
duke@435 | 201 | |
duke@435 | 202 | #ifndef PRODUCT |
duke@435 | 203 | if (TraceSuperWord) |
never@507 | 204 | tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d scale = %d iv_stride = %d", |
never@507 | 205 | offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride()); |
duke@435 | 206 | #endif |
duke@435 | 207 | |
duke@435 | 208 | // Set alignment relative to "align_to_ref" |
duke@435 | 209 | for (int i = memops.size() - 1; i >= 0; i--) { |
duke@435 | 210 | MemNode* s = memops.at(i)->as_Mem(); |
duke@435 | 211 | SWPointer p2(s, this); |
duke@435 | 212 | if (p2.comparable(align_to_ref_p)) { |
duke@435 | 213 | int align = memory_alignment(s, iv_adjustment); |
duke@435 | 214 | set_alignment(s, align); |
duke@435 | 215 | } else { |
duke@435 | 216 | memops.remove(i); |
duke@435 | 217 | } |
duke@435 | 218 | } |
duke@435 | 219 | |
duke@435 | 220 | // Create initial pack pairs of memory operations |
duke@435 | 221 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 222 | Node* s1 = memops.at(i); |
duke@435 | 223 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 224 | Node* s2 = memops.at(j); |
duke@435 | 225 | if (s1 != s2 && are_adjacent_refs(s1, s2)) { |
duke@435 | 226 | int align = alignment(s1); |
duke@435 | 227 | if (stmts_can_pack(s1, s2, align)) { |
duke@435 | 228 | Node_List* pair = new Node_List(); |
duke@435 | 229 | pair->push(s1); |
duke@435 | 230 | pair->push(s2); |
duke@435 | 231 | _packset.append(pair); |
duke@435 | 232 | } |
duke@435 | 233 | } |
duke@435 | 234 | } |
duke@435 | 235 | } |
duke@435 | 236 | |
duke@435 | 237 | #ifndef PRODUCT |
duke@435 | 238 | if (TraceSuperWord) { |
duke@435 | 239 | tty->print_cr("\nAfter find_adjacent_refs"); |
duke@435 | 240 | print_packset(); |
duke@435 | 241 | } |
duke@435 | 242 | #endif |
duke@435 | 243 | } |
duke@435 | 244 | |
duke@435 | 245 | //------------------------------find_align_to_ref--------------------------- |
duke@435 | 246 | // Find a memory reference to align the loop induction variable to. |
duke@435 | 247 | // Looks first at stores then at loads, looking for a memory reference |
duke@435 | 248 | // with the largest number of references similar to it. |
duke@435 | 249 | void SuperWord::find_align_to_ref(Node_List &memops) { |
duke@435 | 250 | GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0); |
duke@435 | 251 | |
duke@435 | 252 | // Count number of comparable memory ops |
duke@435 | 253 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 254 | MemNode* s1 = memops.at(i)->as_Mem(); |
duke@435 | 255 | SWPointer p1(s1, this); |
duke@435 | 256 | // Discard if pre loop can't align this reference |
duke@435 | 257 | if (!ref_is_alignable(p1)) { |
duke@435 | 258 | *cmp_ct.adr_at(i) = 0; |
duke@435 | 259 | continue; |
duke@435 | 260 | } |
duke@435 | 261 | for (uint j = i+1; j < memops.size(); j++) { |
duke@435 | 262 | MemNode* s2 = memops.at(j)->as_Mem(); |
duke@435 | 263 | if (isomorphic(s1, s2)) { |
duke@435 | 264 | SWPointer p2(s2, this); |
duke@435 | 265 | if (p1.comparable(p2)) { |
duke@435 | 266 | (*cmp_ct.adr_at(i))++; |
duke@435 | 267 | (*cmp_ct.adr_at(j))++; |
duke@435 | 268 | } |
duke@435 | 269 | } |
duke@435 | 270 | } |
duke@435 | 271 | } |
duke@435 | 272 | |
duke@435 | 273 | // Find Store (or Load) with the greatest number of "comparable" references |
duke@435 | 274 | int max_ct = 0; |
duke@435 | 275 | int max_idx = -1; |
duke@435 | 276 | int min_size = max_jint; |
duke@435 | 277 | int min_iv_offset = max_jint; |
duke@435 | 278 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 279 | MemNode* s = memops.at(j)->as_Mem(); |
duke@435 | 280 | if (s->is_Store()) { |
duke@435 | 281 | SWPointer p(s, this); |
duke@435 | 282 | if (cmp_ct.at(j) > max_ct || |
duke@435 | 283 | cmp_ct.at(j) == max_ct && (data_size(s) < min_size || |
duke@435 | 284 | data_size(s) == min_size && |
duke@435 | 285 | p.offset_in_bytes() < min_iv_offset)) { |
duke@435 | 286 | max_ct = cmp_ct.at(j); |
duke@435 | 287 | max_idx = j; |
duke@435 | 288 | min_size = data_size(s); |
duke@435 | 289 | min_iv_offset = p.offset_in_bytes(); |
duke@435 | 290 | } |
duke@435 | 291 | } |
duke@435 | 292 | } |
duke@435 | 293 | // If no stores, look at loads |
duke@435 | 294 | if (max_ct == 0) { |
duke@435 | 295 | for (uint j = 0; j < memops.size(); j++) { |
duke@435 | 296 | MemNode* s = memops.at(j)->as_Mem(); |
duke@435 | 297 | if (s->is_Load()) { |
duke@435 | 298 | SWPointer p(s, this); |
duke@435 | 299 | if (cmp_ct.at(j) > max_ct || |
duke@435 | 300 | cmp_ct.at(j) == max_ct && (data_size(s) < min_size || |
duke@435 | 301 | data_size(s) == min_size && |
duke@435 | 302 | p.offset_in_bytes() < min_iv_offset)) { |
duke@435 | 303 | max_ct = cmp_ct.at(j); |
duke@435 | 304 | max_idx = j; |
duke@435 | 305 | min_size = data_size(s); |
duke@435 | 306 | min_iv_offset = p.offset_in_bytes(); |
duke@435 | 307 | } |
duke@435 | 308 | } |
duke@435 | 309 | } |
duke@435 | 310 | } |
duke@435 | 311 | |
duke@435 | 312 | if (max_ct > 0) |
duke@435 | 313 | set_align_to_ref(memops.at(max_idx)->as_Mem()); |
duke@435 | 314 | |
duke@435 | 315 | #ifndef PRODUCT |
duke@435 | 316 | if (TraceSuperWord && Verbose) { |
duke@435 | 317 | tty->print_cr("\nVector memops after find_align_to_refs"); |
duke@435 | 318 | for (uint i = 0; i < memops.size(); i++) { |
duke@435 | 319 | MemNode* s = memops.at(i)->as_Mem(); |
duke@435 | 320 | s->dump(); |
duke@435 | 321 | } |
duke@435 | 322 | } |
duke@435 | 323 | #endif |
duke@435 | 324 | } |
duke@435 | 325 | |
duke@435 | 326 | //------------------------------ref_is_alignable--------------------------- |
duke@435 | 327 | // Can the preloop align the reference to position zero in the vector? |
duke@435 | 328 | bool SuperWord::ref_is_alignable(SWPointer& p) { |
duke@435 | 329 | if (!p.has_iv()) { |
duke@435 | 330 | return true; // no induction variable |
duke@435 | 331 | } |
duke@435 | 332 | CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop()); |
duke@435 | 333 | assert(pre_end->stride_is_con(), "pre loop stride is constant"); |
duke@435 | 334 | int preloop_stride = pre_end->stride_con(); |
duke@435 | 335 | |
duke@435 | 336 | int span = preloop_stride * p.scale_in_bytes(); |
duke@435 | 337 | |
duke@435 | 338 | // Stride one accesses are alignable. |
duke@435 | 339 | if (ABS(span) == p.memory_size()) |
duke@435 | 340 | return true; |
duke@435 | 341 | |
duke@435 | 342 | // If initial offset from start of object is computable, |
duke@435 | 343 | // compute alignment within the vector. |
duke@435 | 344 | int vw = vector_width_in_bytes(); |
duke@435 | 345 | if (vw % span == 0) { |
duke@435 | 346 | Node* init_nd = pre_end->init_trip(); |
duke@435 | 347 | if (init_nd->is_Con() && p.invar() == NULL) { |
duke@435 | 348 | int init = init_nd->bottom_type()->is_int()->get_con(); |
duke@435 | 349 | |
duke@435 | 350 | int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes(); |
duke@435 | 351 | assert(init_offset >= 0, "positive offset from object start"); |
duke@435 | 352 | |
duke@435 | 353 | if (span > 0) { |
duke@435 | 354 | return (vw - (init_offset % vw)) % span == 0; |
duke@435 | 355 | } else { |
duke@435 | 356 | assert(span < 0, "nonzero stride * scale"); |
duke@435 | 357 | return (init_offset % vw) % -span == 0; |
duke@435 | 358 | } |
duke@435 | 359 | } |
duke@435 | 360 | } |
duke@435 | 361 | return false; |
duke@435 | 362 | } |
duke@435 | 363 | |
duke@435 | 364 | //---------------------------dependence_graph--------------------------- |
duke@435 | 365 | // Construct dependency graph. |
duke@435 | 366 | // Add dependence edges to load/store nodes for memory dependence |
duke@435 | 367 | // A.out()->DependNode.in(1) and DependNode.out()->B.prec(x) |
duke@435 | 368 | void SuperWord::dependence_graph() { |
duke@435 | 369 | // First, assign a dependence node to each memory node |
duke@435 | 370 | for (int i = 0; i < _block.length(); i++ ) { |
duke@435 | 371 | Node *n = _block.at(i); |
duke@435 | 372 | if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) { |
duke@435 | 373 | _dg.make_node(n); |
duke@435 | 374 | } |
duke@435 | 375 | } |
duke@435 | 376 | |
duke@435 | 377 | // For each memory slice, create the dependences |
duke@435 | 378 | for (int i = 0; i < _mem_slice_head.length(); i++) { |
duke@435 | 379 | Node* n = _mem_slice_head.at(i); |
duke@435 | 380 | Node* n_tail = _mem_slice_tail.at(i); |
duke@435 | 381 | |
duke@435 | 382 | // Get slice in predecessor order (last is first) |
duke@435 | 383 | mem_slice_preds(n_tail, n, _nlist); |
duke@435 | 384 | |
duke@435 | 385 | // Make the slice dependent on the root |
duke@435 | 386 | DepMem* slice = _dg.dep(n); |
duke@435 | 387 | _dg.make_edge(_dg.root(), slice); |
duke@435 | 388 | |
duke@435 | 389 | // Create a sink for the slice |
duke@435 | 390 | DepMem* slice_sink = _dg.make_node(NULL); |
duke@435 | 391 | _dg.make_edge(slice_sink, _dg.tail()); |
duke@435 | 392 | |
duke@435 | 393 | // Now visit each pair of memory ops, creating the edges |
duke@435 | 394 | for (int j = _nlist.length() - 1; j >= 0 ; j--) { |
duke@435 | 395 | Node* s1 = _nlist.at(j); |
duke@435 | 396 | |
duke@435 | 397 | // If no dependency yet, use slice |
duke@435 | 398 | if (_dg.dep(s1)->in_cnt() == 0) { |
duke@435 | 399 | _dg.make_edge(slice, s1); |
duke@435 | 400 | } |
duke@435 | 401 | SWPointer p1(s1->as_Mem(), this); |
duke@435 | 402 | bool sink_dependent = true; |
duke@435 | 403 | for (int k = j - 1; k >= 0; k--) { |
duke@435 | 404 | Node* s2 = _nlist.at(k); |
duke@435 | 405 | if (s1->is_Load() && s2->is_Load()) |
duke@435 | 406 | continue; |
duke@435 | 407 | SWPointer p2(s2->as_Mem(), this); |
duke@435 | 408 | |
duke@435 | 409 | int cmp = p1.cmp(p2); |
duke@435 | 410 | if (SuperWordRTDepCheck && |
duke@435 | 411 | p1.base() != p2.base() && p1.valid() && p2.valid()) { |
duke@435 | 412 | // Create a runtime check to disambiguate |
duke@435 | 413 | OrderedPair pp(p1.base(), p2.base()); |
duke@435 | 414 | _disjoint_ptrs.append_if_missing(pp); |
duke@435 | 415 | } else if (!SWPointer::not_equal(cmp)) { |
duke@435 | 416 | // Possibly same address |
duke@435 | 417 | _dg.make_edge(s1, s2); |
duke@435 | 418 | sink_dependent = false; |
duke@435 | 419 | } |
duke@435 | 420 | } |
duke@435 | 421 | if (sink_dependent) { |
duke@435 | 422 | _dg.make_edge(s1, slice_sink); |
duke@435 | 423 | } |
duke@435 | 424 | } |
duke@435 | 425 | #ifndef PRODUCT |
duke@435 | 426 | if (TraceSuperWord) { |
duke@435 | 427 | tty->print_cr("\nDependence graph for slice: %d", n->_idx); |
duke@435 | 428 | for (int q = 0; q < _nlist.length(); q++) { |
duke@435 | 429 | _dg.print(_nlist.at(q)); |
duke@435 | 430 | } |
duke@435 | 431 | tty->cr(); |
duke@435 | 432 | } |
duke@435 | 433 | #endif |
duke@435 | 434 | _nlist.clear(); |
duke@435 | 435 | } |
duke@435 | 436 | |
duke@435 | 437 | #ifndef PRODUCT |
duke@435 | 438 | if (TraceSuperWord) { |
duke@435 | 439 | tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE"); |
duke@435 | 440 | for (int r = 0; r < _disjoint_ptrs.length(); r++) { |
duke@435 | 441 | _disjoint_ptrs.at(r).print(); |
duke@435 | 442 | tty->cr(); |
duke@435 | 443 | } |
duke@435 | 444 | tty->cr(); |
duke@435 | 445 | } |
duke@435 | 446 | #endif |
duke@435 | 447 | } |
duke@435 | 448 | |
duke@435 | 449 | //---------------------------mem_slice_preds--------------------------- |
duke@435 | 450 | // Return a memory slice (node list) in predecessor order starting at "start" |
duke@435 | 451 | void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) { |
duke@435 | 452 | assert(preds.length() == 0, "start empty"); |
duke@435 | 453 | Node* n = start; |
duke@435 | 454 | Node* prev = NULL; |
duke@435 | 455 | while (true) { |
duke@435 | 456 | assert(in_bb(n), "must be in block"); |
duke@435 | 457 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 458 | Node* out = n->fast_out(i); |
duke@435 | 459 | if (out->is_Load()) { |
duke@435 | 460 | if (in_bb(out)) { |
duke@435 | 461 | preds.push(out); |
duke@435 | 462 | } |
duke@435 | 463 | } else { |
duke@435 | 464 | // FIXME |
duke@435 | 465 | if (out->is_MergeMem() && !in_bb(out)) { |
duke@435 | 466 | // Either unrolling is causing a memory edge not to disappear, |
duke@435 | 467 | // or need to run igvn.optimize() again before SLP |
duke@435 | 468 | } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) { |
duke@435 | 469 | // Ditto. Not sure what else to check further. |
cfang@1102 | 470 | } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) { |
duke@435 | 471 | // StoreCM has an input edge used as a precedence edge. |
duke@435 | 472 | // Maybe an issue when oop stores are vectorized. |
duke@435 | 473 | } else { |
duke@435 | 474 | assert(out == prev || prev == NULL, "no branches off of store slice"); |
duke@435 | 475 | } |
duke@435 | 476 | } |
duke@435 | 477 | } |
duke@435 | 478 | if (n == stop) break; |
duke@435 | 479 | preds.push(n); |
duke@435 | 480 | prev = n; |
duke@435 | 481 | n = n->in(MemNode::Memory); |
duke@435 | 482 | } |
duke@435 | 483 | } |
duke@435 | 484 | |
duke@435 | 485 | //------------------------------stmts_can_pack--------------------------- |
twisti@1040 | 486 | // Can s1 and s2 be in a pack with s1 immediately preceding s2 and |
duke@435 | 487 | // s1 aligned at "align" |
duke@435 | 488 | bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { |
cfang@1422 | 489 | |
cfang@1422 | 490 | // Do not use superword for non-primitives |
cfang@1422 | 491 | if((s1->is_Mem() && !is_java_primitive(s1->as_Mem()->memory_type())) || |
cfang@1422 | 492 | (s2->is_Mem() && !is_java_primitive(s2->as_Mem()->memory_type()))) |
cfang@1422 | 493 | return false; |
cfang@1422 | 494 | |
duke@435 | 495 | if (isomorphic(s1, s2)) { |
duke@435 | 496 | if (independent(s1, s2)) { |
duke@435 | 497 | if (!exists_at(s1, 0) && !exists_at(s2, 1)) { |
duke@435 | 498 | if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) { |
duke@435 | 499 | int s1_align = alignment(s1); |
duke@435 | 500 | int s2_align = alignment(s2); |
duke@435 | 501 | if (s1_align == top_align || s1_align == align) { |
duke@435 | 502 | if (s2_align == top_align || s2_align == align + data_size(s1)) { |
duke@435 | 503 | return true; |
duke@435 | 504 | } |
duke@435 | 505 | } |
duke@435 | 506 | } |
duke@435 | 507 | } |
duke@435 | 508 | } |
duke@435 | 509 | } |
duke@435 | 510 | return false; |
duke@435 | 511 | } |
duke@435 | 512 | |
duke@435 | 513 | //------------------------------exists_at--------------------------- |
duke@435 | 514 | // Does s exist in a pack at position pos? |
duke@435 | 515 | bool SuperWord::exists_at(Node* s, uint pos) { |
duke@435 | 516 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 517 | Node_List* p = _packset.at(i); |
duke@435 | 518 | if (p->at(pos) == s) { |
duke@435 | 519 | return true; |
duke@435 | 520 | } |
duke@435 | 521 | } |
duke@435 | 522 | return false; |
duke@435 | 523 | } |
duke@435 | 524 | |
duke@435 | 525 | //------------------------------are_adjacent_refs--------------------------- |
duke@435 | 526 | // Is s1 immediately before s2 in memory? |
duke@435 | 527 | bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) { |
duke@435 | 528 | if (!s1->is_Mem() || !s2->is_Mem()) return false; |
duke@435 | 529 | if (!in_bb(s1) || !in_bb(s2)) return false; |
never@1940 | 530 | |
never@1940 | 531 | // Do not use superword for non-primitives |
never@1940 | 532 | if (!is_java_primitive(s1->as_Mem()->memory_type()) || |
never@1940 | 533 | !is_java_primitive(s2->as_Mem()->memory_type())) { |
never@1940 | 534 | return false; |
never@1940 | 535 | } |
never@1940 | 536 | |
duke@435 | 537 | // FIXME - co_locate_pack fails on Stores in different mem-slices, so |
duke@435 | 538 | // only pack memops that are in the same alias set until that's fixed. |
duke@435 | 539 | if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) != |
duke@435 | 540 | _phase->C->get_alias_index(s2->as_Mem()->adr_type())) |
duke@435 | 541 | return false; |
duke@435 | 542 | SWPointer p1(s1->as_Mem(), this); |
duke@435 | 543 | SWPointer p2(s2->as_Mem(), this); |
duke@435 | 544 | if (p1.base() != p2.base() || !p1.comparable(p2)) return false; |
duke@435 | 545 | int diff = p2.offset_in_bytes() - p1.offset_in_bytes(); |
duke@435 | 546 | return diff == data_size(s1); |
duke@435 | 547 | } |
duke@435 | 548 | |
duke@435 | 549 | //------------------------------isomorphic--------------------------- |
duke@435 | 550 | // Are s1 and s2 similar? |
duke@435 | 551 | bool SuperWord::isomorphic(Node* s1, Node* s2) { |
duke@435 | 552 | if (s1->Opcode() != s2->Opcode()) return false; |
duke@435 | 553 | if (s1->req() != s2->req()) return false; |
duke@435 | 554 | if (s1->in(0) != s2->in(0)) return false; |
duke@435 | 555 | if (velt_type(s1) != velt_type(s2)) return false; |
duke@435 | 556 | return true; |
duke@435 | 557 | } |
duke@435 | 558 | |
duke@435 | 559 | //------------------------------independent--------------------------- |
duke@435 | 560 | // Is there no data path from s1 to s2 or s2 to s1? |
duke@435 | 561 | bool SuperWord::independent(Node* s1, Node* s2) { |
duke@435 | 562 | // assert(s1->Opcode() == s2->Opcode(), "check isomorphic first"); |
duke@435 | 563 | int d1 = depth(s1); |
duke@435 | 564 | int d2 = depth(s2); |
duke@435 | 565 | if (d1 == d2) return s1 != s2; |
duke@435 | 566 | Node* deep = d1 > d2 ? s1 : s2; |
duke@435 | 567 | Node* shallow = d1 > d2 ? s2 : s1; |
duke@435 | 568 | |
duke@435 | 569 | visited_clear(); |
duke@435 | 570 | |
duke@435 | 571 | return independent_path(shallow, deep); |
duke@435 | 572 | } |
duke@435 | 573 | |
duke@435 | 574 | //------------------------------independent_path------------------------------ |
duke@435 | 575 | // Helper for independent |
duke@435 | 576 | bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) { |
duke@435 | 577 | if (dp >= 1000) return false; // stop deep recursion |
duke@435 | 578 | visited_set(deep); |
duke@435 | 579 | int shal_depth = depth(shallow); |
duke@435 | 580 | assert(shal_depth <= depth(deep), "must be"); |
duke@435 | 581 | for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) { |
duke@435 | 582 | Node* pred = preds.current(); |
duke@435 | 583 | if (in_bb(pred) && !visited_test(pred)) { |
duke@435 | 584 | if (shallow == pred) { |
duke@435 | 585 | return false; |
duke@435 | 586 | } |
duke@435 | 587 | if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) { |
duke@435 | 588 | return false; |
duke@435 | 589 | } |
duke@435 | 590 | } |
duke@435 | 591 | } |
duke@435 | 592 | return true; |
duke@435 | 593 | } |
duke@435 | 594 | |
duke@435 | 595 | //------------------------------set_alignment--------------------------- |
duke@435 | 596 | void SuperWord::set_alignment(Node* s1, Node* s2, int align) { |
duke@435 | 597 | set_alignment(s1, align); |
duke@435 | 598 | set_alignment(s2, align + data_size(s1)); |
duke@435 | 599 | } |
duke@435 | 600 | |
duke@435 | 601 | //------------------------------data_size--------------------------- |
duke@435 | 602 | int SuperWord::data_size(Node* s) { |
duke@435 | 603 | const Type* t = velt_type(s); |
duke@435 | 604 | BasicType bt = t->array_element_basic_type(); |
kvn@464 | 605 | int bsize = type2aelembytes(bt); |
duke@435 | 606 | assert(bsize != 0, "valid size"); |
duke@435 | 607 | return bsize; |
duke@435 | 608 | } |
duke@435 | 609 | |
duke@435 | 610 | //------------------------------extend_packlist--------------------------- |
duke@435 | 611 | // Extend packset by following use->def and def->use links from pack members. |
duke@435 | 612 | void SuperWord::extend_packlist() { |
duke@435 | 613 | bool changed; |
duke@435 | 614 | do { |
duke@435 | 615 | changed = false; |
duke@435 | 616 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 617 | Node_List* p = _packset.at(i); |
duke@435 | 618 | changed |= follow_use_defs(p); |
duke@435 | 619 | changed |= follow_def_uses(p); |
duke@435 | 620 | } |
duke@435 | 621 | } while (changed); |
duke@435 | 622 | |
duke@435 | 623 | #ifndef PRODUCT |
duke@435 | 624 | if (TraceSuperWord) { |
duke@435 | 625 | tty->print_cr("\nAfter extend_packlist"); |
duke@435 | 626 | print_packset(); |
duke@435 | 627 | } |
duke@435 | 628 | #endif |
duke@435 | 629 | } |
duke@435 | 630 | |
duke@435 | 631 | //------------------------------follow_use_defs--------------------------- |
duke@435 | 632 | // Extend the packset by visiting operand definitions of nodes in pack p |
duke@435 | 633 | bool SuperWord::follow_use_defs(Node_List* p) { |
duke@435 | 634 | Node* s1 = p->at(0); |
duke@435 | 635 | Node* s2 = p->at(1); |
duke@435 | 636 | assert(p->size() == 2, "just checking"); |
duke@435 | 637 | assert(s1->req() == s2->req(), "just checking"); |
duke@435 | 638 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
duke@435 | 639 | |
duke@435 | 640 | if (s1->is_Load()) return false; |
duke@435 | 641 | |
duke@435 | 642 | int align = alignment(s1); |
duke@435 | 643 | bool changed = false; |
duke@435 | 644 | int start = s1->is_Store() ? MemNode::ValueIn : 1; |
duke@435 | 645 | int end = s1->is_Store() ? MemNode::ValueIn+1 : s1->req(); |
duke@435 | 646 | for (int j = start; j < end; j++) { |
duke@435 | 647 | Node* t1 = s1->in(j); |
duke@435 | 648 | Node* t2 = s2->in(j); |
duke@435 | 649 | if (!in_bb(t1) || !in_bb(t2)) |
duke@435 | 650 | continue; |
duke@435 | 651 | if (stmts_can_pack(t1, t2, align)) { |
duke@435 | 652 | if (est_savings(t1, t2) >= 0) { |
duke@435 | 653 | Node_List* pair = new Node_List(); |
duke@435 | 654 | pair->push(t1); |
duke@435 | 655 | pair->push(t2); |
duke@435 | 656 | _packset.append(pair); |
duke@435 | 657 | set_alignment(t1, t2, align); |
duke@435 | 658 | changed = true; |
duke@435 | 659 | } |
duke@435 | 660 | } |
duke@435 | 661 | } |
duke@435 | 662 | return changed; |
duke@435 | 663 | } |
duke@435 | 664 | |
duke@435 | 665 | //------------------------------follow_def_uses--------------------------- |
duke@435 | 666 | // Extend the packset by visiting uses of nodes in pack p |
duke@435 | 667 | bool SuperWord::follow_def_uses(Node_List* p) { |
duke@435 | 668 | bool changed = false; |
duke@435 | 669 | Node* s1 = p->at(0); |
duke@435 | 670 | Node* s2 = p->at(1); |
duke@435 | 671 | assert(p->size() == 2, "just checking"); |
duke@435 | 672 | assert(s1->req() == s2->req(), "just checking"); |
duke@435 | 673 | assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking"); |
duke@435 | 674 | |
duke@435 | 675 | if (s1->is_Store()) return false; |
duke@435 | 676 | |
duke@435 | 677 | int align = alignment(s1); |
duke@435 | 678 | int savings = -1; |
duke@435 | 679 | Node* u1 = NULL; |
duke@435 | 680 | Node* u2 = NULL; |
duke@435 | 681 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
duke@435 | 682 | Node* t1 = s1->fast_out(i); |
duke@435 | 683 | if (!in_bb(t1)) continue; |
duke@435 | 684 | for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 685 | Node* t2 = s2->fast_out(j); |
duke@435 | 686 | if (!in_bb(t2)) continue; |
duke@435 | 687 | if (!opnd_positions_match(s1, t1, s2, t2)) |
duke@435 | 688 | continue; |
duke@435 | 689 | if (stmts_can_pack(t1, t2, align)) { |
duke@435 | 690 | int my_savings = est_savings(t1, t2); |
duke@435 | 691 | if (my_savings > savings) { |
duke@435 | 692 | savings = my_savings; |
duke@435 | 693 | u1 = t1; |
duke@435 | 694 | u2 = t2; |
duke@435 | 695 | } |
duke@435 | 696 | } |
duke@435 | 697 | } |
duke@435 | 698 | } |
duke@435 | 699 | if (savings >= 0) { |
duke@435 | 700 | Node_List* pair = new Node_List(); |
duke@435 | 701 | pair->push(u1); |
duke@435 | 702 | pair->push(u2); |
duke@435 | 703 | _packset.append(pair); |
duke@435 | 704 | set_alignment(u1, u2, align); |
duke@435 | 705 | changed = true; |
duke@435 | 706 | } |
duke@435 | 707 | return changed; |
duke@435 | 708 | } |
duke@435 | 709 | |
duke@435 | 710 | //---------------------------opnd_positions_match------------------------- |
duke@435 | 711 | // Is the use of d1 in u1 at the same operand position as d2 in u2? |
duke@435 | 712 | bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) { |
duke@435 | 713 | uint ct = u1->req(); |
duke@435 | 714 | if (ct != u2->req()) return false; |
duke@435 | 715 | uint i1 = 0; |
duke@435 | 716 | uint i2 = 0; |
duke@435 | 717 | do { |
duke@435 | 718 | for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break; |
duke@435 | 719 | for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break; |
duke@435 | 720 | if (i1 != i2) { |
duke@435 | 721 | return false; |
duke@435 | 722 | } |
duke@435 | 723 | } while (i1 < ct); |
duke@435 | 724 | return true; |
duke@435 | 725 | } |
duke@435 | 726 | |
duke@435 | 727 | //------------------------------est_savings--------------------------- |
duke@435 | 728 | // Estimate the savings from executing s1 and s2 as a pack |
duke@435 | 729 | int SuperWord::est_savings(Node* s1, Node* s2) { |
duke@435 | 730 | int save = 2 - 1; // 2 operations per instruction in packed form |
duke@435 | 731 | |
duke@435 | 732 | // inputs |
duke@435 | 733 | for (uint i = 1; i < s1->req(); i++) { |
duke@435 | 734 | Node* x1 = s1->in(i); |
duke@435 | 735 | Node* x2 = s2->in(i); |
duke@435 | 736 | if (x1 != x2) { |
duke@435 | 737 | if (are_adjacent_refs(x1, x2)) { |
duke@435 | 738 | save += adjacent_profit(x1, x2); |
duke@435 | 739 | } else if (!in_packset(x1, x2)) { |
duke@435 | 740 | save -= pack_cost(2); |
duke@435 | 741 | } else { |
duke@435 | 742 | save += unpack_cost(2); |
duke@435 | 743 | } |
duke@435 | 744 | } |
duke@435 | 745 | } |
duke@435 | 746 | |
duke@435 | 747 | // uses of result |
duke@435 | 748 | uint ct = 0; |
duke@435 | 749 | for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) { |
duke@435 | 750 | Node* s1_use = s1->fast_out(i); |
duke@435 | 751 | for (int j = 0; j < _packset.length(); j++) { |
duke@435 | 752 | Node_List* p = _packset.at(j); |
duke@435 | 753 | if (p->at(0) == s1_use) { |
duke@435 | 754 | for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) { |
duke@435 | 755 | Node* s2_use = s2->fast_out(k); |
duke@435 | 756 | if (p->at(p->size()-1) == s2_use) { |
duke@435 | 757 | ct++; |
duke@435 | 758 | if (are_adjacent_refs(s1_use, s2_use)) { |
duke@435 | 759 | save += adjacent_profit(s1_use, s2_use); |
duke@435 | 760 | } |
duke@435 | 761 | } |
duke@435 | 762 | } |
duke@435 | 763 | } |
duke@435 | 764 | } |
duke@435 | 765 | } |
duke@435 | 766 | |
duke@435 | 767 | if (ct < s1->outcnt()) save += unpack_cost(1); |
duke@435 | 768 | if (ct < s2->outcnt()) save += unpack_cost(1); |
duke@435 | 769 | |
duke@435 | 770 | return save; |
duke@435 | 771 | } |
duke@435 | 772 | |
duke@435 | 773 | //------------------------------costs--------------------------- |
duke@435 | 774 | int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; } |
duke@435 | 775 | int SuperWord::pack_cost(int ct) { return ct; } |
duke@435 | 776 | int SuperWord::unpack_cost(int ct) { return ct; } |
duke@435 | 777 | |
duke@435 | 778 | //------------------------------combine_packs--------------------------- |
duke@435 | 779 | // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last |
duke@435 | 780 | void SuperWord::combine_packs() { |
duke@435 | 781 | bool changed; |
duke@435 | 782 | do { |
duke@435 | 783 | changed = false; |
duke@435 | 784 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 785 | Node_List* p1 = _packset.at(i); |
duke@435 | 786 | if (p1 == NULL) continue; |
duke@435 | 787 | for (int j = 0; j < _packset.length(); j++) { |
duke@435 | 788 | Node_List* p2 = _packset.at(j); |
duke@435 | 789 | if (p2 == NULL) continue; |
duke@435 | 790 | if (p1->at(p1->size()-1) == p2->at(0)) { |
duke@435 | 791 | for (uint k = 1; k < p2->size(); k++) { |
duke@435 | 792 | p1->push(p2->at(k)); |
duke@435 | 793 | } |
duke@435 | 794 | _packset.at_put(j, NULL); |
duke@435 | 795 | changed = true; |
duke@435 | 796 | } |
duke@435 | 797 | } |
duke@435 | 798 | } |
duke@435 | 799 | } while (changed); |
duke@435 | 800 | |
duke@435 | 801 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 802 | Node_List* p1 = _packset.at(i); |
duke@435 | 803 | if (p1 == NULL) { |
duke@435 | 804 | _packset.remove_at(i); |
duke@435 | 805 | } |
duke@435 | 806 | } |
duke@435 | 807 | |
duke@435 | 808 | #ifndef PRODUCT |
duke@435 | 809 | if (TraceSuperWord) { |
duke@435 | 810 | tty->print_cr("\nAfter combine_packs"); |
duke@435 | 811 | print_packset(); |
duke@435 | 812 | } |
duke@435 | 813 | #endif |
duke@435 | 814 | } |
duke@435 | 815 | |
duke@435 | 816 | //-----------------------------construct_my_pack_map-------------------------- |
duke@435 | 817 | // Construct the map from nodes to packs. Only valid after the |
duke@435 | 818 | // point where a node is only in one pack (after combine_packs). |
duke@435 | 819 | void SuperWord::construct_my_pack_map() { |
duke@435 | 820 | Node_List* rslt = NULL; |
duke@435 | 821 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 822 | Node_List* p = _packset.at(i); |
duke@435 | 823 | for (uint j = 0; j < p->size(); j++) { |
duke@435 | 824 | Node* s = p->at(j); |
duke@435 | 825 | assert(my_pack(s) == NULL, "only in one pack"); |
duke@435 | 826 | set_my_pack(s, p); |
duke@435 | 827 | } |
duke@435 | 828 | } |
duke@435 | 829 | } |
duke@435 | 830 | |
duke@435 | 831 | //------------------------------filter_packs--------------------------- |
duke@435 | 832 | // Remove packs that are not implemented or not profitable. |
duke@435 | 833 | void SuperWord::filter_packs() { |
duke@435 | 834 | |
duke@435 | 835 | // Remove packs that are not implemented |
duke@435 | 836 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 837 | Node_List* pk = _packset.at(i); |
duke@435 | 838 | bool impl = implemented(pk); |
duke@435 | 839 | if (!impl) { |
duke@435 | 840 | #ifndef PRODUCT |
duke@435 | 841 | if (TraceSuperWord && Verbose) { |
duke@435 | 842 | tty->print_cr("Unimplemented"); |
duke@435 | 843 | pk->at(0)->dump(); |
duke@435 | 844 | } |
duke@435 | 845 | #endif |
duke@435 | 846 | remove_pack_at(i); |
duke@435 | 847 | } |
duke@435 | 848 | } |
duke@435 | 849 | |
duke@435 | 850 | // Remove packs that are not profitable |
duke@435 | 851 | bool changed; |
duke@435 | 852 | do { |
duke@435 | 853 | changed = false; |
duke@435 | 854 | for (int i = _packset.length() - 1; i >= 0; i--) { |
duke@435 | 855 | Node_List* pk = _packset.at(i); |
duke@435 | 856 | bool prof = profitable(pk); |
duke@435 | 857 | if (!prof) { |
duke@435 | 858 | #ifndef PRODUCT |
duke@435 | 859 | if (TraceSuperWord && Verbose) { |
duke@435 | 860 | tty->print_cr("Unprofitable"); |
duke@435 | 861 | pk->at(0)->dump(); |
duke@435 | 862 | } |
duke@435 | 863 | #endif |
duke@435 | 864 | remove_pack_at(i); |
duke@435 | 865 | changed = true; |
duke@435 | 866 | } |
duke@435 | 867 | } |
duke@435 | 868 | } while (changed); |
duke@435 | 869 | |
duke@435 | 870 | #ifndef PRODUCT |
duke@435 | 871 | if (TraceSuperWord) { |
duke@435 | 872 | tty->print_cr("\nAfter filter_packs"); |
duke@435 | 873 | print_packset(); |
duke@435 | 874 | tty->cr(); |
duke@435 | 875 | } |
duke@435 | 876 | #endif |
duke@435 | 877 | } |
duke@435 | 878 | |
duke@435 | 879 | //------------------------------implemented--------------------------- |
duke@435 | 880 | // Can code be generated for pack p? |
duke@435 | 881 | bool SuperWord::implemented(Node_List* p) { |
duke@435 | 882 | Node* p0 = p->at(0); |
duke@435 | 883 | int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0)); |
duke@435 | 884 | return vopc > 0 && Matcher::has_match_rule(vopc); |
duke@435 | 885 | } |
duke@435 | 886 | |
duke@435 | 887 | //------------------------------profitable--------------------------- |
duke@435 | 888 | // For pack p, are all operands and all uses (with in the block) vector? |
duke@435 | 889 | bool SuperWord::profitable(Node_List* p) { |
duke@435 | 890 | Node* p0 = p->at(0); |
duke@435 | 891 | uint start, end; |
duke@435 | 892 | vector_opd_range(p0, &start, &end); |
duke@435 | 893 | |
duke@435 | 894 | // Return false if some input is not vector and inside block |
duke@435 | 895 | for (uint i = start; i < end; i++) { |
duke@435 | 896 | if (!is_vector_use(p0, i)) { |
duke@435 | 897 | // For now, return false if not scalar promotion case (inputs are the same.) |
twisti@1040 | 898 | // Later, implement PackNode and allow differing, non-vector inputs |
duke@435 | 899 | // (maybe just the ones from outside the block.) |
duke@435 | 900 | Node* p0_def = p0->in(i); |
duke@435 | 901 | for (uint j = 1; j < p->size(); j++) { |
duke@435 | 902 | Node* use = p->at(j); |
duke@435 | 903 | Node* def = use->in(i); |
duke@435 | 904 | if (p0_def != def) |
duke@435 | 905 | return false; |
duke@435 | 906 | } |
duke@435 | 907 | } |
duke@435 | 908 | } |
duke@435 | 909 | if (!p0->is_Store()) { |
duke@435 | 910 | // For now, return false if not all uses are vector. |
duke@435 | 911 | // Later, implement ExtractNode and allow non-vector uses (maybe |
duke@435 | 912 | // just the ones outside the block.) |
duke@435 | 913 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 914 | Node* def = p->at(i); |
duke@435 | 915 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 916 | Node* use = def->fast_out(j); |
duke@435 | 917 | for (uint k = 0; k < use->req(); k++) { |
duke@435 | 918 | Node* n = use->in(k); |
duke@435 | 919 | if (def == n) { |
duke@435 | 920 | if (!is_vector_use(use, k)) { |
duke@435 | 921 | return false; |
duke@435 | 922 | } |
duke@435 | 923 | } |
duke@435 | 924 | } |
duke@435 | 925 | } |
duke@435 | 926 | } |
duke@435 | 927 | } |
duke@435 | 928 | return true; |
duke@435 | 929 | } |
duke@435 | 930 | |
duke@435 | 931 | //------------------------------schedule--------------------------- |
duke@435 | 932 | // Adjust the memory graph for the packed operations |
duke@435 | 933 | void SuperWord::schedule() { |
duke@435 | 934 | |
duke@435 | 935 | // Co-locate in the memory graph the members of each memory pack |
duke@435 | 936 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 937 | co_locate_pack(_packset.at(i)); |
duke@435 | 938 | } |
duke@435 | 939 | } |
duke@435 | 940 | |
cfang@1102 | 941 | //-------------------------------remove_and_insert------------------- |
cfang@1102 | 942 | //remove "current" from its current position in the memory graph and insert |
cfang@1102 | 943 | //it after the appropriate insertion point (lip or uip) |
cfang@1102 | 944 | void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, |
cfang@1102 | 945 | Node *uip, Unique_Node_List &sched_before) { |
cfang@1102 | 946 | Node* my_mem = current->in(MemNode::Memory); |
cfang@1102 | 947 | _igvn.hash_delete(current); |
cfang@1102 | 948 | _igvn.hash_delete(my_mem); |
cfang@1102 | 949 | |
cfang@1102 | 950 | //remove current_store from its current position in the memmory graph |
cfang@1102 | 951 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
cfang@1102 | 952 | Node* use = current->out(i); |
cfang@1102 | 953 | if (use->is_Mem()) { |
cfang@1102 | 954 | assert(use->in(MemNode::Memory) == current, "must be"); |
cfang@1102 | 955 | _igvn.hash_delete(use); |
cfang@1102 | 956 | if (use == prev) { // connect prev to my_mem |
cfang@1102 | 957 | use->set_req(MemNode::Memory, my_mem); |
cfang@1102 | 958 | } else if (sched_before.member(use)) { |
cfang@1102 | 959 | _igvn.hash_delete(uip); |
cfang@1102 | 960 | use->set_req(MemNode::Memory, uip); |
cfang@1102 | 961 | } else { |
cfang@1102 | 962 | _igvn.hash_delete(lip); |
cfang@1102 | 963 | use->set_req(MemNode::Memory, lip); |
cfang@1102 | 964 | } |
cfang@1102 | 965 | _igvn._worklist.push(use); |
cfang@1102 | 966 | --i; //deleted this edge; rescan position |
cfang@1102 | 967 | } |
cfang@1102 | 968 | } |
cfang@1102 | 969 | |
cfang@1102 | 970 | bool sched_up = sched_before.member(current); |
cfang@1102 | 971 | Node *insert_pt = sched_up ? uip : lip; |
cfang@1102 | 972 | _igvn.hash_delete(insert_pt); |
cfang@1102 | 973 | |
cfang@1102 | 974 | // all uses of insert_pt's memory state should use current's instead |
cfang@1102 | 975 | for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { |
cfang@1102 | 976 | Node* use = insert_pt->out(i); |
cfang@1102 | 977 | if (use->is_Mem()) { |
cfang@1102 | 978 | assert(use->in(MemNode::Memory) == insert_pt, "must be"); |
cfang@1102 | 979 | _igvn.hash_delete(use); |
cfang@1102 | 980 | use->set_req(MemNode::Memory, current); |
cfang@1102 | 981 | _igvn._worklist.push(use); |
cfang@1102 | 982 | --i; //deleted this edge; rescan position |
cfang@1102 | 983 | } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) { |
cfang@1102 | 984 | uint pos; //lip (lower insert point) must be the last one in the memory slice |
cfang@1102 | 985 | _igvn.hash_delete(use); |
cfang@1102 | 986 | for (pos=1; pos < use->req(); pos++) { |
cfang@1102 | 987 | if (use->in(pos) == insert_pt) break; |
cfang@1102 | 988 | } |
cfang@1102 | 989 | use->set_req(pos, current); |
cfang@1102 | 990 | _igvn._worklist.push(use); |
cfang@1102 | 991 | --i; |
cfang@1102 | 992 | } |
cfang@1102 | 993 | } |
cfang@1102 | 994 | |
cfang@1102 | 995 | //connect current to insert_pt |
cfang@1102 | 996 | current->set_req(MemNode::Memory, insert_pt); |
cfang@1102 | 997 | _igvn._worklist.push(current); |
cfang@1102 | 998 | } |
cfang@1102 | 999 | |
cfang@1102 | 1000 | //------------------------------co_locate_pack---------------------------------- |
cfang@1102 | 1001 | // To schedule a store pack, we need to move any sandwiched memory ops either before |
cfang@1102 | 1002 | // or after the pack, based upon dependence information: |
cfang@1102 | 1003 | // (1) If any store in the pack depends on the sandwiched memory op, the |
cfang@1102 | 1004 | // sandwiched memory op must be scheduled BEFORE the pack; |
cfang@1102 | 1005 | // (2) If a sandwiched memory op depends on any store in the pack, the |
cfang@1102 | 1006 | // sandwiched memory op must be scheduled AFTER the pack; |
cfang@1102 | 1007 | // (3) If a sandwiched memory op (say, memA) depends on another sandwiched |
cfang@1102 | 1008 | // memory op (say memB), memB must be scheduled before memA. So, if memA is |
cfang@1102 | 1009 | // scheduled before the pack, memB must also be scheduled before the pack; |
cfang@1102 | 1010 | // (4) If there is no dependence restriction for a sandwiched memory op, we simply |
cfang@1102 | 1011 | // schedule this store AFTER the pack |
cfang@1102 | 1012 | // (5) We know there is no dependence cycle, so there in no other case; |
cfang@1102 | 1013 | // (6) Finally, all memory ops in another single pack should be moved in the same direction. |
cfang@1102 | 1014 | // |
cfang@1387 | 1015 | // To schedule a load pack, we use the memory state of either the first or the last load in |
cfang@1387 | 1016 | // the pack, based on the dependence constraint. |
duke@435 | 1017 | void SuperWord::co_locate_pack(Node_List* pk) { |
duke@435 | 1018 | if (pk->at(0)->is_Store()) { |
duke@435 | 1019 | MemNode* first = executed_first(pk)->as_Mem(); |
duke@435 | 1020 | MemNode* last = executed_last(pk)->as_Mem(); |
cfang@1102 | 1021 | Unique_Node_List schedule_before_pack; |
cfang@1102 | 1022 | Unique_Node_List memops; |
cfang@1102 | 1023 | |
duke@435 | 1024 | MemNode* current = last->in(MemNode::Memory)->as_Mem(); |
cfang@1102 | 1025 | MemNode* previous = last; |
duke@435 | 1026 | while (true) { |
duke@435 | 1027 | assert(in_bb(current), "stay in block"); |
cfang@1102 | 1028 | memops.push(previous); |
cfang@1102 | 1029 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
cfang@1102 | 1030 | Node* use = current->out(i); |
cfang@1102 | 1031 | if (use->is_Mem() && use != previous) |
cfang@1102 | 1032 | memops.push(use); |
cfang@1102 | 1033 | } |
cfang@1102 | 1034 | if(current == first) break; |
cfang@1102 | 1035 | previous = current; |
cfang@1102 | 1036 | current = current->in(MemNode::Memory)->as_Mem(); |
cfang@1102 | 1037 | } |
cfang@1102 | 1038 | |
cfang@1102 | 1039 | // determine which memory operations should be scheduled before the pack |
cfang@1102 | 1040 | for (uint i = 1; i < memops.size(); i++) { |
cfang@1102 | 1041 | Node *s1 = memops.at(i); |
cfang@1102 | 1042 | if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) { |
cfang@1102 | 1043 | for (uint j = 0; j< i; j++) { |
cfang@1102 | 1044 | Node *s2 = memops.at(j); |
cfang@1102 | 1045 | if (!independent(s1, s2)) { |
cfang@1102 | 1046 | if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { |
cfang@1102 | 1047 | schedule_before_pack.push(s1); //s1 must be scheduled before |
cfang@1102 | 1048 | Node_List* mem_pk = my_pack(s1); |
cfang@1102 | 1049 | if (mem_pk != NULL) { |
cfang@1102 | 1050 | for (uint ii = 0; ii < mem_pk->size(); ii++) { |
cfang@1102 | 1051 | Node* s = mem_pk->at(ii); // follow partner |
cfang@1102 | 1052 | if (memops.member(s) && !schedule_before_pack.member(s)) |
cfang@1102 | 1053 | schedule_before_pack.push(s); |
cfang@1102 | 1054 | } |
cfang@1102 | 1055 | } |
cfang@1102 | 1056 | } |
cfang@1102 | 1057 | } |
cfang@1102 | 1058 | } |
cfang@1102 | 1059 | } |
cfang@1102 | 1060 | } |
cfang@1102 | 1061 | |
cfang@1102 | 1062 | MemNode* lower_insert_pt = last; |
cfang@1102 | 1063 | Node* upper_insert_pt = first->in(MemNode::Memory); |
cfang@1102 | 1064 | previous = last; //previous store in pk |
cfang@1102 | 1065 | current = last->in(MemNode::Memory)->as_Mem(); |
cfang@1102 | 1066 | |
cfang@1102 | 1067 | //start scheduling from "last" to "first" |
cfang@1102 | 1068 | while (true) { |
cfang@1102 | 1069 | assert(in_bb(current), "stay in block"); |
cfang@1102 | 1070 | assert(in_pack(previous, pk), "previous stays in pack"); |
duke@435 | 1071 | Node* my_mem = current->in(MemNode::Memory); |
cfang@1102 | 1072 | |
duke@435 | 1073 | if (in_pack(current, pk)) { |
cfang@1102 | 1074 | // Forward users of my memory state (except "previous) to my input memory state |
duke@435 | 1075 | _igvn.hash_delete(current); |
duke@435 | 1076 | for (DUIterator i = current->outs(); current->has_out(i); i++) { |
duke@435 | 1077 | Node* use = current->out(i); |
cfang@1102 | 1078 | if (use->is_Mem() && use != previous) { |
duke@435 | 1079 | assert(use->in(MemNode::Memory) == current, "must be"); |
duke@435 | 1080 | _igvn.hash_delete(use); |
cfang@1102 | 1081 | if (schedule_before_pack.member(use)) { |
cfang@1102 | 1082 | _igvn.hash_delete(upper_insert_pt); |
cfang@1102 | 1083 | use->set_req(MemNode::Memory, upper_insert_pt); |
cfang@1102 | 1084 | } else { |
cfang@1102 | 1085 | _igvn.hash_delete(lower_insert_pt); |
cfang@1102 | 1086 | use->set_req(MemNode::Memory, lower_insert_pt); |
cfang@1102 | 1087 | } |
duke@435 | 1088 | _igvn._worklist.push(use); |
duke@435 | 1089 | --i; // deleted this edge; rescan position |
duke@435 | 1090 | } |
duke@435 | 1091 | } |
cfang@1102 | 1092 | previous = current; |
cfang@1102 | 1093 | } else { // !in_pack(current, pk) ==> a sandwiched store |
cfang@1102 | 1094 | remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack); |
duke@435 | 1095 | } |
cfang@1102 | 1096 | |
duke@435 | 1097 | if (current == first) break; |
duke@435 | 1098 | current = my_mem->as_Mem(); |
cfang@1102 | 1099 | } // end while |
cfang@1102 | 1100 | } else if (pk->at(0)->is_Load()) { //load |
cfang@1387 | 1101 | // all loads in the pack should have the same memory state. By default, |
cfang@1387 | 1102 | // we use the memory state of the last load. However, if any load could |
cfang@1387 | 1103 | // not be moved down due to the dependence constraint, we use the memory |
cfang@1387 | 1104 | // state of the first load. |
cfang@1387 | 1105 | Node* last_mem = executed_last(pk)->in(MemNode::Memory); |
cfang@1387 | 1106 | Node* first_mem = executed_first(pk)->in(MemNode::Memory); |
cfang@1387 | 1107 | bool schedule_last = true; |
cfang@1387 | 1108 | for (uint i = 0; i < pk->size(); i++) { |
cfang@1387 | 1109 | Node* ld = pk->at(i); |
cfang@1387 | 1110 | for (Node* current = last_mem; current != ld->in(MemNode::Memory); |
cfang@1387 | 1111 | current=current->in(MemNode::Memory)) { |
cfang@1387 | 1112 | assert(current != first_mem, "corrupted memory graph"); |
cfang@1387 | 1113 | if(current->is_Mem() && !independent(current, ld)){ |
cfang@1387 | 1114 | schedule_last = false; // a later store depends on this load |
cfang@1387 | 1115 | break; |
cfang@1387 | 1116 | } |
cfang@1387 | 1117 | } |
cfang@1387 | 1118 | } |
cfang@1387 | 1119 | |
cfang@1387 | 1120 | Node* mem_input = schedule_last ? last_mem : first_mem; |
cfang@1387 | 1121 | _igvn.hash_delete(mem_input); |
cfang@1387 | 1122 | // Give each load the same memory state |
duke@435 | 1123 | for (uint i = 0; i < pk->size(); i++) { |
duke@435 | 1124 | LoadNode* ld = pk->at(i)->as_Load(); |
duke@435 | 1125 | _igvn.hash_delete(ld); |
cfang@1387 | 1126 | ld->set_req(MemNode::Memory, mem_input); |
duke@435 | 1127 | _igvn._worklist.push(ld); |
duke@435 | 1128 | } |
duke@435 | 1129 | } |
duke@435 | 1130 | } |
duke@435 | 1131 | |
duke@435 | 1132 | //------------------------------output--------------------------- |
duke@435 | 1133 | // Convert packs into vector node operations |
duke@435 | 1134 | void SuperWord::output() { |
duke@435 | 1135 | if (_packset.length() == 0) return; |
duke@435 | 1136 | |
kvn@2727 | 1137 | #ifndef PRODUCT |
kvn@2727 | 1138 | if (TraceLoopOpts) { |
kvn@2727 | 1139 | tty->print("SuperWord "); |
kvn@2727 | 1140 | lpt()->dump_head(); |
kvn@2727 | 1141 | } |
kvn@2727 | 1142 | #endif |
kvn@2727 | 1143 | |
duke@435 | 1144 | // MUST ENSURE main loop's initial value is properly aligned: |
duke@435 | 1145 | // (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0 |
duke@435 | 1146 | |
duke@435 | 1147 | align_initial_loop_index(align_to_ref()); |
duke@435 | 1148 | |
duke@435 | 1149 | // Insert extract (unpack) operations for scalar uses |
duke@435 | 1150 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1151 | insert_extracts(_packset.at(i)); |
duke@435 | 1152 | } |
duke@435 | 1153 | |
duke@435 | 1154 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1155 | Node* n = _block.at(i); |
duke@435 | 1156 | Node_List* p = my_pack(n); |
duke@435 | 1157 | if (p && n == executed_last(p)) { |
duke@435 | 1158 | uint vlen = p->size(); |
duke@435 | 1159 | Node* vn = NULL; |
duke@435 | 1160 | Node* low_adr = p->at(0); |
duke@435 | 1161 | Node* first = executed_first(p); |
duke@435 | 1162 | if (n->is_Load()) { |
duke@435 | 1163 | int opc = n->Opcode(); |
duke@435 | 1164 | Node* ctl = n->in(MemNode::Control); |
duke@435 | 1165 | Node* mem = first->in(MemNode::Memory); |
duke@435 | 1166 | Node* adr = low_adr->in(MemNode::Address); |
duke@435 | 1167 | const TypePtr* atyp = n->adr_type(); |
duke@435 | 1168 | vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen); |
duke@435 | 1169 | |
duke@435 | 1170 | } else if (n->is_Store()) { |
duke@435 | 1171 | // Promote value to be stored to vector |
kvn@3040 | 1172 | Node* val = vector_opd(p, MemNode::ValueIn); |
duke@435 | 1173 | |
duke@435 | 1174 | int opc = n->Opcode(); |
duke@435 | 1175 | Node* ctl = n->in(MemNode::Control); |
duke@435 | 1176 | Node* mem = first->in(MemNode::Memory); |
duke@435 | 1177 | Node* adr = low_adr->in(MemNode::Address); |
duke@435 | 1178 | const TypePtr* atyp = n->adr_type(); |
duke@435 | 1179 | vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); |
duke@435 | 1180 | |
duke@435 | 1181 | } else if (n->req() == 3) { |
duke@435 | 1182 | // Promote operands to vector |
duke@435 | 1183 | Node* in1 = vector_opd(p, 1); |
duke@435 | 1184 | Node* in2 = vector_opd(p, 2); |
duke@435 | 1185 | vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n)); |
duke@435 | 1186 | |
duke@435 | 1187 | } else { |
duke@435 | 1188 | ShouldNotReachHere(); |
duke@435 | 1189 | } |
duke@435 | 1190 | |
duke@435 | 1191 | _phase->_igvn.register_new_node_with_optimizer(vn); |
duke@435 | 1192 | _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0))); |
duke@435 | 1193 | for (uint j = 0; j < p->size(); j++) { |
duke@435 | 1194 | Node* pm = p->at(j); |
kvn@1976 | 1195 | _igvn.replace_node(pm, vn); |
duke@435 | 1196 | } |
duke@435 | 1197 | _igvn._worklist.push(vn); |
duke@435 | 1198 | } |
duke@435 | 1199 | } |
duke@435 | 1200 | } |
duke@435 | 1201 | |
duke@435 | 1202 | //------------------------------vector_opd--------------------------- |
duke@435 | 1203 | // Create a vector operand for the nodes in pack p for operand: in(opd_idx) |
kvn@3040 | 1204 | Node* SuperWord::vector_opd(Node_List* p, int opd_idx) { |
duke@435 | 1205 | Node* p0 = p->at(0); |
duke@435 | 1206 | uint vlen = p->size(); |
duke@435 | 1207 | Node* opd = p0->in(opd_idx); |
duke@435 | 1208 | |
duke@435 | 1209 | bool same_opd = true; |
duke@435 | 1210 | for (uint i = 1; i < vlen; i++) { |
duke@435 | 1211 | Node* pi = p->at(i); |
duke@435 | 1212 | Node* in = pi->in(opd_idx); |
duke@435 | 1213 | if (opd != in) { |
duke@435 | 1214 | same_opd = false; |
duke@435 | 1215 | break; |
duke@435 | 1216 | } |
duke@435 | 1217 | } |
duke@435 | 1218 | |
duke@435 | 1219 | if (same_opd) { |
kvn@3040 | 1220 | if (opd->is_Vector() || opd->is_VectorLoad()) { |
kvn@3040 | 1221 | return opd; // input is matching vector |
duke@435 | 1222 | } |
kvn@3040 | 1223 | assert(!opd->is_VectorStore(), "such vector is not expected here"); |
duke@435 | 1224 | // Convert scalar input to vector. Use p0's type because it's container |
duke@435 | 1225 | // maybe smaller than the operand's container. |
duke@435 | 1226 | const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd); |
duke@435 | 1227 | const Type* p0_t = velt_type(p0); |
duke@435 | 1228 | if (p0_t->higher_equal(opd_t)) opd_t = p0_t; |
duke@435 | 1229 | VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, opd_t); |
duke@435 | 1230 | |
duke@435 | 1231 | _phase->_igvn.register_new_node_with_optimizer(vn); |
duke@435 | 1232 | _phase->set_ctrl(vn, _phase->get_ctrl(opd)); |
duke@435 | 1233 | return vn; |
duke@435 | 1234 | } |
duke@435 | 1235 | |
duke@435 | 1236 | // Insert pack operation |
duke@435 | 1237 | const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd); |
duke@435 | 1238 | PackNode* pk = PackNode::make(_phase->C, opd, opd_t); |
duke@435 | 1239 | |
duke@435 | 1240 | for (uint i = 1; i < vlen; i++) { |
duke@435 | 1241 | Node* pi = p->at(i); |
duke@435 | 1242 | Node* in = pi->in(opd_idx); |
duke@435 | 1243 | assert(my_pack(in) == NULL, "Should already have been unpacked"); |
duke@435 | 1244 | assert(opd_t == velt_type(!in_bb(in) ? pi : in), "all same type"); |
duke@435 | 1245 | pk->add_opd(in); |
duke@435 | 1246 | } |
duke@435 | 1247 | _phase->_igvn.register_new_node_with_optimizer(pk); |
duke@435 | 1248 | _phase->set_ctrl(pk, _phase->get_ctrl(opd)); |
duke@435 | 1249 | return pk; |
duke@435 | 1250 | } |
duke@435 | 1251 | |
duke@435 | 1252 | //------------------------------insert_extracts--------------------------- |
duke@435 | 1253 | // If a use of pack p is not a vector use, then replace the |
duke@435 | 1254 | // use with an extract operation. |
duke@435 | 1255 | void SuperWord::insert_extracts(Node_List* p) { |
duke@435 | 1256 | if (p->at(0)->is_Store()) return; |
duke@435 | 1257 | assert(_n_idx_list.is_empty(), "empty (node,index) list"); |
duke@435 | 1258 | |
duke@435 | 1259 | // Inspect each use of each pack member. For each use that is |
duke@435 | 1260 | // not a vector use, replace the use with an extract operation. |
duke@435 | 1261 | |
duke@435 | 1262 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1263 | Node* def = p->at(i); |
duke@435 | 1264 | for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) { |
duke@435 | 1265 | Node* use = def->fast_out(j); |
duke@435 | 1266 | for (uint k = 0; k < use->req(); k++) { |
duke@435 | 1267 | Node* n = use->in(k); |
duke@435 | 1268 | if (def == n) { |
duke@435 | 1269 | if (!is_vector_use(use, k)) { |
duke@435 | 1270 | _n_idx_list.push(use, k); |
duke@435 | 1271 | } |
duke@435 | 1272 | } |
duke@435 | 1273 | } |
duke@435 | 1274 | } |
duke@435 | 1275 | } |
duke@435 | 1276 | |
duke@435 | 1277 | while (_n_idx_list.is_nonempty()) { |
duke@435 | 1278 | Node* use = _n_idx_list.node(); |
duke@435 | 1279 | int idx = _n_idx_list.index(); |
duke@435 | 1280 | _n_idx_list.pop(); |
duke@435 | 1281 | Node* def = use->in(idx); |
duke@435 | 1282 | |
duke@435 | 1283 | // Insert extract operation |
duke@435 | 1284 | _igvn.hash_delete(def); |
duke@435 | 1285 | _igvn.hash_delete(use); |
duke@435 | 1286 | int def_pos = alignment(def) / data_size(def); |
duke@435 | 1287 | const Type* def_t = velt_type(def); |
duke@435 | 1288 | |
duke@435 | 1289 | Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t); |
duke@435 | 1290 | _phase->_igvn.register_new_node_with_optimizer(ex); |
duke@435 | 1291 | _phase->set_ctrl(ex, _phase->get_ctrl(def)); |
duke@435 | 1292 | use->set_req(idx, ex); |
duke@435 | 1293 | _igvn._worklist.push(def); |
duke@435 | 1294 | _igvn._worklist.push(use); |
duke@435 | 1295 | |
duke@435 | 1296 | bb_insert_after(ex, bb_idx(def)); |
duke@435 | 1297 | set_velt_type(ex, def_t); |
duke@435 | 1298 | } |
duke@435 | 1299 | } |
duke@435 | 1300 | |
duke@435 | 1301 | //------------------------------is_vector_use--------------------------- |
duke@435 | 1302 | // Is use->in(u_idx) a vector use? |
duke@435 | 1303 | bool SuperWord::is_vector_use(Node* use, int u_idx) { |
duke@435 | 1304 | Node_List* u_pk = my_pack(use); |
duke@435 | 1305 | if (u_pk == NULL) return false; |
duke@435 | 1306 | Node* def = use->in(u_idx); |
duke@435 | 1307 | Node_List* d_pk = my_pack(def); |
duke@435 | 1308 | if (d_pk == NULL) { |
duke@435 | 1309 | // check for scalar promotion |
duke@435 | 1310 | Node* n = u_pk->at(0)->in(u_idx); |
duke@435 | 1311 | for (uint i = 1; i < u_pk->size(); i++) { |
duke@435 | 1312 | if (u_pk->at(i)->in(u_idx) != n) return false; |
duke@435 | 1313 | } |
duke@435 | 1314 | return true; |
duke@435 | 1315 | } |
duke@435 | 1316 | if (u_pk->size() != d_pk->size()) |
duke@435 | 1317 | return false; |
duke@435 | 1318 | for (uint i = 0; i < u_pk->size(); i++) { |
duke@435 | 1319 | Node* ui = u_pk->at(i); |
duke@435 | 1320 | Node* di = d_pk->at(i); |
duke@435 | 1321 | if (ui->in(u_idx) != di || alignment(ui) != alignment(di)) |
duke@435 | 1322 | return false; |
duke@435 | 1323 | } |
duke@435 | 1324 | return true; |
duke@435 | 1325 | } |
duke@435 | 1326 | |
duke@435 | 1327 | //------------------------------construct_bb--------------------------- |
duke@435 | 1328 | // Construct reverse postorder list of block members |
duke@435 | 1329 | void SuperWord::construct_bb() { |
duke@435 | 1330 | Node* entry = bb(); |
duke@435 | 1331 | |
duke@435 | 1332 | assert(_stk.length() == 0, "stk is empty"); |
duke@435 | 1333 | assert(_block.length() == 0, "block is empty"); |
duke@435 | 1334 | assert(_data_entry.length() == 0, "data_entry is empty"); |
duke@435 | 1335 | assert(_mem_slice_head.length() == 0, "mem_slice_head is empty"); |
duke@435 | 1336 | assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty"); |
duke@435 | 1337 | |
duke@435 | 1338 | // Find non-control nodes with no inputs from within block, |
duke@435 | 1339 | // create a temporary map from node _idx to bb_idx for use |
duke@435 | 1340 | // by the visited and post_visited sets, |
duke@435 | 1341 | // and count number of nodes in block. |
duke@435 | 1342 | int bb_ct = 0; |
duke@435 | 1343 | for (uint i = 0; i < lpt()->_body.size(); i++ ) { |
duke@435 | 1344 | Node *n = lpt()->_body.at(i); |
duke@435 | 1345 | set_bb_idx(n, i); // Create a temporary map |
duke@435 | 1346 | if (in_bb(n)) { |
duke@435 | 1347 | bb_ct++; |
duke@435 | 1348 | if (!n->is_CFG()) { |
duke@435 | 1349 | bool found = false; |
duke@435 | 1350 | for (uint j = 0; j < n->req(); j++) { |
duke@435 | 1351 | Node* def = n->in(j); |
duke@435 | 1352 | if (def && in_bb(def)) { |
duke@435 | 1353 | found = true; |
duke@435 | 1354 | break; |
duke@435 | 1355 | } |
duke@435 | 1356 | } |
duke@435 | 1357 | if (!found) { |
duke@435 | 1358 | assert(n != entry, "can't be entry"); |
duke@435 | 1359 | _data_entry.push(n); |
duke@435 | 1360 | } |
duke@435 | 1361 | } |
duke@435 | 1362 | } |
duke@435 | 1363 | } |
duke@435 | 1364 | |
duke@435 | 1365 | // Find memory slices (head and tail) |
duke@435 | 1366 | for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) { |
duke@435 | 1367 | Node *n = lp()->fast_out(i); |
duke@435 | 1368 | if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) { |
duke@435 | 1369 | Node* n_tail = n->in(LoopNode::LoopBackControl); |
kvn@688 | 1370 | if (n_tail != n->in(LoopNode::EntryControl)) { |
kvn@688 | 1371 | _mem_slice_head.push(n); |
kvn@688 | 1372 | _mem_slice_tail.push(n_tail); |
kvn@688 | 1373 | } |
duke@435 | 1374 | } |
duke@435 | 1375 | } |
duke@435 | 1376 | |
duke@435 | 1377 | // Create an RPO list of nodes in block |
duke@435 | 1378 | |
duke@435 | 1379 | visited_clear(); |
duke@435 | 1380 | post_visited_clear(); |
duke@435 | 1381 | |
duke@435 | 1382 | // Push all non-control nodes with no inputs from within block, then control entry |
duke@435 | 1383 | for (int j = 0; j < _data_entry.length(); j++) { |
duke@435 | 1384 | Node* n = _data_entry.at(j); |
duke@435 | 1385 | visited_set(n); |
duke@435 | 1386 | _stk.push(n); |
duke@435 | 1387 | } |
duke@435 | 1388 | visited_set(entry); |
duke@435 | 1389 | _stk.push(entry); |
duke@435 | 1390 | |
duke@435 | 1391 | // Do a depth first walk over out edges |
duke@435 | 1392 | int rpo_idx = bb_ct - 1; |
duke@435 | 1393 | int size; |
duke@435 | 1394 | while ((size = _stk.length()) > 0) { |
duke@435 | 1395 | Node* n = _stk.top(); // Leave node on stack |
duke@435 | 1396 | if (!visited_test_set(n)) { |
duke@435 | 1397 | // forward arc in graph |
duke@435 | 1398 | } else if (!post_visited_test(n)) { |
duke@435 | 1399 | // cross or back arc |
duke@435 | 1400 | for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { |
duke@435 | 1401 | Node *use = n->fast_out(i); |
duke@435 | 1402 | if (in_bb(use) && !visited_test(use) && |
duke@435 | 1403 | // Don't go around backedge |
duke@435 | 1404 | (!use->is_Phi() || n == entry)) { |
duke@435 | 1405 | _stk.push(use); |
duke@435 | 1406 | } |
duke@435 | 1407 | } |
duke@435 | 1408 | if (_stk.length() == size) { |
duke@435 | 1409 | // There were no additional uses, post visit node now |
duke@435 | 1410 | _stk.pop(); // Remove node from stack |
duke@435 | 1411 | assert(rpo_idx >= 0, ""); |
duke@435 | 1412 | _block.at_put_grow(rpo_idx, n); |
duke@435 | 1413 | rpo_idx--; |
duke@435 | 1414 | post_visited_set(n); |
duke@435 | 1415 | assert(rpo_idx >= 0 || _stk.is_empty(), ""); |
duke@435 | 1416 | } |
duke@435 | 1417 | } else { |
duke@435 | 1418 | _stk.pop(); // Remove post-visited node from stack |
duke@435 | 1419 | } |
duke@435 | 1420 | } |
duke@435 | 1421 | |
duke@435 | 1422 | // Create real map of block indices for nodes |
duke@435 | 1423 | for (int j = 0; j < _block.length(); j++) { |
duke@435 | 1424 | Node* n = _block.at(j); |
duke@435 | 1425 | set_bb_idx(n, j); |
duke@435 | 1426 | } |
duke@435 | 1427 | |
duke@435 | 1428 | initialize_bb(); // Ensure extra info is allocated. |
duke@435 | 1429 | |
duke@435 | 1430 | #ifndef PRODUCT |
duke@435 | 1431 | if (TraceSuperWord) { |
duke@435 | 1432 | print_bb(); |
duke@435 | 1433 | tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE"); |
duke@435 | 1434 | for (int m = 0; m < _data_entry.length(); m++) { |
duke@435 | 1435 | tty->print("%3d ", m); |
duke@435 | 1436 | _data_entry.at(m)->dump(); |
duke@435 | 1437 | } |
duke@435 | 1438 | tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE"); |
duke@435 | 1439 | for (int m = 0; m < _mem_slice_head.length(); m++) { |
duke@435 | 1440 | tty->print("%3d ", m); _mem_slice_head.at(m)->dump(); |
duke@435 | 1441 | tty->print(" "); _mem_slice_tail.at(m)->dump(); |
duke@435 | 1442 | } |
duke@435 | 1443 | } |
duke@435 | 1444 | #endif |
duke@435 | 1445 | assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found"); |
duke@435 | 1446 | } |
duke@435 | 1447 | |
duke@435 | 1448 | //------------------------------initialize_bb--------------------------- |
duke@435 | 1449 | // Initialize per node info |
duke@435 | 1450 | void SuperWord::initialize_bb() { |
duke@435 | 1451 | Node* last = _block.at(_block.length() - 1); |
duke@435 | 1452 | grow_node_info(bb_idx(last)); |
duke@435 | 1453 | } |
duke@435 | 1454 | |
duke@435 | 1455 | //------------------------------bb_insert_after--------------------------- |
duke@435 | 1456 | // Insert n into block after pos |
duke@435 | 1457 | void SuperWord::bb_insert_after(Node* n, int pos) { |
duke@435 | 1458 | int n_pos = pos + 1; |
duke@435 | 1459 | // Make room |
duke@435 | 1460 | for (int i = _block.length() - 1; i >= n_pos; i--) { |
duke@435 | 1461 | _block.at_put_grow(i+1, _block.at(i)); |
duke@435 | 1462 | } |
duke@435 | 1463 | for (int j = _node_info.length() - 1; j >= n_pos; j--) { |
duke@435 | 1464 | _node_info.at_put_grow(j+1, _node_info.at(j)); |
duke@435 | 1465 | } |
duke@435 | 1466 | // Set value |
duke@435 | 1467 | _block.at_put_grow(n_pos, n); |
duke@435 | 1468 | _node_info.at_put_grow(n_pos, SWNodeInfo::initial); |
duke@435 | 1469 | // Adjust map from node->_idx to _block index |
duke@435 | 1470 | for (int i = n_pos; i < _block.length(); i++) { |
duke@435 | 1471 | set_bb_idx(_block.at(i), i); |
duke@435 | 1472 | } |
duke@435 | 1473 | } |
duke@435 | 1474 | |
duke@435 | 1475 | //------------------------------compute_max_depth--------------------------- |
duke@435 | 1476 | // Compute max depth for expressions from beginning of block |
duke@435 | 1477 | // Use to prune search paths during test for independence. |
duke@435 | 1478 | void SuperWord::compute_max_depth() { |
duke@435 | 1479 | int ct = 0; |
duke@435 | 1480 | bool again; |
duke@435 | 1481 | do { |
duke@435 | 1482 | again = false; |
duke@435 | 1483 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1484 | Node* n = _block.at(i); |
duke@435 | 1485 | if (!n->is_Phi()) { |
duke@435 | 1486 | int d_orig = depth(n); |
duke@435 | 1487 | int d_in = 0; |
duke@435 | 1488 | for (DepPreds preds(n, _dg); !preds.done(); preds.next()) { |
duke@435 | 1489 | Node* pred = preds.current(); |
duke@435 | 1490 | if (in_bb(pred)) { |
duke@435 | 1491 | d_in = MAX2(d_in, depth(pred)); |
duke@435 | 1492 | } |
duke@435 | 1493 | } |
duke@435 | 1494 | if (d_in + 1 != d_orig) { |
duke@435 | 1495 | set_depth(n, d_in + 1); |
duke@435 | 1496 | again = true; |
duke@435 | 1497 | } |
duke@435 | 1498 | } |
duke@435 | 1499 | } |
duke@435 | 1500 | ct++; |
duke@435 | 1501 | } while (again); |
duke@435 | 1502 | #ifndef PRODUCT |
duke@435 | 1503 | if (TraceSuperWord && Verbose) |
duke@435 | 1504 | tty->print_cr("compute_max_depth iterated: %d times", ct); |
duke@435 | 1505 | #endif |
duke@435 | 1506 | } |
duke@435 | 1507 | |
duke@435 | 1508 | //-------------------------compute_vector_element_type----------------------- |
duke@435 | 1509 | // Compute necessary vector element type for expressions |
duke@435 | 1510 | // This propagates backwards a narrower integer type when the |
duke@435 | 1511 | // upper bits of the value are not needed. |
duke@435 | 1512 | // Example: char a,b,c; a = b + c; |
duke@435 | 1513 | // Normally the type of the add is integer, but for packed character |
duke@435 | 1514 | // operations the type of the add needs to be char. |
duke@435 | 1515 | void SuperWord::compute_vector_element_type() { |
duke@435 | 1516 | #ifndef PRODUCT |
duke@435 | 1517 | if (TraceSuperWord && Verbose) |
duke@435 | 1518 | tty->print_cr("\ncompute_velt_type:"); |
duke@435 | 1519 | #endif |
duke@435 | 1520 | |
duke@435 | 1521 | // Initial type |
duke@435 | 1522 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1523 | Node* n = _block.at(i); |
duke@435 | 1524 | const Type* t = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type()) |
duke@435 | 1525 | : _igvn.type(n); |
duke@435 | 1526 | const Type* vt = container_type(t); |
duke@435 | 1527 | set_velt_type(n, vt); |
duke@435 | 1528 | } |
duke@435 | 1529 | |
duke@435 | 1530 | // Propagate narrowed type backwards through operations |
duke@435 | 1531 | // that don't depend on higher order bits |
duke@435 | 1532 | for (int i = _block.length() - 1; i >= 0; i--) { |
duke@435 | 1533 | Node* n = _block.at(i); |
duke@435 | 1534 | // Only integer types need be examined |
duke@435 | 1535 | if (n->bottom_type()->isa_int()) { |
duke@435 | 1536 | uint start, end; |
duke@435 | 1537 | vector_opd_range(n, &start, &end); |
duke@435 | 1538 | const Type* vt = velt_type(n); |
duke@435 | 1539 | |
duke@435 | 1540 | for (uint j = start; j < end; j++) { |
duke@435 | 1541 | Node* in = n->in(j); |
duke@435 | 1542 | // Don't propagate through a type conversion |
duke@435 | 1543 | if (n->bottom_type() != in->bottom_type()) |
duke@435 | 1544 | continue; |
duke@435 | 1545 | switch(in->Opcode()) { |
duke@435 | 1546 | case Op_AddI: case Op_AddL: |
duke@435 | 1547 | case Op_SubI: case Op_SubL: |
duke@435 | 1548 | case Op_MulI: case Op_MulL: |
duke@435 | 1549 | case Op_AndI: case Op_AndL: |
duke@435 | 1550 | case Op_OrI: case Op_OrL: |
duke@435 | 1551 | case Op_XorI: case Op_XorL: |
duke@435 | 1552 | case Op_LShiftI: case Op_LShiftL: |
duke@435 | 1553 | case Op_CMoveI: case Op_CMoveL: |
duke@435 | 1554 | if (in_bb(in)) { |
duke@435 | 1555 | bool same_type = true; |
duke@435 | 1556 | for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { |
duke@435 | 1557 | Node *use = in->fast_out(k); |
duke@435 | 1558 | if (!in_bb(use) || velt_type(use) != vt) { |
duke@435 | 1559 | same_type = false; |
duke@435 | 1560 | break; |
duke@435 | 1561 | } |
duke@435 | 1562 | } |
duke@435 | 1563 | if (same_type) { |
duke@435 | 1564 | set_velt_type(in, vt); |
duke@435 | 1565 | } |
duke@435 | 1566 | } |
duke@435 | 1567 | } |
duke@435 | 1568 | } |
duke@435 | 1569 | } |
duke@435 | 1570 | } |
duke@435 | 1571 | #ifndef PRODUCT |
duke@435 | 1572 | if (TraceSuperWord && Verbose) { |
duke@435 | 1573 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1574 | Node* n = _block.at(i); |
duke@435 | 1575 | velt_type(n)->dump(); |
duke@435 | 1576 | tty->print("\t"); |
duke@435 | 1577 | n->dump(); |
duke@435 | 1578 | } |
duke@435 | 1579 | } |
duke@435 | 1580 | #endif |
duke@435 | 1581 | } |
duke@435 | 1582 | |
duke@435 | 1583 | //------------------------------memory_alignment--------------------------- |
duke@435 | 1584 | // Alignment within a vector memory reference |
duke@435 | 1585 | int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { |
duke@435 | 1586 | SWPointer p(s, this); |
duke@435 | 1587 | if (!p.valid()) { |
duke@435 | 1588 | return bottom_align; |
duke@435 | 1589 | } |
duke@435 | 1590 | int offset = p.offset_in_bytes(); |
duke@435 | 1591 | offset += iv_adjust_in_bytes; |
duke@435 | 1592 | int off_rem = offset % vector_width_in_bytes(); |
duke@435 | 1593 | int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes(); |
duke@435 | 1594 | return off_mod; |
duke@435 | 1595 | } |
duke@435 | 1596 | |
duke@435 | 1597 | //---------------------------container_type--------------------------- |
duke@435 | 1598 | // Smallest type containing range of values |
duke@435 | 1599 | const Type* SuperWord::container_type(const Type* t) { |
kvn@656 | 1600 | const Type* tp = t->make_ptr(); |
kvn@656 | 1601 | if (tp && tp->isa_aryptr()) { |
kvn@656 | 1602 | t = tp->is_aryptr()->elem(); |
duke@435 | 1603 | } |
duke@435 | 1604 | if (t->basic_type() == T_INT) { |
duke@435 | 1605 | if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL; |
duke@435 | 1606 | if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE; |
duke@435 | 1607 | if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR; |
duke@435 | 1608 | if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT; |
duke@435 | 1609 | return TypeInt::INT; |
duke@435 | 1610 | } |
duke@435 | 1611 | return t; |
duke@435 | 1612 | } |
duke@435 | 1613 | |
duke@435 | 1614 | //-------------------------vector_opd_range----------------------- |
duke@435 | 1615 | // (Start, end] half-open range defining which operands are vector |
duke@435 | 1616 | void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) { |
duke@435 | 1617 | switch (n->Opcode()) { |
twisti@993 | 1618 | case Op_LoadB: case Op_LoadUS: |
duke@435 | 1619 | case Op_LoadI: case Op_LoadL: |
duke@435 | 1620 | case Op_LoadF: case Op_LoadD: |
duke@435 | 1621 | case Op_LoadP: |
duke@435 | 1622 | *start = 0; |
duke@435 | 1623 | *end = 0; |
duke@435 | 1624 | return; |
duke@435 | 1625 | case Op_StoreB: case Op_StoreC: |
duke@435 | 1626 | case Op_StoreI: case Op_StoreL: |
duke@435 | 1627 | case Op_StoreF: case Op_StoreD: |
duke@435 | 1628 | case Op_StoreP: |
duke@435 | 1629 | *start = MemNode::ValueIn; |
duke@435 | 1630 | *end = *start + 1; |
duke@435 | 1631 | return; |
duke@435 | 1632 | case Op_LShiftI: case Op_LShiftL: |
duke@435 | 1633 | *start = 1; |
duke@435 | 1634 | *end = 2; |
duke@435 | 1635 | return; |
duke@435 | 1636 | case Op_CMoveI: case Op_CMoveL: case Op_CMoveF: case Op_CMoveD: |
duke@435 | 1637 | *start = 2; |
duke@435 | 1638 | *end = n->req(); |
duke@435 | 1639 | return; |
duke@435 | 1640 | } |
duke@435 | 1641 | *start = 1; |
duke@435 | 1642 | *end = n->req(); // default is all operands |
duke@435 | 1643 | } |
duke@435 | 1644 | |
duke@435 | 1645 | //------------------------------in_packset--------------------------- |
duke@435 | 1646 | // Are s1 and s2 in a pack pair and ordered as s1,s2? |
duke@435 | 1647 | bool SuperWord::in_packset(Node* s1, Node* s2) { |
duke@435 | 1648 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1649 | Node_List* p = _packset.at(i); |
duke@435 | 1650 | assert(p->size() == 2, "must be"); |
duke@435 | 1651 | if (p->at(0) == s1 && p->at(p->size()-1) == s2) { |
duke@435 | 1652 | return true; |
duke@435 | 1653 | } |
duke@435 | 1654 | } |
duke@435 | 1655 | return false; |
duke@435 | 1656 | } |
duke@435 | 1657 | |
duke@435 | 1658 | //------------------------------in_pack--------------------------- |
duke@435 | 1659 | // Is s in pack p? |
duke@435 | 1660 | Node_List* SuperWord::in_pack(Node* s, Node_List* p) { |
duke@435 | 1661 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1662 | if (p->at(i) == s) { |
duke@435 | 1663 | return p; |
duke@435 | 1664 | } |
duke@435 | 1665 | } |
duke@435 | 1666 | return NULL; |
duke@435 | 1667 | } |
duke@435 | 1668 | |
duke@435 | 1669 | //------------------------------remove_pack_at--------------------------- |
duke@435 | 1670 | // Remove the pack at position pos in the packset |
duke@435 | 1671 | void SuperWord::remove_pack_at(int pos) { |
duke@435 | 1672 | Node_List* p = _packset.at(pos); |
duke@435 | 1673 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1674 | Node* s = p->at(i); |
duke@435 | 1675 | set_my_pack(s, NULL); |
duke@435 | 1676 | } |
duke@435 | 1677 | _packset.remove_at(pos); |
duke@435 | 1678 | } |
duke@435 | 1679 | |
duke@435 | 1680 | //------------------------------executed_first--------------------------- |
duke@435 | 1681 | // Return the node executed first in pack p. Uses the RPO block list |
duke@435 | 1682 | // to determine order. |
duke@435 | 1683 | Node* SuperWord::executed_first(Node_List* p) { |
duke@435 | 1684 | Node* n = p->at(0); |
duke@435 | 1685 | int n_rpo = bb_idx(n); |
duke@435 | 1686 | for (uint i = 1; i < p->size(); i++) { |
duke@435 | 1687 | Node* s = p->at(i); |
duke@435 | 1688 | int s_rpo = bb_idx(s); |
duke@435 | 1689 | if (s_rpo < n_rpo) { |
duke@435 | 1690 | n = s; |
duke@435 | 1691 | n_rpo = s_rpo; |
duke@435 | 1692 | } |
duke@435 | 1693 | } |
duke@435 | 1694 | return n; |
duke@435 | 1695 | } |
duke@435 | 1696 | |
duke@435 | 1697 | //------------------------------executed_last--------------------------- |
duke@435 | 1698 | // Return the node executed last in pack p. |
duke@435 | 1699 | Node* SuperWord::executed_last(Node_List* p) { |
duke@435 | 1700 | Node* n = p->at(0); |
duke@435 | 1701 | int n_rpo = bb_idx(n); |
duke@435 | 1702 | for (uint i = 1; i < p->size(); i++) { |
duke@435 | 1703 | Node* s = p->at(i); |
duke@435 | 1704 | int s_rpo = bb_idx(s); |
duke@435 | 1705 | if (s_rpo > n_rpo) { |
duke@435 | 1706 | n = s; |
duke@435 | 1707 | n_rpo = s_rpo; |
duke@435 | 1708 | } |
duke@435 | 1709 | } |
duke@435 | 1710 | return n; |
duke@435 | 1711 | } |
duke@435 | 1712 | |
duke@435 | 1713 | //----------------------------align_initial_loop_index--------------------------- |
duke@435 | 1714 | // Adjust pre-loop limit so that in main loop, a load/store reference |
duke@435 | 1715 | // to align_to_ref will be a position zero in the vector. |
duke@435 | 1716 | // (iv + k) mod vector_align == 0 |
duke@435 | 1717 | void SuperWord::align_initial_loop_index(MemNode* align_to_ref) { |
duke@435 | 1718 | CountedLoopNode *main_head = lp()->as_CountedLoop(); |
duke@435 | 1719 | assert(main_head->is_main_loop(), ""); |
duke@435 | 1720 | CountedLoopEndNode* pre_end = get_pre_loop_end(main_head); |
duke@435 | 1721 | assert(pre_end != NULL, ""); |
duke@435 | 1722 | Node *pre_opaq1 = pre_end->limit(); |
duke@435 | 1723 | assert(pre_opaq1->Opcode() == Op_Opaque1, ""); |
duke@435 | 1724 | Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1; |
never@507 | 1725 | Node *lim0 = pre_opaq->in(1); |
duke@435 | 1726 | |
duke@435 | 1727 | // Where we put new limit calculations |
duke@435 | 1728 | Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl); |
duke@435 | 1729 | |
duke@435 | 1730 | // Ensure the original loop limit is available from the |
duke@435 | 1731 | // pre-loop Opaque1 node. |
duke@435 | 1732 | Node *orig_limit = pre_opaq->original_loop_limit(); |
duke@435 | 1733 | assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, ""); |
duke@435 | 1734 | |
duke@435 | 1735 | SWPointer align_to_ref_p(align_to_ref, this); |
duke@435 | 1736 | |
never@507 | 1737 | // Given: |
never@507 | 1738 | // lim0 == original pre loop limit |
never@507 | 1739 | // V == v_align (power of 2) |
never@507 | 1740 | // invar == extra invariant piece of the address expression |
never@507 | 1741 | // e == k [ +/- invar ] |
duke@435 | 1742 | // |
never@507 | 1743 | // When reassociating expressions involving '%' the basic rules are: |
never@507 | 1744 | // (a - b) % k == 0 => a % k == b % k |
never@507 | 1745 | // and: |
never@507 | 1746 | // (a + b) % k == 0 => a % k == (k - b) % k |
never@507 | 1747 | // |
never@507 | 1748 | // For stride > 0 && scale > 0, |
never@507 | 1749 | // Derive the new pre-loop limit "lim" such that the two constraints: |
never@507 | 1750 | // (1) lim = lim0 + N (where N is some positive integer < V) |
never@507 | 1751 | // (2) (e + lim) % V == 0 |
never@507 | 1752 | // are true. |
never@507 | 1753 | // |
never@507 | 1754 | // Substituting (1) into (2), |
never@507 | 1755 | // (e + lim0 + N) % V == 0 |
never@507 | 1756 | // solve for N: |
never@507 | 1757 | // N = (V - (e + lim0)) % V |
never@507 | 1758 | // substitute back into (1), so that new limit |
never@507 | 1759 | // lim = lim0 + (V - (e + lim0)) % V |
never@507 | 1760 | // |
never@507 | 1761 | // For stride > 0 && scale < 0 |
never@507 | 1762 | // Constraints: |
never@507 | 1763 | // lim = lim0 + N |
never@507 | 1764 | // (e - lim) % V == 0 |
never@507 | 1765 | // Solving for lim: |
never@507 | 1766 | // (e - lim0 - N) % V == 0 |
never@507 | 1767 | // N = (e - lim0) % V |
never@507 | 1768 | // lim = lim0 + (e - lim0) % V |
never@507 | 1769 | // |
never@507 | 1770 | // For stride < 0 && scale > 0 |
never@507 | 1771 | // Constraints: |
never@507 | 1772 | // lim = lim0 - N |
never@507 | 1773 | // (e + lim) % V == 0 |
never@507 | 1774 | // Solving for lim: |
never@507 | 1775 | // (e + lim0 - N) % V == 0 |
never@507 | 1776 | // N = (e + lim0) % V |
never@507 | 1777 | // lim = lim0 - (e + lim0) % V |
never@507 | 1778 | // |
never@507 | 1779 | // For stride < 0 && scale < 0 |
never@507 | 1780 | // Constraints: |
never@507 | 1781 | // lim = lim0 - N |
never@507 | 1782 | // (e - lim) % V == 0 |
never@507 | 1783 | // Solving for lim: |
never@507 | 1784 | // (e - lim0 + N) % V == 0 |
never@507 | 1785 | // N = (V - (e - lim0)) % V |
never@507 | 1786 | // lim = lim0 - (V - (e - lim0)) % V |
duke@435 | 1787 | |
never@507 | 1788 | int stride = iv_stride(); |
never@507 | 1789 | int scale = align_to_ref_p.scale_in_bytes(); |
duke@435 | 1790 | int elt_size = align_to_ref_p.memory_size(); |
duke@435 | 1791 | int v_align = vector_width_in_bytes() / elt_size; |
duke@435 | 1792 | int k = align_to_ref_p.offset_in_bytes() / elt_size; |
duke@435 | 1793 | |
duke@435 | 1794 | Node *kn = _igvn.intcon(k); |
never@507 | 1795 | |
never@507 | 1796 | Node *e = kn; |
duke@435 | 1797 | if (align_to_ref_p.invar() != NULL) { |
never@507 | 1798 | // incorporate any extra invariant piece producing k +/- invar >>> log2(elt) |
duke@435 | 1799 | Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
duke@435 | 1800 | Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); |
duke@435 | 1801 | _phase->_igvn.register_new_node_with_optimizer(aref); |
duke@435 | 1802 | _phase->set_ctrl(aref, pre_ctrl); |
never@507 | 1803 | if (align_to_ref_p.negate_invar()) { |
never@507 | 1804 | e = new (_phase->C, 3) SubINode(e, aref); |
duke@435 | 1805 | } else { |
never@507 | 1806 | e = new (_phase->C, 3) AddINode(e, aref); |
duke@435 | 1807 | } |
never@507 | 1808 | _phase->_igvn.register_new_node_with_optimizer(e); |
never@507 | 1809 | _phase->set_ctrl(e, pre_ctrl); |
duke@435 | 1810 | } |
never@507 | 1811 | |
never@507 | 1812 | // compute e +/- lim0 |
never@507 | 1813 | if (scale < 0) { |
never@507 | 1814 | e = new (_phase->C, 3) SubINode(e, lim0); |
never@507 | 1815 | } else { |
never@507 | 1816 | e = new (_phase->C, 3) AddINode(e, lim0); |
never@507 | 1817 | } |
never@507 | 1818 | _phase->_igvn.register_new_node_with_optimizer(e); |
never@507 | 1819 | _phase->set_ctrl(e, pre_ctrl); |
never@507 | 1820 | |
never@507 | 1821 | if (stride * scale > 0) { |
never@507 | 1822 | // compute V - (e +/- lim0) |
never@507 | 1823 | Node* va = _igvn.intcon(v_align); |
never@507 | 1824 | e = new (_phase->C, 3) SubINode(va, e); |
never@507 | 1825 | _phase->_igvn.register_new_node_with_optimizer(e); |
never@507 | 1826 | _phase->set_ctrl(e, pre_ctrl); |
never@507 | 1827 | } |
never@507 | 1828 | // compute N = (exp) % V |
duke@435 | 1829 | Node* va_msk = _igvn.intcon(v_align - 1); |
never@507 | 1830 | Node* N = new (_phase->C, 3) AndINode(e, va_msk); |
never@507 | 1831 | _phase->_igvn.register_new_node_with_optimizer(N); |
never@507 | 1832 | _phase->set_ctrl(N, pre_ctrl); |
never@507 | 1833 | |
never@507 | 1834 | // substitute back into (1), so that new limit |
never@507 | 1835 | // lim = lim0 + N |
never@507 | 1836 | Node* lim; |
never@507 | 1837 | if (stride < 0) { |
never@507 | 1838 | lim = new (_phase->C, 3) SubINode(lim0, N); |
duke@435 | 1839 | } else { |
never@507 | 1840 | lim = new (_phase->C, 3) AddINode(lim0, N); |
duke@435 | 1841 | } |
never@507 | 1842 | _phase->_igvn.register_new_node_with_optimizer(lim); |
never@507 | 1843 | _phase->set_ctrl(lim, pre_ctrl); |
duke@435 | 1844 | Node* constrained = |
never@507 | 1845 | (stride > 0) ? (Node*) new (_phase->C,3) MinINode(lim, orig_limit) |
never@507 | 1846 | : (Node*) new (_phase->C,3) MaxINode(lim, orig_limit); |
duke@435 | 1847 | _phase->_igvn.register_new_node_with_optimizer(constrained); |
duke@435 | 1848 | _phase->set_ctrl(constrained, pre_ctrl); |
duke@435 | 1849 | _igvn.hash_delete(pre_opaq); |
duke@435 | 1850 | pre_opaq->set_req(1, constrained); |
duke@435 | 1851 | } |
duke@435 | 1852 | |
duke@435 | 1853 | //----------------------------get_pre_loop_end--------------------------- |
duke@435 | 1854 | // Find pre loop end from main loop. Returns null if none. |
duke@435 | 1855 | CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode *cl) { |
duke@435 | 1856 | Node *ctrl = cl->in(LoopNode::EntryControl); |
duke@435 | 1857 | if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return NULL; |
duke@435 | 1858 | Node *iffm = ctrl->in(0); |
duke@435 | 1859 | if (!iffm->is_If()) return NULL; |
duke@435 | 1860 | Node *p_f = iffm->in(0); |
duke@435 | 1861 | if (!p_f->is_IfFalse()) return NULL; |
duke@435 | 1862 | if (!p_f->in(0)->is_CountedLoopEnd()) return NULL; |
duke@435 | 1863 | CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd(); |
duke@435 | 1864 | if (!pre_end->loopnode()->is_pre_loop()) return NULL; |
duke@435 | 1865 | return pre_end; |
duke@435 | 1866 | } |
duke@435 | 1867 | |
duke@435 | 1868 | |
duke@435 | 1869 | //------------------------------init--------------------------- |
duke@435 | 1870 | void SuperWord::init() { |
duke@435 | 1871 | _dg.init(); |
duke@435 | 1872 | _packset.clear(); |
duke@435 | 1873 | _disjoint_ptrs.clear(); |
duke@435 | 1874 | _block.clear(); |
duke@435 | 1875 | _data_entry.clear(); |
duke@435 | 1876 | _mem_slice_head.clear(); |
duke@435 | 1877 | _mem_slice_tail.clear(); |
duke@435 | 1878 | _node_info.clear(); |
duke@435 | 1879 | _align_to_ref = NULL; |
duke@435 | 1880 | _lpt = NULL; |
duke@435 | 1881 | _lp = NULL; |
duke@435 | 1882 | _bb = NULL; |
duke@435 | 1883 | _iv = NULL; |
duke@435 | 1884 | } |
duke@435 | 1885 | |
duke@435 | 1886 | //------------------------------print_packset--------------------------- |
duke@435 | 1887 | void SuperWord::print_packset() { |
duke@435 | 1888 | #ifndef PRODUCT |
duke@435 | 1889 | tty->print_cr("packset"); |
duke@435 | 1890 | for (int i = 0; i < _packset.length(); i++) { |
duke@435 | 1891 | tty->print_cr("Pack: %d", i); |
duke@435 | 1892 | Node_List* p = _packset.at(i); |
duke@435 | 1893 | print_pack(p); |
duke@435 | 1894 | } |
duke@435 | 1895 | #endif |
duke@435 | 1896 | } |
duke@435 | 1897 | |
duke@435 | 1898 | //------------------------------print_pack--------------------------- |
duke@435 | 1899 | void SuperWord::print_pack(Node_List* p) { |
duke@435 | 1900 | for (uint i = 0; i < p->size(); i++) { |
duke@435 | 1901 | print_stmt(p->at(i)); |
duke@435 | 1902 | } |
duke@435 | 1903 | } |
duke@435 | 1904 | |
duke@435 | 1905 | //------------------------------print_bb--------------------------- |
duke@435 | 1906 | void SuperWord::print_bb() { |
duke@435 | 1907 | #ifndef PRODUCT |
duke@435 | 1908 | tty->print_cr("\nBlock"); |
duke@435 | 1909 | for (int i = 0; i < _block.length(); i++) { |
duke@435 | 1910 | Node* n = _block.at(i); |
duke@435 | 1911 | tty->print("%d ", i); |
duke@435 | 1912 | if (n) { |
duke@435 | 1913 | n->dump(); |
duke@435 | 1914 | } |
duke@435 | 1915 | } |
duke@435 | 1916 | #endif |
duke@435 | 1917 | } |
duke@435 | 1918 | |
duke@435 | 1919 | //------------------------------print_stmt--------------------------- |
duke@435 | 1920 | void SuperWord::print_stmt(Node* s) { |
duke@435 | 1921 | #ifndef PRODUCT |
duke@435 | 1922 | tty->print(" align: %d \t", alignment(s)); |
duke@435 | 1923 | s->dump(); |
duke@435 | 1924 | #endif |
duke@435 | 1925 | } |
duke@435 | 1926 | |
duke@435 | 1927 | //------------------------------blank--------------------------- |
duke@435 | 1928 | char* SuperWord::blank(uint depth) { |
duke@435 | 1929 | static char blanks[101]; |
duke@435 | 1930 | assert(depth < 101, "too deep"); |
duke@435 | 1931 | for (uint i = 0; i < depth; i++) blanks[i] = ' '; |
duke@435 | 1932 | blanks[depth] = '\0'; |
duke@435 | 1933 | return blanks; |
duke@435 | 1934 | } |
duke@435 | 1935 | |
duke@435 | 1936 | |
duke@435 | 1937 | //==============================SWPointer=========================== |
duke@435 | 1938 | |
duke@435 | 1939 | //----------------------------SWPointer------------------------ |
duke@435 | 1940 | SWPointer::SWPointer(MemNode* mem, SuperWord* slp) : |
duke@435 | 1941 | _mem(mem), _slp(slp), _base(NULL), _adr(NULL), |
duke@435 | 1942 | _scale(0), _offset(0), _invar(NULL), _negate_invar(false) { |
duke@435 | 1943 | |
duke@435 | 1944 | Node* adr = mem->in(MemNode::Address); |
duke@435 | 1945 | if (!adr->is_AddP()) { |
duke@435 | 1946 | assert(!valid(), "too complex"); |
duke@435 | 1947 | return; |
duke@435 | 1948 | } |
duke@435 | 1949 | // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant) |
duke@435 | 1950 | Node* base = adr->in(AddPNode::Base); |
cfang@1493 | 1951 | //unsafe reference could not be aligned appropriately without runtime checking |
cfang@1493 | 1952 | if (base == NULL || base->bottom_type() == Type::TOP) { |
cfang@1493 | 1953 | assert(!valid(), "unsafe access"); |
cfang@1493 | 1954 | return; |
cfang@1493 | 1955 | } |
duke@435 | 1956 | for (int i = 0; i < 3; i++) { |
duke@435 | 1957 | if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) { |
duke@435 | 1958 | assert(!valid(), "too complex"); |
duke@435 | 1959 | return; |
duke@435 | 1960 | } |
duke@435 | 1961 | adr = adr->in(AddPNode::Address); |
duke@435 | 1962 | if (base == adr || !adr->is_AddP()) { |
duke@435 | 1963 | break; // stop looking at addp's |
duke@435 | 1964 | } |
duke@435 | 1965 | } |
duke@435 | 1966 | _base = base; |
duke@435 | 1967 | _adr = adr; |
duke@435 | 1968 | assert(valid(), "Usable"); |
duke@435 | 1969 | } |
duke@435 | 1970 | |
duke@435 | 1971 | // Following is used to create a temporary object during |
duke@435 | 1972 | // the pattern match of an address expression. |
duke@435 | 1973 | SWPointer::SWPointer(SWPointer* p) : |
duke@435 | 1974 | _mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL), |
duke@435 | 1975 | _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {} |
duke@435 | 1976 | |
duke@435 | 1977 | //------------------------scaled_iv_plus_offset-------------------- |
duke@435 | 1978 | // Match: k*iv + offset |
duke@435 | 1979 | // where: k is a constant that maybe zero, and |
duke@435 | 1980 | // offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional |
duke@435 | 1981 | bool SWPointer::scaled_iv_plus_offset(Node* n) { |
duke@435 | 1982 | if (scaled_iv(n)) { |
duke@435 | 1983 | return true; |
duke@435 | 1984 | } |
duke@435 | 1985 | if (offset_plus_k(n)) { |
duke@435 | 1986 | return true; |
duke@435 | 1987 | } |
duke@435 | 1988 | int opc = n->Opcode(); |
duke@435 | 1989 | if (opc == Op_AddI) { |
duke@435 | 1990 | if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) { |
duke@435 | 1991 | return true; |
duke@435 | 1992 | } |
duke@435 | 1993 | if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { |
duke@435 | 1994 | return true; |
duke@435 | 1995 | } |
duke@435 | 1996 | } else if (opc == Op_SubI) { |
duke@435 | 1997 | if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) { |
duke@435 | 1998 | return true; |
duke@435 | 1999 | } |
duke@435 | 2000 | if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) { |
duke@435 | 2001 | _scale *= -1; |
duke@435 | 2002 | return true; |
duke@435 | 2003 | } |
duke@435 | 2004 | } |
duke@435 | 2005 | return false; |
duke@435 | 2006 | } |
duke@435 | 2007 | |
duke@435 | 2008 | //----------------------------scaled_iv------------------------ |
duke@435 | 2009 | // Match: k*iv where k is a constant that's not zero |
duke@435 | 2010 | bool SWPointer::scaled_iv(Node* n) { |
duke@435 | 2011 | if (_scale != 0) { |
duke@435 | 2012 | return false; // already found a scale |
duke@435 | 2013 | } |
duke@435 | 2014 | if (n == iv()) { |
duke@435 | 2015 | _scale = 1; |
duke@435 | 2016 | return true; |
duke@435 | 2017 | } |
duke@435 | 2018 | int opc = n->Opcode(); |
duke@435 | 2019 | if (opc == Op_MulI) { |
duke@435 | 2020 | if (n->in(1) == iv() && n->in(2)->is_Con()) { |
duke@435 | 2021 | _scale = n->in(2)->get_int(); |
duke@435 | 2022 | return true; |
duke@435 | 2023 | } else if (n->in(2) == iv() && n->in(1)->is_Con()) { |
duke@435 | 2024 | _scale = n->in(1)->get_int(); |
duke@435 | 2025 | return true; |
duke@435 | 2026 | } |
duke@435 | 2027 | } else if (opc == Op_LShiftI) { |
duke@435 | 2028 | if (n->in(1) == iv() && n->in(2)->is_Con()) { |
duke@435 | 2029 | _scale = 1 << n->in(2)->get_int(); |
duke@435 | 2030 | return true; |
duke@435 | 2031 | } |
duke@435 | 2032 | } else if (opc == Op_ConvI2L) { |
duke@435 | 2033 | if (scaled_iv_plus_offset(n->in(1))) { |
duke@435 | 2034 | return true; |
duke@435 | 2035 | } |
duke@435 | 2036 | } else if (opc == Op_LShiftL) { |
duke@435 | 2037 | if (!has_iv() && _invar == NULL) { |
duke@435 | 2038 | // Need to preserve the current _offset value, so |
duke@435 | 2039 | // create a temporary object for this expression subtree. |
duke@435 | 2040 | // Hacky, so should re-engineer the address pattern match. |
duke@435 | 2041 | SWPointer tmp(this); |
duke@435 | 2042 | if (tmp.scaled_iv_plus_offset(n->in(1))) { |
duke@435 | 2043 | if (tmp._invar == NULL) { |
duke@435 | 2044 | int mult = 1 << n->in(2)->get_int(); |
duke@435 | 2045 | _scale = tmp._scale * mult; |
duke@435 | 2046 | _offset += tmp._offset * mult; |
duke@435 | 2047 | return true; |
duke@435 | 2048 | } |
duke@435 | 2049 | } |
duke@435 | 2050 | } |
duke@435 | 2051 | } |
duke@435 | 2052 | return false; |
duke@435 | 2053 | } |
duke@435 | 2054 | |
duke@435 | 2055 | //----------------------------offset_plus_k------------------------ |
duke@435 | 2056 | // Match: offset is (k [+/- invariant]) |
duke@435 | 2057 | // where k maybe zero and invariant is optional, but not both. |
duke@435 | 2058 | bool SWPointer::offset_plus_k(Node* n, bool negate) { |
duke@435 | 2059 | int opc = n->Opcode(); |
duke@435 | 2060 | if (opc == Op_ConI) { |
duke@435 | 2061 | _offset += negate ? -(n->get_int()) : n->get_int(); |
duke@435 | 2062 | return true; |
duke@435 | 2063 | } else if (opc == Op_ConL) { |
duke@435 | 2064 | // Okay if value fits into an int |
duke@435 | 2065 | const TypeLong* t = n->find_long_type(); |
duke@435 | 2066 | if (t->higher_equal(TypeLong::INT)) { |
duke@435 | 2067 | jlong loff = n->get_long(); |
duke@435 | 2068 | jint off = (jint)loff; |
duke@435 | 2069 | _offset += negate ? -off : loff; |
duke@435 | 2070 | return true; |
duke@435 | 2071 | } |
duke@435 | 2072 | return false; |
duke@435 | 2073 | } |
duke@435 | 2074 | if (_invar != NULL) return false; // already have an invariant |
duke@435 | 2075 | if (opc == Op_AddI) { |
duke@435 | 2076 | if (n->in(2)->is_Con() && invariant(n->in(1))) { |
duke@435 | 2077 | _negate_invar = negate; |
duke@435 | 2078 | _invar = n->in(1); |
duke@435 | 2079 | _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); |
duke@435 | 2080 | return true; |
duke@435 | 2081 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { |
duke@435 | 2082 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); |
duke@435 | 2083 | _negate_invar = negate; |
duke@435 | 2084 | _invar = n->in(2); |
duke@435 | 2085 | return true; |
duke@435 | 2086 | } |
duke@435 | 2087 | } |
duke@435 | 2088 | if (opc == Op_SubI) { |
duke@435 | 2089 | if (n->in(2)->is_Con() && invariant(n->in(1))) { |
duke@435 | 2090 | _negate_invar = negate; |
duke@435 | 2091 | _invar = n->in(1); |
duke@435 | 2092 | _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int(); |
duke@435 | 2093 | return true; |
duke@435 | 2094 | } else if (n->in(1)->is_Con() && invariant(n->in(2))) { |
duke@435 | 2095 | _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int(); |
duke@435 | 2096 | _negate_invar = !negate; |
duke@435 | 2097 | _invar = n->in(2); |
duke@435 | 2098 | return true; |
duke@435 | 2099 | } |
duke@435 | 2100 | } |
duke@435 | 2101 | if (invariant(n)) { |
duke@435 | 2102 | _negate_invar = negate; |
duke@435 | 2103 | _invar = n; |
duke@435 | 2104 | return true; |
duke@435 | 2105 | } |
duke@435 | 2106 | return false; |
duke@435 | 2107 | } |
duke@435 | 2108 | |
duke@435 | 2109 | //----------------------------print------------------------ |
duke@435 | 2110 | void SWPointer::print() { |
duke@435 | 2111 | #ifndef PRODUCT |
duke@435 | 2112 | tty->print("base: %d adr: %d scale: %d offset: %d invar: %c%d\n", |
duke@435 | 2113 | _base != NULL ? _base->_idx : 0, |
duke@435 | 2114 | _adr != NULL ? _adr->_idx : 0, |
duke@435 | 2115 | _scale, _offset, |
duke@435 | 2116 | _negate_invar?'-':'+', |
duke@435 | 2117 | _invar != NULL ? _invar->_idx : 0); |
duke@435 | 2118 | #endif |
duke@435 | 2119 | } |
duke@435 | 2120 | |
duke@435 | 2121 | // ========================= OrderedPair ===================== |
duke@435 | 2122 | |
duke@435 | 2123 | const OrderedPair OrderedPair::initial; |
duke@435 | 2124 | |
duke@435 | 2125 | // ========================= SWNodeInfo ===================== |
duke@435 | 2126 | |
duke@435 | 2127 | const SWNodeInfo SWNodeInfo::initial; |
duke@435 | 2128 | |
duke@435 | 2129 | |
duke@435 | 2130 | // ============================ DepGraph =========================== |
duke@435 | 2131 | |
duke@435 | 2132 | //------------------------------make_node--------------------------- |
duke@435 | 2133 | // Make a new dependence graph node for an ideal node. |
duke@435 | 2134 | DepMem* DepGraph::make_node(Node* node) { |
duke@435 | 2135 | DepMem* m = new (_arena) DepMem(node); |
duke@435 | 2136 | if (node != NULL) { |
duke@435 | 2137 | assert(_map.at_grow(node->_idx) == NULL, "one init only"); |
duke@435 | 2138 | _map.at_put_grow(node->_idx, m); |
duke@435 | 2139 | } |
duke@435 | 2140 | return m; |
duke@435 | 2141 | } |
duke@435 | 2142 | |
duke@435 | 2143 | //------------------------------make_edge--------------------------- |
duke@435 | 2144 | // Make a new dependence graph edge from dpred -> dsucc |
duke@435 | 2145 | DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) { |
duke@435 | 2146 | DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head()); |
duke@435 | 2147 | dpred->set_out_head(e); |
duke@435 | 2148 | dsucc->set_in_head(e); |
duke@435 | 2149 | return e; |
duke@435 | 2150 | } |
duke@435 | 2151 | |
duke@435 | 2152 | // ========================== DepMem ======================== |
duke@435 | 2153 | |
duke@435 | 2154 | //------------------------------in_cnt--------------------------- |
duke@435 | 2155 | int DepMem::in_cnt() { |
duke@435 | 2156 | int ct = 0; |
duke@435 | 2157 | for (DepEdge* e = _in_head; e != NULL; e = e->next_in()) ct++; |
duke@435 | 2158 | return ct; |
duke@435 | 2159 | } |
duke@435 | 2160 | |
duke@435 | 2161 | //------------------------------out_cnt--------------------------- |
duke@435 | 2162 | int DepMem::out_cnt() { |
duke@435 | 2163 | int ct = 0; |
duke@435 | 2164 | for (DepEdge* e = _out_head; e != NULL; e = e->next_out()) ct++; |
duke@435 | 2165 | return ct; |
duke@435 | 2166 | } |
duke@435 | 2167 | |
duke@435 | 2168 | //------------------------------print----------------------------- |
duke@435 | 2169 | void DepMem::print() { |
duke@435 | 2170 | #ifndef PRODUCT |
duke@435 | 2171 | tty->print(" DepNode %d (", _node->_idx); |
duke@435 | 2172 | for (DepEdge* p = _in_head; p != NULL; p = p->next_in()) { |
duke@435 | 2173 | Node* pred = p->pred()->node(); |
duke@435 | 2174 | tty->print(" %d", pred != NULL ? pred->_idx : 0); |
duke@435 | 2175 | } |
duke@435 | 2176 | tty->print(") ["); |
duke@435 | 2177 | for (DepEdge* s = _out_head; s != NULL; s = s->next_out()) { |
duke@435 | 2178 | Node* succ = s->succ()->node(); |
duke@435 | 2179 | tty->print(" %d", succ != NULL ? succ->_idx : 0); |
duke@435 | 2180 | } |
duke@435 | 2181 | tty->print_cr(" ]"); |
duke@435 | 2182 | #endif |
duke@435 | 2183 | } |
duke@435 | 2184 | |
duke@435 | 2185 | // =========================== DepEdge ========================= |
duke@435 | 2186 | |
duke@435 | 2187 | //------------------------------DepPreds--------------------------- |
duke@435 | 2188 | void DepEdge::print() { |
duke@435 | 2189 | #ifndef PRODUCT |
duke@435 | 2190 | tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx); |
duke@435 | 2191 | #endif |
duke@435 | 2192 | } |
duke@435 | 2193 | |
duke@435 | 2194 | // =========================== DepPreds ========================= |
duke@435 | 2195 | // Iterator over predecessor edges in the dependence graph. |
duke@435 | 2196 | |
duke@435 | 2197 | //------------------------------DepPreds--------------------------- |
duke@435 | 2198 | DepPreds::DepPreds(Node* n, DepGraph& dg) { |
duke@435 | 2199 | _n = n; |
duke@435 | 2200 | _done = false; |
duke@435 | 2201 | if (_n->is_Store() || _n->is_Load()) { |
duke@435 | 2202 | _next_idx = MemNode::Address; |
duke@435 | 2203 | _end_idx = n->req(); |
duke@435 | 2204 | _dep_next = dg.dep(_n)->in_head(); |
duke@435 | 2205 | } else if (_n->is_Mem()) { |
duke@435 | 2206 | _next_idx = 0; |
duke@435 | 2207 | _end_idx = 0; |
duke@435 | 2208 | _dep_next = dg.dep(_n)->in_head(); |
duke@435 | 2209 | } else { |
duke@435 | 2210 | _next_idx = 1; |
duke@435 | 2211 | _end_idx = _n->req(); |
duke@435 | 2212 | _dep_next = NULL; |
duke@435 | 2213 | } |
duke@435 | 2214 | next(); |
duke@435 | 2215 | } |
duke@435 | 2216 | |
duke@435 | 2217 | //------------------------------next--------------------------- |
duke@435 | 2218 | void DepPreds::next() { |
duke@435 | 2219 | if (_dep_next != NULL) { |
duke@435 | 2220 | _current = _dep_next->pred()->node(); |
duke@435 | 2221 | _dep_next = _dep_next->next_in(); |
duke@435 | 2222 | } else if (_next_idx < _end_idx) { |
duke@435 | 2223 | _current = _n->in(_next_idx++); |
duke@435 | 2224 | } else { |
duke@435 | 2225 | _done = true; |
duke@435 | 2226 | } |
duke@435 | 2227 | } |
duke@435 | 2228 | |
duke@435 | 2229 | // =========================== DepSuccs ========================= |
duke@435 | 2230 | // Iterator over successor edges in the dependence graph. |
duke@435 | 2231 | |
duke@435 | 2232 | //------------------------------DepSuccs--------------------------- |
duke@435 | 2233 | DepSuccs::DepSuccs(Node* n, DepGraph& dg) { |
duke@435 | 2234 | _n = n; |
duke@435 | 2235 | _done = false; |
duke@435 | 2236 | if (_n->is_Load()) { |
duke@435 | 2237 | _next_idx = 0; |
duke@435 | 2238 | _end_idx = _n->outcnt(); |
duke@435 | 2239 | _dep_next = dg.dep(_n)->out_head(); |
duke@435 | 2240 | } else if (_n->is_Mem() || _n->is_Phi() && _n->bottom_type() == Type::MEMORY) { |
duke@435 | 2241 | _next_idx = 0; |
duke@435 | 2242 | _end_idx = 0; |
duke@435 | 2243 | _dep_next = dg.dep(_n)->out_head(); |
duke@435 | 2244 | } else { |
duke@435 | 2245 | _next_idx = 0; |
duke@435 | 2246 | _end_idx = _n->outcnt(); |
duke@435 | 2247 | _dep_next = NULL; |
duke@435 | 2248 | } |
duke@435 | 2249 | next(); |
duke@435 | 2250 | } |
duke@435 | 2251 | |
duke@435 | 2252 | //-------------------------------next--------------------------- |
duke@435 | 2253 | void DepSuccs::next() { |
duke@435 | 2254 | if (_dep_next != NULL) { |
duke@435 | 2255 | _current = _dep_next->succ()->node(); |
duke@435 | 2256 | _dep_next = _dep_next->next_out(); |
duke@435 | 2257 | } else if (_next_idx < _end_idx) { |
duke@435 | 2258 | _current = _n->raw_out(_next_idx++); |
duke@435 | 2259 | } else { |
duke@435 | 2260 | _done = true; |
duke@435 | 2261 | } |
duke@435 | 2262 | } |