jdk8-mips64-public/hotspot: src/share/vm/opto/superword.cpp@cfd05ec74089

8024342: PPC64 (part 111): Support for C calling conventions that require 64-bit ints.
Summary: Some platforms, as ppc and s390x/zArch require that 32-bit ints are passed as 64-bit values to C functions. This change adds support to adapt the signature and to issue proper casts to c2-compiled stubs. The functions are used in generate_native_wrapper(). Adapt signature used by the compiler as in PhaseIdealLoop::intrinsify_fill().
Reviewed-by: kvn

     1 /*

     2  * Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  */

    24 #include "precompiled.hpp"

    25 #include "compiler/compileLog.hpp"

    26 #include "libadt/vectset.hpp"

    27 #include "memory/allocation.inline.hpp"

    28 #include "opto/addnode.hpp"

    29 #include "opto/callnode.hpp"

    30 #include "opto/divnode.hpp"

    31 #include "opto/matcher.hpp"

    32 #include "opto/memnode.hpp"

    33 #include "opto/mulnode.hpp"

    34 #include "opto/opcodes.hpp"

    35 #include "opto/superword.hpp"

    36 #include "opto/vectornode.hpp"

    38 //

    39 //                  S U P E R W O R D   T R A N S F O R M

    40 //=============================================================================

    42 //------------------------------SuperWord---------------------------

    43 SuperWord::SuperWord(PhaseIdealLoop* phase) :

    44   _phase(phase),

    45   _igvn(phase->_igvn),

    46   _arena(phase->C->comp_arena()),

    47   _packset(arena(), 8,  0, NULL),         // packs for the current block

    48   _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb

    49   _block(arena(), 8,  0, NULL),           // nodes in current block

    50   _data_entry(arena(), 8,  0, NULL),      // nodes with all inputs from outside

    51   _mem_slice_head(arena(), 8,  0, NULL),  // memory slice heads

    52   _mem_slice_tail(arena(), 8,  0, NULL),  // memory slice tails

    53   _node_info(arena(), 8,  0, SWNodeInfo::initial), // info needed per node

    54   _align_to_ref(NULL),                    // memory reference to align vectors to

    55   _disjoint_ptrs(arena(), 8,  0, OrderedPair::initial), // runtime disambiguated pointer pairs

    56   _dg(_arena),                            // dependence graph

    57   _visited(arena()),                      // visited node set

    58   _post_visited(arena()),                 // post visited node set

    59   _n_idx_list(arena(), 8),                // scratch list of (node,index) pairs

    60   _stk(arena(), 8, 0, NULL),              // scratch stack of nodes

    61   _nlist(arena(), 8, 0, NULL),            // scratch list of nodes

    62   _lpt(NULL),                             // loop tree node

    63   _lp(NULL),                              // LoopNode

    64   _bb(NULL),                              // basic block

    65   _iv(NULL)                               // induction var

    66 {}

    68 //------------------------------transform_loop---------------------------

    69 void SuperWord::transform_loop(IdealLoopTree* lpt) {

    70   assert(UseSuperWord, "should be");

    71   // Do vectors exist on this architecture?

    72   if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;

    74   assert(lpt->_head->is_CountedLoop(), "must be");

    75   CountedLoopNode *cl = lpt->_head->as_CountedLoop();

    77   if (!cl->is_valid_counted_loop()) return; // skip malformed counted loop

    79   if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops

    81   // Check for no control flow in body (other than exit)

    82   Node *cl_exit = cl->loopexit();

    83   if (cl_exit->in(0) != lpt->_head) return;

    85   // Make sure the are no extra control users of the loop backedge

    86   if (cl->back_control()->outcnt() != 1) {

    87     return;

    88   }

    90   // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))

    91   CountedLoopEndNode* pre_end = get_pre_loop_end(cl);

    92   if (pre_end == NULL) return;

    93   Node *pre_opaq1 = pre_end->limit();

    94   if (pre_opaq1->Opcode() != Op_Opaque1) return;

    96   init(); // initialize data structures

    98   set_lpt(lpt);

    99   set_lp(cl);

   101   // For now, define one block which is the entire loop body

   102   set_bb(cl);

   104   assert(_packset.length() == 0, "packset must be empty");

   105   SLP_extract();

   106 }

   108 //------------------------------SLP_extract---------------------------

   109 // Extract the superword level parallelism

   110 //

   111 // 1) A reverse post-order of nodes in the block is constructed.  By scanning

   112 //    this list from first to last, all definitions are visited before their uses.

   113 //

   114 // 2) A point-to-point dependence graph is constructed between memory references.

   115 //    This simplies the upcoming "independence" checker.

   116 //

   117 // 3) The maximum depth in the node graph from the beginning of the block

   118 //    to each node is computed.  This is used to prune the graph search

   119 //    in the independence checker.

   120 //

   121 // 4) For integer types, the necessary bit width is propagated backwards

   122 //    from stores to allow packed operations on byte, char, and short

   123 //    integers.  This reverses the promotion to type "int" that javac

   124 //    did for operations like: char c1,c2,c3;  c1 = c2 + c3.

   125 //

   126 // 5) One of the memory references is picked to be an aligned vector reference.

   127 //    The pre-loop trip count is adjusted to align this reference in the

   128 //    unrolled body.

   129 //

   130 // 6) The initial set of pack pairs is seeded with memory references.

   131 //

   132 // 7) The set of pack pairs is extended by following use->def and def->use links.

   133 //

   134 // 8) The pairs are combined into vector sized packs.

   135 //

   136 // 9) Reorder the memory slices to co-locate members of the memory packs.

   137 //

   138 // 10) Generate ideal vector nodes for the final set of packs and where necessary,

   139 //    inserting scalar promotion, vector creation from multiple scalars, and

   140 //    extraction of scalar values from vectors.

   141 //

   142 void SuperWord::SLP_extract() {

   144   // Ready the block

   146   if (!construct_bb())

   147     return; // Exit if no interesting nodes or complex graph.

   149   dependence_graph();

   151   compute_max_depth();

   153   compute_vector_element_type();

   155   // Attempt vectorization

   157   find_adjacent_refs();

   159   extend_packlist();

   161   combine_packs();

   163   construct_my_pack_map();

   165   filter_packs();

   167   schedule();

   169   output();

   170 }

   172 //------------------------------find_adjacent_refs---------------------------

   173 // Find the adjacent memory references and create pack pairs for them.

   174 // This is the initial set of packs that will then be extended by

   175 // following use->def and def->use links.  The align positions are

   176 // assigned relative to the reference "align_to_ref"

   177 void SuperWord::find_adjacent_refs() {

   178   // Get list of memory operations

   179   Node_List memops;

   180   for (int i = 0; i < _block.length(); i++) {

   181     Node* n = _block.at(i);

   182     if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) &&

   183         is_java_primitive(n->as_Mem()->memory_type())) {

   184       int align = memory_alignment(n->as_Mem(), 0);

   185       if (align != bottom_align) {

   186         memops.push(n);

   187       }

   188     }

   189   }

   191   Node_List align_to_refs;

   192   int best_iv_adjustment = 0;

   193   MemNode* best_align_to_mem_ref = NULL;

   195   while (memops.size() != 0) {

   196     // Find a memory reference to align to.

   197     MemNode* mem_ref = find_align_to_ref(memops);

   198     if (mem_ref == NULL) break;

   199     align_to_refs.push(mem_ref);

   200     int iv_adjustment = get_iv_adjustment(mem_ref);

   202     if (best_align_to_mem_ref == NULL) {

   203       // Set memory reference which is the best from all memory operations

   204       // to be used for alignment. The pre-loop trip count is modified to align

   205       // this reference to a vector-aligned address.

   206       best_align_to_mem_ref = mem_ref;

   207       best_iv_adjustment = iv_adjustment;

   208     }

   210     SWPointer align_to_ref_p(mem_ref, this);

   211     // Set alignment relative to "align_to_ref" for all related memory operations.

   212     for (int i = memops.size() - 1; i >= 0; i--) {

   213       MemNode* s = memops.at(i)->as_Mem();

   214       if (isomorphic(s, mem_ref)) {

   215         SWPointer p2(s, this);

   216         if (p2.comparable(align_to_ref_p)) {

   217           int align = memory_alignment(s, iv_adjustment);

   218           set_alignment(s, align);

   219         }

   220       }

   221     }

   223     // Create initial pack pairs of memory operations for which

   224     // alignment is set and vectors will be aligned.

   225     bool create_pack = true;

   226     if (memory_alignment(mem_ref, best_iv_adjustment) == 0) {

   227       if (!Matcher::misaligned_vectors_ok()) {

   228         int vw = vector_width(mem_ref);

   229         int vw_best = vector_width(best_align_to_mem_ref);

   230         if (vw > vw_best) {

   231           // Do not vectorize a memory access with more elements per vector

   232           // if unaligned memory access is not allowed because number of

   233           // iterations in pre-loop will be not enough to align it.

   234           create_pack = false;

   235         }

   236       }

   237     } else {

   238       if (same_velt_type(mem_ref, best_align_to_mem_ref)) {

   239         // Can't allow vectorization of unaligned memory accesses with the

   240         // same type since it could be overlapped accesses to the same array.

   241         create_pack = false;

   242       } else {

   243         // Allow independent (different type) unaligned memory operations

   244         // if HW supports them.

   245         if (!Matcher::misaligned_vectors_ok()) {

   246           create_pack = false;

   247         } else {

   248           // Check if packs of the same memory type but

   249           // with a different alignment were created before.

   250           for (uint i = 0; i < align_to_refs.size(); i++) {

   251             MemNode* mr = align_to_refs.at(i)->as_Mem();

   252             if (same_velt_type(mr, mem_ref) &&

   253                 memory_alignment(mr, iv_adjustment) != 0)

   254               create_pack = false;

   255           }

   256         }

   257       }

   258     }

   259     if (create_pack) {

   260       for (uint i = 0; i < memops.size(); i++) {

   261         Node* s1 = memops.at(i);

   262         int align = alignment(s1);

   263         if (align == top_align) continue;

   264         for (uint j = 0; j < memops.size(); j++) {

   265           Node* s2 = memops.at(j);

   266           if (alignment(s2) == top_align) continue;

   267           if (s1 != s2 && are_adjacent_refs(s1, s2)) {

   268             if (stmts_can_pack(s1, s2, align)) {

   269               Node_List* pair = new Node_List();

   270               pair->push(s1);

   271               pair->push(s2);

   272               _packset.append(pair);

   273             }

   274           }

   275         }

   276       }

   277     } else { // Don't create unaligned pack

   278       // First, remove remaining memory ops of the same type from the list.

   279       for (int i = memops.size() - 1; i >= 0; i--) {

   280         MemNode* s = memops.at(i)->as_Mem();

   281         if (same_velt_type(s, mem_ref)) {

   282           memops.remove(i);

   283         }

   284       }

   286       // Second, remove already constructed packs of the same type.

   287       for (int i = _packset.length() - 1; i >= 0; i--) {

   288         Node_List* p = _packset.at(i);

   289         MemNode* s = p->at(0)->as_Mem();

   290         if (same_velt_type(s, mem_ref)) {

   291           remove_pack_at(i);

   292         }

   293       }

   295       // If needed find the best memory reference for loop alignment again.

   296       if (same_velt_type(mem_ref, best_align_to_mem_ref)) {

   297         // Put memory ops from remaining packs back on memops list for

   298         // the best alignment search.

   299         uint orig_msize = memops.size();

   300         for (int i = 0; i < _packset.length(); i++) {

   301           Node_List* p = _packset.at(i);

   302           MemNode* s = p->at(0)->as_Mem();

   303           assert(!same_velt_type(s, mem_ref), "sanity");

   304           memops.push(s);

   305         }

   306         MemNode* best_align_to_mem_ref = find_align_to_ref(memops);

   307         if (best_align_to_mem_ref == NULL) break;

   308         best_iv_adjustment = get_iv_adjustment(best_align_to_mem_ref);

   309         // Restore list.

   310         while (memops.size() > orig_msize)

   311           (void)memops.pop();

   312       }

   313     } // unaligned memory accesses

   315     // Remove used mem nodes.

   316     for (int i = memops.size() - 1; i >= 0; i--) {

   317       MemNode* m = memops.at(i)->as_Mem();

   318       if (alignment(m) != top_align) {

   319         memops.remove(i);

   320       }

   321     }

   323   } // while (memops.size() != 0

   324   set_align_to_ref(best_align_to_mem_ref);

   326 #ifndef PRODUCT

   327   if (TraceSuperWord) {

   328     tty->print_cr("\nAfter find_adjacent_refs");

   329     print_packset();

   330   }

   331 #endif

   332 }

   334 //------------------------------find_align_to_ref---------------------------

   335 // Find a memory reference to align the loop induction variable to.

   336 // Looks first at stores then at loads, looking for a memory reference

   337 // with the largest number of references similar to it.

   338 MemNode* SuperWord::find_align_to_ref(Node_List &memops) {

   339   GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);

   341   // Count number of comparable memory ops

   342   for (uint i = 0; i < memops.size(); i++) {

   343     MemNode* s1 = memops.at(i)->as_Mem();

   344     SWPointer p1(s1, this);

   345     // Discard if pre loop can't align this reference

   346     if (!ref_is_alignable(p1)) {

   347       *cmp_ct.adr_at(i) = 0;

   348       continue;

   349     }

   350     for (uint j = i+1; j < memops.size(); j++) {

   351       MemNode* s2 = memops.at(j)->as_Mem();

   352       if (isomorphic(s1, s2)) {

   353         SWPointer p2(s2, this);

   354         if (p1.comparable(p2)) {

   355           (*cmp_ct.adr_at(i))++;

   356           (*cmp_ct.adr_at(j))++;

   357         }

   358       }

   359     }

   360   }

   362   // Find Store (or Load) with the greatest number of "comparable" references,

   363   // biggest vector size, smallest data size and smallest iv offset.

   364   int max_ct        = 0;

   365   int max_vw        = 0;

   366   int max_idx       = -1;

   367   int min_size      = max_jint;

   368   int min_iv_offset = max_jint;

   369   for (uint j = 0; j < memops.size(); j++) {

   370     MemNode* s = memops.at(j)->as_Mem();

   371     if (s->is_Store()) {

   372       int vw = vector_width_in_bytes(s);

   373       assert(vw > 1, "sanity");

   374       SWPointer p(s, this);

   375       if (cmp_ct.at(j) >  max_ct ||

   376           cmp_ct.at(j) == max_ct &&

   377             (vw >  max_vw ||

   378              vw == max_vw &&

   379               (data_size(s) <  min_size ||

   380                data_size(s) == min_size &&

   381                  (p.offset_in_bytes() < min_iv_offset)))) {

   382         max_ct = cmp_ct.at(j);

   383         max_vw = vw;

   384         max_idx = j;

   385         min_size = data_size(s);

   386         min_iv_offset = p.offset_in_bytes();

   387       }

   388     }

   389   }

   390   // If no stores, look at loads

   391   if (max_ct == 0) {

   392     for (uint j = 0; j < memops.size(); j++) {

   393       MemNode* s = memops.at(j)->as_Mem();

   394       if (s->is_Load()) {

   395         int vw = vector_width_in_bytes(s);

   396         assert(vw > 1, "sanity");

   397         SWPointer p(s, this);

   398         if (cmp_ct.at(j) >  max_ct ||

   399             cmp_ct.at(j) == max_ct &&

   400               (vw >  max_vw ||

   401                vw == max_vw &&

   402                 (data_size(s) <  min_size ||

   403                  data_size(s) == min_size &&

   404                    (p.offset_in_bytes() < min_iv_offset)))) {

   405           max_ct = cmp_ct.at(j);

   406           max_vw = vw;

   407           max_idx = j;

   408           min_size = data_size(s);

   409           min_iv_offset = p.offset_in_bytes();

   410         }

   411       }

   412     }

   413   }

   415 #ifdef ASSERT

   416   if (TraceSuperWord && Verbose) {

   417     tty->print_cr("\nVector memops after find_align_to_refs");

   418     for (uint i = 0; i < memops.size(); i++) {

   419       MemNode* s = memops.at(i)->as_Mem();

   420       s->dump();

   421     }

   422   }

   423 #endif

   425   if (max_ct > 0) {

   426 #ifdef ASSERT

   427     if (TraceSuperWord) {

   428       tty->print("\nVector align to node: ");

   429       memops.at(max_idx)->as_Mem()->dump();

   430     }

   431 #endif

   432     return memops.at(max_idx)->as_Mem();

   433   }

   434   return NULL;

   435 }

   437 //------------------------------ref_is_alignable---------------------------

   438 // Can the preloop align the reference to position zero in the vector?

   439 bool SuperWord::ref_is_alignable(SWPointer& p) {

   440   if (!p.has_iv()) {

   441     return true;   // no induction variable

   442   }

   443   CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());

   444   assert(pre_end->stride_is_con(), "pre loop stride is constant");

   445   int preloop_stride = pre_end->stride_con();

   447   int span = preloop_stride * p.scale_in_bytes();

   449   // Stride one accesses are alignable.

   450   if (ABS(span) == p.memory_size())

   451     return true;

   453   // If initial offset from start of object is computable,

   454   // compute alignment within the vector.

   455   int vw = vector_width_in_bytes(p.mem());

   456   assert(vw > 1, "sanity");

   457   if (vw % span == 0) {

   458     Node* init_nd = pre_end->init_trip();

   459     if (init_nd->is_Con() && p.invar() == NULL) {

   460       int init = init_nd->bottom_type()->is_int()->get_con();

   462       int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes();

   463       assert(init_offset >= 0, "positive offset from object start");

   465       if (span > 0) {

   466         return (vw - (init_offset % vw)) % span == 0;

   467       } else {

   468         assert(span < 0, "nonzero stride * scale");

   469         return (init_offset % vw) % -span == 0;

   470       }

   471     }

   472   }

   473   return false;

   474 }

   476 //---------------------------get_iv_adjustment---------------------------

   477 // Calculate loop's iv adjustment for this memory ops.

   478 int SuperWord::get_iv_adjustment(MemNode* mem_ref) {

   479   SWPointer align_to_ref_p(mem_ref, this);

   480   int offset = align_to_ref_p.offset_in_bytes();

   481   int scale  = align_to_ref_p.scale_in_bytes();

   482   int vw       = vector_width_in_bytes(mem_ref);

   483   assert(vw > 1, "sanity");

   484   int stride_sign   = (scale * iv_stride()) > 0 ? 1 : -1;

   485   // At least one iteration is executed in pre-loop by default. As result

   486   // several iterations are needed to align memory operations in main-loop even

   487   // if offset is 0.

   488   int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw));

   489   int elt_size = align_to_ref_p.memory_size();

   490   assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0),

   491          err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size));

   492   int iv_adjustment = iv_adjustment_in_bytes/elt_size;

   494 #ifndef PRODUCT

   495   if (TraceSuperWord)

   496     tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d",

   497                   offset, iv_adjustment, elt_size, scale, iv_stride(), vw);

   498 #endif

   499   return iv_adjustment;

   500 }

   502 //---------------------------dependence_graph---------------------------

   503 // Construct dependency graph.

   504 // Add dependence edges to load/store nodes for memory dependence

   505 //    A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)

   506 void SuperWord::dependence_graph() {

   507   // First, assign a dependence node to each memory node

   508   for (int i = 0; i < _block.length(); i++ ) {

   509     Node *n = _block.at(i);

   510     if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {

   511       _dg.make_node(n);

   512     }

   513   }

   515   // For each memory slice, create the dependences

   516   for (int i = 0; i < _mem_slice_head.length(); i++) {

   517     Node* n      = _mem_slice_head.at(i);

   518     Node* n_tail = _mem_slice_tail.at(i);

   520     // Get slice in predecessor order (last is first)

   521     mem_slice_preds(n_tail, n, _nlist);

   523     // Make the slice dependent on the root

   524     DepMem* slice = _dg.dep(n);

   525     _dg.make_edge(_dg.root(), slice);

   527     // Create a sink for the slice

   528     DepMem* slice_sink = _dg.make_node(NULL);

   529     _dg.make_edge(slice_sink, _dg.tail());

   531     // Now visit each pair of memory ops, creating the edges

   532     for (int j = _nlist.length() - 1; j >= 0 ; j--) {

   533       Node* s1 = _nlist.at(j);

   535       // If no dependency yet, use slice

   536       if (_dg.dep(s1)->in_cnt() == 0) {

   537         _dg.make_edge(slice, s1);

   538       }

   539       SWPointer p1(s1->as_Mem(), this);

   540       bool sink_dependent = true;

   541       for (int k = j - 1; k >= 0; k--) {

   542         Node* s2 = _nlist.at(k);

   543         if (s1->is_Load() && s2->is_Load())

   544           continue;

   545         SWPointer p2(s2->as_Mem(), this);

   547         int cmp = p1.cmp(p2);

   548         if (SuperWordRTDepCheck &&

   549             p1.base() != p2.base() && p1.valid() && p2.valid()) {

   550           // Create a runtime check to disambiguate

   551           OrderedPair pp(p1.base(), p2.base());

   552           _disjoint_ptrs.append_if_missing(pp);

   553         } else if (!SWPointer::not_equal(cmp)) {

   554           // Possibly same address

   555           _dg.make_edge(s1, s2);

   556           sink_dependent = false;

   557         }

   558       }

   559       if (sink_dependent) {

   560         _dg.make_edge(s1, slice_sink);

   561       }

   562     }

   563 #ifndef PRODUCT

   564     if (TraceSuperWord) {

   565       tty->print_cr("\nDependence graph for slice: %d", n->_idx);

   566       for (int q = 0; q < _nlist.length(); q++) {

   567         _dg.print(_nlist.at(q));

   568       }

   569       tty->cr();

   570     }

   571 #endif

   572     _nlist.clear();

   573   }

   575 #ifndef PRODUCT

   576   if (TraceSuperWord) {

   577     tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE");

   578     for (int r = 0; r < _disjoint_ptrs.length(); r++) {

   579       _disjoint_ptrs.at(r).print();

   580       tty->cr();

   581     }

   582     tty->cr();

   583   }

   584 #endif

   585 }

   587 //---------------------------mem_slice_preds---------------------------

   588 // Return a memory slice (node list) in predecessor order starting at "start"

   589 void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) {

   590   assert(preds.length() == 0, "start empty");

   591   Node* n = start;

   592   Node* prev = NULL;

   593   while (true) {

   594     assert(in_bb(n), "must be in block");

   595     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {

   596       Node* out = n->fast_out(i);

   597       if (out->is_Load()) {

   598         if (in_bb(out)) {

   599           preds.push(out);

   600         }

   601       } else {

   602         // FIXME

   603         if (out->is_MergeMem() && !in_bb(out)) {

   604           // Either unrolling is causing a memory edge not to disappear,

   605           // or need to run igvn.optimize() again before SLP

   606         } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) {

   607           // Ditto.  Not sure what else to check further.

   608         } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) {

   609           // StoreCM has an input edge used as a precedence edge.

   610           // Maybe an issue when oop stores are vectorized.

   611         } else {

   612           assert(out == prev || prev == NULL, "no branches off of store slice");

   613         }

   614       }

   615     }

   616     if (n == stop) break;

   617     preds.push(n);

   618     prev = n;

   619     assert(n->is_Mem(), err_msg_res("unexpected node %s", n->Name()));

   620     n = n->in(MemNode::Memory);

   621   }

   622 }

   624 //------------------------------stmts_can_pack---------------------------

   625 // Can s1 and s2 be in a pack with s1 immediately preceding s2 and

   626 // s1 aligned at "align"

   627 bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {

   629   // Do not use superword for non-primitives

   630   BasicType bt1 = velt_basic_type(s1);

   631   BasicType bt2 = velt_basic_type(s2);

   632   if(!is_java_primitive(bt1) || !is_java_primitive(bt2))

   633     return false;

   634   if (Matcher::max_vector_size(bt1) < 2) {

   635     return false; // No vectors for this type

   636   }

   638   if (isomorphic(s1, s2)) {

   639     if (independent(s1, s2)) {

   640       if (!exists_at(s1, 0) && !exists_at(s2, 1)) {

   641         if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {

   642           int s1_align = alignment(s1);

   643           int s2_align = alignment(s2);

   644           if (s1_align == top_align || s1_align == align) {

   645             if (s2_align == top_align || s2_align == align + data_size(s1)) {

   646               return true;

   647             }

   648           }

   649         }

   650       }

   651     }

   652   }

   653   return false;

   654 }

   656 //------------------------------exists_at---------------------------

   657 // Does s exist in a pack at position pos?

   658 bool SuperWord::exists_at(Node* s, uint pos) {

   659   for (int i = 0; i < _packset.length(); i++) {

   660     Node_List* p = _packset.at(i);

   661     if (p->at(pos) == s) {

   662       return true;

   663     }

   664   }

   665   return false;

   666 }

   668 //------------------------------are_adjacent_refs---------------------------

   669 // Is s1 immediately before s2 in memory?

   670 bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {

   671   if (!s1->is_Mem() || !s2->is_Mem()) return false;

   672   if (!in_bb(s1)    || !in_bb(s2))    return false;

   674   // Do not use superword for non-primitives

   675   if (!is_java_primitive(s1->as_Mem()->memory_type()) ||

   676       !is_java_primitive(s2->as_Mem()->memory_type())) {

   677     return false;

   678   }

   680   // FIXME - co_locate_pack fails on Stores in different mem-slices, so

   681   // only pack memops that are in the same alias set until that's fixed.

   682   if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=

   683       _phase->C->get_alias_index(s2->as_Mem()->adr_type()))

   684     return false;

   685   SWPointer p1(s1->as_Mem(), this);

   686   SWPointer p2(s2->as_Mem(), this);

   687   if (p1.base() != p2.base() || !p1.comparable(p2)) return false;

   688   int diff = p2.offset_in_bytes() - p1.offset_in_bytes();

   689   return diff == data_size(s1);

   690 }

   692 //------------------------------isomorphic---------------------------

   693 // Are s1 and s2 similar?

   694 bool SuperWord::isomorphic(Node* s1, Node* s2) {

   695   if (s1->Opcode() != s2->Opcode()) return false;

   696   if (s1->req() != s2->req()) return false;

   697   if (s1->in(0) != s2->in(0)) return false;

   698   if (!same_velt_type(s1, s2)) return false;

   699   return true;

   700 }

   702 //------------------------------independent---------------------------

   703 // Is there no data path from s1 to s2 or s2 to s1?

   704 bool SuperWord::independent(Node* s1, Node* s2) {

   705   //  assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");

   706   int d1 = depth(s1);

   707   int d2 = depth(s2);

   708   if (d1 == d2) return s1 != s2;

   709   Node* deep    = d1 > d2 ? s1 : s2;

   710   Node* shallow = d1 > d2 ? s2 : s1;

   712   visited_clear();

   714   return independent_path(shallow, deep);

   715 }

   717 //------------------------------independent_path------------------------------

   718 // Helper for independent

   719 bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {

   720   if (dp >= 1000) return false; // stop deep recursion

   721   visited_set(deep);

   722   int shal_depth = depth(shallow);

   723   assert(shal_depth <= depth(deep), "must be");

   724   for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) {

   725     Node* pred = preds.current();

   726     if (in_bb(pred) && !visited_test(pred)) {

   727       if (shallow == pred) {

   728         return false;

   729       }

   730       if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {

   731         return false;

   732       }

   733     }

   734   }

   735   return true;

   736 }

   738 //------------------------------set_alignment---------------------------

   739 void SuperWord::set_alignment(Node* s1, Node* s2, int align) {

   740   set_alignment(s1, align);

   741   if (align == top_align || align == bottom_align) {

   742     set_alignment(s2, align);

   743   } else {

   744     set_alignment(s2, align + data_size(s1));

   745   }

   746 }

   748 //------------------------------data_size---------------------------

   749 int SuperWord::data_size(Node* s) {

   750   int bsize = type2aelembytes(velt_basic_type(s));

   751   assert(bsize != 0, "valid size");

   752   return bsize;

   753 }

   755 //------------------------------extend_packlist---------------------------

   756 // Extend packset by following use->def and def->use links from pack members.

   757 void SuperWord::extend_packlist() {

   758   bool changed;

   759   do {

   760     changed = false;

   761     for (int i = 0; i < _packset.length(); i++) {

   762       Node_List* p = _packset.at(i);

   763       changed |= follow_use_defs(p);

   764       changed |= follow_def_uses(p);

   765     }

   766   } while (changed);

   768 #ifndef PRODUCT

   769   if (TraceSuperWord) {

   770     tty->print_cr("\nAfter extend_packlist");

   771     print_packset();

   772   }

   773 #endif

   774 }

   776 //------------------------------follow_use_defs---------------------------

   777 // Extend the packset by visiting operand definitions of nodes in pack p

   778 bool SuperWord::follow_use_defs(Node_List* p) {

   779   assert(p->size() == 2, "just checking");

   780   Node* s1 = p->at(0);

   781   Node* s2 = p->at(1);

   782   assert(s1->req() == s2->req(), "just checking");

   783   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");

   785   if (s1->is_Load()) return false;

   787   int align = alignment(s1);

   788   bool changed = false;

   789   int start = s1->is_Store() ? MemNode::ValueIn   : 1;

   790   int end   = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();

   791   for (int j = start; j < end; j++) {

   792     Node* t1 = s1->in(j);

   793     Node* t2 = s2->in(j);

   794     if (!in_bb(t1) || !in_bb(t2))

   795       continue;

   796     if (stmts_can_pack(t1, t2, align)) {

   797       if (est_savings(t1, t2) >= 0) {

   798         Node_List* pair = new Node_List();

   799         pair->push(t1);

   800         pair->push(t2);

   801         _packset.append(pair);

   802         set_alignment(t1, t2, align);

   803         changed = true;

   804       }

   805     }

   806   }

   807   return changed;

   808 }

   810 //------------------------------follow_def_uses---------------------------

   811 // Extend the packset by visiting uses of nodes in pack p

   812 bool SuperWord::follow_def_uses(Node_List* p) {

   813   bool changed = false;

   814   Node* s1 = p->at(0);

   815   Node* s2 = p->at(1);

   816   assert(p->size() == 2, "just checking");

   817   assert(s1->req() == s2->req(), "just checking");

   818   assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");

   820   if (s1->is_Store()) return false;

   822   int align = alignment(s1);

   823   int savings = -1;

   824   Node* u1 = NULL;

   825   Node* u2 = NULL;

   826   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {

   827     Node* t1 = s1->fast_out(i);

   828     if (!in_bb(t1)) continue;

   829     for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {

   830       Node* t2 = s2->fast_out(j);

   831       if (!in_bb(t2)) continue;

   832       if (!opnd_positions_match(s1, t1, s2, t2))

   833         continue;

   834       if (stmts_can_pack(t1, t2, align)) {

   835         int my_savings = est_savings(t1, t2);

   836         if (my_savings > savings) {

   837           savings = my_savings;

   838           u1 = t1;

   839           u2 = t2;

   840         }

   841       }

   842     }

   843   }

   844   if (savings >= 0) {

   845     Node_List* pair = new Node_List();

   846     pair->push(u1);

   847     pair->push(u2);

   848     _packset.append(pair);

   849     set_alignment(u1, u2, align);

   850     changed = true;

   851   }

   852   return changed;

   853 }

   855 //---------------------------opnd_positions_match-------------------------

   856 // Is the use of d1 in u1 at the same operand position as d2 in u2?

   857 bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {

   858   uint ct = u1->req();

   859   if (ct != u2->req()) return false;

   860   uint i1 = 0;

   861   uint i2 = 0;

   862   do {

   863     for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;

   864     for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;

   865     if (i1 != i2) {

   866       if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) {

   867         // Further analysis relies on operands position matching.

   868         u2->swap_edges(i1, i2);

   869       } else {

   870         return false;

   871       }

   872     }

   873   } while (i1 < ct);

   874   return true;

   875 }

   877 //------------------------------est_savings---------------------------

   878 // Estimate the savings from executing s1 and s2 as a pack

   879 int SuperWord::est_savings(Node* s1, Node* s2) {

   880   int save_in = 2 - 1; // 2 operations per instruction in packed form

   882   // inputs

   883   for (uint i = 1; i < s1->req(); i++) {

   884     Node* x1 = s1->in(i);

   885     Node* x2 = s2->in(i);

   886     if (x1 != x2) {

   887       if (are_adjacent_refs(x1, x2)) {

   888         save_in += adjacent_profit(x1, x2);

   889       } else if (!in_packset(x1, x2)) {

   890         save_in -= pack_cost(2);

   891       } else {

   892         save_in += unpack_cost(2);

   893       }

   894     }

   895   }

   897   // uses of result

   898   uint ct = 0;

   899   int save_use = 0;

   900   for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {

   901     Node* s1_use = s1->fast_out(i);

   902     for (int j = 0; j < _packset.length(); j++) {

   903       Node_List* p = _packset.at(j);

   904       if (p->at(0) == s1_use) {

   905         for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) {

   906           Node* s2_use = s2->fast_out(k);

   907           if (p->at(p->size()-1) == s2_use) {

   908             ct++;

   909             if (are_adjacent_refs(s1_use, s2_use)) {

   910               save_use += adjacent_profit(s1_use, s2_use);

   911             }

   912           }

   913         }

   914       }

   915     }

   916   }

   918   if (ct < s1->outcnt()) save_use += unpack_cost(1);

   919   if (ct < s2->outcnt()) save_use += unpack_cost(1);

   921   return MAX2(save_in, save_use);

   922 }

   924 //------------------------------costs---------------------------

   925 int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }

   926 int SuperWord::pack_cost(int ct)   { return ct; }

   927 int SuperWord::unpack_cost(int ct) { return ct; }

   929 //------------------------------combine_packs---------------------------

   930 // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last

   931 void SuperWord::combine_packs() {

   932   bool changed = true;

   933   // Combine packs regardless max vector size.

   934   while (changed) {

   935     changed = false;

   936     for (int i = 0; i < _packset.length(); i++) {

   937       Node_List* p1 = _packset.at(i);

   938       if (p1 == NULL) continue;

   939       for (int j = 0; j < _packset.length(); j++) {

   940         Node_List* p2 = _packset.at(j);

   941         if (p2 == NULL) continue;

   942         if (i == j) continue;

   943         if (p1->at(p1->size()-1) == p2->at(0)) {

   944           for (uint k = 1; k < p2->size(); k++) {

   945             p1->push(p2->at(k));

   946           }

   947           _packset.at_put(j, NULL);

   948           changed = true;

   949         }

   950       }

   951     }

   952   }

   954   // Split packs which have size greater then max vector size.

   955   for (int i = 0; i < _packset.length(); i++) {

   956     Node_List* p1 = _packset.at(i);

   957     if (p1 != NULL) {

   958       BasicType bt = velt_basic_type(p1->at(0));

   959       uint max_vlen = Matcher::max_vector_size(bt); // Max elements in vector

   960       assert(is_power_of_2(max_vlen), "sanity");

   961       uint psize = p1->size();

   962       if (!is_power_of_2(psize)) {

   963         // Skip pack which can't be vector.

   964         // case1: for(...) { a[i] = i; }    elements values are different (i+x)

   965         // case2: for(...) { a[i] = b[i+1]; }  can't align both, load and store

   966         _packset.at_put(i, NULL);

   967         continue;

   968       }

   969       if (psize > max_vlen) {

   970         Node_List* pack = new Node_List();

   971         for (uint j = 0; j < psize; j++) {

   972           pack->push(p1->at(j));

   973           if (pack->size() >= max_vlen) {

   974             assert(is_power_of_2(pack->size()), "sanity");

   975             _packset.append(pack);

   976             pack = new Node_List();

   977           }

   978         }

   979         _packset.at_put(i, NULL);

   980       }

   981     }

   982   }

   984   // Compress list.

   985   for (int i = _packset.length() - 1; i >= 0; i--) {

   986     Node_List* p1 = _packset.at(i);

   987     if (p1 == NULL) {

   988       _packset.remove_at(i);

   989     }

   990   }

   992 #ifndef PRODUCT

   993   if (TraceSuperWord) {

   994     tty->print_cr("\nAfter combine_packs");

   995     print_packset();

   996   }

   997 #endif

   998 }

  1000 //-----------------------------construct_my_pack_map--------------------------

  1001 // Construct the map from nodes to packs.  Only valid after the

  1002 // point where a node is only in one pack (after combine_packs).

  1003 void SuperWord::construct_my_pack_map() {

  1004   Node_List* rslt = NULL;

  1005   for (int i = 0; i < _packset.length(); i++) {

  1006     Node_List* p = _packset.at(i);

  1007     for (uint j = 0; j < p->size(); j++) {

  1008       Node* s = p->at(j);

  1009       assert(my_pack(s) == NULL, "only in one pack");

  1010       set_my_pack(s, p);

  1011     }

  1012   }

  1013 }

  1015 //------------------------------filter_packs---------------------------

  1016 // Remove packs that are not implemented or not profitable.

  1017 void SuperWord::filter_packs() {

  1019   // Remove packs that are not implemented

  1020   for (int i = _packset.length() - 1; i >= 0; i--) {

  1021     Node_List* pk = _packset.at(i);

  1022     bool impl = implemented(pk);

  1023     if (!impl) {

  1024 #ifndef PRODUCT

  1025       if (TraceSuperWord && Verbose) {

  1026         tty->print_cr("Unimplemented");

  1027         pk->at(0)->dump();

  1028       }

  1029 #endif

  1030       remove_pack_at(i);

  1031     }

  1032   }

  1034   // Remove packs that are not profitable

  1035   bool changed;

  1036   do {

  1037     changed = false;

  1038     for (int i = _packset.length() - 1; i >= 0; i--) {

  1039       Node_List* pk = _packset.at(i);

  1040       bool prof = profitable(pk);

  1041       if (!prof) {

  1042 #ifndef PRODUCT

  1043         if (TraceSuperWord && Verbose) {

  1044           tty->print_cr("Unprofitable");

  1045           pk->at(0)->dump();

  1046         }

  1047 #endif

  1048         remove_pack_at(i);

  1049         changed = true;

  1050       }

  1051     }

  1052   } while (changed);

  1054 #ifndef PRODUCT

  1055   if (TraceSuperWord) {

  1056     tty->print_cr("\nAfter filter_packs");

  1057     print_packset();

  1058     tty->cr();

  1059   }

  1060 #endif

  1061 }

  1063 //------------------------------implemented---------------------------

  1064 // Can code be generated for pack p?

  1065 bool SuperWord::implemented(Node_List* p) {

  1066   Node* p0 = p->at(0);

  1067   return VectorNode::implemented(p0->Opcode(), p->size(), velt_basic_type(p0));

  1068 }

  1070 //------------------------------same_inputs--------------------------

  1071 // For pack p, are all idx operands the same?

  1072 static bool same_inputs(Node_List* p, int idx) {

  1073   Node* p0 = p->at(0);

  1074   uint vlen = p->size();

  1075   Node* p0_def = p0->in(idx);

  1076   for (uint i = 1; i < vlen; i++) {

  1077     Node* pi = p->at(i);

  1078     Node* pi_def = pi->in(idx);

  1079     if (p0_def != pi_def)

  1080       return false;

  1081   }

  1082   return true;

  1083 }

  1085 //------------------------------profitable---------------------------

  1086 // For pack p, are all operands and all uses (with in the block) vector?

  1087 bool SuperWord::profitable(Node_List* p) {

  1088   Node* p0 = p->at(0);

  1089   uint start, end;

  1090   VectorNode::vector_operands(p0, &start, &end);

  1092   // Return false if some inputs are not vectors or vectors with different

  1093   // size or alignment.

  1094   // Also, for now, return false if not scalar promotion case when inputs are

  1095   // the same. Later, implement PackNode and allow differing, non-vector inputs

  1096   // (maybe just the ones from outside the block.)

  1097   for (uint i = start; i < end; i++) {

  1098     if (!is_vector_use(p0, i))

  1099       return false;

  1100   }

  1101   if (VectorNode::is_shift(p0)) {

  1102     // For now, return false if shift count is vector or not scalar promotion

  1103     // case (different shift counts) because it is not supported yet.

  1104     Node* cnt = p0->in(2);

  1105     Node_List* cnt_pk = my_pack(cnt);

  1106     if (cnt_pk != NULL)

  1107       return false;

  1108     if (!same_inputs(p, 2))

  1109       return false;

  1110   }

  1111   if (!p0->is_Store()) {

  1112     // For now, return false if not all uses are vector.

  1113     // Later, implement ExtractNode and allow non-vector uses (maybe

  1114     // just the ones outside the block.)

  1115     for (uint i = 0; i < p->size(); i++) {

  1116       Node* def = p->at(i);

  1117       for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {

  1118         Node* use = def->fast_out(j);

  1119         for (uint k = 0; k < use->req(); k++) {

  1120           Node* n = use->in(k);

  1121           if (def == n) {

  1122             if (!is_vector_use(use, k)) {

  1123               return false;

  1124             }

  1125           }

  1126         }

  1127       }

  1128     }

  1129   }

  1130   return true;

  1131 }

  1133 //------------------------------schedule---------------------------

  1134 // Adjust the memory graph for the packed operations

  1135 void SuperWord::schedule() {

  1137   // Co-locate in the memory graph the members of each memory pack

  1138   for (int i = 0; i < _packset.length(); i++) {

  1139     co_locate_pack(_packset.at(i));

  1140   }

  1141 }

  1143 //-------------------------------remove_and_insert-------------------

  1144 // Remove "current" from its current position in the memory graph and insert

  1145 // it after the appropriate insertion point (lip or uip).

  1146 void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,

  1147                                   Node *uip, Unique_Node_List &sched_before) {

  1148   Node* my_mem = current->in(MemNode::Memory);

  1149   bool sched_up = sched_before.member(current);

  1151   // remove current_store from its current position in the memmory graph

  1152   for (DUIterator i = current->outs(); current->has_out(i); i++) {

  1153     Node* use = current->out(i);

  1154     if (use->is_Mem()) {

  1155       assert(use->in(MemNode::Memory) == current, "must be");

  1156       if (use == prev) { // connect prev to my_mem

  1157           _igvn.replace_input_of(use, MemNode::Memory, my_mem);

  1158           --i; //deleted this edge; rescan position

  1159       } else if (sched_before.member(use)) {

  1160         if (!sched_up) { // Will be moved together with current

  1161           _igvn.replace_input_of(use, MemNode::Memory, uip);

  1162           --i; //deleted this edge; rescan position

  1163         }

  1164       } else {

  1165         if (sched_up) { // Will be moved together with current

  1166           _igvn.replace_input_of(use, MemNode::Memory, lip);

  1167           --i; //deleted this edge; rescan position

  1168         }

  1169       }

  1170     }

  1171   }

  1173   Node *insert_pt =  sched_up ?  uip : lip;

  1175   // all uses of insert_pt's memory state should use current's instead

  1176   for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {

  1177     Node* use = insert_pt->out(i);

  1178     if (use->is_Mem()) {

  1179       assert(use->in(MemNode::Memory) == insert_pt, "must be");

  1180       _igvn.replace_input_of(use, MemNode::Memory, current);

  1181       --i; //deleted this edge; rescan position

  1182     } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) {

  1183       uint pos; //lip (lower insert point) must be the last one in the memory slice

  1184       for (pos=1; pos < use->req(); pos++) {

  1185         if (use->in(pos) == insert_pt) break;

  1186       }

  1187       _igvn.replace_input_of(use, pos, current);

  1188       --i;

  1189     }

  1190   }

  1192   //connect current to insert_pt

  1193   _igvn.replace_input_of(current, MemNode::Memory, insert_pt);

  1194 }

  1196 //------------------------------co_locate_pack----------------------------------

  1197 // To schedule a store pack, we need to move any sandwiched memory ops either before

  1198 // or after the pack, based upon dependence information:

  1199 // (1) If any store in the pack depends on the sandwiched memory op, the

  1200 //     sandwiched memory op must be scheduled BEFORE the pack;

  1201 // (2) If a sandwiched memory op depends on any store in the pack, the

  1202 //     sandwiched memory op must be scheduled AFTER the pack;

  1203 // (3) If a sandwiched memory op (say, memA) depends on another sandwiched

  1204 //     memory op (say memB), memB must be scheduled before memA. So, if memA is

  1205 //     scheduled before the pack, memB must also be scheduled before the pack;

  1206 // (4) If there is no dependence restriction for a sandwiched memory op, we simply

  1207 //     schedule this store AFTER the pack

  1208 // (5) We know there is no dependence cycle, so there in no other case;

  1209 // (6) Finally, all memory ops in another single pack should be moved in the same direction.

  1210 //

  1211 // To schedule a load pack, we use the memory state of either the first or the last load in

  1212 // the pack, based on the dependence constraint.

  1213 void SuperWord::co_locate_pack(Node_List* pk) {

  1214   if (pk->at(0)->is_Store()) {

  1215     MemNode* first     = executed_first(pk)->as_Mem();

  1216     MemNode* last      = executed_last(pk)->as_Mem();

  1217     Unique_Node_List schedule_before_pack;

  1218     Unique_Node_List memops;

  1220     MemNode* current   = last->in(MemNode::Memory)->as_Mem();

  1221     MemNode* previous  = last;

  1222     while (true) {

  1223       assert(in_bb(current), "stay in block");

  1224       memops.push(previous);

  1225       for (DUIterator i = current->outs(); current->has_out(i); i++) {

  1226         Node* use = current->out(i);

  1227         if (use->is_Mem() && use != previous)

  1228           memops.push(use);

  1229       }

  1230       if (current == first) break;

  1231       previous = current;

  1232       current  = current->in(MemNode::Memory)->as_Mem();

  1233     }

  1235     // determine which memory operations should be scheduled before the pack

  1236     for (uint i = 1; i < memops.size(); i++) {

  1237       Node *s1 = memops.at(i);

  1238       if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) {

  1239         for (uint j = 0; j< i; j++) {

  1240           Node *s2 = memops.at(j);

  1241           if (!independent(s1, s2)) {

  1242             if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {

  1243               schedule_before_pack.push(s1); // s1 must be scheduled before

  1244               Node_List* mem_pk = my_pack(s1);

  1245               if (mem_pk != NULL) {

  1246                 for (uint ii = 0; ii < mem_pk->size(); ii++) {

  1247                   Node* s = mem_pk->at(ii);  // follow partner

  1248                   if (memops.member(s) && !schedule_before_pack.member(s))

  1249                     schedule_before_pack.push(s);

  1250                 }

  1251               }

  1252               break;

  1253             }

  1254           }

  1255         }

  1256       }

  1257     }

  1259     Node*    upper_insert_pt = first->in(MemNode::Memory);

  1260     // Following code moves loads connected to upper_insert_pt below aliased stores.

  1261     // Collect such loads here and reconnect them back to upper_insert_pt later.

  1262     memops.clear();

  1263     for (DUIterator i = upper_insert_pt->outs(); upper_insert_pt->has_out(i); i++) {

  1264       Node* use = upper_insert_pt->out(i);

  1265       if (!use->is_Store())

  1266         memops.push(use);

  1267     }

  1269     MemNode* lower_insert_pt = last;

  1270     previous                 = last; //previous store in pk

  1271     current                  = last->in(MemNode::Memory)->as_Mem();

  1273     // start scheduling from "last" to "first"

  1274     while (true) {

  1275       assert(in_bb(current), "stay in block");

  1276       assert(in_pack(previous, pk), "previous stays in pack");

  1277       Node* my_mem = current->in(MemNode::Memory);

  1279       if (in_pack(current, pk)) {

  1280         // Forward users of my memory state (except "previous) to my input memory state

  1281         for (DUIterator i = current->outs(); current->has_out(i); i++) {

  1282           Node* use = current->out(i);

  1283           if (use->is_Mem() && use != previous) {

  1284             assert(use->in(MemNode::Memory) == current, "must be");

  1285             if (schedule_before_pack.member(use)) {

  1286               _igvn.replace_input_of(use, MemNode::Memory, upper_insert_pt);

  1287             } else {

  1288               _igvn.replace_input_of(use, MemNode::Memory, lower_insert_pt);

  1289             }

  1290             --i; // deleted this edge; rescan position

  1291           }

  1292         }

  1293         previous = current;

  1294       } else { // !in_pack(current, pk) ==> a sandwiched store

  1295         remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack);

  1296       }

  1298       if (current == first) break;

  1299       current = my_mem->as_Mem();

  1300     } // end while

  1302     // Reconnect loads back to upper_insert_pt.

  1303     for (uint i = 0; i < memops.size(); i++) {

  1304       Node *ld = memops.at(i);

  1305       if (ld->in(MemNode::Memory) != upper_insert_pt) {

  1306         _igvn.replace_input_of(ld, MemNode::Memory, upper_insert_pt);

  1307       }

  1308     }

  1309   } else if (pk->at(0)->is_Load()) { //load

  1310     // all loads in the pack should have the same memory state. By default,

  1311     // we use the memory state of the last load. However, if any load could

  1312     // not be moved down due to the dependence constraint, we use the memory

  1313     // state of the first load.

  1314     Node* last_mem  = executed_last(pk)->in(MemNode::Memory);

  1315     Node* first_mem = executed_first(pk)->in(MemNode::Memory);

  1316     bool schedule_last = true;

  1317     for (uint i = 0; i < pk->size(); i++) {

  1318       Node* ld = pk->at(i);

  1319       for (Node* current = last_mem; current != ld->in(MemNode::Memory);

  1320            current=current->in(MemNode::Memory)) {

  1321         assert(current != first_mem, "corrupted memory graph");

  1322         if(current->is_Mem() && !independent(current, ld)){

  1323           schedule_last = false; // a later store depends on this load

  1324           break;

  1325         }

  1326       }

  1327     }

  1329     Node* mem_input = schedule_last ? last_mem : first_mem;

  1330     _igvn.hash_delete(mem_input);

  1331     // Give each load the same memory state

  1332     for (uint i = 0; i < pk->size(); i++) {

  1333       LoadNode* ld = pk->at(i)->as_Load();

  1334       _igvn.replace_input_of(ld, MemNode::Memory, mem_input);

  1335     }

  1336   }

  1337 }

  1339 //------------------------------output---------------------------

  1340 // Convert packs into vector node operations

  1341 void SuperWord::output() {

  1342   if (_packset.length() == 0) return;

  1344 #ifndef PRODUCT

  1345   if (TraceLoopOpts) {

  1346     tty->print("SuperWord    ");

  1347     lpt()->dump_head();

  1348   }

  1349 #endif

  1351   // MUST ENSURE main loop's initial value is properly aligned:

  1352   //  (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0

  1354   align_initial_loop_index(align_to_ref());

  1356   // Insert extract (unpack) operations for scalar uses

  1357   for (int i = 0; i < _packset.length(); i++) {

  1358     insert_extracts(_packset.at(i));

  1359   }

  1361   Compile* C = _phase->C;

  1362   uint max_vlen_in_bytes = 0;

  1363   for (int i = 0; i < _block.length(); i++) {

  1364     Node* n = _block.at(i);

  1365     Node_List* p = my_pack(n);

  1366     if (p && n == executed_last(p)) {

  1367       uint vlen = p->size();

  1368       uint vlen_in_bytes = 0;

  1369       Node* vn = NULL;

  1370       Node* low_adr = p->at(0);

  1371       Node* first   = executed_first(p);

  1372       int   opc = n->Opcode();

  1373       if (n->is_Load()) {

  1374         Node* ctl = n->in(MemNode::Control);

  1375         Node* mem = first->in(MemNode::Memory);

  1376         Node* adr = low_adr->in(MemNode::Address);

  1377         const TypePtr* atyp = n->adr_type();

  1378         vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));

  1379         vlen_in_bytes = vn->as_LoadVector()->memory_size();

  1380       } else if (n->is_Store()) {

  1381         // Promote value to be stored to vector

  1382         Node* val = vector_opd(p, MemNode::ValueIn);

  1383         Node* ctl = n->in(MemNode::Control);

  1384         Node* mem = first->in(MemNode::Memory);

  1385         Node* adr = low_adr->in(MemNode::Address);

  1386         const TypePtr* atyp = n->adr_type();

  1387         vn = StoreVectorNode::make(C, opc, ctl, mem, adr, atyp, val, vlen);

  1388         vlen_in_bytes = vn->as_StoreVector()->memory_size();

  1389       } else if (n->req() == 3) {

  1390         // Promote operands to vector

  1391         Node* in1 = vector_opd(p, 1);

  1392         Node* in2 = vector_opd(p, 2);

  1393         if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) {

  1394           // Move invariant vector input into second position to avoid register spilling.

  1395           Node* tmp = in1;

  1396           in1 = in2;

  1397           in2 = tmp;

  1398         }

  1399         vn = VectorNode::make(C, opc, in1, in2, vlen, velt_basic_type(n));

  1400         vlen_in_bytes = vn->as_Vector()->length_in_bytes();

  1401       } else {

  1402         ShouldNotReachHere();

  1403       }

  1404       assert(vn != NULL, "sanity");

  1405       _igvn.register_new_node_with_optimizer(vn);

  1406       _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));

  1407       for (uint j = 0; j < p->size(); j++) {

  1408         Node* pm = p->at(j);

  1409         _igvn.replace_node(pm, vn);

  1410       }

  1411       _igvn._worklist.push(vn);

  1413       if (vlen_in_bytes > max_vlen_in_bytes) {

  1414         max_vlen_in_bytes = vlen_in_bytes;

  1415       }

  1416 #ifdef ASSERT

  1417       if (TraceNewVectors) {

  1418         tty->print("new Vector node: ");

  1419         vn->dump();

  1420       }

  1421 #endif

  1422     }

  1423   }

  1424   C->set_max_vector_size(max_vlen_in_bytes);

  1425 }

  1427 //------------------------------vector_opd---------------------------

  1428 // Create a vector operand for the nodes in pack p for operand: in(opd_idx)

  1429 Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {

  1430   Node* p0 = p->at(0);

  1431   uint vlen = p->size();

  1432   Node* opd = p0->in(opd_idx);

  1434   if (same_inputs(p, opd_idx)) {

  1435     if (opd->is_Vector() || opd->is_LoadVector()) {

  1436       assert(((opd_idx != 2) || !VectorNode::is_shift(p0)), "shift's count can't be vector");

  1437       return opd; // input is matching vector

  1438     }

  1439     if ((opd_idx == 2) && VectorNode::is_shift(p0)) {

  1440       Compile* C = _phase->C;

  1441       Node* cnt = opd;

  1442       // Vector instructions do not mask shift count, do it here.

  1443       juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);

  1444       const TypeInt* t = opd->find_int_type();

  1445       if (t != NULL && t->is_con()) {

  1446         juint shift = t->get_con();

  1447         if (shift > mask) { // Unsigned cmp

  1448           cnt = ConNode::make(C, TypeInt::make(shift & mask));

  1449         }

  1450       } else {

  1451         if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {

  1452           cnt = ConNode::make(C, TypeInt::make(mask));

  1453           _igvn.register_new_node_with_optimizer(cnt);

  1454           cnt = new (C) AndINode(opd, cnt);

  1455           _igvn.register_new_node_with_optimizer(cnt);

  1456           _phase->set_ctrl(cnt, _phase->get_ctrl(opd));

  1457         }

  1458         assert(opd->bottom_type()->isa_int(), "int type only");

  1459         // Move non constant shift count into vector register.

  1460         cnt = VectorNode::shift_count(C, p0, cnt, vlen, velt_basic_type(p0));

  1461       }

  1462       if (cnt != opd) {

  1463         _igvn.register_new_node_with_optimizer(cnt);

  1464         _phase->set_ctrl(cnt, _phase->get_ctrl(opd));

  1465       }

  1466       return cnt;

  1467     }

  1468     assert(!opd->is_StoreVector(), "such vector is not expected here");

  1469     // Convert scalar input to vector with the same number of elements as

  1470     // p0's vector. Use p0's type because size of operand's container in

  1471     // vector should match p0's size regardless operand's size.

  1472     const Type* p0_t = velt_type(p0);

  1473     VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t);

  1475     _igvn.register_new_node_with_optimizer(vn);

  1476     _phase->set_ctrl(vn, _phase->get_ctrl(opd));

  1477 #ifdef ASSERT

  1478     if (TraceNewVectors) {

  1479       tty->print("new Vector node: ");

  1480       vn->dump();

  1481     }

  1482 #endif

  1483     return vn;

  1484   }

  1486   // Insert pack operation

  1487   BasicType bt = velt_basic_type(p0);

  1488   PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt);

  1489   DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )

  1491   for (uint i = 1; i < vlen; i++) {

  1492     Node* pi = p->at(i);

  1493     Node* in = pi->in(opd_idx);

  1494     assert(my_pack(in) == NULL, "Should already have been unpacked");

  1495     assert(opd_bt == in->bottom_type()->basic_type(), "all same type");

  1496     pk->add_opd(in);

  1497   }

  1498   _igvn.register_new_node_with_optimizer(pk);

  1499   _phase->set_ctrl(pk, _phase->get_ctrl(opd));

  1500 #ifdef ASSERT

  1501   if (TraceNewVectors) {

  1502     tty->print("new Vector node: ");

  1503     pk->dump();

  1504   }

  1505 #endif

  1506   return pk;

  1507 }

  1509 //------------------------------insert_extracts---------------------------

  1510 // If a use of pack p is not a vector use, then replace the

  1511 // use with an extract operation.

  1512 void SuperWord::insert_extracts(Node_List* p) {

  1513   if (p->at(0)->is_Store()) return;

  1514   assert(_n_idx_list.is_empty(), "empty (node,index) list");

  1516   // Inspect each use of each pack member.  For each use that is

  1517   // not a vector use, replace the use with an extract operation.

  1519   for (uint i = 0; i < p->size(); i++) {

  1520     Node* def = p->at(i);

  1521     for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {

  1522       Node* use = def->fast_out(j);

  1523       for (uint k = 0; k < use->req(); k++) {

  1524         Node* n = use->in(k);

  1525         if (def == n) {

  1526           if (!is_vector_use(use, k)) {

  1527             _n_idx_list.push(use, k);

  1528           }

  1529         }

  1530       }

  1531     }

  1532   }

  1534   while (_n_idx_list.is_nonempty()) {

  1535     Node* use = _n_idx_list.node();

  1536     int   idx = _n_idx_list.index();

  1537     _n_idx_list.pop();

  1538     Node* def = use->in(idx);

  1540     // Insert extract operation

  1541     _igvn.hash_delete(def);

  1542     int def_pos = alignment(def) / data_size(def);

  1544     Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def));

  1545     _igvn.register_new_node_with_optimizer(ex);

  1546     _phase->set_ctrl(ex, _phase->get_ctrl(def));

  1547     _igvn.replace_input_of(use, idx, ex);

  1548     _igvn._worklist.push(def);

  1550     bb_insert_after(ex, bb_idx(def));

  1551     set_velt_type(ex, velt_type(def));

  1552   }

  1553 }

  1555 //------------------------------is_vector_use---------------------------

  1556 // Is use->in(u_idx) a vector use?

  1557 bool SuperWord::is_vector_use(Node* use, int u_idx) {

  1558   Node_List* u_pk = my_pack(use);

  1559   if (u_pk == NULL) return false;

  1560   Node* def = use->in(u_idx);

  1561   Node_List* d_pk = my_pack(def);

  1562   if (d_pk == NULL) {

  1563     // check for scalar promotion

  1564     Node* n = u_pk->at(0)->in(u_idx);

  1565     for (uint i = 1; i < u_pk->size(); i++) {

  1566       if (u_pk->at(i)->in(u_idx) != n) return false;

  1567     }

  1568     return true;

  1569   }

  1570   if (u_pk->size() != d_pk->size())

  1571     return false;

  1572   for (uint i = 0; i < u_pk->size(); i++) {

  1573     Node* ui = u_pk->at(i);

  1574     Node* di = d_pk->at(i);

  1575     if (ui->in(u_idx) != di || alignment(ui) != alignment(di))

  1576       return false;

  1577   }

  1578   return true;

  1579 }

  1581 //------------------------------construct_bb---------------------------

  1582 // Construct reverse postorder list of block members

  1583 bool SuperWord::construct_bb() {

  1584   Node* entry = bb();

  1586   assert(_stk.length() == 0,            "stk is empty");

  1587   assert(_block.length() == 0,          "block is empty");

  1588   assert(_data_entry.length() == 0,     "data_entry is empty");

  1589   assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");

  1590   assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");

  1592   // Find non-control nodes with no inputs from within block,

  1593   // create a temporary map from node _idx to bb_idx for use

  1594   // by the visited and post_visited sets,

  1595   // and count number of nodes in block.

  1596   int bb_ct = 0;

  1597   for (uint i = 0; i < lpt()->_body.size(); i++ ) {

  1598     Node *n = lpt()->_body.at(i);

  1599     set_bb_idx(n, i); // Create a temporary map

  1600     if (in_bb(n)) {

  1601       if (n->is_LoadStore() || n->is_MergeMem() ||

  1602           (n->is_Proj() && !n->as_Proj()->is_CFG())) {

  1603         // Bailout if the loop has LoadStore, MergeMem or data Proj

  1604         // nodes. Superword optimization does not work with them.

  1605         return false;

  1606       }

  1607       bb_ct++;

  1608       if (!n->is_CFG()) {

  1609         bool found = false;

  1610         for (uint j = 0; j < n->req(); j++) {

  1611           Node* def = n->in(j);

  1612           if (def && in_bb(def)) {

  1613             found = true;

  1614             break;

  1615           }

  1616         }

  1617         if (!found) {

  1618           assert(n != entry, "can't be entry");

  1619           _data_entry.push(n);

  1620         }

  1621       }

  1622     }

  1623   }

  1625   // Find memory slices (head and tail)

  1626   for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) {

  1627     Node *n = lp()->fast_out(i);

  1628     if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {

  1629       Node* n_tail  = n->in(LoopNode::LoopBackControl);

  1630       if (n_tail != n->in(LoopNode::EntryControl)) {

  1631         if (!n_tail->is_Mem()) {

  1632           assert(n_tail->is_Mem(), err_msg_res("unexpected node for memory slice: %s", n_tail->Name()));

  1633           return false; // Bailout

  1634         }

  1635         _mem_slice_head.push(n);

  1636         _mem_slice_tail.push(n_tail);

  1637       }

  1638     }

  1639   }

  1641   // Create an RPO list of nodes in block

  1643   visited_clear();

  1644   post_visited_clear();

  1646   // Push all non-control nodes with no inputs from within block, then control entry

  1647   for (int j = 0; j < _data_entry.length(); j++) {

  1648     Node* n = _data_entry.at(j);

  1649     visited_set(n);

  1650     _stk.push(n);

  1651   }

  1652   visited_set(entry);

  1653   _stk.push(entry);

  1655   // Do a depth first walk over out edges

  1656   int rpo_idx = bb_ct - 1;

  1657   int size;

  1658   while ((size = _stk.length()) > 0) {

  1659     Node* n = _stk.top(); // Leave node on stack

  1660     if (!visited_test_set(n)) {

  1661       // forward arc in graph

  1662     } else if (!post_visited_test(n)) {

  1663       // cross or back arc

  1664       for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {

  1665         Node *use = n->fast_out(i);

  1666         if (in_bb(use) && !visited_test(use) &&

  1667             // Don't go around backedge

  1668             (!use->is_Phi() || n == entry)) {

  1669           _stk.push(use);

  1670         }

  1671       }

  1672       if (_stk.length() == size) {

  1673         // There were no additional uses, post visit node now

  1674         _stk.pop(); // Remove node from stack

  1675         assert(rpo_idx >= 0, "");

  1676         _block.at_put_grow(rpo_idx, n);

  1677         rpo_idx--;

  1678         post_visited_set(n);

  1679         assert(rpo_idx >= 0 || _stk.is_empty(), "");

  1680       }

  1681     } else {

  1682       _stk.pop(); // Remove post-visited node from stack

  1683     }

  1684   }

  1686   // Create real map of block indices for nodes

  1687   for (int j = 0; j < _block.length(); j++) {

  1688     Node* n = _block.at(j);

  1689     set_bb_idx(n, j);

  1690   }

  1692   initialize_bb(); // Ensure extra info is allocated.

  1694 #ifndef PRODUCT

  1695   if (TraceSuperWord) {

  1696     print_bb();

  1697     tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");

  1698     for (int m = 0; m < _data_entry.length(); m++) {

  1699       tty->print("%3d ", m);

  1700       _data_entry.at(m)->dump();

  1701     }

  1702     tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");

  1703     for (int m = 0; m < _mem_slice_head.length(); m++) {

  1704       tty->print("%3d ", m); _mem_slice_head.at(m)->dump();

  1705       tty->print("    ");    _mem_slice_tail.at(m)->dump();

  1706     }

  1707   }

  1708 #endif

  1709   assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");

  1710   return (_mem_slice_head.length() > 0) || (_data_entry.length() > 0);

  1711 }

  1713 //------------------------------initialize_bb---------------------------

  1714 // Initialize per node info

  1715 void SuperWord::initialize_bb() {

  1716   Node* last = _block.at(_block.length() - 1);

  1717   grow_node_info(bb_idx(last));

  1718 }

  1720 //------------------------------bb_insert_after---------------------------

  1721 // Insert n into block after pos

  1722 void SuperWord::bb_insert_after(Node* n, int pos) {

  1723   int n_pos = pos + 1;

  1724   // Make room

  1725   for (int i = _block.length() - 1; i >= n_pos; i--) {

  1726     _block.at_put_grow(i+1, _block.at(i));

  1727   }

  1728   for (int j = _node_info.length() - 1; j >= n_pos; j--) {

  1729     _node_info.at_put_grow(j+1, _node_info.at(j));

  1730   }

  1731   // Set value

  1732   _block.at_put_grow(n_pos, n);

  1733   _node_info.at_put_grow(n_pos, SWNodeInfo::initial);

  1734   // Adjust map from node->_idx to _block index

  1735   for (int i = n_pos; i < _block.length(); i++) {

  1736     set_bb_idx(_block.at(i), i);

  1737   }

  1738 }

  1740 //------------------------------compute_max_depth---------------------------

  1741 // Compute max depth for expressions from beginning of block

  1742 // Use to prune search paths during test for independence.

  1743 void SuperWord::compute_max_depth() {

  1744   int ct = 0;

  1745   bool again;

  1746   do {

  1747     again = false;

  1748     for (int i = 0; i < _block.length(); i++) {

  1749       Node* n = _block.at(i);

  1750       if (!n->is_Phi()) {

  1751         int d_orig = depth(n);

  1752         int d_in   = 0;

  1753         for (DepPreds preds(n, _dg); !preds.done(); preds.next()) {

  1754           Node* pred = preds.current();

  1755           if (in_bb(pred)) {

  1756             d_in = MAX2(d_in, depth(pred));

  1757           }

  1758         }

  1759         if (d_in + 1 != d_orig) {

  1760           set_depth(n, d_in + 1);

  1761           again = true;

  1762         }

  1763       }

  1764     }

  1765     ct++;

  1766   } while (again);

  1767 #ifndef PRODUCT

  1768   if (TraceSuperWord && Verbose)

  1769     tty->print_cr("compute_max_depth iterated: %d times", ct);

  1770 #endif

  1771 }

  1773 //-------------------------compute_vector_element_type-----------------------

  1774 // Compute necessary vector element type for expressions

  1775 // This propagates backwards a narrower integer type when the

  1776 // upper bits of the value are not needed.

  1777 // Example:  char a,b,c;  a = b + c;

  1778 // Normally the type of the add is integer, but for packed character

  1779 // operations the type of the add needs to be char.

  1780 void SuperWord::compute_vector_element_type() {

  1781 #ifndef PRODUCT

  1782   if (TraceSuperWord && Verbose)

  1783     tty->print_cr("\ncompute_velt_type:");

  1784 #endif

  1786   // Initial type

  1787   for (int i = 0; i < _block.length(); i++) {

  1788     Node* n = _block.at(i);

  1789     set_velt_type(n, container_type(n));

  1790   }

  1792   // Propagate integer narrowed type backwards through operations

  1793   // that don't depend on higher order bits

  1794   for (int i = _block.length() - 1; i >= 0; i--) {

  1795     Node* n = _block.at(i);

  1796     // Only integer types need be examined

  1797     const Type* vtn = velt_type(n);

  1798     if (vtn->basic_type() == T_INT) {

  1799       uint start, end;

  1800       VectorNode::vector_operands(n, &start, &end);

  1802       for (uint j = start; j < end; j++) {

  1803         Node* in  = n->in(j);

  1804         // Don't propagate through a memory

  1805         if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT &&

  1806             data_size(n) < data_size(in)) {

  1807           bool same_type = true;

  1808           for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {

  1809             Node *use = in->fast_out(k);

  1810             if (!in_bb(use) || !same_velt_type(use, n)) {

  1811               same_type = false;

  1812               break;

  1813             }

  1814           }

  1815           if (same_type) {

  1816             // For right shifts of small integer types (bool, byte, char, short)

  1817             // we need precise information about sign-ness. Only Load nodes have

  1818             // this information because Store nodes are the same for signed and

  1819             // unsigned values. And any arithmetic operation after a load may

  1820             // expand a value to signed Int so such right shifts can't be used

  1821             // because vector elements do not have upper bits of Int.

  1822             const Type* vt = vtn;

  1823             if (VectorNode::is_shift(in)) {

  1824               Node* load = in->in(1);

  1825               if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) {

  1826                 vt = velt_type(load);

  1827               } else if (in->Opcode() != Op_LShiftI) {

  1828                 // Widen type to Int to avoid creation of right shift vector

  1829                 // (align + data_size(s1) check in stmts_can_pack() will fail).

  1830                 // Note, left shifts work regardless type.

  1831                 vt = TypeInt::INT;

  1832               }

  1833             }

  1834             set_velt_type(in, vt);

  1835           }

  1836         }

  1837       }

  1838     }

  1839   }

  1840 #ifndef PRODUCT

  1841   if (TraceSuperWord && Verbose) {

  1842     for (int i = 0; i < _block.length(); i++) {

  1843       Node* n = _block.at(i);

  1844       velt_type(n)->dump();

  1845       tty->print("\t");

  1846       n->dump();

  1847     }

  1848   }

  1849 #endif

  1850 }

  1852 //------------------------------memory_alignment---------------------------

  1853 // Alignment within a vector memory reference

  1854 int SuperWord::memory_alignment(MemNode* s, int iv_adjust) {

  1855   SWPointer p(s, this);

  1856   if (!p.valid()) {

  1857     return bottom_align;

  1858   }

  1859   int vw = vector_width_in_bytes(s);

  1860   if (vw < 2) {

  1861     return bottom_align; // No vectors for this type

  1862   }

  1863   int offset  = p.offset_in_bytes();

  1864   offset     += iv_adjust*p.memory_size();

  1865   int off_rem = offset % vw;

  1866   int off_mod = off_rem >= 0 ? off_rem : off_rem + vw;

  1867   return off_mod;

  1868 }

  1870 //---------------------------container_type---------------------------

  1871 // Smallest type containing range of values

  1872 const Type* SuperWord::container_type(Node* n) {

  1873   if (n->is_Mem()) {

  1874     BasicType bt = n->as_Mem()->memory_type();

  1875     if (n->is_Store() && (bt == T_CHAR)) {

  1876       // Use T_SHORT type instead of T_CHAR for stored values because any

  1877       // preceding arithmetic operation extends values to signed Int.

  1878       bt = T_SHORT;

  1879     }

  1880     if (n->Opcode() == Op_LoadUB) {

  1881       // Adjust type for unsigned byte loads, it is important for right shifts.

  1882       // T_BOOLEAN is used because there is no basic type representing type

  1883       // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only

  1884       // size (one byte) and sign is important.

  1885       bt = T_BOOLEAN;

  1886     }

  1887     return Type::get_const_basic_type(bt);

  1888   }

  1889   const Type* t = _igvn.type(n);

  1890   if (t->basic_type() == T_INT) {

  1891     // A narrow type of arithmetic operations will be determined by

  1892     // propagating the type of memory operations.

  1893     return TypeInt::INT;

  1894   }

  1895   return t;

  1896 }

  1898 bool SuperWord::same_velt_type(Node* n1, Node* n2) {

  1899   const Type* vt1 = velt_type(n1);

  1900   const Type* vt2 = velt_type(n2);

  1901   if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) {

  1902     // Compare vectors element sizes for integer types.

  1903     return data_size(n1) == data_size(n2);

  1904   }

  1905   return vt1 == vt2;

  1906 }

  1908 //------------------------------in_packset---------------------------

  1909 // Are s1 and s2 in a pack pair and ordered as s1,s2?

  1910 bool SuperWord::in_packset(Node* s1, Node* s2) {

  1911   for (int i = 0; i < _packset.length(); i++) {

  1912     Node_List* p = _packset.at(i);

  1913     assert(p->size() == 2, "must be");

  1914     if (p->at(0) == s1 && p->at(p->size()-1) == s2) {

  1915       return true;

  1916     }

  1917   }

  1918   return false;

  1919 }

  1921 //------------------------------in_pack---------------------------

  1922 // Is s in pack p?

  1923 Node_List* SuperWord::in_pack(Node* s, Node_List* p) {

  1924   for (uint i = 0; i < p->size(); i++) {

  1925     if (p->at(i) == s) {

  1926       return p;

  1927     }

  1928   }

  1929   return NULL;

  1930 }

  1932 //------------------------------remove_pack_at---------------------------

  1933 // Remove the pack at position pos in the packset

  1934 void SuperWord::remove_pack_at(int pos) {

  1935   Node_List* p = _packset.at(pos);

  1936   for (uint i = 0; i < p->size(); i++) {

  1937     Node* s = p->at(i);

  1938     set_my_pack(s, NULL);

  1939   }

  1940   _packset.remove_at(pos);

  1941 }

  1943 //------------------------------executed_first---------------------------

  1944 // Return the node executed first in pack p.  Uses the RPO block list

  1945 // to determine order.

  1946 Node* SuperWord::executed_first(Node_List* p) {

  1947   Node* n = p->at(0);

  1948   int n_rpo = bb_idx(n);

  1949   for (uint i = 1; i < p->size(); i++) {

  1950     Node* s = p->at(i);

  1951     int s_rpo = bb_idx(s);

  1952     if (s_rpo < n_rpo) {

  1953       n = s;

  1954       n_rpo = s_rpo;

  1955     }

  1956   }

  1957   return n;

  1958 }

  1960 //------------------------------executed_last---------------------------

  1961 // Return the node executed last in pack p.

  1962 Node* SuperWord::executed_last(Node_List* p) {

  1963   Node* n = p->at(0);

  1964   int n_rpo = bb_idx(n);

  1965   for (uint i = 1; i < p->size(); i++) {

  1966     Node* s = p->at(i);

  1967     int s_rpo = bb_idx(s);

  1968     if (s_rpo > n_rpo) {

  1969       n = s;

  1970       n_rpo = s_rpo;

  1971     }

  1972   }

  1973   return n;

  1974 }

  1976 //----------------------------align_initial_loop_index---------------------------

  1977 // Adjust pre-loop limit so that in main loop, a load/store reference

  1978 // to align_to_ref will be a position zero in the vector.

  1979 //   (iv + k) mod vector_align == 0

  1980 void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {

  1981   CountedLoopNode *main_head = lp()->as_CountedLoop();

  1982   assert(main_head->is_main_loop(), "");

  1983   CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);

  1984   assert(pre_end != NULL, "");

  1985   Node *pre_opaq1 = pre_end->limit();

  1986   assert(pre_opaq1->Opcode() == Op_Opaque1, "");

  1987   Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;

  1988   Node *lim0 = pre_opaq->in(1);

  1990   // Where we put new limit calculations

  1991   Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);

  1993   // Ensure the original loop limit is available from the

  1994   // pre-loop Opaque1 node.

  1995   Node *orig_limit = pre_opaq->original_loop_limit();

  1996   assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");

  1998   SWPointer align_to_ref_p(align_to_ref, this);

  1999   assert(align_to_ref_p.valid(), "sanity");

  2001   // Given:

  2002   //     lim0 == original pre loop limit

  2003   //     V == v_align (power of 2)

  2004   //     invar == extra invariant piece of the address expression

  2005   //     e == offset [ +/- invar ]

  2006   //

  2007   // When reassociating expressions involving '%' the basic rules are:

  2008   //     (a - b) % k == 0   =>  a % k == b % k

  2009   // and:

  2010   //     (a + b) % k == 0   =>  a % k == (k - b) % k

  2011   //

  2012   // For stride > 0 && scale > 0,

  2013   //   Derive the new pre-loop limit "lim" such that the two constraints:

  2014   //     (1) lim = lim0 + N           (where N is some positive integer < V)

  2015   //     (2) (e + lim) % V == 0

  2016   //   are true.

  2017   //

  2018   //   Substituting (1) into (2),

  2019   //     (e + lim0 + N) % V == 0

  2020   //   solve for N:

  2021   //     N = (V - (e + lim0)) % V

  2022   //   substitute back into (1), so that new limit

  2023   //     lim = lim0 + (V - (e + lim0)) % V

  2024   //

  2025   // For stride > 0 && scale < 0

  2026   //   Constraints:

  2027   //     lim = lim0 + N

  2028   //     (e - lim) % V == 0

  2029   //   Solving for lim:

  2030   //     (e - lim0 - N) % V == 0

  2031   //     N = (e - lim0) % V

  2032   //     lim = lim0 + (e - lim0) % V

  2033   //

  2034   // For stride < 0 && scale > 0

  2035   //   Constraints:

  2036   //     lim = lim0 - N

  2037   //     (e + lim) % V == 0

  2038   //   Solving for lim:

  2039   //     (e + lim0 - N) % V == 0

  2040   //     N = (e + lim0) % V

  2041   //     lim = lim0 - (e + lim0) % V

  2042   //

  2043   // For stride < 0 && scale < 0

  2044   //   Constraints:

  2045   //     lim = lim0 - N

  2046   //     (e - lim) % V == 0

  2047   //   Solving for lim:

  2048   //     (e - lim0 + N) % V == 0

  2049   //     N = (V - (e - lim0)) % V

  2050   //     lim = lim0 - (V - (e - lim0)) % V

  2052   int vw = vector_width_in_bytes(align_to_ref);

  2053   int stride   = iv_stride();

  2054   int scale    = align_to_ref_p.scale_in_bytes();

  2055   int elt_size = align_to_ref_p.memory_size();

  2056   int v_align  = vw / elt_size;

  2057   assert(v_align > 1, "sanity");

  2058   int offset   = align_to_ref_p.offset_in_bytes() / elt_size;

  2059   Node *offsn  = _igvn.intcon(offset);

  2061   Node *e = offsn;

  2062   if (align_to_ref_p.invar() != NULL) {

  2063     // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt)

  2064     Node* log2_elt = _igvn.intcon(exact_log2(elt_size));

  2065     Node* aref     = new (_phase->C) URShiftINode(align_to_ref_p.invar(), log2_elt);

  2066     _igvn.register_new_node_with_optimizer(aref);

  2067     _phase->set_ctrl(aref, pre_ctrl);

  2068     if (align_to_ref_p.negate_invar()) {

  2069       e = new (_phase->C) SubINode(e, aref);

  2070     } else {

  2071       e = new (_phase->C) AddINode(e, aref);

  2072     }

  2073     _igvn.register_new_node_with_optimizer(e);

  2074     _phase->set_ctrl(e, pre_ctrl);

  2075   }

  2076   if (vw > ObjectAlignmentInBytes) {

  2077     // incorporate base e +/- base && Mask >>> log2(elt)

  2078     Node* xbase = new(_phase->C) CastP2XNode(NULL, align_to_ref_p.base());

  2079     _igvn.register_new_node_with_optimizer(xbase);

  2080 #ifdef _LP64

  2081     xbase  = new (_phase->C) ConvL2INode(xbase);

  2082     _igvn.register_new_node_with_optimizer(xbase);

  2083 #endif

  2084     Node* mask = _igvn.intcon(vw-1);

  2085     Node* masked_xbase  = new (_phase->C) AndINode(xbase, mask);

  2086     _igvn.register_new_node_with_optimizer(masked_xbase);

  2087     Node* log2_elt = _igvn.intcon(exact_log2(elt_size));

  2088     Node* bref     = new (_phase->C) URShiftINode(masked_xbase, log2_elt);

  2089     _igvn.register_new_node_with_optimizer(bref);

  2090     _phase->set_ctrl(bref, pre_ctrl);

  2091     e = new (_phase->C) AddINode(e, bref);

  2092     _igvn.register_new_node_with_optimizer(e);

  2093     _phase->set_ctrl(e, pre_ctrl);

  2094   }

  2096   // compute e +/- lim0

  2097   if (scale < 0) {

  2098     e = new (_phase->C) SubINode(e, lim0);

  2099   } else {

  2100     e = new (_phase->C) AddINode(e, lim0);

  2101   }

  2102   _igvn.register_new_node_with_optimizer(e);

  2103   _phase->set_ctrl(e, pre_ctrl);

  2105   if (stride * scale > 0) {

  2106     // compute V - (e +/- lim0)

  2107     Node* va  = _igvn.intcon(v_align);

  2108     e = new (_phase->C) SubINode(va, e);

  2109     _igvn.register_new_node_with_optimizer(e);

  2110     _phase->set_ctrl(e, pre_ctrl);

  2111   }

  2112   // compute N = (exp) % V

  2113   Node* va_msk = _igvn.intcon(v_align - 1);

  2114   Node* N = new (_phase->C) AndINode(e, va_msk);

  2115   _igvn.register_new_node_with_optimizer(N);

  2116   _phase->set_ctrl(N, pre_ctrl);

  2118   //   substitute back into (1), so that new limit

  2119   //     lim = lim0 + N

  2120   Node* lim;

  2121   if (stride < 0) {

  2122     lim = new (_phase->C) SubINode(lim0, N);

  2123   } else {

  2124     lim = new (_phase->C) AddINode(lim0, N);

  2125   }

  2126   _igvn.register_new_node_with_optimizer(lim);

  2127   _phase->set_ctrl(lim, pre_ctrl);

  2128   Node* constrained =

  2129     (stride > 0) ? (Node*) new (_phase->C) MinINode(lim, orig_limit)

  2130                  : (Node*) new (_phase->C) MaxINode(lim, orig_limit);

  2131   _igvn.register_new_node_with_optimizer(constrained);

  2132   _phase->set_ctrl(constrained, pre_ctrl);

  2133   _igvn.hash_delete(pre_opaq);

  2134   pre_opaq->set_req(1, constrained);

  2135 }

  2137 //----------------------------get_pre_loop_end---------------------------

  2138 // Find pre loop end from main loop.  Returns null if none.

  2139 CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode *cl) {

  2140   Node *ctrl = cl->in(LoopNode::EntryControl);

  2141   if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return NULL;

  2142   Node *iffm = ctrl->in(0);

  2143   if (!iffm->is_If()) return NULL;

  2144   Node *p_f = iffm->in(0);

  2145   if (!p_f->is_IfFalse()) return NULL;

  2146   if (!p_f->in(0)->is_CountedLoopEnd()) return NULL;

  2147   CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();

  2148   if (!pre_end->loopnode()->is_pre_loop()) return NULL;

  2149   return pre_end;

  2150 }

  2153 //------------------------------init---------------------------

  2154 void SuperWord::init() {

  2155   _dg.init();

  2156   _packset.clear();

  2157   _disjoint_ptrs.clear();

  2158   _block.clear();

  2159   _data_entry.clear();

  2160   _mem_slice_head.clear();

  2161   _mem_slice_tail.clear();

  2162   _node_info.clear();

  2163   _align_to_ref = NULL;

  2164   _lpt = NULL;

  2165   _lp = NULL;

  2166   _bb = NULL;

  2167   _iv = NULL;

  2168 }

  2170 //------------------------------print_packset---------------------------

  2171 void SuperWord::print_packset() {

  2172 #ifndef PRODUCT

  2173   tty->print_cr("packset");

  2174   for (int i = 0; i < _packset.length(); i++) {

  2175     tty->print_cr("Pack: %d", i);

  2176     Node_List* p = _packset.at(i);

  2177     print_pack(p);

  2178   }

  2179 #endif

  2180 }

  2182 //------------------------------print_pack---------------------------

  2183 void SuperWord::print_pack(Node_List* p) {

  2184   for (uint i = 0; i < p->size(); i++) {

  2185     print_stmt(p->at(i));

  2186   }

  2187 }

  2189 //------------------------------print_bb---------------------------

  2190 void SuperWord::print_bb() {

  2191 #ifndef PRODUCT

  2192   tty->print_cr("\nBlock");

  2193   for (int i = 0; i < _block.length(); i++) {

  2194     Node* n = _block.at(i);

  2195     tty->print("%d ", i);

  2196     if (n) {

  2197       n->dump();

  2198     }

  2199   }

  2200 #endif

  2201 }

  2203 //------------------------------print_stmt---------------------------

  2204 void SuperWord::print_stmt(Node* s) {

  2205 #ifndef PRODUCT

  2206   tty->print(" align: %d \t", alignment(s));

  2207   s->dump();

  2208 #endif

  2209 }

  2211 //------------------------------blank---------------------------

  2212 char* SuperWord::blank(uint depth) {

  2213   static char blanks[101];

  2214   assert(depth < 101, "too deep");

  2215   for (uint i = 0; i < depth; i++) blanks[i] = ' ';

  2216   blanks[depth] = '\0';

  2217   return blanks;

  2218 }

  2221 //==============================SWPointer===========================

  2223 //----------------------------SWPointer------------------------

  2224 SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :

  2225   _mem(mem), _slp(slp),  _base(NULL),  _adr(NULL),

  2226   _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {

  2228   Node* adr = mem->in(MemNode::Address);

  2229   if (!adr->is_AddP()) {

  2230     assert(!valid(), "too complex");

  2231     return;

  2232   }

  2233   // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)

  2234   Node* base = adr->in(AddPNode::Base);

  2235   //unsafe reference could not be aligned appropriately without runtime checking

  2236   if (base == NULL || base->bottom_type() == Type::TOP) {

  2237     assert(!valid(), "unsafe access");

  2238     return;

  2239   }

  2240   for (int i = 0; i < 3; i++) {

  2241     if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {

  2242       assert(!valid(), "too complex");

  2243       return;

  2244     }

  2245     adr = adr->in(AddPNode::Address);

  2246     if (base == adr || !adr->is_AddP()) {

  2247       break; // stop looking at addp's

  2248     }

  2249   }

  2250   _base = base;

  2251   _adr  = adr;

  2252   assert(valid(), "Usable");

  2253 }

  2255 // Following is used to create a temporary object during

  2256 // the pattern match of an address expression.

  2257 SWPointer::SWPointer(SWPointer* p) :

  2258   _mem(p->_mem), _slp(p->_slp),  _base(NULL),  _adr(NULL),

  2259   _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {}

  2261 //------------------------scaled_iv_plus_offset--------------------

  2262 // Match: k*iv + offset

  2263 // where: k is a constant that maybe zero, and

  2264 //        offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional

  2265 bool SWPointer::scaled_iv_plus_offset(Node* n) {

  2266   if (scaled_iv(n)) {

  2267     return true;

  2268   }

  2269   if (offset_plus_k(n)) {

  2270     return true;

  2271   }

  2272   int opc = n->Opcode();

  2273   if (opc == Op_AddI) {

  2274     if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {

  2275       return true;

  2276     }

  2277     if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {

  2278       return true;

  2279     }

  2280   } else if (opc == Op_SubI) {

  2281     if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) {

  2282       return true;

  2283     }

  2284     if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {

  2285       _scale *= -1;

  2286       return true;

  2287     }

  2288   }

  2289   return false;

  2290 }

  2292 //----------------------------scaled_iv------------------------

  2293 // Match: k*iv where k is a constant that's not zero

  2294 bool SWPointer::scaled_iv(Node* n) {

  2295   if (_scale != 0) {

  2296     return false;  // already found a scale

  2297   }

  2298   if (n == iv()) {

  2299     _scale = 1;

  2300     return true;

  2301   }

  2302   int opc = n->Opcode();

  2303   if (opc == Op_MulI) {

  2304     if (n->in(1) == iv() && n->in(2)->is_Con()) {

  2305       _scale = n->in(2)->get_int();

  2306       return true;

  2307     } else if (n->in(2) == iv() && n->in(1)->is_Con()) {

  2308       _scale = n->in(1)->get_int();

  2309       return true;

  2310     }

  2311   } else if (opc == Op_LShiftI) {

  2312     if (n->in(1) == iv() && n->in(2)->is_Con()) {

  2313       _scale = 1 << n->in(2)->get_int();

  2314       return true;

  2315     }

  2316   } else if (opc == Op_ConvI2L) {

  2317     if (scaled_iv_plus_offset(n->in(1))) {

  2318       return true;

  2319     }

  2320   } else if (opc == Op_LShiftL) {

  2321     if (!has_iv() && _invar == NULL) {

  2322       // Need to preserve the current _offset value, so

  2323       // create a temporary object for this expression subtree.

  2324       // Hacky, so should re-engineer the address pattern match.

  2325       SWPointer tmp(this);

  2326       if (tmp.scaled_iv_plus_offset(n->in(1))) {

  2327         if (tmp._invar == NULL) {

  2328           int mult = 1 << n->in(2)->get_int();

  2329           _scale   = tmp._scale  * mult;

  2330           _offset += tmp._offset * mult;

  2331           return true;

  2332         }

  2333       }

  2334     }

  2335   }

  2336   return false;

  2337 }

  2339 //----------------------------offset_plus_k------------------------

  2340 // Match: offset is (k [+/- invariant])

  2341 // where k maybe zero and invariant is optional, but not both.

  2342 bool SWPointer::offset_plus_k(Node* n, bool negate) {

  2343   int opc = n->Opcode();

  2344   if (opc == Op_ConI) {

  2345     _offset += negate ? -(n->get_int()) : n->get_int();

  2346     return true;

  2347   } else if (opc == Op_ConL) {

  2348     // Okay if value fits into an int

  2349     const TypeLong* t = n->find_long_type();

  2350     if (t->higher_equal(TypeLong::INT)) {

  2351       jlong loff = n->get_long();

  2352       jint  off  = (jint)loff;

  2353       _offset += negate ? -off : loff;

  2354       return true;

  2355     }

  2356     return false;

  2357   }

  2358   if (_invar != NULL) return false; // already have an invariant

  2359   if (opc == Op_AddI) {

  2360     if (n->in(2)->is_Con() && invariant(n->in(1))) {

  2361       _negate_invar = negate;

  2362       _invar = n->in(1);

  2363       _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();

  2364       return true;

  2365     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {

  2366       _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();

  2367       _negate_invar = negate;

  2368       _invar = n->in(2);

  2369       return true;

  2370     }

  2371   }

  2372   if (opc == Op_SubI) {

  2373     if (n->in(2)->is_Con() && invariant(n->in(1))) {

  2374       _negate_invar = negate;

  2375       _invar = n->in(1);

  2376       _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();

  2377       return true;

  2378     } else if (n->in(1)->is_Con() && invariant(n->in(2))) {

  2379       _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();

  2380       _negate_invar = !negate;

  2381       _invar = n->in(2);

  2382       return true;

  2383     }

  2384   }

  2385   if (invariant(n)) {

  2386     _negate_invar = negate;

  2387     _invar = n;

  2388     return true;

  2389   }

  2390   return false;

  2391 }

  2393 //----------------------------print------------------------

  2394 void SWPointer::print() {

  2395 #ifndef PRODUCT

  2396   tty->print("base: %d  adr: %d  scale: %d  offset: %d  invar: %c%d\n",

  2397              _base != NULL ? _base->_idx : 0,

  2398              _adr  != NULL ? _adr->_idx  : 0,

  2399              _scale, _offset,

  2400              _negate_invar?'-':'+',

  2401              _invar != NULL ? _invar->_idx : 0);

  2402 #endif

  2403 }

  2405 // ========================= OrderedPair =====================

  2407 const OrderedPair OrderedPair::initial;

  2409 // ========================= SWNodeInfo =====================

  2411 const SWNodeInfo SWNodeInfo::initial;

  2414 // ============================ DepGraph ===========================

  2416 //------------------------------make_node---------------------------

  2417 // Make a new dependence graph node for an ideal node.

  2418 DepMem* DepGraph::make_node(Node* node) {

  2419   DepMem* m = new (_arena) DepMem(node);

  2420   if (node != NULL) {

  2421     assert(_map.at_grow(node->_idx) == NULL, "one init only");

  2422     _map.at_put_grow(node->_idx, m);

  2423   }

  2424   return m;

  2425 }

  2427 //------------------------------make_edge---------------------------

  2428 // Make a new dependence graph edge from dpred -> dsucc

  2429 DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) {

  2430   DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head());

  2431   dpred->set_out_head(e);

  2432   dsucc->set_in_head(e);

  2433   return e;

  2434 }

  2436 // ========================== DepMem ========================

  2438 //------------------------------in_cnt---------------------------

  2439 int DepMem::in_cnt() {

  2440   int ct = 0;

  2441   for (DepEdge* e = _in_head; e != NULL; e = e->next_in()) ct++;

  2442   return ct;

  2443 }

  2445 //------------------------------out_cnt---------------------------

  2446 int DepMem::out_cnt() {

  2447   int ct = 0;

  2448   for (DepEdge* e = _out_head; e != NULL; e = e->next_out()) ct++;

  2449   return ct;

  2450 }

  2452 //------------------------------print-----------------------------

  2453 void DepMem::print() {

  2454 #ifndef PRODUCT

  2455   tty->print("  DepNode %d (", _node->_idx);

  2456   for (DepEdge* p = _in_head; p != NULL; p = p->next_in()) {

  2457     Node* pred = p->pred()->node();

  2458     tty->print(" %d", pred != NULL ? pred->_idx : 0);

  2459   }

  2460   tty->print(") [");

  2461   for (DepEdge* s = _out_head; s != NULL; s = s->next_out()) {

  2462     Node* succ = s->succ()->node();

  2463     tty->print(" %d", succ != NULL ? succ->_idx : 0);

  2464   }

  2465   tty->print_cr(" ]");

  2466 #endif

  2467 }

  2469 // =========================== DepEdge =========================

  2471 //------------------------------DepPreds---------------------------

  2472 void DepEdge::print() {

  2473 #ifndef PRODUCT

  2474   tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx);

  2475 #endif

  2476 }

  2478 // =========================== DepPreds =========================

  2479 // Iterator over predecessor edges in the dependence graph.

  2481 //------------------------------DepPreds---------------------------

  2482 DepPreds::DepPreds(Node* n, DepGraph& dg) {

  2483   _n = n;

  2484   _done = false;

  2485   if (_n->is_Store() || _n->is_Load()) {

  2486     _next_idx = MemNode::Address;

  2487     _end_idx  = n->req();

  2488     _dep_next = dg.dep(_n)->in_head();

  2489   } else if (_n->is_Mem()) {

  2490     _next_idx = 0;

  2491     _end_idx  = 0;

  2492     _dep_next = dg.dep(_n)->in_head();

  2493   } else {

  2494     _next_idx = 1;

  2495     _end_idx  = _n->req();

  2496     _dep_next = NULL;

  2497   }

  2498   next();

  2499 }

  2501 //------------------------------next---------------------------

  2502 void DepPreds::next() {

  2503   if (_dep_next != NULL) {

  2504     _current  = _dep_next->pred()->node();

  2505     _dep_next = _dep_next->next_in();

  2506   } else if (_next_idx < _end_idx) {

  2507     _current  = _n->in(_next_idx++);

  2508   } else {

  2509     _done = true;

  2510   }

  2511 }

  2513 // =========================== DepSuccs =========================

  2514 // Iterator over successor edges in the dependence graph.

  2516 //------------------------------DepSuccs---------------------------

  2517 DepSuccs::DepSuccs(Node* n, DepGraph& dg) {

  2518   _n = n;

  2519   _done = false;

  2520   if (_n->is_Load()) {

  2521     _next_idx = 0;

  2522     _end_idx  = _n->outcnt();

  2523     _dep_next = dg.dep(_n)->out_head();

  2524   } else if (_n->is_Mem() || _n->is_Phi() && _n->bottom_type() == Type::MEMORY) {

  2525     _next_idx = 0;

  2526     _end_idx  = 0;

  2527     _dep_next = dg.dep(_n)->out_head();

  2528   } else {

  2529     _next_idx = 0;

  2530     _end_idx  = _n->outcnt();

  2531     _dep_next = NULL;

  2532   }

  2533   next();

  2534 }

  2536 //-------------------------------next---------------------------

  2537 void DepSuccs::next() {

  2538   if (_dep_next != NULL) {

  2539     _current  = _dep_next->succ()->node();

  2540     _dep_next = _dep_next->next_out();

  2541   } else if (_next_idx < _end_idx) {

  2542     _current  = _n->raw_out(_next_idx++);

  2543   } else {

  2544     _done = true;

  2545   }

  2546 }

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/opto/superword.cpp@cfd05ec74089

src/share/vm/opto/superword.cpp