6636138: UseSuperWord enabled failure

Tue, 24 Mar 2009 12:19:47 -0700

author
cfang
date
Tue, 24 Mar 2009 12:19:47 -0700
changeset 1102
78af5ae8e731
parent 1101
ebebd376f657
child 1103
90a66aa50514
child 1104
eca19a8425b5

6636138: UseSuperWord enabled failure
Summary: Fixed SuperWord scheduling of memory operations.
Reviewed-by: kvn, never

src/share/vm/opto/superword.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/superword.hpp file | annotate | diff | comparison | revisions
test/compiler/6636138/Test1.java file | annotate | diff | comparison | revisions
test/compiler/6636138/Test2.java file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/opto/superword.cpp	Mon Mar 23 13:58:58 2009 -0700
     1.2 +++ b/src/share/vm/opto/superword.cpp	Tue Mar 24 12:19:47 2009 -0700
     1.3 @@ -454,9 +454,13 @@
     1.4            // or need to run igvn.optimize() again before SLP
     1.5          } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) {
     1.6            // Ditto.  Not sure what else to check further.
     1.7 -        } else if (out->Opcode() == Op_StoreCM && out->in(4) == n) {
     1.8 +        } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) {
     1.9            // StoreCM has an input edge used as a precedence edge.
    1.10            // Maybe an issue when oop stores are vectorized.
    1.11 +        } else if( out->is_MergeMem() && prev &&
    1.12 +                   prev->Opcode() == Op_StoreCM && out == prev->in(MemNode::OopStore)) {
    1.13 +          // Oop store is a MergeMem! This should not happen. Temporarily remove the assertion
    1.14 +          // for this case because it could not be superwordized anyway.
    1.15          } else {
    1.16            assert(out == prev || prev == NULL, "no branches off of store slice");
    1.17          }
    1.18 @@ -912,54 +916,175 @@
    1.19    }
    1.20  }
    1.21  
    1.22 -//------------------------------co_locate_pack---------------------------
    1.23 -// Within a pack, move stores down to the last executed store,
    1.24 -// and move loads up to the first executed load.
    1.25 +//-------------------------------remove_and_insert-------------------
    1.26 +//remove "current" from its current position in the memory graph and insert
    1.27 +//it after the appropriate insertion point (lip or uip)
    1.28 +void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip,
    1.29 +                                  Node *uip, Unique_Node_List &sched_before) {
    1.30 +  Node* my_mem = current->in(MemNode::Memory);
    1.31 +  _igvn.hash_delete(current);
    1.32 +  _igvn.hash_delete(my_mem);
    1.33 +
    1.34 +  //remove current_store from its current position in the memmory graph
    1.35 +  for (DUIterator i = current->outs(); current->has_out(i); i++) {
    1.36 +    Node* use = current->out(i);
    1.37 +    if (use->is_Mem()) {
    1.38 +      assert(use->in(MemNode::Memory) == current, "must be");
    1.39 +      _igvn.hash_delete(use);
    1.40 +      if (use == prev) { // connect prev to my_mem
    1.41 +        use->set_req(MemNode::Memory, my_mem);
    1.42 +      } else if (sched_before.member(use)) {
    1.43 +        _igvn.hash_delete(uip);
    1.44 +        use->set_req(MemNode::Memory, uip);
    1.45 +      } else {
    1.46 +        _igvn.hash_delete(lip);
    1.47 +        use->set_req(MemNode::Memory, lip);
    1.48 +      }
    1.49 +      _igvn._worklist.push(use);
    1.50 +      --i; //deleted this edge; rescan position
    1.51 +    }
    1.52 +  }
    1.53 +
    1.54 +  bool sched_up = sched_before.member(current);
    1.55 +  Node *insert_pt =  sched_up ?  uip : lip;
    1.56 +  _igvn.hash_delete(insert_pt);
    1.57 +
    1.58 +  // all uses of insert_pt's memory state should use current's instead
    1.59 +  for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) {
    1.60 +    Node* use = insert_pt->out(i);
    1.61 +    if (use->is_Mem()) {
    1.62 +      assert(use->in(MemNode::Memory) == insert_pt, "must be");
    1.63 +      _igvn.hash_delete(use);
    1.64 +      use->set_req(MemNode::Memory, current);
    1.65 +      _igvn._worklist.push(use);
    1.66 +      --i; //deleted this edge; rescan position
    1.67 +    } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) {
    1.68 +      uint pos; //lip (lower insert point) must be the last one in the memory slice
    1.69 +      _igvn.hash_delete(use);
    1.70 +      for (pos=1; pos < use->req(); pos++) {
    1.71 +        if (use->in(pos) == insert_pt) break;
    1.72 +      }
    1.73 +      use->set_req(pos, current);
    1.74 +      _igvn._worklist.push(use);
    1.75 +      --i;
    1.76 +    }
    1.77 +  }
    1.78 +
    1.79 +  //connect current to insert_pt
    1.80 +  current->set_req(MemNode::Memory, insert_pt);
    1.81 +  _igvn._worklist.push(current);
    1.82 +}
    1.83 +
    1.84 +//------------------------------co_locate_pack----------------------------------
    1.85 +// To schedule a store pack, we need to move any sandwiched memory ops either before
    1.86 +// or after the pack, based upon dependence information:
    1.87 +// (1) If any store in the pack depends on the sandwiched memory op, the
    1.88 +//     sandwiched memory op must be scheduled BEFORE the pack;
    1.89 +// (2) If a sandwiched memory op depends on any store in the pack, the
    1.90 +//     sandwiched memory op must be scheduled AFTER the pack;
    1.91 +// (3) If a sandwiched memory op (say, memA) depends on another sandwiched
    1.92 +//     memory op (say memB), memB must be scheduled before memA. So, if memA is
    1.93 +//     scheduled before the pack, memB must also be scheduled before the pack;
    1.94 +// (4) If there is no dependence restriction for a sandwiched memory op, we simply
    1.95 +//     schedule this store AFTER the pack
    1.96 +// (5) We know there is no dependence cycle, so there in no other case;
    1.97 +// (6) Finally, all memory ops in another single pack should be moved in the same direction.
    1.98 +//
    1.99 +// To schedule a load pack: the memory edge of every loads in the pack must be
   1.100 +// the same as the memory edge of the last executed load in the pack
   1.101  void SuperWord::co_locate_pack(Node_List* pk) {
   1.102    if (pk->at(0)->is_Store()) {
   1.103 -    // Push Stores down towards last executed pack member
   1.104      MemNode* first     = executed_first(pk)->as_Mem();
   1.105      MemNode* last      = executed_last(pk)->as_Mem();
   1.106 -    MemNode* insert_pt = last;
   1.107 +    Unique_Node_List schedule_before_pack;
   1.108 +    Unique_Node_List memops;
   1.109 +
   1.110      MemNode* current   = last->in(MemNode::Memory)->as_Mem();
   1.111 +    MemNode* previous  = last;
   1.112      while (true) {
   1.113        assert(in_bb(current), "stay in block");
   1.114 +      memops.push(previous);
   1.115 +      for (DUIterator i = current->outs(); current->has_out(i); i++) {
   1.116 +        Node* use = current->out(i);
   1.117 +        if (use->is_Mem() && use != previous)
   1.118 +          memops.push(use);
   1.119 +      }
   1.120 +      if(current == first) break;
   1.121 +      previous = current;
   1.122 +      current  = current->in(MemNode::Memory)->as_Mem();
   1.123 +    }
   1.124 +
   1.125 +    // determine which memory operations should be scheduled before the pack
   1.126 +    for (uint i = 1; i < memops.size(); i++) {
   1.127 +      Node *s1 = memops.at(i);
   1.128 +      if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) {
   1.129 +        for (uint j = 0; j< i; j++) {
   1.130 +          Node *s2 = memops.at(j);
   1.131 +          if (!independent(s1, s2)) {
   1.132 +            if (in_pack(s2, pk) || schedule_before_pack.member(s2)) {
   1.133 +              schedule_before_pack.push(s1); //s1 must be scheduled before
   1.134 +              Node_List* mem_pk = my_pack(s1);
   1.135 +              if (mem_pk != NULL) {
   1.136 +                for (uint ii = 0; ii < mem_pk->size(); ii++) {
   1.137 +                  Node* s = mem_pk->at(ii); // follow partner
   1.138 +                  if (memops.member(s) && !schedule_before_pack.member(s))
   1.139 +                    schedule_before_pack.push(s);
   1.140 +                }
   1.141 +              }
   1.142 +            }
   1.143 +          }
   1.144 +        }
   1.145 +      }
   1.146 +    }
   1.147 +
   1.148 +    MemNode* lower_insert_pt = last;
   1.149 +    Node*    upper_insert_pt = first->in(MemNode::Memory);
   1.150 +    previous                 = last; //previous store in pk
   1.151 +    current                  = last->in(MemNode::Memory)->as_Mem();
   1.152 +
   1.153 +    //start scheduling from "last" to "first"
   1.154 +    while (true) {
   1.155 +      assert(in_bb(current), "stay in block");
   1.156 +      assert(in_pack(previous, pk), "previous stays in pack");
   1.157        Node* my_mem = current->in(MemNode::Memory);
   1.158 +
   1.159        if (in_pack(current, pk)) {
   1.160 -        // Forward users of my memory state to my input memory state
   1.161 +        // Forward users of my memory state (except "previous) to my input memory state
   1.162          _igvn.hash_delete(current);
   1.163 -        _igvn.hash_delete(my_mem);
   1.164          for (DUIterator i = current->outs(); current->has_out(i); i++) {
   1.165            Node* use = current->out(i);
   1.166 -          if (use->is_Mem()) {
   1.167 +          if (use->is_Mem() && use != previous) {
   1.168              assert(use->in(MemNode::Memory) == current, "must be");
   1.169              _igvn.hash_delete(use);
   1.170 -            use->set_req(MemNode::Memory, my_mem);
   1.171 +            if (schedule_before_pack.member(use)) {
   1.172 +              _igvn.hash_delete(upper_insert_pt);
   1.173 +              use->set_req(MemNode::Memory, upper_insert_pt);
   1.174 +            } else {
   1.175 +              _igvn.hash_delete(lower_insert_pt);
   1.176 +              use->set_req(MemNode::Memory, lower_insert_pt);
   1.177 +            }
   1.178              _igvn._worklist.push(use);
   1.179              --i; // deleted this edge; rescan position
   1.180            }
   1.181          }
   1.182 -        // put current immediately before insert_pt
   1.183 -        current->set_req(MemNode::Memory, insert_pt->in(MemNode::Memory));
   1.184 -        _igvn.hash_delete(insert_pt);
   1.185 -        insert_pt->set_req(MemNode::Memory, current);
   1.186 -        _igvn._worklist.push(insert_pt);
   1.187 -        _igvn._worklist.push(current);
   1.188 -        insert_pt = current;
   1.189 +        previous = current;
   1.190 +      } else { // !in_pack(current, pk) ==> a sandwiched store
   1.191 +        remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack);
   1.192        }
   1.193 +
   1.194        if (current == first) break;
   1.195        current = my_mem->as_Mem();
   1.196 -    }
   1.197 -  } else if (pk->at(0)->is_Load()) {
   1.198 -    // Pull Loads up towards first executed pack member
   1.199 -    LoadNode* first = executed_first(pk)->as_Load();
   1.200 -    Node* first_mem = first->in(MemNode::Memory);
   1.201 -    _igvn.hash_delete(first_mem);
   1.202 -    // Give each load same memory state as first
   1.203 +    } // end while
   1.204 +  } else if (pk->at(0)->is_Load()) { //load
   1.205 +    // all use the memory state that the last executed load uses
   1.206 +    LoadNode* last_load  = executed_last(pk)->as_Load();
   1.207 +    Node* last_mem       = last_load->in(MemNode::Memory);
   1.208 +    _igvn.hash_delete(last_mem);
   1.209 +    // Give each load same memory state as last
   1.210      for (uint i = 0; i < pk->size(); i++) {
   1.211        LoadNode* ld = pk->at(i)->as_Load();
   1.212        _igvn.hash_delete(ld);
   1.213 -      ld->set_req(MemNode::Memory, first_mem);
   1.214 +      ld->set_req(MemNode::Memory, last_mem);
   1.215        _igvn._worklist.push(ld);
   1.216      }
   1.217    }
     2.1 --- a/src/share/vm/opto/superword.hpp	Mon Mar 23 13:58:58 2009 -0700
     2.2 +++ b/src/share/vm/opto/superword.hpp	Tue Mar 24 12:19:47 2009 -0700
     2.3 @@ -341,8 +341,11 @@
     2.4    void filter_packs();
     2.5    // Adjust the memory graph for the packed operations
     2.6    void schedule();
     2.7 -  // Within a pack, move stores down to the last executed store,
     2.8 -  // and move loads up to the first executed load.
     2.9 +  // Remove "current" from its current position in the memory graph and insert
    2.10 +  // it after the appropriate insert points (lip or uip);
    2.11 +  void remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, Node *uip, Unique_Node_List &schd_before);
    2.12 +  // Within a store pack, schedule stores together by moving out the sandwiched memory ops according
    2.13 +  // to dependence info; and within a load pack, move loads down to the last executed load.
    2.14    void co_locate_pack(Node_List* p);
    2.15    // Convert packs into vector node operations
    2.16    void output();
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/test/compiler/6636138/Test1.java	Tue Mar 24 12:19:47 2009 -0700
     3.3 @@ -0,0 +1,67 @@
     3.4 +/*
     3.5 + * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
     3.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     3.7 + *
     3.8 + * This code is free software; you can redistribute it and/or modify it
     3.9 + * under the terms of the GNU General Public License version 2 only, as
    3.10 + * published by the Free Software Foundation.
    3.11 + *
    3.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    3.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    3.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    3.15 + * version 2 for more details (a copy is included in the LICENSE file that
    3.16 + * accompanied this code).
    3.17 + *
    3.18 + * You should have received a copy of the GNU General Public License version
    3.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    3.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    3.21 + *
    3.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    3.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    3.24 + * have any questions.
    3.25 + */
    3.26 +
    3.27 +/**
    3.28 + * @test
    3.29 + * @bug 6636138
    3.30 + * @summary SuperWord::co_locate_pack(Node_List* p) generates memory graph that leads to memory order violation.
    3.31 + *
    3.32 + * @run main/othervm -server -Xbatch -XX:CompileOnly=Test1.init -XX:+UseSuperword Test1
    3.33 + */
    3.34 +
    3.35 +class Test1 {
    3.36 +
    3.37 +    public static void init(int src[], int [] dst, int[] ref) {
    3.38 +        // initialize the arrays
    3.39 +        for (int i =0; i<src.length; i++) {
    3.40 +            src[i] =  i;
    3.41 +            dst[i] = 2;      // yes, dst[i] needed(otherwise src[i] will be replaced with i)
    3.42 +            ref[i] = src[i]; // src[i] depends on the store src[i]
    3.43 +        }
    3.44 +    }
    3.45 +
    3.46 +    public static void verify(int src[], int[] ref) {
    3.47 +        // check whether src and ref are equal
    3.48 +        for (int i = 0; i < src.length; i++) {
    3.49 +            if (src[i] != ref[i]) {
    3.50 +                System.out.println("Error: src and ref don't match at " + i);
    3.51 +                System.exit(-1);
    3.52 +            }
    3.53 +        }
    3.54 +    }
    3.55 +
    3.56 +    public static void test() {
    3.57 +        int[] src = new int[34];
    3.58 +        int[] dst = new int[34];
    3.59 +        int[] ref = new int[34];
    3.60 +
    3.61 +        init(src, dst, ref);
    3.62 +        verify(src, ref);
    3.63 +    }
    3.64 +
    3.65 +    public static void main(String[] args) {
    3.66 +        for (int i=0; i< 2000; i++) {
    3.67 +            test();
    3.68 +        }
    3.69 +    }
    3.70 +}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/test/compiler/6636138/Test2.java	Tue Mar 24 12:19:47 2009 -0700
     4.3 @@ -0,0 +1,70 @@
     4.4 +/*
     4.5 + * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
     4.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4.7 + *
     4.8 + * This code is free software; you can redistribute it and/or modify it
     4.9 + * under the terms of the GNU General Public License version 2 only, as
    4.10 + * published by the Free Software Foundation.
    4.11 + *
    4.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    4.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    4.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    4.15 + * version 2 for more details (a copy is included in the LICENSE file that
    4.16 + * accompanied this code).
    4.17 + *
    4.18 + * You should have received a copy of the GNU General Public License version
    4.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    4.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    4.21 + *
    4.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    4.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    4.24 + * have any questions.
    4.25 + */
    4.26 +
    4.27 +/**
    4.28 + * @test
    4.29 + * @bug 6636138
    4.30 + * @summary SuperWord::co_locate_pack(Node_List* p) generates memory graph that leads to memory order violation.
    4.31 + *
    4.32 + * @run main/othervm -server -Xbatch -XX:CompileOnly=Test2.shift -XX:+UseSuperword Test2
    4.33 + */
    4.34 +
    4.35 +class Test2 {
    4.36 +
    4.37 +    public static void init(int src[]) {
    4.38 +        // Initialize the array
    4.39 +        for (int i = 0; i < src.length; i++)
    4.40 +            src[i] = i;
    4.41 +    }
    4.42 +
    4.43 +   public static void shift(int src[]) {
    4.44 +       //left-shift the array
    4.45 +       for (int i = src.length-1; i > 0; i--){
    4.46 +           int tmp  = src[i];
    4.47 +           src[i]   = src[i-1];
    4.48 +           src[i-1] = tmp;
    4.49 +       }
    4.50 +    }
    4.51 +
    4.52 +    public static void verify(int src[]) {
    4.53 +        for (int i = 0; i < src.length; i++){
    4.54 +            int value = (i-1 + src.length)%src.length; // correct value after shifting
    4.55 +                if (src[i] != value) {
    4.56 +                    System.out.println("Error: src["+i+"] should be "+ value + " instead of " + src[i]);
    4.57 +                    System.exit(-1);
    4.58 +                }
    4.59 +        }
    4.60 +    }
    4.61 +
    4.62 +    public static void test() {
    4.63 +        int[] src = new int[10];
    4.64 +        init(src);
    4.65 +        shift(src);
    4.66 +        verify(src);
    4.67 +    }
    4.68 +
    4.69 +    public static void main(String[] args) {
    4.70 +        for (int i=0; i< 2000; i++)
    4.71 +            test();
    4.72 +    }
    4.73 +}

mercurial