Tue, 24 Mar 2009 12:19:47 -0700
6636138: UseSuperWord enabled failure
Summary: Fixed SuperWord scheduling of memory operations.
Reviewed-by: kvn, never
1.1 --- a/src/share/vm/opto/superword.cpp Mon Mar 23 13:58:58 2009 -0700 1.2 +++ b/src/share/vm/opto/superword.cpp Tue Mar 24 12:19:47 2009 -0700 1.3 @@ -454,9 +454,13 @@ 1.4 // or need to run igvn.optimize() again before SLP 1.5 } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) { 1.6 // Ditto. Not sure what else to check further. 1.7 - } else if (out->Opcode() == Op_StoreCM && out->in(4) == n) { 1.8 + } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) { 1.9 // StoreCM has an input edge used as a precedence edge. 1.10 // Maybe an issue when oop stores are vectorized. 1.11 + } else if( out->is_MergeMem() && prev && 1.12 + prev->Opcode() == Op_StoreCM && out == prev->in(MemNode::OopStore)) { 1.13 + // Oop store is a MergeMem! This should not happen. Temporarily remove the assertion 1.14 + // for this case because it could not be superwordized anyway. 1.15 } else { 1.16 assert(out == prev || prev == NULL, "no branches off of store slice"); 1.17 } 1.18 @@ -912,54 +916,175 @@ 1.19 } 1.20 } 1.21 1.22 -//------------------------------co_locate_pack--------------------------- 1.23 -// Within a pack, move stores down to the last executed store, 1.24 -// and move loads up to the first executed load. 1.25 +//-------------------------------remove_and_insert------------------- 1.26 +//remove "current" from its current position in the memory graph and insert 1.27 +//it after the appropriate insertion point (lip or uip) 1.28 +void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, 1.29 + Node *uip, Unique_Node_List &sched_before) { 1.30 + Node* my_mem = current->in(MemNode::Memory); 1.31 + _igvn.hash_delete(current); 1.32 + _igvn.hash_delete(my_mem); 1.33 + 1.34 + //remove current_store from its current position in the memmory graph 1.35 + for (DUIterator i = current->outs(); current->has_out(i); i++) { 1.36 + Node* use = current->out(i); 1.37 + if (use->is_Mem()) { 1.38 + assert(use->in(MemNode::Memory) == current, "must be"); 1.39 + _igvn.hash_delete(use); 1.40 + if (use == prev) { // connect prev to my_mem 1.41 + use->set_req(MemNode::Memory, my_mem); 1.42 + } else if (sched_before.member(use)) { 1.43 + _igvn.hash_delete(uip); 1.44 + use->set_req(MemNode::Memory, uip); 1.45 + } else { 1.46 + _igvn.hash_delete(lip); 1.47 + use->set_req(MemNode::Memory, lip); 1.48 + } 1.49 + _igvn._worklist.push(use); 1.50 + --i; //deleted this edge; rescan position 1.51 + } 1.52 + } 1.53 + 1.54 + bool sched_up = sched_before.member(current); 1.55 + Node *insert_pt = sched_up ? uip : lip; 1.56 + _igvn.hash_delete(insert_pt); 1.57 + 1.58 + // all uses of insert_pt's memory state should use current's instead 1.59 + for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { 1.60 + Node* use = insert_pt->out(i); 1.61 + if (use->is_Mem()) { 1.62 + assert(use->in(MemNode::Memory) == insert_pt, "must be"); 1.63 + _igvn.hash_delete(use); 1.64 + use->set_req(MemNode::Memory, current); 1.65 + _igvn._worklist.push(use); 1.66 + --i; //deleted this edge; rescan position 1.67 + } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) { 1.68 + uint pos; //lip (lower insert point) must be the last one in the memory slice 1.69 + _igvn.hash_delete(use); 1.70 + for (pos=1; pos < use->req(); pos++) { 1.71 + if (use->in(pos) == insert_pt) break; 1.72 + } 1.73 + use->set_req(pos, current); 1.74 + _igvn._worklist.push(use); 1.75 + --i; 1.76 + } 1.77 + } 1.78 + 1.79 + //connect current to insert_pt 1.80 + current->set_req(MemNode::Memory, insert_pt); 1.81 + _igvn._worklist.push(current); 1.82 +} 1.83 + 1.84 +//------------------------------co_locate_pack---------------------------------- 1.85 +// To schedule a store pack, we need to move any sandwiched memory ops either before 1.86 +// or after the pack, based upon dependence information: 1.87 +// (1) If any store in the pack depends on the sandwiched memory op, the 1.88 +// sandwiched memory op must be scheduled BEFORE the pack; 1.89 +// (2) If a sandwiched memory op depends on any store in the pack, the 1.90 +// sandwiched memory op must be scheduled AFTER the pack; 1.91 +// (3) If a sandwiched memory op (say, memA) depends on another sandwiched 1.92 +// memory op (say memB), memB must be scheduled before memA. So, if memA is 1.93 +// scheduled before the pack, memB must also be scheduled before the pack; 1.94 +// (4) If there is no dependence restriction for a sandwiched memory op, we simply 1.95 +// schedule this store AFTER the pack 1.96 +// (5) We know there is no dependence cycle, so there in no other case; 1.97 +// (6) Finally, all memory ops in another single pack should be moved in the same direction. 1.98 +// 1.99 +// To schedule a load pack: the memory edge of every loads in the pack must be 1.100 +// the same as the memory edge of the last executed load in the pack 1.101 void SuperWord::co_locate_pack(Node_List* pk) { 1.102 if (pk->at(0)->is_Store()) { 1.103 - // Push Stores down towards last executed pack member 1.104 MemNode* first = executed_first(pk)->as_Mem(); 1.105 MemNode* last = executed_last(pk)->as_Mem(); 1.106 - MemNode* insert_pt = last; 1.107 + Unique_Node_List schedule_before_pack; 1.108 + Unique_Node_List memops; 1.109 + 1.110 MemNode* current = last->in(MemNode::Memory)->as_Mem(); 1.111 + MemNode* previous = last; 1.112 while (true) { 1.113 assert(in_bb(current), "stay in block"); 1.114 + memops.push(previous); 1.115 + for (DUIterator i = current->outs(); current->has_out(i); i++) { 1.116 + Node* use = current->out(i); 1.117 + if (use->is_Mem() && use != previous) 1.118 + memops.push(use); 1.119 + } 1.120 + if(current == first) break; 1.121 + previous = current; 1.122 + current = current->in(MemNode::Memory)->as_Mem(); 1.123 + } 1.124 + 1.125 + // determine which memory operations should be scheduled before the pack 1.126 + for (uint i = 1; i < memops.size(); i++) { 1.127 + Node *s1 = memops.at(i); 1.128 + if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) { 1.129 + for (uint j = 0; j< i; j++) { 1.130 + Node *s2 = memops.at(j); 1.131 + if (!independent(s1, s2)) { 1.132 + if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { 1.133 + schedule_before_pack.push(s1); //s1 must be scheduled before 1.134 + Node_List* mem_pk = my_pack(s1); 1.135 + if (mem_pk != NULL) { 1.136 + for (uint ii = 0; ii < mem_pk->size(); ii++) { 1.137 + Node* s = mem_pk->at(ii); // follow partner 1.138 + if (memops.member(s) && !schedule_before_pack.member(s)) 1.139 + schedule_before_pack.push(s); 1.140 + } 1.141 + } 1.142 + } 1.143 + } 1.144 + } 1.145 + } 1.146 + } 1.147 + 1.148 + MemNode* lower_insert_pt = last; 1.149 + Node* upper_insert_pt = first->in(MemNode::Memory); 1.150 + previous = last; //previous store in pk 1.151 + current = last->in(MemNode::Memory)->as_Mem(); 1.152 + 1.153 + //start scheduling from "last" to "first" 1.154 + while (true) { 1.155 + assert(in_bb(current), "stay in block"); 1.156 + assert(in_pack(previous, pk), "previous stays in pack"); 1.157 Node* my_mem = current->in(MemNode::Memory); 1.158 + 1.159 if (in_pack(current, pk)) { 1.160 - // Forward users of my memory state to my input memory state 1.161 + // Forward users of my memory state (except "previous) to my input memory state 1.162 _igvn.hash_delete(current); 1.163 - _igvn.hash_delete(my_mem); 1.164 for (DUIterator i = current->outs(); current->has_out(i); i++) { 1.165 Node* use = current->out(i); 1.166 - if (use->is_Mem()) { 1.167 + if (use->is_Mem() && use != previous) { 1.168 assert(use->in(MemNode::Memory) == current, "must be"); 1.169 _igvn.hash_delete(use); 1.170 - use->set_req(MemNode::Memory, my_mem); 1.171 + if (schedule_before_pack.member(use)) { 1.172 + _igvn.hash_delete(upper_insert_pt); 1.173 + use->set_req(MemNode::Memory, upper_insert_pt); 1.174 + } else { 1.175 + _igvn.hash_delete(lower_insert_pt); 1.176 + use->set_req(MemNode::Memory, lower_insert_pt); 1.177 + } 1.178 _igvn._worklist.push(use); 1.179 --i; // deleted this edge; rescan position 1.180 } 1.181 } 1.182 - // put current immediately before insert_pt 1.183 - current->set_req(MemNode::Memory, insert_pt->in(MemNode::Memory)); 1.184 - _igvn.hash_delete(insert_pt); 1.185 - insert_pt->set_req(MemNode::Memory, current); 1.186 - _igvn._worklist.push(insert_pt); 1.187 - _igvn._worklist.push(current); 1.188 - insert_pt = current; 1.189 + previous = current; 1.190 + } else { // !in_pack(current, pk) ==> a sandwiched store 1.191 + remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack); 1.192 } 1.193 + 1.194 if (current == first) break; 1.195 current = my_mem->as_Mem(); 1.196 - } 1.197 - } else if (pk->at(0)->is_Load()) { 1.198 - // Pull Loads up towards first executed pack member 1.199 - LoadNode* first = executed_first(pk)->as_Load(); 1.200 - Node* first_mem = first->in(MemNode::Memory); 1.201 - _igvn.hash_delete(first_mem); 1.202 - // Give each load same memory state as first 1.203 + } // end while 1.204 + } else if (pk->at(0)->is_Load()) { //load 1.205 + // all use the memory state that the last executed load uses 1.206 + LoadNode* last_load = executed_last(pk)->as_Load(); 1.207 + Node* last_mem = last_load->in(MemNode::Memory); 1.208 + _igvn.hash_delete(last_mem); 1.209 + // Give each load same memory state as last 1.210 for (uint i = 0; i < pk->size(); i++) { 1.211 LoadNode* ld = pk->at(i)->as_Load(); 1.212 _igvn.hash_delete(ld); 1.213 - ld->set_req(MemNode::Memory, first_mem); 1.214 + ld->set_req(MemNode::Memory, last_mem); 1.215 _igvn._worklist.push(ld); 1.216 } 1.217 }
2.1 --- a/src/share/vm/opto/superword.hpp Mon Mar 23 13:58:58 2009 -0700 2.2 +++ b/src/share/vm/opto/superword.hpp Tue Mar 24 12:19:47 2009 -0700 2.3 @@ -341,8 +341,11 @@ 2.4 void filter_packs(); 2.5 // Adjust the memory graph for the packed operations 2.6 void schedule(); 2.7 - // Within a pack, move stores down to the last executed store, 2.8 - // and move loads up to the first executed load. 2.9 + // Remove "current" from its current position in the memory graph and insert 2.10 + // it after the appropriate insert points (lip or uip); 2.11 + void remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, Node *uip, Unique_Node_List &schd_before); 2.12 + // Within a store pack, schedule stores together by moving out the sandwiched memory ops according 2.13 + // to dependence info; and within a load pack, move loads down to the last executed load. 2.14 void co_locate_pack(Node_List* p); 2.15 // Convert packs into vector node operations 2.16 void output();
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/test/compiler/6636138/Test1.java Tue Mar 24 12:19:47 2009 -0700 3.3 @@ -0,0 +1,67 @@ 3.4 +/* 3.5 + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. 3.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3.7 + * 3.8 + * This code is free software; you can redistribute it and/or modify it 3.9 + * under the terms of the GNU General Public License version 2 only, as 3.10 + * published by the Free Software Foundation. 3.11 + * 3.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 3.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 3.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 3.15 + * version 2 for more details (a copy is included in the LICENSE file that 3.16 + * accompanied this code). 3.17 + * 3.18 + * You should have received a copy of the GNU General Public License version 3.19 + * 2 along with this work; if not, write to the Free Software Foundation, 3.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 3.21 + * 3.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 3.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 3.24 + * have any questions. 3.25 + */ 3.26 + 3.27 +/** 3.28 + * @test 3.29 + * @bug 6636138 3.30 + * @summary SuperWord::co_locate_pack(Node_List* p) generates memory graph that leads to memory order violation. 3.31 + * 3.32 + * @run main/othervm -server -Xbatch -XX:CompileOnly=Test1.init -XX:+UseSuperword Test1 3.33 + */ 3.34 + 3.35 +class Test1 { 3.36 + 3.37 + public static void init(int src[], int [] dst, int[] ref) { 3.38 + // initialize the arrays 3.39 + for (int i =0; i<src.length; i++) { 3.40 + src[i] = i; 3.41 + dst[i] = 2; // yes, dst[i] needed(otherwise src[i] will be replaced with i) 3.42 + ref[i] = src[i]; // src[i] depends on the store src[i] 3.43 + } 3.44 + } 3.45 + 3.46 + public static void verify(int src[], int[] ref) { 3.47 + // check whether src and ref are equal 3.48 + for (int i = 0; i < src.length; i++) { 3.49 + if (src[i] != ref[i]) { 3.50 + System.out.println("Error: src and ref don't match at " + i); 3.51 + System.exit(-1); 3.52 + } 3.53 + } 3.54 + } 3.55 + 3.56 + public static void test() { 3.57 + int[] src = new int[34]; 3.58 + int[] dst = new int[34]; 3.59 + int[] ref = new int[34]; 3.60 + 3.61 + init(src, dst, ref); 3.62 + verify(src, ref); 3.63 + } 3.64 + 3.65 + public static void main(String[] args) { 3.66 + for (int i=0; i< 2000; i++) { 3.67 + test(); 3.68 + } 3.69 + } 3.70 +}
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/test/compiler/6636138/Test2.java Tue Mar 24 12:19:47 2009 -0700 4.3 @@ -0,0 +1,70 @@ 4.4 +/* 4.5 + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. 4.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4.7 + * 4.8 + * This code is free software; you can redistribute it and/or modify it 4.9 + * under the terms of the GNU General Public License version 2 only, as 4.10 + * published by the Free Software Foundation. 4.11 + * 4.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 4.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 4.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 4.15 + * version 2 for more details (a copy is included in the LICENSE file that 4.16 + * accompanied this code). 4.17 + * 4.18 + * You should have received a copy of the GNU General Public License version 4.19 + * 2 along with this work; if not, write to the Free Software Foundation, 4.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 4.21 + * 4.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 4.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 4.24 + * have any questions. 4.25 + */ 4.26 + 4.27 +/** 4.28 + * @test 4.29 + * @bug 6636138 4.30 + * @summary SuperWord::co_locate_pack(Node_List* p) generates memory graph that leads to memory order violation. 4.31 + * 4.32 + * @run main/othervm -server -Xbatch -XX:CompileOnly=Test2.shift -XX:+UseSuperword Test2 4.33 + */ 4.34 + 4.35 +class Test2 { 4.36 + 4.37 + public static void init(int src[]) { 4.38 + // Initialize the array 4.39 + for (int i = 0; i < src.length; i++) 4.40 + src[i] = i; 4.41 + } 4.42 + 4.43 + public static void shift(int src[]) { 4.44 + //left-shift the array 4.45 + for (int i = src.length-1; i > 0; i--){ 4.46 + int tmp = src[i]; 4.47 + src[i] = src[i-1]; 4.48 + src[i-1] = tmp; 4.49 + } 4.50 + } 4.51 + 4.52 + public static void verify(int src[]) { 4.53 + for (int i = 0; i < src.length; i++){ 4.54 + int value = (i-1 + src.length)%src.length; // correct value after shifting 4.55 + if (src[i] != value) { 4.56 + System.out.println("Error: src["+i+"] should be "+ value + " instead of " + src[i]); 4.57 + System.exit(-1); 4.58 + } 4.59 + } 4.60 + } 4.61 + 4.62 + public static void test() { 4.63 + int[] src = new int[10]; 4.64 + init(src); 4.65 + shift(src); 4.66 + verify(src); 4.67 + } 4.68 + 4.69 + public static void main(String[] args) { 4.70 + for (int i=0; i< 2000; i++) 4.71 + test(); 4.72 + } 4.73 +}