1.1 --- a/src/share/vm/opto/superword.cpp Mon Jun 18 15:17:30 2012 -0700 1.2 +++ b/src/share/vm/opto/superword.cpp Tue Jun 19 15:12:56 2012 -0700 1.3 @@ -222,7 +222,18 @@ 1.4 // Create initial pack pairs of memory operations for which 1.5 // alignment is set and vectors will be aligned. 1.6 bool create_pack = true; 1.7 - if (memory_alignment(mem_ref, best_iv_adjustment) != 0) { 1.8 + if (memory_alignment(mem_ref, best_iv_adjustment) == 0) { 1.9 + if (!Matcher::misaligned_vectors_ok()) { 1.10 + int vw = vector_width(mem_ref); 1.11 + int vw_best = vector_width(best_align_to_mem_ref); 1.12 + if (vw > vw_best) { 1.13 + // Do not vectorize a memory access with more elements per vector 1.14 + // if unaligned memory access is not allowed because number of 1.15 + // iterations in pre-loop will be not enough to align it. 1.16 + create_pack = false; 1.17 + } 1.18 + } 1.19 + } else { 1.20 if (same_velt_type(mem_ref, best_align_to_mem_ref)) { 1.21 // Can't allow vectorization of unaligned memory accesses with the 1.22 // same type since it could be overlapped accesses to the same array. 1.23 @@ -357,7 +368,7 @@ 1.24 for (uint j = 0; j < memops.size(); j++) { 1.25 MemNode* s = memops.at(j)->as_Mem(); 1.26 if (s->is_Store()) { 1.27 - int vw = vector_width_in_bytes(velt_basic_type(s)); 1.28 + int vw = vector_width_in_bytes(s); 1.29 assert(vw > 1, "sanity"); 1.30 SWPointer p(s, this); 1.31 if (cmp_ct.at(j) > max_ct || 1.32 @@ -380,7 +391,7 @@ 1.33 for (uint j = 0; j < memops.size(); j++) { 1.34 MemNode* s = memops.at(j)->as_Mem(); 1.35 if (s->is_Load()) { 1.36 - int vw = vector_width_in_bytes(velt_basic_type(s)); 1.37 + int vw = vector_width_in_bytes(s); 1.38 assert(vw > 1, "sanity"); 1.39 SWPointer p(s, this); 1.40 if (cmp_ct.at(j) > max_ct || 1.41 @@ -440,8 +451,7 @@ 1.42 1.43 // If initial offset from start of object is computable, 1.44 // compute alignment within the vector. 1.45 - BasicType bt = velt_basic_type(p.mem()); 1.46 - int vw = vector_width_in_bytes(bt); 1.47 + int vw = vector_width_in_bytes(p.mem()); 1.48 assert(vw > 1, "sanity"); 1.49 if (vw % span == 0) { 1.50 Node* init_nd = pre_end->init_trip(); 1.51 @@ -468,8 +478,7 @@ 1.52 SWPointer align_to_ref_p(mem_ref, this); 1.53 int offset = align_to_ref_p.offset_in_bytes(); 1.54 int scale = align_to_ref_p.scale_in_bytes(); 1.55 - BasicType bt = velt_basic_type(mem_ref); 1.56 - int vw = vector_width_in_bytes(bt); 1.57 + int vw = vector_width_in_bytes(mem_ref); 1.58 assert(vw > 1, "sanity"); 1.59 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; 1.60 int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; 1.61 @@ -1361,7 +1370,7 @@ 1.62 } 1.63 _igvn._worklist.push(vn); 1.64 #ifdef ASSERT 1.65 - if (TraceSuperWord) { 1.66 + if (TraceNewVectors) { 1.67 tty->print("new Vector node: "); 1.68 vn->dump(); 1.69 } 1.70 @@ -1401,7 +1410,7 @@ 1.71 _phase->_igvn.register_new_node_with_optimizer(vn); 1.72 _phase->set_ctrl(vn, _phase->get_ctrl(opd)); 1.73 #ifdef ASSERT 1.74 - if (TraceSuperWord) { 1.75 + if (TraceNewVectors) { 1.76 tty->print("new Vector node: "); 1.77 vn->dump(); 1.78 } 1.79 @@ -1424,8 +1433,8 @@ 1.80 _phase->_igvn.register_new_node_with_optimizer(pk); 1.81 _phase->set_ctrl(pk, _phase->get_ctrl(opd)); 1.82 #ifdef ASSERT 1.83 - if (TraceSuperWord) { 1.84 - tty->print("new Pack node: "); 1.85 + if (TraceNewVectors) { 1.86 + tty->print("new Vector node: "); 1.87 pk->dump(); 1.88 } 1.89 #endif 1.90 @@ -1764,7 +1773,7 @@ 1.91 if (!p.valid()) { 1.92 return bottom_align; 1.93 } 1.94 - int vw = vector_width_in_bytes(velt_basic_type(s)); 1.95 + int vw = vector_width_in_bytes(s); 1.96 if (vw < 2) { 1.97 return bottom_align; // No vectors for this type 1.98 } 1.99 @@ -1978,12 +1987,12 @@ 1.100 // N = (V - (e - lim0)) % V 1.101 // lim = lim0 - (V - (e - lim0)) % V 1.102 1.103 - int vw = vector_width_in_bytes(velt_basic_type(align_to_ref)); 1.104 - assert(vw > 1, "sanity"); 1.105 + int vw = vector_width_in_bytes(align_to_ref); 1.106 int stride = iv_stride(); 1.107 int scale = align_to_ref_p.scale_in_bytes(); 1.108 int elt_size = align_to_ref_p.memory_size(); 1.109 int v_align = vw / elt_size; 1.110 + assert(v_align > 1, "sanity"); 1.111 int k = align_to_ref_p.offset_in_bytes() / elt_size; 1.112 1.113 Node *kn = _igvn.intcon(k);