220 } |
220 } |
221 |
221 |
222 // Create initial pack pairs of memory operations for which |
222 // Create initial pack pairs of memory operations for which |
223 // alignment is set and vectors will be aligned. |
223 // alignment is set and vectors will be aligned. |
224 bool create_pack = true; |
224 bool create_pack = true; |
225 if (memory_alignment(mem_ref, best_iv_adjustment) != 0) { |
225 if (memory_alignment(mem_ref, best_iv_adjustment) == 0) { |
|
226 if (!Matcher::misaligned_vectors_ok()) { |
|
227 int vw = vector_width(mem_ref); |
|
228 int vw_best = vector_width(best_align_to_mem_ref); |
|
229 if (vw > vw_best) { |
|
230 // Do not vectorize a memory access with more elements per vector |
|
231 // if unaligned memory access is not allowed because number of |
|
232 // iterations in pre-loop will be not enough to align it. |
|
233 create_pack = false; |
|
234 } |
|
235 } |
|
236 } else { |
226 if (same_velt_type(mem_ref, best_align_to_mem_ref)) { |
237 if (same_velt_type(mem_ref, best_align_to_mem_ref)) { |
227 // Can't allow vectorization of unaligned memory accesses with the |
238 // Can't allow vectorization of unaligned memory accesses with the |
228 // same type since it could be overlapped accesses to the same array. |
239 // same type since it could be overlapped accesses to the same array. |
229 create_pack = false; |
240 create_pack = false; |
230 } else { |
241 } else { |
355 int min_size = max_jint; |
366 int min_size = max_jint; |
356 int min_iv_offset = max_jint; |
367 int min_iv_offset = max_jint; |
357 for (uint j = 0; j < memops.size(); j++) { |
368 for (uint j = 0; j < memops.size(); j++) { |
358 MemNode* s = memops.at(j)->as_Mem(); |
369 MemNode* s = memops.at(j)->as_Mem(); |
359 if (s->is_Store()) { |
370 if (s->is_Store()) { |
360 int vw = vector_width_in_bytes(velt_basic_type(s)); |
371 int vw = vector_width_in_bytes(s); |
361 assert(vw > 1, "sanity"); |
372 assert(vw > 1, "sanity"); |
362 SWPointer p(s, this); |
373 SWPointer p(s, this); |
363 if (cmp_ct.at(j) > max_ct || |
374 if (cmp_ct.at(j) > max_ct || |
364 cmp_ct.at(j) == max_ct && |
375 cmp_ct.at(j) == max_ct && |
365 (vw > max_vw || |
376 (vw > max_vw || |
378 // If no stores, look at loads |
389 // If no stores, look at loads |
379 if (max_ct == 0) { |
390 if (max_ct == 0) { |
380 for (uint j = 0; j < memops.size(); j++) { |
391 for (uint j = 0; j < memops.size(); j++) { |
381 MemNode* s = memops.at(j)->as_Mem(); |
392 MemNode* s = memops.at(j)->as_Mem(); |
382 if (s->is_Load()) { |
393 if (s->is_Load()) { |
383 int vw = vector_width_in_bytes(velt_basic_type(s)); |
394 int vw = vector_width_in_bytes(s); |
384 assert(vw > 1, "sanity"); |
395 assert(vw > 1, "sanity"); |
385 SWPointer p(s, this); |
396 SWPointer p(s, this); |
386 if (cmp_ct.at(j) > max_ct || |
397 if (cmp_ct.at(j) > max_ct || |
387 cmp_ct.at(j) == max_ct && |
398 cmp_ct.at(j) == max_ct && |
388 (vw > max_vw || |
399 (vw > max_vw || |
438 if (ABS(span) == p.memory_size()) |
449 if (ABS(span) == p.memory_size()) |
439 return true; |
450 return true; |
440 |
451 |
441 // If initial offset from start of object is computable, |
452 // If initial offset from start of object is computable, |
442 // compute alignment within the vector. |
453 // compute alignment within the vector. |
443 BasicType bt = velt_basic_type(p.mem()); |
454 int vw = vector_width_in_bytes(p.mem()); |
444 int vw = vector_width_in_bytes(bt); |
|
445 assert(vw > 1, "sanity"); |
455 assert(vw > 1, "sanity"); |
446 if (vw % span == 0) { |
456 if (vw % span == 0) { |
447 Node* init_nd = pre_end->init_trip(); |
457 Node* init_nd = pre_end->init_trip(); |
448 if (init_nd->is_Con() && p.invar() == NULL) { |
458 if (init_nd->is_Con() && p.invar() == NULL) { |
449 int init = init_nd->bottom_type()->is_int()->get_con(); |
459 int init = init_nd->bottom_type()->is_int()->get_con(); |
466 // Calculate loop's iv adjustment for this memory ops. |
476 // Calculate loop's iv adjustment for this memory ops. |
467 int SuperWord::get_iv_adjustment(MemNode* mem_ref) { |
477 int SuperWord::get_iv_adjustment(MemNode* mem_ref) { |
468 SWPointer align_to_ref_p(mem_ref, this); |
478 SWPointer align_to_ref_p(mem_ref, this); |
469 int offset = align_to_ref_p.offset_in_bytes(); |
479 int offset = align_to_ref_p.offset_in_bytes(); |
470 int scale = align_to_ref_p.scale_in_bytes(); |
480 int scale = align_to_ref_p.scale_in_bytes(); |
471 BasicType bt = velt_basic_type(mem_ref); |
481 int vw = vector_width_in_bytes(mem_ref); |
472 int vw = vector_width_in_bytes(bt); |
|
473 assert(vw > 1, "sanity"); |
482 assert(vw > 1, "sanity"); |
474 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; |
483 int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; |
475 int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; |
484 int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; |
476 |
485 |
477 #ifndef PRODUCT |
486 #ifndef PRODUCT |
1359 Node* pm = p->at(j); |
1368 Node* pm = p->at(j); |
1360 _igvn.replace_node(pm, vn); |
1369 _igvn.replace_node(pm, vn); |
1361 } |
1370 } |
1362 _igvn._worklist.push(vn); |
1371 _igvn._worklist.push(vn); |
1363 #ifdef ASSERT |
1372 #ifdef ASSERT |
1364 if (TraceSuperWord) { |
1373 if (TraceNewVectors) { |
1365 tty->print("new Vector node: "); |
1374 tty->print("new Vector node: "); |
1366 vn->dump(); |
1375 vn->dump(); |
1367 } |
1376 } |
1368 #endif |
1377 #endif |
1369 } |
1378 } |
1399 VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t); |
1408 VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t); |
1400 |
1409 |
1401 _phase->_igvn.register_new_node_with_optimizer(vn); |
1410 _phase->_igvn.register_new_node_with_optimizer(vn); |
1402 _phase->set_ctrl(vn, _phase->get_ctrl(opd)); |
1411 _phase->set_ctrl(vn, _phase->get_ctrl(opd)); |
1403 #ifdef ASSERT |
1412 #ifdef ASSERT |
1404 if (TraceSuperWord) { |
1413 if (TraceNewVectors) { |
1405 tty->print("new Vector node: "); |
1414 tty->print("new Vector node: "); |
1406 vn->dump(); |
1415 vn->dump(); |
1407 } |
1416 } |
1408 #endif |
1417 #endif |
1409 return vn; |
1418 return vn; |
1762 int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { |
1771 int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { |
1763 SWPointer p(s, this); |
1772 SWPointer p(s, this); |
1764 if (!p.valid()) { |
1773 if (!p.valid()) { |
1765 return bottom_align; |
1774 return bottom_align; |
1766 } |
1775 } |
1767 int vw = vector_width_in_bytes(velt_basic_type(s)); |
1776 int vw = vector_width_in_bytes(s); |
1768 if (vw < 2) { |
1777 if (vw < 2) { |
1769 return bottom_align; // No vectors for this type |
1778 return bottom_align; // No vectors for this type |
1770 } |
1779 } |
1771 int offset = p.offset_in_bytes(); |
1780 int offset = p.offset_in_bytes(); |
1772 offset += iv_adjust_in_bytes; |
1781 offset += iv_adjust_in_bytes; |
1976 // Solving for lim: |
1985 // Solving for lim: |
1977 // (e - lim0 + N) % V == 0 |
1986 // (e - lim0 + N) % V == 0 |
1978 // N = (V - (e - lim0)) % V |
1987 // N = (V - (e - lim0)) % V |
1979 // lim = lim0 - (V - (e - lim0)) % V |
1988 // lim = lim0 - (V - (e - lim0)) % V |
1980 |
1989 |
1981 int vw = vector_width_in_bytes(velt_basic_type(align_to_ref)); |
1990 int vw = vector_width_in_bytes(align_to_ref); |
1982 assert(vw > 1, "sanity"); |
|
1983 int stride = iv_stride(); |
1991 int stride = iv_stride(); |
1984 int scale = align_to_ref_p.scale_in_bytes(); |
1992 int scale = align_to_ref_p.scale_in_bytes(); |
1985 int elt_size = align_to_ref_p.memory_size(); |
1993 int elt_size = align_to_ref_p.memory_size(); |
1986 int v_align = vw / elt_size; |
1994 int v_align = vw / elt_size; |
|
1995 assert(v_align > 1, "sanity"); |
1987 int k = align_to_ref_p.offset_in_bytes() / elt_size; |
1996 int k = align_to_ref_p.offset_in_bytes() / elt_size; |
1988 |
1997 |
1989 Node *kn = _igvn.intcon(k); |
1998 Node *kn = _igvn.intcon(k); |
1990 |
1999 |
1991 Node *e = kn; |
2000 Node *e = kn; |