1355 vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); |
1355 vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); |
1356 } else if (n->req() == 3) { |
1356 } else if (n->req() == 3) { |
1357 // Promote operands to vector |
1357 // Promote operands to vector |
1358 Node* in1 = vector_opd(p, 1); |
1358 Node* in1 = vector_opd(p, 1); |
1359 Node* in2 = vector_opd(p, 2); |
1359 Node* in2 = vector_opd(p, 2); |
|
1360 if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) { |
|
1361 // Move invariant vector input into second position to avoid register spilling. |
|
1362 Node* tmp = in1; |
|
1363 in1 = in2; |
|
1364 in2 = tmp; |
|
1365 } |
1360 vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); |
1366 vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); |
1361 } else { |
1367 } else { |
1362 ShouldNotReachHere(); |
1368 ShouldNotReachHere(); |
1363 } |
1369 } |
1364 assert(vn != NULL, "sanity"); |
1370 assert(vn != NULL, "sanity"); |
1397 } |
1403 } |
1398 |
1404 |
1399 if (same_opd) { |
1405 if (same_opd) { |
1400 if (opd->is_Vector() || opd->is_LoadVector()) { |
1406 if (opd->is_Vector() || opd->is_LoadVector()) { |
1401 return opd; // input is matching vector |
1407 return opd; // input is matching vector |
|
1408 } |
|
1409 if ((opd_idx == 2) && VectorNode::is_shift(p0)) { |
|
1410 // No vector is needed for shift count. |
|
1411 // Vector instructions do not mask shift count, do it here. |
|
1412 Compile* C = _phase->C; |
|
1413 Node* cnt = opd; |
|
1414 juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1); |
|
1415 const TypeInt* t = opd->find_int_type(); |
|
1416 if (t != NULL && t->is_con()) { |
|
1417 juint shift = t->get_con(); |
|
1418 if (shift > mask) { // Unsigned cmp |
|
1419 cnt = ConNode::make(C, TypeInt::make(shift & mask)); |
|
1420 } |
|
1421 } else { |
|
1422 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { |
|
1423 cnt = ConNode::make(C, TypeInt::make(mask)); |
|
1424 _phase->_igvn.register_new_node_with_optimizer(cnt); |
|
1425 cnt = new (C, 3) AndINode(opd, cnt); |
|
1426 _phase->_igvn.register_new_node_with_optimizer(cnt); |
|
1427 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); |
|
1428 } |
|
1429 assert(opd->bottom_type()->isa_int(), "int type only"); |
|
1430 // Move non constant shift count into XMM register. |
|
1431 cnt = new (_phase->C, 2) MoveI2FNode(cnt); |
|
1432 } |
|
1433 if (cnt != opd) { |
|
1434 _phase->_igvn.register_new_node_with_optimizer(cnt); |
|
1435 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); |
|
1436 } |
|
1437 return cnt; |
1402 } |
1438 } |
1403 assert(!opd->is_StoreVector(), "such vector is not expected here"); |
1439 assert(!opd->is_StoreVector(), "such vector is not expected here"); |
1404 // Convert scalar input to vector with the same number of elements as |
1440 // Convert scalar input to vector with the same number of elements as |
1405 // p0's vector. Use p0's type because size of operand's container in |
1441 // p0's vector. Use p0's type because size of operand's container in |
1406 // vector should match p0's size regardless operand's size. |
1442 // vector should match p0's size regardless operand's size. |
1716 // Propagate narrowed type backwards through operations |
1752 // Propagate narrowed type backwards through operations |
1717 // that don't depend on higher order bits |
1753 // that don't depend on higher order bits |
1718 for (int i = _block.length() - 1; i >= 0; i--) { |
1754 for (int i = _block.length() - 1; i >= 0; i--) { |
1719 Node* n = _block.at(i); |
1755 Node* n = _block.at(i); |
1720 // Only integer types need be examined |
1756 // Only integer types need be examined |
1721 if (n->bottom_type()->isa_int()) { |
1757 const Type* vt = velt_type(n); |
|
1758 if (vt->basic_type() == T_INT) { |
1722 uint start, end; |
1759 uint start, end; |
1723 vector_opd_range(n, &start, &end); |
1760 vector_opd_range(n, &start, &end); |
1724 const Type* vt = velt_type(n); |
1761 const Type* vt = velt_type(n); |
1725 |
1762 |
1726 for (uint j = start; j < end; j++) { |
1763 for (uint j = start; j < end; j++) { |
1727 Node* in = n->in(j); |
1764 Node* in = n->in(j); |
1728 // Don't propagate through a type conversion |
1765 // Don't propagate through a memory |
1729 if (n->bottom_type() != in->bottom_type()) |
1766 if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT && |
1730 continue; |
1767 data_size(n) < data_size(in)) { |
1731 switch(in->Opcode()) { |
1768 bool same_type = true; |
1732 case Op_AddI: case Op_AddL: |
1769 for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { |
1733 case Op_SubI: case Op_SubL: |
1770 Node *use = in->fast_out(k); |
1734 case Op_MulI: case Op_MulL: |
1771 if (!in_bb(use) || !same_velt_type(use, n)) { |
1735 case Op_AndI: case Op_AndL: |
1772 same_type = false; |
1736 case Op_OrI: case Op_OrL: |
1773 break; |
1737 case Op_XorI: case Op_XorL: |
|
1738 case Op_LShiftI: case Op_LShiftL: |
|
1739 case Op_CMoveI: case Op_CMoveL: |
|
1740 if (in_bb(in)) { |
|
1741 bool same_type = true; |
|
1742 for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { |
|
1743 Node *use = in->fast_out(k); |
|
1744 if (!in_bb(use) || !same_velt_type(use, n)) { |
|
1745 same_type = false; |
|
1746 break; |
|
1747 } |
|
1748 } |
1774 } |
1749 if (same_type) { |
1775 } |
1750 set_velt_type(in, vt); |
1776 if (same_type) { |
1751 } |
1777 set_velt_type(in, vt); |
1752 } |
1778 } |
1753 } |
1779 } |
1754 } |
1780 } |
1755 } |
1781 } |
1756 } |
1782 } |
1790 if (n->is_Mem()) { |
1816 if (n->is_Mem()) { |
1791 return Type::get_const_basic_type(n->as_Mem()->memory_type()); |
1817 return Type::get_const_basic_type(n->as_Mem()->memory_type()); |
1792 } |
1818 } |
1793 const Type* t = _igvn.type(n); |
1819 const Type* t = _igvn.type(n); |
1794 if (t->basic_type() == T_INT) { |
1820 if (t->basic_type() == T_INT) { |
1795 if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL; |
1821 // A narrow type of arithmetic operations will be determined by |
1796 if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE; |
1822 // propagating the type of memory operations. |
1797 if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR; |
|
1798 if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT; |
|
1799 return TypeInt::INT; |
1823 return TypeInt::INT; |
1800 } |
1824 } |
1801 return t; |
1825 return t; |
1802 } |
1826 } |
1803 |
1827 |
1938 |
1962 |
1939 // Given: |
1963 // Given: |
1940 // lim0 == original pre loop limit |
1964 // lim0 == original pre loop limit |
1941 // V == v_align (power of 2) |
1965 // V == v_align (power of 2) |
1942 // invar == extra invariant piece of the address expression |
1966 // invar == extra invariant piece of the address expression |
1943 // e == k [ +/- invar ] |
1967 // e == offset [ +/- invar ] |
1944 // |
1968 // |
1945 // When reassociating expressions involving '%' the basic rules are: |
1969 // When reassociating expressions involving '%' the basic rules are: |
1946 // (a - b) % k == 0 => a % k == b % k |
1970 // (a - b) % k == 0 => a % k == b % k |
1947 // and: |
1971 // and: |
1948 // (a + b) % k == 0 => a % k == (k - b) % k |
1972 // (a + b) % k == 0 => a % k == (k - b) % k |
1991 int stride = iv_stride(); |
2015 int stride = iv_stride(); |
1992 int scale = align_to_ref_p.scale_in_bytes(); |
2016 int scale = align_to_ref_p.scale_in_bytes(); |
1993 int elt_size = align_to_ref_p.memory_size(); |
2017 int elt_size = align_to_ref_p.memory_size(); |
1994 int v_align = vw / elt_size; |
2018 int v_align = vw / elt_size; |
1995 assert(v_align > 1, "sanity"); |
2019 assert(v_align > 1, "sanity"); |
1996 int k = align_to_ref_p.offset_in_bytes() / elt_size; |
2020 int offset = align_to_ref_p.offset_in_bytes() / elt_size; |
1997 |
2021 Node *offsn = _igvn.intcon(offset); |
1998 Node *kn = _igvn.intcon(k); |
2022 |
1999 |
2023 Node *e = offsn; |
2000 Node *e = kn; |
|
2001 if (align_to_ref_p.invar() != NULL) { |
2024 if (align_to_ref_p.invar() != NULL) { |
2002 // incorporate any extra invariant piece producing k +/- invar >>> log2(elt) |
2025 // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt) |
2003 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
2026 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
2004 Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); |
2027 Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); |
2005 _phase->_igvn.register_new_node_with_optimizer(aref); |
2028 _phase->_igvn.register_new_node_with_optimizer(aref); |
2006 _phase->set_ctrl(aref, pre_ctrl); |
2029 _phase->set_ctrl(aref, pre_ctrl); |
2007 if (align_to_ref_p.negate_invar()) { |
2030 if (align_to_ref_p.negate_invar()) { |
2012 _phase->_igvn.register_new_node_with_optimizer(e); |
2035 _phase->_igvn.register_new_node_with_optimizer(e); |
2013 _phase->set_ctrl(e, pre_ctrl); |
2036 _phase->set_ctrl(e, pre_ctrl); |
2014 } |
2037 } |
2015 if (vw > ObjectAlignmentInBytes) { |
2038 if (vw > ObjectAlignmentInBytes) { |
2016 // incorporate base e +/- base && Mask >>> log2(elt) |
2039 // incorporate base e +/- base && Mask >>> log2(elt) |
2017 Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw))); |
|
2018 Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); |
2040 Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); |
2019 _phase->_igvn.register_new_node_with_optimizer(xbase); |
2041 _phase->_igvn.register_new_node_with_optimizer(xbase); |
2020 Node* masked_xbase = new (_phase->C, 3) AndXNode(xbase, mask); |
2042 #ifdef _LP64 |
|
2043 xbase = new (_phase->C, 2) ConvL2INode(xbase); |
|
2044 _phase->_igvn.register_new_node_with_optimizer(xbase); |
|
2045 #endif |
|
2046 Node* mask = _igvn.intcon(vw-1); |
|
2047 Node* masked_xbase = new (_phase->C, 3) AndINode(xbase, mask); |
2021 _phase->_igvn.register_new_node_with_optimizer(masked_xbase); |
2048 _phase->_igvn.register_new_node_with_optimizer(masked_xbase); |
2022 #ifdef _LP64 |
|
2023 masked_xbase = new (_phase->C, 2) ConvL2INode(masked_xbase); |
|
2024 _phase->_igvn.register_new_node_with_optimizer(masked_xbase); |
|
2025 #endif |
|
2026 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
2049 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
2027 Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); |
2050 Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); |
2028 _phase->_igvn.register_new_node_with_optimizer(bref); |
2051 _phase->_igvn.register_new_node_with_optimizer(bref); |
2029 _phase->set_ctrl(bref, pre_ctrl); |
2052 _phase->set_ctrl(bref, pre_ctrl); |
2030 e = new (_phase->C, 3) AddINode(e, bref); |
2053 e = new (_phase->C, 3) AddINode(e, bref); |