1086 bool SuperWord::profitable(Node_List* p) { |
1086 bool SuperWord::profitable(Node_List* p) { |
1087 Node* p0 = p->at(0); |
1087 Node* p0 = p->at(0); |
1088 uint start, end; |
1088 uint start, end; |
1089 VectorNode::vector_operands(p0, &start, &end); |
1089 VectorNode::vector_operands(p0, &start, &end); |
1090 |
1090 |
1091 // Return false if some input is not vector and inside block |
1091 // Return false if some inputs are not vectors or vectors with different |
|
1092 // size or alignment. |
|
1093 // Also, for now, return false if not scalar promotion case when inputs are |
|
1094 // the same. Later, implement PackNode and allow differing, non-vector inputs |
|
1095 // (maybe just the ones from outside the block.) |
1092 for (uint i = start; i < end; i++) { |
1096 for (uint i = start; i < end; i++) { |
1093 if (!is_vector_use(p0, i)) { |
1097 if (!is_vector_use(p0, i)) |
1094 // For now, return false if not scalar promotion case (inputs are the same.) |
1098 return false; |
1095 // Later, implement PackNode and allow differing, non-vector inputs |
|
1096 // (maybe just the ones from outside the block.) |
|
1097 if (!same_inputs(p, i)) { |
|
1098 return false; |
|
1099 } |
|
1100 } |
|
1101 } |
1099 } |
1102 if (VectorNode::is_shift(p0)) { |
1100 if (VectorNode::is_shift(p0)) { |
1103 // For now, return false if shift count is vector because |
1101 // For now, return false if shift count is vector or not scalar promotion |
1104 // hw does not support it. |
1102 // case (different shift counts) because it is not supported yet. |
1105 if (is_vector_use(p0, 2)) |
1103 Node* cnt = p0->in(2); |
|
1104 Node_List* cnt_pk = my_pack(cnt); |
|
1105 if (cnt_pk != NULL) |
1106 return false; |
1106 return false; |
1107 // For the same reason return false if different shift counts. |
|
1108 if (!same_inputs(p, 2)) |
1107 if (!same_inputs(p, 2)) |
1109 return false; |
1108 return false; |
1110 } |
1109 } |
1111 if (!p0->is_Store()) { |
1110 if (!p0->is_Store()) { |
1112 // For now, return false if not all uses are vector. |
1111 // For now, return false if not all uses are vector. |
1449 cnt = ConNode::make(C, TypeInt::make(shift & mask)); |
1448 cnt = ConNode::make(C, TypeInt::make(shift & mask)); |
1450 } |
1449 } |
1451 } else { |
1450 } else { |
1452 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { |
1451 if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { |
1453 cnt = ConNode::make(C, TypeInt::make(mask)); |
1452 cnt = ConNode::make(C, TypeInt::make(mask)); |
1454 _phase->_igvn.register_new_node_with_optimizer(cnt); |
1453 _igvn.register_new_node_with_optimizer(cnt); |
1455 cnt = new (C, 3) AndINode(opd, cnt); |
1454 cnt = new (C, 3) AndINode(opd, cnt); |
1456 _phase->_igvn.register_new_node_with_optimizer(cnt); |
1455 _igvn.register_new_node_with_optimizer(cnt); |
1457 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); |
1456 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); |
1458 } |
1457 } |
1459 assert(opd->bottom_type()->isa_int(), "int type only"); |
1458 assert(opd->bottom_type()->isa_int(), "int type only"); |
1460 // Move non constant shift count into XMM register. |
1459 // Move non constant shift count into XMM register. |
1461 cnt = new (C, 2) MoveI2FNode(cnt); |
1460 cnt = new (C, 2) MoveI2FNode(cnt); |
1462 } |
1461 } |
1463 if (cnt != opd) { |
1462 if (cnt != opd) { |
1464 _phase->_igvn.register_new_node_with_optimizer(cnt); |
1463 _igvn.register_new_node_with_optimizer(cnt); |
1465 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); |
1464 _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); |
1466 } |
1465 } |
1467 return cnt; |
1466 return cnt; |
1468 } |
1467 } |
1469 assert(!opd->is_StoreVector(), "such vector is not expected here"); |
1468 assert(!opd->is_StoreVector(), "such vector is not expected here"); |
2021 Node *e = offsn; |
2020 Node *e = offsn; |
2022 if (align_to_ref_p.invar() != NULL) { |
2021 if (align_to_ref_p.invar() != NULL) { |
2023 // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt) |
2022 // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt) |
2024 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
2023 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
2025 Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); |
2024 Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); |
2026 _phase->_igvn.register_new_node_with_optimizer(aref); |
2025 _igvn.register_new_node_with_optimizer(aref); |
2027 _phase->set_ctrl(aref, pre_ctrl); |
2026 _phase->set_ctrl(aref, pre_ctrl); |
2028 if (align_to_ref_p.negate_invar()) { |
2027 if (align_to_ref_p.negate_invar()) { |
2029 e = new (_phase->C, 3) SubINode(e, aref); |
2028 e = new (_phase->C, 3) SubINode(e, aref); |
2030 } else { |
2029 } else { |
2031 e = new (_phase->C, 3) AddINode(e, aref); |
2030 e = new (_phase->C, 3) AddINode(e, aref); |
2032 } |
2031 } |
2033 _phase->_igvn.register_new_node_with_optimizer(e); |
2032 _igvn.register_new_node_with_optimizer(e); |
2034 _phase->set_ctrl(e, pre_ctrl); |
2033 _phase->set_ctrl(e, pre_ctrl); |
2035 } |
2034 } |
2036 if (vw > ObjectAlignmentInBytes) { |
2035 if (vw > ObjectAlignmentInBytes) { |
2037 // incorporate base e +/- base && Mask >>> log2(elt) |
2036 // incorporate base e +/- base && Mask >>> log2(elt) |
2038 Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); |
2037 Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); |
2039 _phase->_igvn.register_new_node_with_optimizer(xbase); |
2038 _igvn.register_new_node_with_optimizer(xbase); |
2040 #ifdef _LP64 |
2039 #ifdef _LP64 |
2041 xbase = new (_phase->C, 2) ConvL2INode(xbase); |
2040 xbase = new (_phase->C, 2) ConvL2INode(xbase); |
2042 _phase->_igvn.register_new_node_with_optimizer(xbase); |
2041 _igvn.register_new_node_with_optimizer(xbase); |
2043 #endif |
2042 #endif |
2044 Node* mask = _igvn.intcon(vw-1); |
2043 Node* mask = _igvn.intcon(vw-1); |
2045 Node* masked_xbase = new (_phase->C, 3) AndINode(xbase, mask); |
2044 Node* masked_xbase = new (_phase->C, 3) AndINode(xbase, mask); |
2046 _phase->_igvn.register_new_node_with_optimizer(masked_xbase); |
2045 _igvn.register_new_node_with_optimizer(masked_xbase); |
2047 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
2046 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); |
2048 Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); |
2047 Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); |
2049 _phase->_igvn.register_new_node_with_optimizer(bref); |
2048 _igvn.register_new_node_with_optimizer(bref); |
2050 _phase->set_ctrl(bref, pre_ctrl); |
2049 _phase->set_ctrl(bref, pre_ctrl); |
2051 e = new (_phase->C, 3) AddINode(e, bref); |
2050 e = new (_phase->C, 3) AddINode(e, bref); |
2052 _phase->_igvn.register_new_node_with_optimizer(e); |
2051 _igvn.register_new_node_with_optimizer(e); |
2053 _phase->set_ctrl(e, pre_ctrl); |
2052 _phase->set_ctrl(e, pre_ctrl); |
2054 } |
2053 } |
2055 |
2054 |
2056 // compute e +/- lim0 |
2055 // compute e +/- lim0 |
2057 if (scale < 0) { |
2056 if (scale < 0) { |
2058 e = new (_phase->C, 3) SubINode(e, lim0); |
2057 e = new (_phase->C, 3) SubINode(e, lim0); |
2059 } else { |
2058 } else { |
2060 e = new (_phase->C, 3) AddINode(e, lim0); |
2059 e = new (_phase->C, 3) AddINode(e, lim0); |
2061 } |
2060 } |
2062 _phase->_igvn.register_new_node_with_optimizer(e); |
2061 _igvn.register_new_node_with_optimizer(e); |
2063 _phase->set_ctrl(e, pre_ctrl); |
2062 _phase->set_ctrl(e, pre_ctrl); |
2064 |
2063 |
2065 if (stride * scale > 0) { |
2064 if (stride * scale > 0) { |
2066 // compute V - (e +/- lim0) |
2065 // compute V - (e +/- lim0) |
2067 Node* va = _igvn.intcon(v_align); |
2066 Node* va = _igvn.intcon(v_align); |
2068 e = new (_phase->C, 3) SubINode(va, e); |
2067 e = new (_phase->C, 3) SubINode(va, e); |
2069 _phase->_igvn.register_new_node_with_optimizer(e); |
2068 _igvn.register_new_node_with_optimizer(e); |
2070 _phase->set_ctrl(e, pre_ctrl); |
2069 _phase->set_ctrl(e, pre_ctrl); |
2071 } |
2070 } |
2072 // compute N = (exp) % V |
2071 // compute N = (exp) % V |
2073 Node* va_msk = _igvn.intcon(v_align - 1); |
2072 Node* va_msk = _igvn.intcon(v_align - 1); |
2074 Node* N = new (_phase->C, 3) AndINode(e, va_msk); |
2073 Node* N = new (_phase->C, 3) AndINode(e, va_msk); |
2075 _phase->_igvn.register_new_node_with_optimizer(N); |
2074 _igvn.register_new_node_with_optimizer(N); |
2076 _phase->set_ctrl(N, pre_ctrl); |
2075 _phase->set_ctrl(N, pre_ctrl); |
2077 |
2076 |
2078 // substitute back into (1), so that new limit |
2077 // substitute back into (1), so that new limit |
2079 // lim = lim0 + N |
2078 // lim = lim0 + N |
2080 Node* lim; |
2079 Node* lim; |
2081 if (stride < 0) { |
2080 if (stride < 0) { |
2082 lim = new (_phase->C, 3) SubINode(lim0, N); |
2081 lim = new (_phase->C, 3) SubINode(lim0, N); |
2083 } else { |
2082 } else { |
2084 lim = new (_phase->C, 3) AddINode(lim0, N); |
2083 lim = new (_phase->C, 3) AddINode(lim0, N); |
2085 } |
2084 } |
2086 _phase->_igvn.register_new_node_with_optimizer(lim); |
2085 _igvn.register_new_node_with_optimizer(lim); |
2087 _phase->set_ctrl(lim, pre_ctrl); |
2086 _phase->set_ctrl(lim, pre_ctrl); |
2088 Node* constrained = |
2087 Node* constrained = |
2089 (stride > 0) ? (Node*) new (_phase->C,3) MinINode(lim, orig_limit) |
2088 (stride > 0) ? (Node*) new (_phase->C,3) MinINode(lim, orig_limit) |
2090 : (Node*) new (_phase->C,3) MaxINode(lim, orig_limit); |
2089 : (Node*) new (_phase->C,3) MaxINode(lim, orig_limit); |
2091 _phase->_igvn.register_new_node_with_optimizer(constrained); |
2090 _igvn.register_new_node_with_optimizer(constrained); |
2092 _phase->set_ctrl(constrained, pre_ctrl); |
2091 _phase->set_ctrl(constrained, pre_ctrl); |
2093 _igvn.hash_delete(pre_opaq); |
2092 _igvn.hash_delete(pre_opaq); |
2094 pre_opaq->set_req(1, constrained); |
2093 pre_opaq->set_req(1, constrained); |
2095 } |
2094 } |
2096 |
2095 |