1.1 --- a/src/share/vm/opto/superword.cpp Wed Aug 15 16:49:38 2012 -0700 1.2 +++ b/src/share/vm/opto/superword.cpp Mon Aug 20 09:07:21 2012 -0700 1.3 @@ -1357,6 +1357,12 @@ 1.4 // Promote operands to vector 1.5 Node* in1 = vector_opd(p, 1); 1.6 Node* in2 = vector_opd(p, 2); 1.7 + if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) { 1.8 + // Move invariant vector input into second position to avoid register spilling. 1.9 + Node* tmp = in1; 1.10 + in1 = in2; 1.11 + in2 = tmp; 1.12 + } 1.13 vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); 1.14 } else { 1.15 ShouldNotReachHere(); 1.16 @@ -1400,6 +1406,36 @@ 1.17 if (opd->is_Vector() || opd->is_LoadVector()) { 1.18 return opd; // input is matching vector 1.19 } 1.20 + if ((opd_idx == 2) && VectorNode::is_shift(p0)) { 1.21 + // No vector is needed for shift count. 1.22 + // Vector instructions do not mask shift count, do it here. 1.23 + Compile* C = _phase->C; 1.24 + Node* cnt = opd; 1.25 + juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1); 1.26 + const TypeInt* t = opd->find_int_type(); 1.27 + if (t != NULL && t->is_con()) { 1.28 + juint shift = t->get_con(); 1.29 + if (shift > mask) { // Unsigned cmp 1.30 + cnt = ConNode::make(C, TypeInt::make(shift & mask)); 1.31 + } 1.32 + } else { 1.33 + if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { 1.34 + cnt = ConNode::make(C, TypeInt::make(mask)); 1.35 + _phase->_igvn.register_new_node_with_optimizer(cnt); 1.36 + cnt = new (C, 3) AndINode(opd, cnt); 1.37 + _phase->_igvn.register_new_node_with_optimizer(cnt); 1.38 + _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); 1.39 + } 1.40 + assert(opd->bottom_type()->isa_int(), "int type only"); 1.41 + // Move non constant shift count into XMM register. 1.42 + cnt = new (_phase->C, 2) MoveI2FNode(cnt); 1.43 + } 1.44 + if (cnt != opd) { 1.45 + _phase->_igvn.register_new_node_with_optimizer(cnt); 1.46 + _phase->set_ctrl(cnt, _phase->get_ctrl(opd)); 1.47 + } 1.48 + return cnt; 1.49 + } 1.50 assert(!opd->is_StoreVector(), "such vector is not expected here"); 1.51 // Convert scalar input to vector with the same number of elements as 1.52 // p0's vector. Use p0's type because size of operand's container in 1.53 @@ -1718,37 +1754,27 @@ 1.54 for (int i = _block.length() - 1; i >= 0; i--) { 1.55 Node* n = _block.at(i); 1.56 // Only integer types need be examined 1.57 - if (n->bottom_type()->isa_int()) { 1.58 + const Type* vt = velt_type(n); 1.59 + if (vt->basic_type() == T_INT) { 1.60 uint start, end; 1.61 vector_opd_range(n, &start, &end); 1.62 const Type* vt = velt_type(n); 1.63 1.64 for (uint j = start; j < end; j++) { 1.65 Node* in = n->in(j); 1.66 - // Don't propagate through a type conversion 1.67 - if (n->bottom_type() != in->bottom_type()) 1.68 - continue; 1.69 - switch(in->Opcode()) { 1.70 - case Op_AddI: case Op_AddL: 1.71 - case Op_SubI: case Op_SubL: 1.72 - case Op_MulI: case Op_MulL: 1.73 - case Op_AndI: case Op_AndL: 1.74 - case Op_OrI: case Op_OrL: 1.75 - case Op_XorI: case Op_XorL: 1.76 - case Op_LShiftI: case Op_LShiftL: 1.77 - case Op_CMoveI: case Op_CMoveL: 1.78 - if (in_bb(in)) { 1.79 - bool same_type = true; 1.80 - for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { 1.81 - Node *use = in->fast_out(k); 1.82 - if (!in_bb(use) || !same_velt_type(use, n)) { 1.83 - same_type = false; 1.84 - break; 1.85 - } 1.86 + // Don't propagate through a memory 1.87 + if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT && 1.88 + data_size(n) < data_size(in)) { 1.89 + bool same_type = true; 1.90 + for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) { 1.91 + Node *use = in->fast_out(k); 1.92 + if (!in_bb(use) || !same_velt_type(use, n)) { 1.93 + same_type = false; 1.94 + break; 1.95 } 1.96 - if (same_type) { 1.97 - set_velt_type(in, vt); 1.98 - } 1.99 + } 1.100 + if (same_type) { 1.101 + set_velt_type(in, vt); 1.102 } 1.103 } 1.104 } 1.105 @@ -1792,10 +1818,8 @@ 1.106 } 1.107 const Type* t = _igvn.type(n); 1.108 if (t->basic_type() == T_INT) { 1.109 - if (t->higher_equal(TypeInt::BOOL)) return TypeInt::BOOL; 1.110 - if (t->higher_equal(TypeInt::BYTE)) return TypeInt::BYTE; 1.111 - if (t->higher_equal(TypeInt::CHAR)) return TypeInt::CHAR; 1.112 - if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT; 1.113 + // A narrow type of arithmetic operations will be determined by 1.114 + // propagating the type of memory operations. 1.115 return TypeInt::INT; 1.116 } 1.117 return t; 1.118 @@ -1940,7 +1964,7 @@ 1.119 // lim0 == original pre loop limit 1.120 // V == v_align (power of 2) 1.121 // invar == extra invariant piece of the address expression 1.122 - // e == k [ +/- invar ] 1.123 + // e == offset [ +/- invar ] 1.124 // 1.125 // When reassociating expressions involving '%' the basic rules are: 1.126 // (a - b) % k == 0 => a % k == b % k 1.127 @@ -1993,13 +2017,12 @@ 1.128 int elt_size = align_to_ref_p.memory_size(); 1.129 int v_align = vw / elt_size; 1.130 assert(v_align > 1, "sanity"); 1.131 - int k = align_to_ref_p.offset_in_bytes() / elt_size; 1.132 + int offset = align_to_ref_p.offset_in_bytes() / elt_size; 1.133 + Node *offsn = _igvn.intcon(offset); 1.134 1.135 - Node *kn = _igvn.intcon(k); 1.136 - 1.137 - Node *e = kn; 1.138 + Node *e = offsn; 1.139 if (align_to_ref_p.invar() != NULL) { 1.140 - // incorporate any extra invariant piece producing k +/- invar >>> log2(elt) 1.141 + // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt) 1.142 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); 1.143 Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); 1.144 _phase->_igvn.register_new_node_with_optimizer(aref); 1.145 @@ -2014,15 +2037,15 @@ 1.146 } 1.147 if (vw > ObjectAlignmentInBytes) { 1.148 // incorporate base e +/- base && Mask >>> log2(elt) 1.149 - Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw))); 1.150 Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base()); 1.151 _phase->_igvn.register_new_node_with_optimizer(xbase); 1.152 - Node* masked_xbase = new (_phase->C, 3) AndXNode(xbase, mask); 1.153 +#ifdef _LP64 1.154 + xbase = new (_phase->C, 2) ConvL2INode(xbase); 1.155 + _phase->_igvn.register_new_node_with_optimizer(xbase); 1.156 +#endif 1.157 + Node* mask = _igvn.intcon(vw-1); 1.158 + Node* masked_xbase = new (_phase->C, 3) AndINode(xbase, mask); 1.159 _phase->_igvn.register_new_node_with_optimizer(masked_xbase); 1.160 -#ifdef _LP64 1.161 - masked_xbase = new (_phase->C, 2) ConvL2INode(masked_xbase); 1.162 - _phase->_igvn.register_new_node_with_optimizer(masked_xbase); 1.163 -#endif 1.164 Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); 1.165 Node* bref = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt); 1.166 _phase->_igvn.register_new_node_with_optimizer(bref);