src/share/vm/opto/superword.cpp

changeset 4001
006050192a5a
parent 3886
6f8f439e247d
child 4004
4b0d6fd74911
     1.1 --- a/src/share/vm/opto/superword.cpp	Wed Aug 15 16:49:38 2012 -0700
     1.2 +++ b/src/share/vm/opto/superword.cpp	Mon Aug 20 09:07:21 2012 -0700
     1.3 @@ -1357,6 +1357,12 @@
     1.4          // Promote operands to vector
     1.5          Node* in1 = vector_opd(p, 1);
     1.6          Node* in2 = vector_opd(p, 2);
     1.7 +        if (VectorNode::is_invariant_vector(in1) && (n->is_Add() || n->is_Mul())) {
     1.8 +          // Move invariant vector input into second position to avoid register spilling.
     1.9 +          Node* tmp = in1;
    1.10 +          in1 = in2;
    1.11 +          in2 = tmp;
    1.12 +        }
    1.13          vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n));
    1.14        } else {
    1.15          ShouldNotReachHere();
    1.16 @@ -1400,6 +1406,36 @@
    1.17      if (opd->is_Vector() || opd->is_LoadVector()) {
    1.18        return opd; // input is matching vector
    1.19      }
    1.20 +    if ((opd_idx == 2) && VectorNode::is_shift(p0)) {
    1.21 +      // No vector is needed for shift count.
    1.22 +      // Vector instructions do not mask shift count, do it here.
    1.23 +      Compile* C = _phase->C;
    1.24 +      Node* cnt = opd;
    1.25 +      juint mask = (p0->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
    1.26 +      const TypeInt* t = opd->find_int_type();
    1.27 +      if (t != NULL && t->is_con()) {
    1.28 +        juint shift = t->get_con();
    1.29 +        if (shift > mask) { // Unsigned cmp
    1.30 +          cnt = ConNode::make(C, TypeInt::make(shift & mask));
    1.31 +        }
    1.32 +      } else {
    1.33 +        if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
    1.34 +          cnt = ConNode::make(C, TypeInt::make(mask));
    1.35 +          _phase->_igvn.register_new_node_with_optimizer(cnt);
    1.36 +          cnt = new (C, 3) AndINode(opd, cnt);
    1.37 +          _phase->_igvn.register_new_node_with_optimizer(cnt);
    1.38 +          _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
    1.39 +        }
    1.40 +        assert(opd->bottom_type()->isa_int(), "int type only");
    1.41 +        // Move non constant shift count into XMM register.
    1.42 +        cnt = new (_phase->C, 2) MoveI2FNode(cnt);
    1.43 +      }
    1.44 +      if (cnt != opd) {
    1.45 +        _phase->_igvn.register_new_node_with_optimizer(cnt);
    1.46 +        _phase->set_ctrl(cnt, _phase->get_ctrl(opd));
    1.47 +      }
    1.48 +      return cnt;
    1.49 +    }
    1.50      assert(!opd->is_StoreVector(), "such vector is not expected here");
    1.51      // Convert scalar input to vector with the same number of elements as
    1.52      // p0's vector. Use p0's type because size of operand's container in
    1.53 @@ -1718,37 +1754,27 @@
    1.54    for (int i = _block.length() - 1; i >= 0; i--) {
    1.55      Node* n = _block.at(i);
    1.56      // Only integer types need be examined
    1.57 -    if (n->bottom_type()->isa_int()) {
    1.58 +    const Type* vt = velt_type(n);
    1.59 +    if (vt->basic_type() == T_INT) {
    1.60        uint start, end;
    1.61        vector_opd_range(n, &start, &end);
    1.62        const Type* vt = velt_type(n);
    1.63  
    1.64        for (uint j = start; j < end; j++) {
    1.65          Node* in  = n->in(j);
    1.66 -        // Don't propagate through a type conversion
    1.67 -        if (n->bottom_type() != in->bottom_type())
    1.68 -          continue;
    1.69 -        switch(in->Opcode()) {
    1.70 -        case Op_AddI:    case Op_AddL:
    1.71 -        case Op_SubI:    case Op_SubL:
    1.72 -        case Op_MulI:    case Op_MulL:
    1.73 -        case Op_AndI:    case Op_AndL:
    1.74 -        case Op_OrI:     case Op_OrL:
    1.75 -        case Op_XorI:    case Op_XorL:
    1.76 -        case Op_LShiftI: case Op_LShiftL:
    1.77 -        case Op_CMoveI:  case Op_CMoveL:
    1.78 -          if (in_bb(in)) {
    1.79 -            bool same_type = true;
    1.80 -            for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
    1.81 -              Node *use = in->fast_out(k);
    1.82 -              if (!in_bb(use) || !same_velt_type(use, n)) {
    1.83 -                same_type = false;
    1.84 -                break;
    1.85 -              }
    1.86 +        // Don't propagate through a memory
    1.87 +        if (!in->is_Mem() && in_bb(in) && velt_type(in)->basic_type() == T_INT &&
    1.88 +            data_size(n) < data_size(in)) {
    1.89 +          bool same_type = true;
    1.90 +          for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
    1.91 +            Node *use = in->fast_out(k);
    1.92 +            if (!in_bb(use) || !same_velt_type(use, n)) {
    1.93 +              same_type = false;
    1.94 +              break;
    1.95              }
    1.96 -            if (same_type) {
    1.97 -              set_velt_type(in, vt);
    1.98 -            }
    1.99 +          }
   1.100 +          if (same_type) {
   1.101 +            set_velt_type(in, vt);
   1.102            }
   1.103          }
   1.104        }
   1.105 @@ -1792,10 +1818,8 @@
   1.106    }
   1.107    const Type* t = _igvn.type(n);
   1.108    if (t->basic_type() == T_INT) {
   1.109 -    if (t->higher_equal(TypeInt::BOOL))  return TypeInt::BOOL;
   1.110 -    if (t->higher_equal(TypeInt::BYTE))  return TypeInt::BYTE;
   1.111 -    if (t->higher_equal(TypeInt::CHAR))  return TypeInt::CHAR;
   1.112 -    if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT;
   1.113 +    // A narrow type of arithmetic operations will be determined by
   1.114 +    // propagating the type of memory operations.
   1.115      return TypeInt::INT;
   1.116    }
   1.117    return t;
   1.118 @@ -1940,7 +1964,7 @@
   1.119    //     lim0 == original pre loop limit
   1.120    //     V == v_align (power of 2)
   1.121    //     invar == extra invariant piece of the address expression
   1.122 -  //     e == k [ +/- invar ]
   1.123 +  //     e == offset [ +/- invar ]
   1.124    //
   1.125    // When reassociating expressions involving '%' the basic rules are:
   1.126    //     (a - b) % k == 0   =>  a % k == b % k
   1.127 @@ -1993,13 +2017,12 @@
   1.128    int elt_size = align_to_ref_p.memory_size();
   1.129    int v_align  = vw / elt_size;
   1.130    assert(v_align > 1, "sanity");
   1.131 -  int k        = align_to_ref_p.offset_in_bytes() / elt_size;
   1.132 +  int offset   = align_to_ref_p.offset_in_bytes() / elt_size;
   1.133 +  Node *offsn  = _igvn.intcon(offset);
   1.134  
   1.135 -  Node *kn   = _igvn.intcon(k);
   1.136 -
   1.137 -  Node *e = kn;
   1.138 +  Node *e = offsn;
   1.139    if (align_to_ref_p.invar() != NULL) {
   1.140 -    // incorporate any extra invariant piece producing k +/- invar >>> log2(elt)
   1.141 +    // incorporate any extra invariant piece producing (offset +/- invar) >>> log2(elt)
   1.142      Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
   1.143      Node* aref     = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt);
   1.144      _phase->_igvn.register_new_node_with_optimizer(aref);
   1.145 @@ -2014,15 +2037,15 @@
   1.146    }
   1.147    if (vw > ObjectAlignmentInBytes) {
   1.148      // incorporate base e +/- base && Mask >>> log2(elt)
   1.149 -    Node* mask = _igvn.MakeConX(~(-1 << exact_log2(vw)));
   1.150      Node* xbase = new(_phase->C, 2) CastP2XNode(NULL, align_to_ref_p.base());
   1.151      _phase->_igvn.register_new_node_with_optimizer(xbase);
   1.152 -    Node* masked_xbase  = new (_phase->C, 3) AndXNode(xbase, mask);
   1.153 +#ifdef _LP64
   1.154 +    xbase  = new (_phase->C, 2) ConvL2INode(xbase);
   1.155 +    _phase->_igvn.register_new_node_with_optimizer(xbase);
   1.156 +#endif
   1.157 +    Node* mask = _igvn.intcon(vw-1);
   1.158 +    Node* masked_xbase  = new (_phase->C, 3) AndINode(xbase, mask);
   1.159      _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
   1.160 -#ifdef _LP64
   1.161 -    masked_xbase  = new (_phase->C, 2) ConvL2INode(masked_xbase);
   1.162 -    _phase->_igvn.register_new_node_with_optimizer(masked_xbase);
   1.163 -#endif
   1.164      Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
   1.165      Node* bref     = new (_phase->C, 3) URShiftINode(masked_xbase, log2_elt);
   1.166      _phase->_igvn.register_new_node_with_optimizer(bref);

mercurial