src/cpu/sparc/vm/stubGenerator_sparc.cpp

changeset 2118
d6f45b55c972
parent 2010
e7ec8cd4dd8a
child 2137
f353275af40e
     1.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Aug 20 09:55:50 2010 -0700
     1.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Aug 27 17:33:49 2010 -0700
     1.3 @@ -1588,6 +1588,185 @@
     1.4    }
     1.5  
     1.6    //
     1.7 +  //  Generate stub for disjoint short fill.  If "aligned" is true, the
     1.8 +  //  "to" address is assumed to be heapword aligned.
     1.9 +  //
    1.10 +  // Arguments for generated stub:
    1.11 +  //      to:    O0
    1.12 +  //      value: O1
    1.13 +  //      count: O2 treated as signed
    1.14 +  //
    1.15 +  address generate_fill(BasicType t, bool aligned, const char* name) {
    1.16 +    __ align(CodeEntryAlignment);
    1.17 +    StubCodeMark mark(this, "StubRoutines", name);
    1.18 +    address start = __ pc();
    1.19 +
    1.20 +    const Register to        = O0;   // source array address
    1.21 +    const Register value     = O1;   // fill value
    1.22 +    const Register count     = O2;   // elements count
    1.23 +    // O3 is used as a temp register
    1.24 +
    1.25 +    assert_clean_int(count, O3);     // Make sure 'count' is clean int.
    1.26 +
    1.27 +    Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
    1.28 +    Label L_fill_2_bytes, L_fill_4_bytes, L_fill_32_bytes;
    1.29 +
    1.30 +    int shift = -1;
    1.31 +    switch (t) {
    1.32 +       case T_BYTE:
    1.33 +        shift = 2;
    1.34 +        break;
    1.35 +       case T_SHORT:
    1.36 +        shift = 1;
    1.37 +        break;
    1.38 +      case T_INT:
    1.39 +         shift = 0;
    1.40 +        break;
    1.41 +      default: ShouldNotReachHere();
    1.42 +    }
    1.43 +
    1.44 +    BLOCK_COMMENT("Entry:");
    1.45 +
    1.46 +    if (t == T_BYTE) {
    1.47 +      // Zero extend value
    1.48 +      __ and3(value, 0xff, value);
    1.49 +      __ sllx(value, 8, O3);
    1.50 +      __ or3(value, O3, value);
    1.51 +    }
    1.52 +    if (t == T_SHORT) {
    1.53 +      // Zero extend value
    1.54 +      __ sethi(0xffff0000, O3);
    1.55 +      __ andn(value, O3, value);
    1.56 +    }
    1.57 +    if (t == T_BYTE || t == T_SHORT) {
    1.58 +      __ sllx(value, 16, O3);
    1.59 +      __ or3(value, O3, value);
    1.60 +    }
    1.61 +
    1.62 +    __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
    1.63 +    __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_4_bytes); // use unsigned cmp
    1.64 +    __ delayed()->andcc(count, 1<<shift, G0);
    1.65 +
    1.66 +    if (!aligned && (t == T_BYTE || t == T_SHORT)) {
    1.67 +      // align source address at 4 bytes address boundary
    1.68 +      if (t == T_BYTE) {
    1.69 +        // One byte misalignment happens only for byte arrays
    1.70 +        __ andcc(to, 1, G0);
    1.71 +        __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
    1.72 +        __ delayed()->nop();
    1.73 +        __ stb(value, to, 0);
    1.74 +        __ inc(to, 1);
    1.75 +        __ dec(count, 1);
    1.76 +        __ BIND(L_skip_align1);
    1.77 +      }
    1.78 +      // Two bytes misalignment happens only for byte and short (char) arrays
    1.79 +      __ andcc(to, 2, G0);
    1.80 +      __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
    1.81 +      __ delayed()->nop();
    1.82 +      __ sth(value, to, 0);
    1.83 +      __ inc(to, 2);
    1.84 +      __ dec(count, 1 << (shift - 1));
    1.85 +      __ BIND(L_skip_align2);
    1.86 +    }
    1.87 +#ifdef _LP64
    1.88 +    if (!aligned) {
    1.89 +#endif
    1.90 +    // align to 8 bytes, we know we are 4 byte aligned to start
    1.91 +    __ andcc(to, 7, G0);
    1.92 +    __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
    1.93 +    __ delayed()->nop();
    1.94 +    __ stw(value, to, 0);
    1.95 +    __ inc(to, 4);
    1.96 +    __ dec(count, 1 << shift);
    1.97 +    __ BIND(L_fill_32_bytes);
    1.98 +#ifdef _LP64
    1.99 +    }
   1.100 +#endif
   1.101 +
   1.102 +    Label L_check_fill_8_bytes;
   1.103 +    // Fill 32-byte chunks
   1.104 +    __ subcc(count, 8 << shift, count);
   1.105 +    __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
   1.106 +    __ delayed()->nop();
   1.107 +
   1.108 +    if (t == T_INT) {
   1.109 +      // Zero extend value
   1.110 +      __ srl(value, 0, value);
   1.111 +    }
   1.112 +    if (t == T_BYTE || t == T_SHORT || t == T_INT) {
   1.113 +      __ sllx(value, 32, O3);
   1.114 +      __ or3(value, O3, value);
   1.115 +    }
   1.116 +
   1.117 +    Label L_fill_32_bytes_loop;
   1.118 +    __ align(16);
   1.119 +    __ BIND(L_fill_32_bytes_loop);
   1.120 +
   1.121 +    __ stx(value, to, 0);
   1.122 +    __ stx(value, to, 8);
   1.123 +    __ stx(value, to, 16);
   1.124 +    __ stx(value, to, 24);
   1.125 +
   1.126 +    __ subcc(count, 8 << shift, count);
   1.127 +    __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop);
   1.128 +    __ delayed()->add(to, 32, to);
   1.129 +
   1.130 +    __ BIND(L_check_fill_8_bytes);
   1.131 +    __ addcc(count, 8 << shift, count);
   1.132 +    __ brx(Assembler::zero, false, Assembler::pn, L_exit);
   1.133 +    __ delayed()->subcc(count, 1 << (shift + 1), count);
   1.134 +    __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes);
   1.135 +    __ delayed()->andcc(count, 1<<shift, G0);
   1.136 +
   1.137 +    //
   1.138 +    // length is too short, just fill 8 bytes at a time
   1.139 +    //
   1.140 +    Label L_fill_8_bytes_loop;
   1.141 +    __ BIND(L_fill_8_bytes_loop);
   1.142 +    __ stx(value, to, 0);
   1.143 +    __ subcc(count, 1 << (shift + 1), count);
   1.144 +    __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop);
   1.145 +    __ delayed()->add(to, 8, to);
   1.146 +
   1.147 +    // fill trailing 4 bytes
   1.148 +    __ andcc(count, 1<<shift, G0);  // in delay slot of branches
   1.149 +    __ BIND(L_fill_4_bytes);
   1.150 +    __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes);
   1.151 +    if (t == T_BYTE || t == T_SHORT) {
   1.152 +      __ delayed()->andcc(count, 1<<(shift-1), G0);
   1.153 +    } else {
   1.154 +      __ delayed()->nop();
   1.155 +    }
   1.156 +    __ stw(value, to, 0);
   1.157 +    if (t == T_BYTE || t == T_SHORT) {
   1.158 +      __ inc(to, 4);
   1.159 +      // fill trailing 2 bytes
   1.160 +      __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches
   1.161 +      __ BIND(L_fill_2_bytes);
   1.162 +      __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte);
   1.163 +      __ delayed()->andcc(count, 1, count);
   1.164 +      __ sth(value, to, 0);
   1.165 +      if (t == T_BYTE) {
   1.166 +        __ inc(to, 2);
   1.167 +        // fill trailing byte
   1.168 +        __ andcc(count, 1, count);  // in delay slot of branches
   1.169 +        __ BIND(L_fill_byte);
   1.170 +        __ brx(Assembler::zero, false, Assembler::pt, L_exit);
   1.171 +        __ delayed()->nop();
   1.172 +        __ stb(value, to, 0);
   1.173 +      } else {
   1.174 +        __ BIND(L_fill_byte);
   1.175 +      }
   1.176 +    } else {
   1.177 +      __ BIND(L_fill_2_bytes);
   1.178 +    }
   1.179 +    __ BIND(L_exit);
   1.180 +    __ retl();
   1.181 +    __ delayed()->mov(G0, O0); // return 0
   1.182 +    return start;
   1.183 +  }
   1.184 +
   1.185 +  //
   1.186    //  Generate stub for conjoint short copy.  If "aligned" is true, the
   1.187    //  "from" and "to" addresses are assumed to be heapword aligned.
   1.188    //
   1.189 @@ -2855,6 +3034,13 @@
   1.190      StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
   1.191      StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy");
   1.192      StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy");
   1.193 +
   1.194 +    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
   1.195 +    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
   1.196 +    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
   1.197 +    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
   1.198 +    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
   1.199 +    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
   1.200    }
   1.201  
   1.202    void generate_initial() {

mercurial