1.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Aug 20 09:55:50 2010 -0700 1.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Aug 27 17:33:49 2010 -0700 1.3 @@ -1588,6 +1588,185 @@ 1.4 } 1.5 1.6 // 1.7 + // Generate stub for disjoint short fill. If "aligned" is true, the 1.8 + // "to" address is assumed to be heapword aligned. 1.9 + // 1.10 + // Arguments for generated stub: 1.11 + // to: O0 1.12 + // value: O1 1.13 + // count: O2 treated as signed 1.14 + // 1.15 + address generate_fill(BasicType t, bool aligned, const char* name) { 1.16 + __ align(CodeEntryAlignment); 1.17 + StubCodeMark mark(this, "StubRoutines", name); 1.18 + address start = __ pc(); 1.19 + 1.20 + const Register to = O0; // source array address 1.21 + const Register value = O1; // fill value 1.22 + const Register count = O2; // elements count 1.23 + // O3 is used as a temp register 1.24 + 1.25 + assert_clean_int(count, O3); // Make sure 'count' is clean int. 1.26 + 1.27 + Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 1.28 + Label L_fill_2_bytes, L_fill_4_bytes, L_fill_32_bytes; 1.29 + 1.30 + int shift = -1; 1.31 + switch (t) { 1.32 + case T_BYTE: 1.33 + shift = 2; 1.34 + break; 1.35 + case T_SHORT: 1.36 + shift = 1; 1.37 + break; 1.38 + case T_INT: 1.39 + shift = 0; 1.40 + break; 1.41 + default: ShouldNotReachHere(); 1.42 + } 1.43 + 1.44 + BLOCK_COMMENT("Entry:"); 1.45 + 1.46 + if (t == T_BYTE) { 1.47 + // Zero extend value 1.48 + __ and3(value, 0xff, value); 1.49 + __ sllx(value, 8, O3); 1.50 + __ or3(value, O3, value); 1.51 + } 1.52 + if (t == T_SHORT) { 1.53 + // Zero extend value 1.54 + __ sethi(0xffff0000, O3); 1.55 + __ andn(value, O3, value); 1.56 + } 1.57 + if (t == T_BYTE || t == T_SHORT) { 1.58 + __ sllx(value, 16, O3); 1.59 + __ or3(value, O3, value); 1.60 + } 1.61 + 1.62 + __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 1.63 + __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_4_bytes); // use unsigned cmp 1.64 + __ delayed()->andcc(count, 1<<shift, G0); 1.65 + 1.66 + if (!aligned && (t == T_BYTE || t == T_SHORT)) { 1.67 + // align source address at 4 bytes address boundary 1.68 + if (t == T_BYTE) { 1.69 + // One byte misalignment happens only for byte arrays 1.70 + __ andcc(to, 1, G0); 1.71 + __ br(Assembler::zero, false, Assembler::pt, L_skip_align1); 1.72 + __ delayed()->nop(); 1.73 + __ stb(value, to, 0); 1.74 + __ inc(to, 1); 1.75 + __ dec(count, 1); 1.76 + __ BIND(L_skip_align1); 1.77 + } 1.78 + // Two bytes misalignment happens only for byte and short (char) arrays 1.79 + __ andcc(to, 2, G0); 1.80 + __ br(Assembler::zero, false, Assembler::pt, L_skip_align2); 1.81 + __ delayed()->nop(); 1.82 + __ sth(value, to, 0); 1.83 + __ inc(to, 2); 1.84 + __ dec(count, 1 << (shift - 1)); 1.85 + __ BIND(L_skip_align2); 1.86 + } 1.87 +#ifdef _LP64 1.88 + if (!aligned) { 1.89 +#endif 1.90 + // align to 8 bytes, we know we are 4 byte aligned to start 1.91 + __ andcc(to, 7, G0); 1.92 + __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes); 1.93 + __ delayed()->nop(); 1.94 + __ stw(value, to, 0); 1.95 + __ inc(to, 4); 1.96 + __ dec(count, 1 << shift); 1.97 + __ BIND(L_fill_32_bytes); 1.98 +#ifdef _LP64 1.99 + } 1.100 +#endif 1.101 + 1.102 + Label L_check_fill_8_bytes; 1.103 + // Fill 32-byte chunks 1.104 + __ subcc(count, 8 << shift, count); 1.105 + __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes); 1.106 + __ delayed()->nop(); 1.107 + 1.108 + if (t == T_INT) { 1.109 + // Zero extend value 1.110 + __ srl(value, 0, value); 1.111 + } 1.112 + if (t == T_BYTE || t == T_SHORT || t == T_INT) { 1.113 + __ sllx(value, 32, O3); 1.114 + __ or3(value, O3, value); 1.115 + } 1.116 + 1.117 + Label L_fill_32_bytes_loop; 1.118 + __ align(16); 1.119 + __ BIND(L_fill_32_bytes_loop); 1.120 + 1.121 + __ stx(value, to, 0); 1.122 + __ stx(value, to, 8); 1.123 + __ stx(value, to, 16); 1.124 + __ stx(value, to, 24); 1.125 + 1.126 + __ subcc(count, 8 << shift, count); 1.127 + __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop); 1.128 + __ delayed()->add(to, 32, to); 1.129 + 1.130 + __ BIND(L_check_fill_8_bytes); 1.131 + __ addcc(count, 8 << shift, count); 1.132 + __ brx(Assembler::zero, false, Assembler::pn, L_exit); 1.133 + __ delayed()->subcc(count, 1 << (shift + 1), count); 1.134 + __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes); 1.135 + __ delayed()->andcc(count, 1<<shift, G0); 1.136 + 1.137 + // 1.138 + // length is too short, just fill 8 bytes at a time 1.139 + // 1.140 + Label L_fill_8_bytes_loop; 1.141 + __ BIND(L_fill_8_bytes_loop); 1.142 + __ stx(value, to, 0); 1.143 + __ subcc(count, 1 << (shift + 1), count); 1.144 + __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop); 1.145 + __ delayed()->add(to, 8, to); 1.146 + 1.147 + // fill trailing 4 bytes 1.148 + __ andcc(count, 1<<shift, G0); // in delay slot of branches 1.149 + __ BIND(L_fill_4_bytes); 1.150 + __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes); 1.151 + if (t == T_BYTE || t == T_SHORT) { 1.152 + __ delayed()->andcc(count, 1<<(shift-1), G0); 1.153 + } else { 1.154 + __ delayed()->nop(); 1.155 + } 1.156 + __ stw(value, to, 0); 1.157 + if (t == T_BYTE || t == T_SHORT) { 1.158 + __ inc(to, 4); 1.159 + // fill trailing 2 bytes 1.160 + __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches 1.161 + __ BIND(L_fill_2_bytes); 1.162 + __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte); 1.163 + __ delayed()->andcc(count, 1, count); 1.164 + __ sth(value, to, 0); 1.165 + if (t == T_BYTE) { 1.166 + __ inc(to, 2); 1.167 + // fill trailing byte 1.168 + __ andcc(count, 1, count); // in delay slot of branches 1.169 + __ BIND(L_fill_byte); 1.170 + __ brx(Assembler::zero, false, Assembler::pt, L_exit); 1.171 + __ delayed()->nop(); 1.172 + __ stb(value, to, 0); 1.173 + } else { 1.174 + __ BIND(L_fill_byte); 1.175 + } 1.176 + } else { 1.177 + __ BIND(L_fill_2_bytes); 1.178 + } 1.179 + __ BIND(L_exit); 1.180 + __ retl(); 1.181 + __ delayed()->mov(G0, O0); // return 0 1.182 + return start; 1.183 + } 1.184 + 1.185 + // 1.186 // Generate stub for conjoint short copy. If "aligned" is true, the 1.187 // "from" and "to" addresses are assumed to be heapword aligned. 1.188 // 1.189 @@ -2855,6 +3034,13 @@ 1.190 StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy"); 1.191 StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy"); 1.192 StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy"); 1.193 + 1.194 + StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); 1.195 + StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); 1.196 + StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); 1.197 + StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); 1.198 + StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); 1.199 + StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); 1.200 } 1.201 1.202 void generate_initial() {