1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Fri Aug 20 09:55:50 2010 -0700 1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Fri Aug 27 17:33:49 2010 -0700 1.3 @@ -8767,6 +8767,186 @@ 1.4 bind(DONE); 1.5 } 1.6 1.7 +#ifdef PRODUCT 1.8 +#define BLOCK_COMMENT(str) /* nothing */ 1.9 +#else 1.10 +#define BLOCK_COMMENT(str) block_comment(str) 1.11 +#endif 1.12 + 1.13 +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 1.14 +void MacroAssembler::generate_fill(BasicType t, bool aligned, 1.15 + Register to, Register value, Register count, 1.16 + Register rtmp, XMMRegister xtmp) { 1.17 + assert_different_registers(to, value, count, rtmp); 1.18 + Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 1.19 + Label L_fill_2_bytes, L_fill_4_bytes; 1.20 + 1.21 + int shift = -1; 1.22 + switch (t) { 1.23 + case T_BYTE: 1.24 + shift = 2; 1.25 + break; 1.26 + case T_SHORT: 1.27 + shift = 1; 1.28 + break; 1.29 + case T_INT: 1.30 + shift = 0; 1.31 + break; 1.32 + default: ShouldNotReachHere(); 1.33 + } 1.34 + 1.35 + if (t == T_BYTE) { 1.36 + andl(value, 0xff); 1.37 + movl(rtmp, value); 1.38 + shll(rtmp, 8); 1.39 + orl(value, rtmp); 1.40 + } 1.41 + if (t == T_SHORT) { 1.42 + andl(value, 0xffff); 1.43 + } 1.44 + if (t == T_BYTE || t == T_SHORT) { 1.45 + movl(rtmp, value); 1.46 + shll(rtmp, 16); 1.47 + orl(value, rtmp); 1.48 + } 1.49 + 1.50 + cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 1.51 + jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 1.52 + if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 1.53 + // align source address at 4 bytes address boundary 1.54 + if (t == T_BYTE) { 1.55 + // One byte misalignment happens only for byte arrays 1.56 + testptr(to, 1); 1.57 + jccb(Assembler::zero, L_skip_align1); 1.58 + movb(Address(to, 0), value); 1.59 + increment(to); 1.60 + decrement(count); 1.61 + BIND(L_skip_align1); 1.62 + } 1.63 + // Two bytes misalignment happens only for byte and short (char) arrays 1.64 + testptr(to, 2); 1.65 + jccb(Assembler::zero, L_skip_align2); 1.66 + movw(Address(to, 0), value); 1.67 + addptr(to, 2); 1.68 + subl(count, 1<<(shift-1)); 1.69 + BIND(L_skip_align2); 1.70 + } 1.71 + if (UseSSE < 2) { 1.72 + Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 1.73 + // Fill 32-byte chunks 1.74 + subl(count, 8 << shift); 1.75 + jcc(Assembler::less, L_check_fill_8_bytes); 1.76 + align(16); 1.77 + 1.78 + BIND(L_fill_32_bytes_loop); 1.79 + 1.80 + for (int i = 0; i < 32; i += 4) { 1.81 + movl(Address(to, i), value); 1.82 + } 1.83 + 1.84 + addptr(to, 32); 1.85 + subl(count, 8 << shift); 1.86 + jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 1.87 + BIND(L_check_fill_8_bytes); 1.88 + addl(count, 8 << shift); 1.89 + jccb(Assembler::zero, L_exit); 1.90 + jmpb(L_fill_8_bytes); 1.91 + 1.92 + // 1.93 + // length is too short, just fill qwords 1.94 + // 1.95 + BIND(L_fill_8_bytes_loop); 1.96 + movl(Address(to, 0), value); 1.97 + movl(Address(to, 4), value); 1.98 + addptr(to, 8); 1.99 + BIND(L_fill_8_bytes); 1.100 + subl(count, 1 << (shift + 1)); 1.101 + jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 1.102 + // fall through to fill 4 bytes 1.103 + } else { 1.104 + Label L_fill_32_bytes; 1.105 + if (!UseUnalignedLoadStores) { 1.106 + // align to 8 bytes, we know we are 4 byte aligned to start 1.107 + testptr(to, 4); 1.108 + jccb(Assembler::zero, L_fill_32_bytes); 1.109 + movl(Address(to, 0), value); 1.110 + addptr(to, 4); 1.111 + subl(count, 1<<shift); 1.112 + } 1.113 + BIND(L_fill_32_bytes); 1.114 + { 1.115 + assert( UseSSE >= 2, "supported cpu only" ); 1.116 + Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 1.117 + // Fill 32-byte chunks 1.118 + movdl(xtmp, value); 1.119 + pshufd(xtmp, xtmp, 0); 1.120 + 1.121 + subl(count, 8 << shift); 1.122 + jcc(Assembler::less, L_check_fill_8_bytes); 1.123 + align(16); 1.124 + 1.125 + BIND(L_fill_32_bytes_loop); 1.126 + 1.127 + if (UseUnalignedLoadStores) { 1.128 + movdqu(Address(to, 0), xtmp); 1.129 + movdqu(Address(to, 16), xtmp); 1.130 + } else { 1.131 + movq(Address(to, 0), xtmp); 1.132 + movq(Address(to, 8), xtmp); 1.133 + movq(Address(to, 16), xtmp); 1.134 + movq(Address(to, 24), xtmp); 1.135 + } 1.136 + 1.137 + addptr(to, 32); 1.138 + subl(count, 8 << shift); 1.139 + jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 1.140 + BIND(L_check_fill_8_bytes); 1.141 + addl(count, 8 << shift); 1.142 + jccb(Assembler::zero, L_exit); 1.143 + jmpb(L_fill_8_bytes); 1.144 + 1.145 + // 1.146 + // length is too short, just fill qwords 1.147 + // 1.148 + BIND(L_fill_8_bytes_loop); 1.149 + movq(Address(to, 0), xtmp); 1.150 + addptr(to, 8); 1.151 + BIND(L_fill_8_bytes); 1.152 + subl(count, 1 << (shift + 1)); 1.153 + jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 1.154 + } 1.155 + } 1.156 + // fill trailing 4 bytes 1.157 + BIND(L_fill_4_bytes); 1.158 + testl(count, 1<<shift); 1.159 + jccb(Assembler::zero, L_fill_2_bytes); 1.160 + movl(Address(to, 0), value); 1.161 + if (t == T_BYTE || t == T_SHORT) { 1.162 + addptr(to, 4); 1.163 + BIND(L_fill_2_bytes); 1.164 + // fill trailing 2 bytes 1.165 + testl(count, 1<<(shift-1)); 1.166 + jccb(Assembler::zero, L_fill_byte); 1.167 + movw(Address(to, 0), value); 1.168 + if (t == T_BYTE) { 1.169 + addptr(to, 2); 1.170 + BIND(L_fill_byte); 1.171 + // fill trailing byte 1.172 + testl(count, 1); 1.173 + jccb(Assembler::zero, L_exit); 1.174 + movb(Address(to, 0), value); 1.175 + } else { 1.176 + BIND(L_fill_byte); 1.177 + } 1.178 + } else { 1.179 + BIND(L_fill_2_bytes); 1.180 + } 1.181 + BIND(L_exit); 1.182 +} 1.183 +#undef BIND 1.184 +#undef BLOCK_COMMENT 1.185 + 1.186 + 1.187 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 1.188 switch (cond) { 1.189 // Note some conditions are synonyms for others