1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Mon Sep 14 09:49:54 2009 -0700 1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Mon Sep 14 12:14:20 2009 -0700 1.3 @@ -8404,6 +8404,319 @@ 1.4 } 1.5 #endif // _LP64 1.6 1.7 +// IndexOf substring. 1.8 +void MacroAssembler::string_indexof(Register str1, Register str2, 1.9 + Register cnt1, Register cnt2, Register result, 1.10 + XMMRegister vec, Register tmp) { 1.11 + assert(UseSSE42Intrinsics, "SSE4.2 is required"); 1.12 + 1.13 + Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR, 1.14 + SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP; 1.15 + 1.16 + push(str1); // string addr 1.17 + push(str2); // substr addr 1.18 + push(cnt2); // substr count 1.19 + jmpb(PREP_FOR_SCAN); 1.20 + 1.21 + // Substr count saved at sp 1.22 + // Substr saved at sp+1*wordSize 1.23 + // String saved at sp+2*wordSize 1.24 + 1.25 + // Reload substr for rescan 1.26 + bind(RELOAD_SUBSTR); 1.27 + movl(cnt2, Address(rsp, 0)); 1.28 + movptr(str2, Address(rsp, wordSize)); 1.29 + // We came here after the beginninig of the substring was 1.30 + // matched but the rest of it was not so we need to search 1.31 + // again. Start from the next element after the previous match. 1.32 + subptr(str1, result); // Restore counter 1.33 + shrl(str1, 1); 1.34 + addl(cnt1, str1); 1.35 + lea(str1, Address(result, 2)); // Reload string 1.36 + 1.37 + // Load substr 1.38 + bind(PREP_FOR_SCAN); 1.39 + movdqu(vec, Address(str2, 0)); 1.40 + addl(cnt1, 8); // prime the loop 1.41 + subptr(str1, 16); 1.42 + 1.43 + // Scan string for substr in 16-byte vectors 1.44 + bind(SCAN_TO_SUBSTR); 1.45 + subl(cnt1, 8); 1.46 + addptr(str1, 16); 1.47 + 1.48 + // pcmpestri 1.49 + // inputs: 1.50 + // xmm - substring 1.51 + // rax - substring length (elements count) 1.52 + // mem - scaned string 1.53 + // rdx - string length (elements count) 1.54 + // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 1.55 + // outputs: 1.56 + // rcx - matched index in string 1.57 + assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 1.58 + 1.59 + pcmpestri(vec, Address(str1, 0), 0x0d); 1.60 + jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0 1.61 + jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0 1.62 + 1.63 + // Fallthrough: found a potential substr 1.64 + 1.65 + // Make sure string is still long enough 1.66 + subl(cnt1, tmp); 1.67 + cmpl(cnt1, cnt2); 1.68 + jccb(Assembler::negative, RET_NOT_FOUND); 1.69 + // Compute start addr of substr 1.70 + lea(str1, Address(str1, tmp, Address::times_2)); 1.71 + movptr(result, str1); // save 1.72 + 1.73 + // Compare potential substr 1.74 + addl(cnt1, 8); // prime the loop 1.75 + addl(cnt2, 8); 1.76 + subptr(str1, 16); 1.77 + subptr(str2, 16); 1.78 + 1.79 + // Scan 16-byte vectors of string and substr 1.80 + bind(SCAN_SUBSTR); 1.81 + subl(cnt1, 8); 1.82 + subl(cnt2, 8); 1.83 + addptr(str1, 16); 1.84 + addptr(str2, 16); 1.85 + movdqu(vec, Address(str2, 0)); 1.86 + pcmpestri(vec, Address(str1, 0), 0x0d); 1.87 + jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 1.88 + jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0 1.89 + 1.90 + // Compute substr offset 1.91 + subptr(result, Address(rsp, 2*wordSize)); 1.92 + shrl(result, 1); // index 1.93 + jmpb(CLEANUP); 1.94 + 1.95 + bind(RET_NOT_FOUND); 1.96 + movl(result, -1); 1.97 + 1.98 + bind(CLEANUP); 1.99 + addptr(rsp, 3*wordSize); 1.100 +} 1.101 + 1.102 +// Compare strings. 1.103 +void MacroAssembler::string_compare(Register str1, Register str2, 1.104 + Register cnt1, Register cnt2, Register result, 1.105 + XMMRegister vec1, XMMRegister vec2) { 1.106 + Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 1.107 + 1.108 + // Compute the minimum of the string lengths and the 1.109 + // difference of the string lengths (stack). 1.110 + // Do the conditional move stuff 1.111 + movl(result, cnt1); 1.112 + subl(cnt1, cnt2); 1.113 + push(cnt1); 1.114 + if (VM_Version::supports_cmov()) { 1.115 + cmovl(Assembler::lessEqual, cnt2, result); 1.116 + } else { 1.117 + Label GT_LABEL; 1.118 + jccb(Assembler::greater, GT_LABEL); 1.119 + movl(cnt2, result); 1.120 + bind(GT_LABEL); 1.121 + } 1.122 + 1.123 + // Is the minimum length zero? 1.124 + testl(cnt2, cnt2); 1.125 + jcc(Assembler::zero, LENGTH_DIFF_LABEL); 1.126 + 1.127 + // Load first characters 1.128 + load_unsigned_short(result, Address(str1, 0)); 1.129 + load_unsigned_short(cnt1, Address(str2, 0)); 1.130 + 1.131 + // Compare first characters 1.132 + subl(result, cnt1); 1.133 + jcc(Assembler::notZero, POP_LABEL); 1.134 + decrementl(cnt2); 1.135 + jcc(Assembler::zero, LENGTH_DIFF_LABEL); 1.136 + 1.137 + { 1.138 + // Check after comparing first character to see if strings are equivalent 1.139 + Label LSkip2; 1.140 + // Check if the strings start at same location 1.141 + cmpptr(str1, str2); 1.142 + jccb(Assembler::notEqual, LSkip2); 1.143 + 1.144 + // Check if the length difference is zero (from stack) 1.145 + cmpl(Address(rsp, 0), 0x0); 1.146 + jcc(Assembler::equal, LENGTH_DIFF_LABEL); 1.147 + 1.148 + // Strings might not be equivalent 1.149 + bind(LSkip2); 1.150 + } 1.151 + 1.152 + // Advance to next character 1.153 + addptr(str1, 2); 1.154 + addptr(str2, 2); 1.155 + 1.156 + if (UseSSE42Intrinsics) { 1.157 + // With SSE4.2, use double quad vector compare 1.158 + Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 1.159 + // Setup to compare 16-byte vectors 1.160 + movl(cnt1, cnt2); 1.161 + andl(cnt2, 0xfffffff8); // cnt2 holds the vector count 1.162 + andl(cnt1, 0x00000007); // cnt1 holds the tail count 1.163 + testl(cnt2, cnt2); 1.164 + jccb(Assembler::zero, COMPARE_TAIL); 1.165 + 1.166 + lea(str2, Address(str2, cnt2, Address::times_2)); 1.167 + lea(str1, Address(str1, cnt2, Address::times_2)); 1.168 + negptr(cnt2); 1.169 + 1.170 + bind(COMPARE_VECTORS); 1.171 + movdqu(vec1, Address(str1, cnt2, Address::times_2)); 1.172 + movdqu(vec2, Address(str2, cnt2, Address::times_2)); 1.173 + pxor(vec1, vec2); 1.174 + ptest(vec1, vec1); 1.175 + jccb(Assembler::notZero, VECTOR_NOT_EQUAL); 1.176 + addptr(cnt2, 8); 1.177 + jcc(Assembler::notZero, COMPARE_VECTORS); 1.178 + jmpb(COMPARE_TAIL); 1.179 + 1.180 + // Mismatched characters in the vectors 1.181 + bind(VECTOR_NOT_EQUAL); 1.182 + lea(str1, Address(str1, cnt2, Address::times_2)); 1.183 + lea(str2, Address(str2, cnt2, Address::times_2)); 1.184 + movl(cnt1, 8); 1.185 + 1.186 + // Compare tail (< 8 chars), or rescan last vectors to 1.187 + // find 1st mismatched characters 1.188 + bind(COMPARE_TAIL); 1.189 + testl(cnt1, cnt1); 1.190 + jccb(Assembler::zero, LENGTH_DIFF_LABEL); 1.191 + movl(cnt2, cnt1); 1.192 + // Fallthru to tail compare 1.193 + } 1.194 + 1.195 + // Shift str2 and str1 to the end of the arrays, negate min 1.196 + lea(str1, Address(str1, cnt2, Address::times_2, 0)); 1.197 + lea(str2, Address(str2, cnt2, Address::times_2, 0)); 1.198 + negptr(cnt2); 1.199 + 1.200 + // Compare the rest of the characters 1.201 + bind(WHILE_HEAD_LABEL); 1.202 + load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0)); 1.203 + load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0)); 1.204 + subl(result, cnt1); 1.205 + jccb(Assembler::notZero, POP_LABEL); 1.206 + increment(cnt2); 1.207 + jcc(Assembler::notZero, WHILE_HEAD_LABEL); 1.208 + 1.209 + // Strings are equal up to min length. Return the length difference. 1.210 + bind(LENGTH_DIFF_LABEL); 1.211 + pop(result); 1.212 + jmpb(DONE_LABEL); 1.213 + 1.214 + // Discard the stored length difference 1.215 + bind(POP_LABEL); 1.216 + addptr(rsp, wordSize); 1.217 + 1.218 + // That's it 1.219 + bind(DONE_LABEL); 1.220 +} 1.221 + 1.222 +// Compare char[] arrays aligned to 4 bytes or substrings. 1.223 +void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 1.224 + Register limit, Register result, Register chr, 1.225 + XMMRegister vec1, XMMRegister vec2) { 1.226 + Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 1.227 + 1.228 + int length_offset = arrayOopDesc::length_offset_in_bytes(); 1.229 + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 1.230 + 1.231 + // Check the input args 1.232 + cmpptr(ary1, ary2); 1.233 + jcc(Assembler::equal, TRUE_LABEL); 1.234 + 1.235 + if (is_array_equ) { 1.236 + // Need additional checks for arrays_equals. 1.237 + andptr(ary1, ary2); 1.238 + jcc(Assembler::zero, FALSE_LABEL); // One pointer is NULL 1.239 + 1.240 + // Check the lengths 1.241 + movl(limit, Address(ary1, length_offset)); 1.242 + cmpl(limit, Address(ary2, length_offset)); 1.243 + jcc(Assembler::notEqual, FALSE_LABEL); 1.244 + } 1.245 + 1.246 + // count == 0 1.247 + testl(limit, limit); 1.248 + jcc(Assembler::zero, TRUE_LABEL); 1.249 + 1.250 + if (is_array_equ) { 1.251 + // Load array address 1.252 + lea(ary1, Address(ary1, base_offset)); 1.253 + lea(ary2, Address(ary2, base_offset)); 1.254 + } 1.255 + 1.256 + shll(limit, 1); // byte count != 0 1.257 + movl(result, limit); // copy 1.258 + 1.259 + if (UseSSE42Intrinsics) { 1.260 + // With SSE4.2, use double quad vector compare 1.261 + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 1.262 + // Compare 16-byte vectors 1.263 + andl(result, 0x0000000e); // tail count (in bytes) 1.264 + andl(limit, 0xfffffff0); // vector count (in bytes) 1.265 + jccb(Assembler::zero, COMPARE_TAIL); 1.266 + 1.267 + lea(ary1, Address(ary1, limit, Address::times_1)); 1.268 + lea(ary2, Address(ary2, limit, Address::times_1)); 1.269 + negptr(limit); 1.270 + 1.271 + bind(COMPARE_WIDE_VECTORS); 1.272 + movdqu(vec1, Address(ary1, limit, Address::times_1)); 1.273 + movdqu(vec2, Address(ary2, limit, Address::times_1)); 1.274 + pxor(vec1, vec2); 1.275 + ptest(vec1, vec1); 1.276 + jccb(Assembler::notZero, FALSE_LABEL); 1.277 + addptr(limit, 16); 1.278 + jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 1.279 + 1.280 + bind(COMPARE_TAIL); // limit is zero 1.281 + movl(limit, result); 1.282 + // Fallthru to tail compare 1.283 + } 1.284 + 1.285 + // Compare 4-byte vectors 1.286 + andl(limit, 0xfffffffc); // vector count (in bytes) 1.287 + jccb(Assembler::zero, COMPARE_CHAR); 1.288 + 1.289 + lea(ary1, Address(ary1, limit, Address::times_1)); 1.290 + lea(ary2, Address(ary2, limit, Address::times_1)); 1.291 + negptr(limit); 1.292 + 1.293 + bind(COMPARE_VECTORS); 1.294 + movl(chr, Address(ary1, limit, Address::times_1)); 1.295 + cmpl(chr, Address(ary2, limit, Address::times_1)); 1.296 + jccb(Assembler::notEqual, FALSE_LABEL); 1.297 + addptr(limit, 4); 1.298 + jcc(Assembler::notZero, COMPARE_VECTORS); 1.299 + 1.300 + // Compare trailing char (final 2 bytes), if any 1.301 + bind(COMPARE_CHAR); 1.302 + testl(result, 0x2); // tail char 1.303 + jccb(Assembler::zero, TRUE_LABEL); 1.304 + load_unsigned_short(chr, Address(ary1, 0)); 1.305 + load_unsigned_short(limit, Address(ary2, 0)); 1.306 + cmpl(chr, limit); 1.307 + jccb(Assembler::notEqual, FALSE_LABEL); 1.308 + 1.309 + bind(TRUE_LABEL); 1.310 + movl(result, 1); // return true 1.311 + jmpb(DONE); 1.312 + 1.313 + bind(FALSE_LABEL); 1.314 + xorl(result, result); // return false 1.315 + 1.316 + // That's it 1.317 + bind(DONE); 1.318 +} 1.319 + 1.320 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 1.321 switch (cond) { 1.322 // Note some conditions are synonyms for others