1.1 --- a/src/cpu/x86/vm/x86_32.ad Tue Mar 31 10:02:01 2009 -0700 1.2 +++ b/src/cpu/x86/vm/x86_32.ad Tue Mar 31 14:07:08 2009 -0700 1.3 @@ -3694,12 +3694,16 @@ 1.4 } 1.5 %} 1.6 1.7 - enc_class enc_String_Compare() %{ 1.8 + enc_class enc_String_Compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, 1.9 + eAXRegI tmp3, eBXRegI tmp4, eCXRegI result) %{ 1.10 Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL, 1.11 POP_LABEL, DONE_LABEL, CONT_LABEL, 1.12 WHILE_HEAD_LABEL; 1.13 MacroAssembler masm(&cbuf); 1.14 1.15 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 1.16 + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); 1.17 + 1.18 // Get the first character position in both strings 1.19 // [8] char array, [12] offset, [16] count 1.20 int value_offset = java_lang_String::value_offset_in_bytes(); 1.21 @@ -3717,7 +3721,6 @@ 1.22 // Compute the minimum of the string lengths(rsi) and the 1.23 // difference of the string lengths (stack) 1.24 1.25 - 1.26 if (VM_Version::supports_cmov()) { 1.27 masm.movl(rdi, Address(rdi, count_offset)); 1.28 masm.movl(rsi, Address(rsi, count_offset)); 1.29 @@ -3731,7 +3734,7 @@ 1.30 masm.movl(rsi, rdi); 1.31 masm.subl(rdi, rcx); 1.32 masm.push(rdi); 1.33 - masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL); 1.34 + masm.jccb(Assembler::lessEqual, ECX_GOOD_LABEL); 1.35 masm.movl(rsi, rcx); 1.36 // rsi holds min, rcx is unused 1.37 } 1.38 @@ -3756,7 +3759,7 @@ 1.39 Label LSkip2; 1.40 // Check if the strings start at same location 1.41 masm.cmpptr(rbx,rax); 1.42 - masm.jcc(Assembler::notEqual, LSkip2); 1.43 + masm.jccb(Assembler::notEqual, LSkip2); 1.44 1.45 // Check if the length difference is zero (from stack) 1.46 masm.cmpl(Address(rsp, 0), 0x0); 1.47 @@ -3766,9 +3769,52 @@ 1.48 masm.bind(LSkip2); 1.49 } 1.50 1.51 - // Shift rax, and rbx, to the end of the arrays, negate min 1.52 - masm.lea(rax, Address(rax, rsi, Address::times_2, 2)); 1.53 - masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2)); 1.54 + // Advance to next character 1.55 + masm.addptr(rax, 2); 1.56 + masm.addptr(rbx, 2); 1.57 + 1.58 + if (UseSSE42Intrinsics) { 1.59 + // With SSE4.2, use double quad vector compare 1.60 + Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 1.61 + // Setup to compare 16-byte vectors 1.62 + masm.movl(rdi, rsi); 1.63 + masm.andl(rsi, 0xfffffff8); // rsi holds the vector count 1.64 + masm.andl(rdi, 0x00000007); // rdi holds the tail count 1.65 + masm.testl(rsi, rsi); 1.66 + masm.jccb(Assembler::zero, COMPARE_TAIL); 1.67 + 1.68 + masm.lea(rax, Address(rax, rsi, Address::times_2)); 1.69 + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); 1.70 + masm.negl(rsi); 1.71 + 1.72 + masm.bind(COMPARE_VECTORS); 1.73 + masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2)); 1.74 + masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2)); 1.75 + masm.pxor(tmp1Reg, tmp2Reg); 1.76 + masm.ptest(tmp1Reg, tmp1Reg); 1.77 + masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL); 1.78 + masm.addl(rsi, 8); 1.79 + masm.jcc(Assembler::notZero, COMPARE_VECTORS); 1.80 + masm.jmpb(COMPARE_TAIL); 1.81 + 1.82 + // Mismatched characters in the vectors 1.83 + masm.bind(VECTOR_NOT_EQUAL); 1.84 + masm.lea(rax, Address(rax, rsi, Address::times_2)); 1.85 + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); 1.86 + masm.movl(rdi, 8); 1.87 + 1.88 + // Compare tail (< 8 chars), or rescan last vectors to 1.89 + // find 1st mismatched characters 1.90 + masm.bind(COMPARE_TAIL); 1.91 + masm.testl(rdi, rdi); 1.92 + masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL); 1.93 + masm.movl(rsi, rdi); 1.94 + // Fallthru to tail compare 1.95 + } 1.96 + 1.97 + //Shift rax, and rbx, to the end of the arrays, negate min 1.98 + masm.lea(rax, Address(rax, rsi, Address::times_2, 0)); 1.99 + masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0)); 1.100 masm.negl(rsi); 1.101 1.102 // Compare the rest of the characters 1.103 @@ -3776,93 +3822,329 @@ 1.104 masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); 1.105 masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); 1.106 masm.subl(rcx, rdi); 1.107 - masm.jcc(Assembler::notZero, POP_LABEL); 1.108 + masm.jccb(Assembler::notZero, POP_LABEL); 1.109 masm.incrementl(rsi); 1.110 masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL); 1.111 1.112 // Strings are equal up to min length. Return the length difference. 1.113 masm.bind(LENGTH_DIFF_LABEL); 1.114 masm.pop(rcx); 1.115 - masm.jmp(DONE_LABEL); 1.116 + masm.jmpb(DONE_LABEL); 1.117 1.118 // Discard the stored length difference 1.119 masm.bind(POP_LABEL); 1.120 masm.addptr(rsp, 4); 1.121 - 1.122 + 1.123 // That's it 1.124 masm.bind(DONE_LABEL); 1.125 %} 1.126 1.127 - enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{ 1.128 - Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP; 1.129 + enc_class enc_String_Equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, 1.130 + eBXRegI tmp3, eCXRegI tmp4, eAXRegI result) %{ 1.131 + Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR; 1.132 MacroAssembler masm(&cbuf); 1.133 1.134 - Register ary1Reg = as_Register($ary1$$reg); 1.135 - Register ary2Reg = as_Register($ary2$$reg); 1.136 - Register tmp1Reg = as_Register($tmp1$$reg); 1.137 - Register tmp2Reg = as_Register($tmp2$$reg); 1.138 - Register resultReg = as_Register($result$$reg); 1.139 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 1.140 + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); 1.141 + 1.142 + int value_offset = java_lang_String::value_offset_in_bytes(); 1.143 + int offset_offset = java_lang_String::offset_offset_in_bytes(); 1.144 + int count_offset = java_lang_String::count_offset_in_bytes(); 1.145 + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 1.146 + 1.147 + // does source == target string? 1.148 + masm.cmpptr(rdi, rsi); 1.149 + masm.jcc(Assembler::equal, RET_TRUE); 1.150 + 1.151 + // get and compare counts 1.152 + masm.movl(rcx, Address(rdi, count_offset)); 1.153 + masm.movl(rax, Address(rsi, count_offset)); 1.154 + masm.cmpl(rcx, rax); 1.155 + masm.jcc(Assembler::notEqual, RET_FALSE); 1.156 + masm.testl(rax, rax); 1.157 + masm.jcc(Assembler::zero, RET_TRUE); 1.158 + 1.159 + // get source string offset and value 1.160 + masm.movptr(rbx, Address(rsi, value_offset)); 1.161 + masm.movl(rax, Address(rsi, offset_offset)); 1.162 + masm.leal(rsi, Address(rbx, rax, Address::times_2, base_offset)); 1.163 + 1.164 + // get compare string offset and value 1.165 + masm.movptr(rbx, Address(rdi, value_offset)); 1.166 + masm.movl(rax, Address(rdi, offset_offset)); 1.167 + masm.leal(rdi, Address(rbx, rax, Address::times_2, base_offset)); 1.168 + 1.169 + // Set byte count 1.170 + masm.shll(rcx, 1); 1.171 + masm.movl(rax, rcx); 1.172 + 1.173 + if (UseSSE42Intrinsics) { 1.174 + // With SSE4.2, use double quad vector compare 1.175 + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 1.176 + // Compare 16-byte vectors 1.177 + masm.andl(rcx, 0xfffffff0); // vector count (in bytes) 1.178 + masm.andl(rax, 0x0000000e); // tail count (in bytes) 1.179 + masm.testl(rcx, rcx); 1.180 + masm.jccb(Assembler::zero, COMPARE_TAIL); 1.181 + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); 1.182 + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); 1.183 + masm.negl(rcx); 1.184 + 1.185 + masm.bind(COMPARE_WIDE_VECTORS); 1.186 + masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1)); 1.187 + masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1)); 1.188 + masm.pxor(tmp1Reg, tmp2Reg); 1.189 + masm.ptest(tmp1Reg, tmp1Reg); 1.190 + masm.jccb(Assembler::notZero, RET_FALSE); 1.191 + masm.addl(rcx, 16); 1.192 + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 1.193 + masm.bind(COMPARE_TAIL); 1.194 + masm.movl(rcx, rax); 1.195 + // Fallthru to tail compare 1.196 + } 1.197 + 1.198 + // Compare 4-byte vectors 1.199 + masm.andl(rcx, 0xfffffffc); // vector count (in bytes) 1.200 + masm.andl(rax, 0x00000002); // tail char (in bytes) 1.201 + masm.testl(rcx, rcx); 1.202 + masm.jccb(Assembler::zero, COMPARE_CHAR); 1.203 + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); 1.204 + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); 1.205 + masm.negl(rcx); 1.206 + 1.207 + masm.bind(COMPARE_VECTORS); 1.208 + masm.movl(rbx, Address(rdi, rcx, Address::times_1)); 1.209 + masm.cmpl(rbx, Address(rsi, rcx, Address::times_1)); 1.210 + masm.jccb(Assembler::notEqual, RET_FALSE); 1.211 + masm.addl(rcx, 4); 1.212 + masm.jcc(Assembler::notZero, COMPARE_VECTORS); 1.213 + 1.214 + // Compare trailing char (final 2 bytes), if any 1.215 + masm.bind(COMPARE_CHAR); 1.216 + masm.testl(rax, rax); 1.217 + masm.jccb(Assembler::zero, RET_TRUE); 1.218 + masm.load_unsigned_short(rbx, Address(rdi, 0)); 1.219 + masm.load_unsigned_short(rcx, Address(rsi, 0)); 1.220 + masm.cmpl(rbx, rcx); 1.221 + masm.jccb(Assembler::notEqual, RET_FALSE); 1.222 + 1.223 + masm.bind(RET_TRUE); 1.224 + masm.movl(rax, 1); // return true 1.225 + masm.jmpb(DONE); 1.226 + 1.227 + masm.bind(RET_FALSE); 1.228 + masm.xorl(rax, rax); // return false 1.229 + 1.230 + masm.bind(DONE); 1.231 + %} 1.232 + 1.233 + enc_class enc_String_IndexOf(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2, 1.234 + eCXRegI tmp3, eDXRegI tmp4, eBXRegI result) %{ 1.235 + // SSE4.2 version 1.236 + Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR, 1.237 + SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE; 1.238 + MacroAssembler masm(&cbuf); 1.239 + 1.240 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 1.241 + 1.242 + // Get the first character position in both strings 1.243 + // [8] char array, [12] offset, [16] count 1.244 + int value_offset = java_lang_String::value_offset_in_bytes(); 1.245 + int offset_offset = java_lang_String::offset_offset_in_bytes(); 1.246 + int count_offset = java_lang_String::count_offset_in_bytes(); 1.247 + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 1.248 + 1.249 + // Get counts for string and substr 1.250 + masm.movl(rdx, Address(rsi, count_offset)); 1.251 + masm.movl(rax, Address(rdi, count_offset)); 1.252 + // Check for substr count > string count 1.253 + masm.cmpl(rax, rdx); 1.254 + masm.jcc(Assembler::greater, RET_NEG_ONE); 1.255 + 1.256 + // Start the indexOf operation 1.257 + // Get start addr of string 1.258 + masm.movptr(rbx, Address(rsi, value_offset)); 1.259 + masm.movl(rcx, Address(rsi, offset_offset)); 1.260 + masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset)); 1.261 + masm.push(rsi); 1.262 + 1.263 + // Get start addr of substr 1.264 + masm.movptr(rbx, Address(rdi, value_offset)); 1.265 + masm.movl(rcx, Address(rdi, offset_offset)); 1.266 + masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset)); 1.267 + masm.push(rdi); 1.268 + masm.push(rax); 1.269 + masm.jmpb(PREP_FOR_SCAN); 1.270 + 1.271 + // Substr count saved at sp 1.272 + // Substr saved at sp+4 1.273 + // String saved at sp+8 1.274 + 1.275 + // Prep to load substr for scan 1.276 + masm.bind(LOAD_SUBSTR); 1.277 + masm.movptr(rdi, Address(rsp, 4)); 1.278 + masm.movl(rax, Address(rsp, 0)); 1.279 + 1.280 + // Load substr 1.281 + masm.bind(PREP_FOR_SCAN); 1.282 + masm.movdqu(tmp1Reg, Address(rdi, 0)); 1.283 + masm.addl(rdx, 8); // prime the loop 1.284 + masm.subptr(rsi, 16); 1.285 + 1.286 + // Scan string for substr in 16-byte vectors 1.287 + masm.bind(SCAN_TO_SUBSTR); 1.288 + masm.subl(rdx, 8); 1.289 + masm.addptr(rsi, 16); 1.290 + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); 1.291 + masm.jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0 1.292 + masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0 1.293 + 1.294 + // Fallthru: found a potential substr 1.295 + 1.296 + // Make sure string is still long enough 1.297 + masm.subl(rdx, rcx); 1.298 + masm.cmpl(rdx, rax); 1.299 + masm.jccb(Assembler::negative, RET_NOT_FOUND); 1.300 + // Compute start addr of substr 1.301 + masm.lea(rsi, Address(rsi, rcx, Address::times_2)); 1.302 + masm.movptr(rbx, rsi); 1.303 + 1.304 + // Compare potential substr 1.305 + masm.addl(rdx, 8); // prime the loop 1.306 + masm.addl(rax, 8); 1.307 + masm.subptr(rsi, 16); 1.308 + masm.subptr(rdi, 16); 1.309 + 1.310 + // Scan 16-byte vectors of string and substr 1.311 + masm.bind(SCAN_SUBSTR); 1.312 + masm.subl(rax, 8); 1.313 + masm.subl(rdx, 8); 1.314 + masm.addptr(rsi, 16); 1.315 + masm.addptr(rdi, 16); 1.316 + masm.movdqu(tmp1Reg, Address(rdi, 0)); 1.317 + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); 1.318 + masm.jcc(Assembler::noOverflow, LOAD_SUBSTR); // OF == 0 1.319 + masm.jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0 1.320 + 1.321 + // Compute substr offset 1.322 + masm.movptr(rsi, Address(rsp, 8)); 1.323 + masm.subptr(rbx, rsi); 1.324 + masm.shrl(rbx, 1); 1.325 + masm.jmpb(CLEANUP); 1.326 + 1.327 + masm.bind(RET_NEG_ONE); 1.328 + masm.movl(rbx, -1); 1.329 + masm.jmpb(DONE); 1.330 + 1.331 + masm.bind(RET_NOT_FOUND); 1.332 + masm.movl(rbx, -1); 1.333 + 1.334 + masm.bind(CLEANUP); 1.335 + masm.addptr(rsp, 12); 1.336 + 1.337 + masm.bind(DONE); 1.338 + %} 1.339 + 1.340 + enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, 1.341 + eBXRegI tmp3, eDXRegI tmp4, eAXRegI result) %{ 1.342 + Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 1.343 + MacroAssembler masm(&cbuf); 1.344 + 1.345 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 1.346 + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); 1.347 + Register ary1Reg = as_Register($ary1$$reg); 1.348 + Register ary2Reg = as_Register($ary2$$reg); 1.349 + Register tmp3Reg = as_Register($tmp3$$reg); 1.350 + Register tmp4Reg = as_Register($tmp4$$reg); 1.351 + Register resultReg = as_Register($result$$reg); 1.352 1.353 int length_offset = arrayOopDesc::length_offset_in_bytes(); 1.354 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 1.355 1.356 // Check the input args 1.357 - masm.cmpl(ary1Reg, ary2Reg); 1.358 + masm.cmpptr(ary1Reg, ary2Reg); 1.359 masm.jcc(Assembler::equal, TRUE_LABEL); 1.360 - masm.testl(ary1Reg, ary1Reg); 1.361 + masm.testptr(ary1Reg, ary1Reg); 1.362 masm.jcc(Assembler::zero, FALSE_LABEL); 1.363 - masm.testl(ary2Reg, ary2Reg); 1.364 + masm.testptr(ary2Reg, ary2Reg); 1.365 masm.jcc(Assembler::zero, FALSE_LABEL); 1.366 1.367 // Check the lengths 1.368 - masm.movl(tmp2Reg, Address(ary1Reg, length_offset)); 1.369 + masm.movl(tmp4Reg, Address(ary1Reg, length_offset)); 1.370 masm.movl(resultReg, Address(ary2Reg, length_offset)); 1.371 - masm.cmpl(tmp2Reg, resultReg); 1.372 + masm.cmpl(tmp4Reg, resultReg); 1.373 masm.jcc(Assembler::notEqual, FALSE_LABEL); 1.374 masm.testl(resultReg, resultReg); 1.375 masm.jcc(Assembler::zero, TRUE_LABEL); 1.376 1.377 - // Get the number of 4 byte vectors to compare 1.378 - masm.shrl(resultReg, 1); 1.379 - 1.380 - // Check for odd-length arrays 1.381 - masm.andl(tmp2Reg, 1); 1.382 - masm.testl(tmp2Reg, tmp2Reg); 1.383 - masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); 1.384 - 1.385 - // Compare 2-byte "tail" at end of arrays 1.386 - masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); 1.387 - masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); 1.388 - masm.cmpl(tmp1Reg, tmp2Reg); 1.389 - masm.jcc(Assembler::notEqual, FALSE_LABEL); 1.390 + // Load array addrs 1.391 + masm.lea(ary1Reg, Address(ary1Reg, base_offset)); 1.392 + masm.lea(ary2Reg, Address(ary2Reg, base_offset)); 1.393 + 1.394 + // Set byte count 1.395 + masm.shll(tmp4Reg, 1); 1.396 + masm.movl(resultReg, tmp4Reg); 1.397 + 1.398 + if (UseSSE42Intrinsics) { 1.399 + // With SSE4.2, use double quad vector compare 1.400 + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 1.401 + // Compare 16-byte vectors 1.402 + masm.andl(tmp4Reg, 0xfffffff0); // vector count (in bytes) 1.403 + masm.andl(resultReg, 0x0000000e); // tail count (in bytes) 1.404 + masm.testl(tmp4Reg, tmp4Reg); 1.405 + masm.jccb(Assembler::zero, COMPARE_TAIL); 1.406 + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 1.407 + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 1.408 + masm.negl(tmp4Reg); 1.409 + 1.410 + masm.bind(COMPARE_WIDE_VECTORS); 1.411 + masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 1.412 + masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 1.413 + masm.pxor(tmp1Reg, tmp2Reg); 1.414 + masm.ptest(tmp1Reg, tmp1Reg); 1.415 + 1.416 + masm.jccb(Assembler::notZero, FALSE_LABEL); 1.417 + masm.addl(tmp4Reg, 16); 1.418 + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 1.419 + masm.bind(COMPARE_TAIL); 1.420 + masm.movl(tmp4Reg, resultReg); 1.421 + // Fallthru to tail compare 1.422 + } 1.423 + 1.424 + // Compare 4-byte vectors 1.425 + masm.andl(tmp4Reg, 0xfffffffc); // vector count (in bytes) 1.426 + masm.andl(resultReg, 0x00000002); // tail char (in bytes) 1.427 + masm.testl(tmp4Reg, tmp4Reg); 1.428 + masm.jccb(Assembler::zero, COMPARE_CHAR); 1.429 + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 1.430 + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 1.431 + masm.negl(tmp4Reg); 1.432 + 1.433 + masm.bind(COMPARE_VECTORS); 1.434 + masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 1.435 + masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 1.436 + masm.jccb(Assembler::notEqual, FALSE_LABEL); 1.437 + masm.addl(tmp4Reg, 4); 1.438 + masm.jcc(Assembler::notZero, COMPARE_VECTORS); 1.439 + 1.440 + // Compare trailing char (final 2 bytes), if any 1.441 + masm.bind(COMPARE_CHAR); 1.442 masm.testl(resultReg, resultReg); 1.443 - masm.jcc(Assembler::zero, TRUE_LABEL); 1.444 - 1.445 - // Setup compare loop 1.446 - masm.bind(COMPARE_LOOP_HDR); 1.447 - // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays 1.448 - masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); 1.449 - masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); 1.450 - masm.negl(resultReg); 1.451 - 1.452 - // 4-byte-wide compare loop 1.453 - masm.bind(COMPARE_LOOP); 1.454 - masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0)); 1.455 - masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0)); 1.456 - masm.cmpl(ary1Reg, ary2Reg); 1.457 - masm.jcc(Assembler::notEqual, FALSE_LABEL); 1.458 - masm.increment(resultReg); 1.459 - masm.jcc(Assembler::notZero, COMPARE_LOOP); 1.460 + masm.jccb(Assembler::zero, TRUE_LABEL); 1.461 + masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0)); 1.462 + masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0)); 1.463 + masm.cmpl(tmp3Reg, tmp4Reg); 1.464 + masm.jccb(Assembler::notEqual, FALSE_LABEL); 1.465 1.466 masm.bind(TRUE_LABEL); 1.467 masm.movl(resultReg, 1); // return true 1.468 - masm.jmp(DONE_LABEL); 1.469 + masm.jmpb(DONE); 1.470 1.471 masm.bind(FALSE_LABEL); 1.472 masm.xorl(resultReg, resultReg); // return false 1.473 1.474 // That's it 1.475 - masm.bind(DONE_LABEL); 1.476 + masm.bind(DONE); 1.477 %} 1.478 1.479 enc_class enc_pop_rdx() %{ 1.480 @@ -12074,11 +12356,8 @@ 1.481 ins_pipe( fpu_reg_reg ); 1.482 %} 1.483 1.484 - 1.485 - 1.486 // ======================================================================= 1.487 // fast clearing of an array 1.488 - 1.489 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 1.490 match(Set dummy (ClearArray cnt base)); 1.491 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 1.492 @@ -12092,24 +12371,48 @@ 1.493 ins_pipe( pipe_slow ); 1.494 %} 1.495 1.496 -instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ 1.497 +instruct string_compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, 1.498 + eAXRegI tmp3, eBXRegI tmp4, eCXRegI result, eFlagsReg cr) %{ 1.499 match(Set result (StrComp str1 str2)); 1.500 - effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr); 1.501 + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); 1.502 //ins_cost(300); 1.503 1.504 format %{ "String Compare $str1,$str2 -> $result // KILL EAX, EBX" %} 1.505 - ins_encode( enc_String_Compare() ); 1.506 + ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); 1.507 + ins_pipe( pipe_slow ); 1.508 +%} 1.509 + 1.510 +// fast string equals 1.511 +instruct string_equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, 1.512 + eBXRegI tmp3, eCXRegI tmp4, eAXRegI result, eFlagsReg cr) %{ 1.513 + match(Set result (StrEquals str1 str2)); 1.514 + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); 1.515 + 1.516 + format %{ "String Equals $str1,$str2 -> $result // KILL EBX, ECX" %} 1.517 + ins_encode( enc_String_Equals(tmp1, tmp2, str1, str2, tmp3, tmp4, result) ); 1.518 + ins_pipe( pipe_slow ); 1.519 +%} 1.520 + 1.521 +instruct string_indexof(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2, 1.522 + eCXRegI tmp3, eDXRegI tmp4, eBXRegI result, eFlagsReg cr) %{ 1.523 + predicate(UseSSE42Intrinsics); 1.524 + match(Set result (StrIndexOf str1 str2)); 1.525 + effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr); 1.526 + 1.527 + format %{ "String IndexOf $str1,$str2 -> $result // KILL EAX, ECX, EDX" %} 1.528 + ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); 1.529 ins_pipe( pipe_slow ); 1.530 %} 1.531 1.532 // fast array equals 1.533 -instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ 1.534 +instruct array_equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, eBXRegI tmp3, 1.535 + eDXRegI tmp4, eAXRegI result, eFlagsReg cr) %{ 1.536 match(Set result (AryEq ary1 ary2)); 1.537 - effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr); 1.538 + effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 1.539 //ins_cost(300); 1.540 1.541 - format %{ "Array Equals $ary1,$ary2 -> $result // KILL EAX, EBX" %} 1.542 - ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) ); 1.543 + format %{ "Array Equals $ary1,$ary2 -> $result // KILL EBX, EDX" %} 1.544 + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) ); 1.545 ins_pipe( pipe_slow ); 1.546 %} 1.547