src/cpu/x86/vm/x86_32.ad

changeset 1116
fbde8ec322d0
parent 1106
d0994e5bebce
child 1210
93c14e5562c4
     1.1 --- a/src/cpu/x86/vm/x86_32.ad	Tue Mar 31 10:02:01 2009 -0700
     1.2 +++ b/src/cpu/x86/vm/x86_32.ad	Tue Mar 31 14:07:08 2009 -0700
     1.3 @@ -3694,12 +3694,16 @@
     1.4      }
     1.5    %}
     1.6  
     1.7 -  enc_class enc_String_Compare() %{
     1.8 +  enc_class enc_String_Compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
     1.9 +                        eAXRegI tmp3, eBXRegI tmp4, eCXRegI result) %{
    1.10      Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL,
    1.11            POP_LABEL, DONE_LABEL, CONT_LABEL,
    1.12            WHILE_HEAD_LABEL;
    1.13      MacroAssembler masm(&cbuf);
    1.14  
    1.15 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
    1.16 +    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
    1.17 +
    1.18      // Get the first character position in both strings
    1.19      //         [8] char array, [12] offset, [16] count
    1.20      int value_offset  = java_lang_String::value_offset_in_bytes();
    1.21 @@ -3717,7 +3721,6 @@
    1.22      // Compute the minimum of the string lengths(rsi) and the
    1.23      // difference of the string lengths (stack)
    1.24  
    1.25 -
    1.26      if (VM_Version::supports_cmov()) {
    1.27        masm.movl(rdi, Address(rdi, count_offset));
    1.28        masm.movl(rsi, Address(rsi, count_offset));
    1.29 @@ -3731,7 +3734,7 @@
    1.30        masm.movl(rsi, rdi);
    1.31        masm.subl(rdi, rcx);
    1.32        masm.push(rdi);
    1.33 -      masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL);
    1.34 +      masm.jccb(Assembler::lessEqual, ECX_GOOD_LABEL);
    1.35        masm.movl(rsi, rcx);
    1.36        // rsi holds min, rcx is unused
    1.37      }
    1.38 @@ -3756,7 +3759,7 @@
    1.39        Label LSkip2;
    1.40        // Check if the strings start at same location
    1.41        masm.cmpptr(rbx,rax);
    1.42 -      masm.jcc(Assembler::notEqual, LSkip2);
    1.43 +      masm.jccb(Assembler::notEqual, LSkip2);
    1.44  
    1.45        // Check if the length difference is zero (from stack)
    1.46        masm.cmpl(Address(rsp, 0), 0x0);
    1.47 @@ -3766,9 +3769,52 @@
    1.48        masm.bind(LSkip2);
    1.49      }
    1.50  
    1.51 -    // Shift rax, and rbx, to the end of the arrays, negate min
    1.52 -    masm.lea(rax, Address(rax, rsi, Address::times_2, 2));
    1.53 -    masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2));
    1.54 +   // Advance to next character
    1.55 +    masm.addptr(rax, 2);
    1.56 +    masm.addptr(rbx, 2);
    1.57 +
    1.58 +    if (UseSSE42Intrinsics) {
    1.59 +      // With SSE4.2, use double quad vector compare
    1.60 +      Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
    1.61 +      // Setup to compare 16-byte vectors
    1.62 +      masm.movl(rdi, rsi);
    1.63 +      masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
    1.64 +      masm.andl(rdi, 0x00000007); // rdi holds the tail count
    1.65 +      masm.testl(rsi, rsi);
    1.66 +      masm.jccb(Assembler::zero, COMPARE_TAIL);
    1.67 +
    1.68 +      masm.lea(rax, Address(rax, rsi, Address::times_2));
    1.69 +      masm.lea(rbx, Address(rbx, rsi, Address::times_2));
    1.70 +      masm.negl(rsi);
    1.71 +
    1.72 +      masm.bind(COMPARE_VECTORS);
    1.73 +      masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2));
    1.74 +      masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2));
    1.75 +      masm.pxor(tmp1Reg, tmp2Reg);
    1.76 +      masm.ptest(tmp1Reg, tmp1Reg);
    1.77 +      masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
    1.78 +      masm.addl(rsi, 8);
    1.79 +      masm.jcc(Assembler::notZero, COMPARE_VECTORS);
    1.80 +      masm.jmpb(COMPARE_TAIL);
    1.81 +
    1.82 +      // Mismatched characters in the vectors
    1.83 +      masm.bind(VECTOR_NOT_EQUAL);
    1.84 +      masm.lea(rax, Address(rax, rsi, Address::times_2));
    1.85 +      masm.lea(rbx, Address(rbx, rsi, Address::times_2));
    1.86 +      masm.movl(rdi, 8);
    1.87 +
    1.88 +      // Compare tail (< 8 chars), or rescan last vectors to
    1.89 +      // find 1st mismatched characters
    1.90 +      masm.bind(COMPARE_TAIL);
    1.91 +      masm.testl(rdi, rdi);
    1.92 +      masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
    1.93 +      masm.movl(rsi, rdi);
    1.94 +      // Fallthru to tail compare
    1.95 +    }
    1.96 +
    1.97 +    //Shift rax, and rbx, to the end of the arrays, negate min
    1.98 +    masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
    1.99 +    masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
   1.100      masm.negl(rsi);
   1.101  
   1.102      // Compare the rest of the characters
   1.103 @@ -3776,93 +3822,329 @@
   1.104      masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
   1.105      masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
   1.106      masm.subl(rcx, rdi);
   1.107 -    masm.jcc(Assembler::notZero, POP_LABEL);
   1.108 +    masm.jccb(Assembler::notZero, POP_LABEL);
   1.109      masm.incrementl(rsi);
   1.110      masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
   1.111  
   1.112      // Strings are equal up to min length.  Return the length difference.
   1.113      masm.bind(LENGTH_DIFF_LABEL);
   1.114      masm.pop(rcx);
   1.115 -    masm.jmp(DONE_LABEL);
   1.116 +    masm.jmpb(DONE_LABEL);
   1.117  
   1.118      // Discard the stored length difference
   1.119      masm.bind(POP_LABEL);
   1.120      masm.addptr(rsp, 4);
   1.121 -       
   1.122 +
   1.123      // That's it
   1.124      masm.bind(DONE_LABEL);
   1.125    %}
   1.126  
   1.127 -  enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{
   1.128 -    Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP;
   1.129 + enc_class enc_String_Equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
   1.130 +                       eBXRegI tmp3, eCXRegI tmp4, eAXRegI result) %{
   1.131 +    Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
   1.132      MacroAssembler masm(&cbuf);
   1.133  
   1.134 -    Register ary1Reg   = as_Register($ary1$$reg);
   1.135 -    Register ary2Reg   = as_Register($ary2$$reg);
   1.136 -    Register tmp1Reg   = as_Register($tmp1$$reg);
   1.137 -    Register tmp2Reg   = as_Register($tmp2$$reg);
   1.138 -    Register resultReg = as_Register($result$$reg);
   1.139 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
   1.140 +    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
   1.141 +
   1.142 +    int value_offset  = java_lang_String::value_offset_in_bytes();
   1.143 +    int offset_offset = java_lang_String::offset_offset_in_bytes();
   1.144 +    int count_offset  = java_lang_String::count_offset_in_bytes();
   1.145 +    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   1.146 +
   1.147 +    // does source == target string?
   1.148 +    masm.cmpptr(rdi, rsi);
   1.149 +    masm.jcc(Assembler::equal, RET_TRUE);
   1.150 +
   1.151 +    // get and compare counts
   1.152 +    masm.movl(rcx, Address(rdi, count_offset));
   1.153 +    masm.movl(rax, Address(rsi, count_offset));
   1.154 +    masm.cmpl(rcx, rax);
   1.155 +    masm.jcc(Assembler::notEqual, RET_FALSE);
   1.156 +    masm.testl(rax, rax);
   1.157 +    masm.jcc(Assembler::zero, RET_TRUE);
   1.158 +
   1.159 +    // get source string offset and value
   1.160 +    masm.movptr(rbx, Address(rsi, value_offset));
   1.161 +    masm.movl(rax, Address(rsi, offset_offset));
   1.162 +    masm.leal(rsi, Address(rbx, rax, Address::times_2, base_offset));
   1.163 +
   1.164 +    // get compare string offset and value
   1.165 +    masm.movptr(rbx, Address(rdi, value_offset));
   1.166 +    masm.movl(rax, Address(rdi, offset_offset));
   1.167 +    masm.leal(rdi, Address(rbx, rax, Address::times_2, base_offset));
   1.168 +
   1.169 +    // Set byte count
   1.170 +    masm.shll(rcx, 1);
   1.171 +    masm.movl(rax, rcx);
   1.172 +
   1.173 +    if (UseSSE42Intrinsics) {
   1.174 +      // With SSE4.2, use double quad vector compare
   1.175 +      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
   1.176 +      // Compare 16-byte vectors
   1.177 +      masm.andl(rcx, 0xfffffff0);  // vector count (in bytes)
   1.178 +      masm.andl(rax, 0x0000000e);  // tail count (in bytes)
   1.179 +      masm.testl(rcx, rcx);
   1.180 +      masm.jccb(Assembler::zero, COMPARE_TAIL);
   1.181 +      masm.lea(rdi, Address(rdi, rcx, Address::times_1));
   1.182 +      masm.lea(rsi, Address(rsi, rcx, Address::times_1));
   1.183 +      masm.negl(rcx);
   1.184 +
   1.185 +      masm.bind(COMPARE_WIDE_VECTORS);
   1.186 +      masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
   1.187 +      masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
   1.188 +      masm.pxor(tmp1Reg, tmp2Reg);
   1.189 +      masm.ptest(tmp1Reg, tmp1Reg);
   1.190 +      masm.jccb(Assembler::notZero, RET_FALSE);
   1.191 +      masm.addl(rcx, 16);
   1.192 +      masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
   1.193 +      masm.bind(COMPARE_TAIL);
   1.194 +      masm.movl(rcx, rax);
   1.195 +      // Fallthru to tail compare
   1.196 +    }
   1.197 +
   1.198 +    // Compare 4-byte vectors
   1.199 +    masm.andl(rcx, 0xfffffffc);  // vector count (in bytes)
   1.200 +    masm.andl(rax, 0x00000002);  // tail char (in bytes)
   1.201 +    masm.testl(rcx, rcx);
   1.202 +    masm.jccb(Assembler::zero, COMPARE_CHAR);
   1.203 +    masm.lea(rdi, Address(rdi, rcx, Address::times_1));
   1.204 +    masm.lea(rsi, Address(rsi, rcx, Address::times_1));
   1.205 +    masm.negl(rcx);
   1.206 +
   1.207 +    masm.bind(COMPARE_VECTORS);
   1.208 +    masm.movl(rbx, Address(rdi, rcx, Address::times_1));
   1.209 +    masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
   1.210 +    masm.jccb(Assembler::notEqual, RET_FALSE);
   1.211 +    masm.addl(rcx, 4);
   1.212 +    masm.jcc(Assembler::notZero, COMPARE_VECTORS);
   1.213 +
   1.214 +    // Compare trailing char (final 2 bytes), if any
   1.215 +    masm.bind(COMPARE_CHAR);
   1.216 +    masm.testl(rax, rax);
   1.217 +    masm.jccb(Assembler::zero, RET_TRUE);
   1.218 +    masm.load_unsigned_short(rbx, Address(rdi, 0));
   1.219 +    masm.load_unsigned_short(rcx, Address(rsi, 0));
   1.220 +    masm.cmpl(rbx, rcx);
   1.221 +    masm.jccb(Assembler::notEqual, RET_FALSE);
   1.222 +
   1.223 +    masm.bind(RET_TRUE);
   1.224 +    masm.movl(rax, 1);   // return true
   1.225 +    masm.jmpb(DONE);
   1.226 +
   1.227 +    masm.bind(RET_FALSE);
   1.228 +    masm.xorl(rax, rax); // return false
   1.229 +
   1.230 +    masm.bind(DONE);
   1.231 +    %}
   1.232 +
   1.233 + enc_class enc_String_IndexOf(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2,
   1.234 +                        eCXRegI tmp3, eDXRegI tmp4, eBXRegI result) %{
   1.235 +    // SSE4.2 version
   1.236 +    Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
   1.237 +          SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
   1.238 +    MacroAssembler masm(&cbuf);
   1.239 +
   1.240 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
   1.241 +
   1.242 +    // Get the first character position in both strings
   1.243 +    //         [8] char array, [12] offset, [16] count
   1.244 +    int value_offset  = java_lang_String::value_offset_in_bytes();
   1.245 +    int offset_offset = java_lang_String::offset_offset_in_bytes();
   1.246 +    int count_offset  = java_lang_String::count_offset_in_bytes();
   1.247 +    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   1.248 +
   1.249 +    // Get counts for string and substr
   1.250 +    masm.movl(rdx, Address(rsi, count_offset));
   1.251 +    masm.movl(rax, Address(rdi, count_offset));
   1.252 +    // Check for substr count > string count
   1.253 +    masm.cmpl(rax, rdx);
   1.254 +    masm.jcc(Assembler::greater, RET_NEG_ONE);
   1.255 +
   1.256 +    // Start the indexOf operation
   1.257 +    // Get start addr of string
   1.258 +    masm.movptr(rbx, Address(rsi, value_offset));
   1.259 +    masm.movl(rcx, Address(rsi, offset_offset));
   1.260 +    masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
   1.261 +    masm.push(rsi);
   1.262 +
   1.263 +    // Get start addr of substr
   1.264 +    masm.movptr(rbx, Address(rdi, value_offset));
   1.265 +    masm.movl(rcx, Address(rdi, offset_offset));
   1.266 +    masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
   1.267 +    masm.push(rdi);
   1.268 +    masm.push(rax);
   1.269 +    masm.jmpb(PREP_FOR_SCAN);
   1.270 +
   1.271 +    // Substr count saved at sp
   1.272 +    // Substr saved at sp+4
   1.273 +    // String saved at sp+8
   1.274 +
   1.275 +    // Prep to load substr for scan
   1.276 +    masm.bind(LOAD_SUBSTR);
   1.277 +    masm.movptr(rdi, Address(rsp, 4));
   1.278 +    masm.movl(rax, Address(rsp, 0));
   1.279 +
   1.280 +    // Load substr
   1.281 +    masm.bind(PREP_FOR_SCAN);
   1.282 +    masm.movdqu(tmp1Reg, Address(rdi, 0));
   1.283 +    masm.addl(rdx, 8);        // prime the loop
   1.284 +    masm.subptr(rsi, 16);
   1.285 +
   1.286 +    // Scan string for substr in 16-byte vectors
   1.287 +    masm.bind(SCAN_TO_SUBSTR);
   1.288 +    masm.subl(rdx, 8);
   1.289 +    masm.addptr(rsi, 16);
   1.290 +    masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
   1.291 +    masm.jcc(Assembler::above, SCAN_TO_SUBSTR);     // CF == 0 && ZF == 0
   1.292 +    masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
   1.293 +
   1.294 +    // Fallthru: found a potential substr
   1.295 +
   1.296 +    // Make sure string is still long enough
   1.297 +    masm.subl(rdx, rcx);
   1.298 +    masm.cmpl(rdx, rax);
   1.299 +    masm.jccb(Assembler::negative, RET_NOT_FOUND);
   1.300 +    // Compute start addr of substr
   1.301 +    masm.lea(rsi, Address(rsi, rcx, Address::times_2));
   1.302 +    masm.movptr(rbx, rsi);
   1.303 +
   1.304 +    // Compare potential substr
   1.305 +    masm.addl(rdx, 8);        // prime the loop
   1.306 +    masm.addl(rax, 8);
   1.307 +    masm.subptr(rsi, 16);
   1.308 +    masm.subptr(rdi, 16);
   1.309 +
   1.310 +    // Scan 16-byte vectors of string and substr
   1.311 +    masm.bind(SCAN_SUBSTR);
   1.312 +    masm.subl(rax, 8);
   1.313 +    masm.subl(rdx, 8);
   1.314 +    masm.addptr(rsi, 16);
   1.315 +    masm.addptr(rdi, 16);
   1.316 +    masm.movdqu(tmp1Reg, Address(rdi, 0));
   1.317 +    masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
   1.318 +    masm.jcc(Assembler::noOverflow, LOAD_SUBSTR);   // OF == 0
   1.319 +    masm.jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
   1.320 +
   1.321 +    // Compute substr offset
   1.322 +    masm.movptr(rsi, Address(rsp, 8));
   1.323 +    masm.subptr(rbx, rsi);
   1.324 +    masm.shrl(rbx, 1);
   1.325 +    masm.jmpb(CLEANUP);
   1.326 +
   1.327 +    masm.bind(RET_NEG_ONE);
   1.328 +    masm.movl(rbx, -1);
   1.329 +    masm.jmpb(DONE);
   1.330 +
   1.331 +    masm.bind(RET_NOT_FOUND);
   1.332 +    masm.movl(rbx, -1);
   1.333 +
   1.334 +    masm.bind(CLEANUP);
   1.335 +    masm.addptr(rsp, 12);
   1.336 +
   1.337 +    masm.bind(DONE);
   1.338 +  %}
   1.339 +
   1.340 +  enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2,
   1.341 +                             eBXRegI tmp3, eDXRegI tmp4, eAXRegI result) %{
   1.342 +    Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
   1.343 +    MacroAssembler masm(&cbuf);
   1.344 +
   1.345 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
   1.346 +    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
   1.347 +    Register ary1Reg      = as_Register($ary1$$reg);
   1.348 +    Register ary2Reg      = as_Register($ary2$$reg);
   1.349 +    Register tmp3Reg      = as_Register($tmp3$$reg);
   1.350 +    Register tmp4Reg      = as_Register($tmp4$$reg);
   1.351 +    Register resultReg    = as_Register($result$$reg);
   1.352  
   1.353      int length_offset  = arrayOopDesc::length_offset_in_bytes();
   1.354      int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   1.355  
   1.356      // Check the input args
   1.357 -    masm.cmpl(ary1Reg, ary2Reg);
   1.358 +    masm.cmpptr(ary1Reg, ary2Reg);
   1.359      masm.jcc(Assembler::equal, TRUE_LABEL);
   1.360 -    masm.testl(ary1Reg, ary1Reg);
   1.361 +    masm.testptr(ary1Reg, ary1Reg);
   1.362      masm.jcc(Assembler::zero, FALSE_LABEL);
   1.363 -    masm.testl(ary2Reg, ary2Reg);
   1.364 +    masm.testptr(ary2Reg, ary2Reg);
   1.365      masm.jcc(Assembler::zero, FALSE_LABEL);
   1.366  
   1.367      // Check the lengths
   1.368 -    masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
   1.369 +    masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
   1.370      masm.movl(resultReg, Address(ary2Reg, length_offset));
   1.371 -    masm.cmpl(tmp2Reg, resultReg);
   1.372 +    masm.cmpl(tmp4Reg, resultReg);
   1.373      masm.jcc(Assembler::notEqual, FALSE_LABEL);
   1.374      masm.testl(resultReg, resultReg);
   1.375      masm.jcc(Assembler::zero, TRUE_LABEL);
   1.376  
   1.377 -    // Get the number of 4 byte vectors to compare
   1.378 -    masm.shrl(resultReg, 1);
   1.379 -
   1.380 -    // Check for odd-length arrays
   1.381 -    masm.andl(tmp2Reg, 1);
   1.382 -    masm.testl(tmp2Reg, tmp2Reg);
   1.383 -    masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
   1.384 -
   1.385 -    // Compare 2-byte "tail" at end of arrays
   1.386 -    masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
   1.387 -    masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
   1.388 -    masm.cmpl(tmp1Reg, tmp2Reg);
   1.389 -    masm.jcc(Assembler::notEqual, FALSE_LABEL);
   1.390 +    // Load array addrs
   1.391 +    masm.lea(ary1Reg, Address(ary1Reg, base_offset));
   1.392 +    masm.lea(ary2Reg, Address(ary2Reg, base_offset));
   1.393 +
   1.394 +    // Set byte count
   1.395 +    masm.shll(tmp4Reg, 1);
   1.396 +    masm.movl(resultReg, tmp4Reg);
   1.397 +
   1.398 +    if (UseSSE42Intrinsics) {
   1.399 +      // With SSE4.2, use double quad vector compare
   1.400 +      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
   1.401 +      // Compare 16-byte vectors
   1.402 +      masm.andl(tmp4Reg, 0xfffffff0);    // vector count (in bytes)
   1.403 +      masm.andl(resultReg, 0x0000000e);  // tail count (in bytes)
   1.404 +      masm.testl(tmp4Reg, tmp4Reg);
   1.405 +      masm.jccb(Assembler::zero, COMPARE_TAIL);
   1.406 +      masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   1.407 +      masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   1.408 +      masm.negl(tmp4Reg);
   1.409 +
   1.410 +      masm.bind(COMPARE_WIDE_VECTORS);
   1.411 +      masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   1.412 +      masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   1.413 +      masm.pxor(tmp1Reg, tmp2Reg);
   1.414 +      masm.ptest(tmp1Reg, tmp1Reg);
   1.415 +
   1.416 +      masm.jccb(Assembler::notZero, FALSE_LABEL);
   1.417 +      masm.addl(tmp4Reg, 16);
   1.418 +      masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
   1.419 +      masm.bind(COMPARE_TAIL);
   1.420 +      masm.movl(tmp4Reg, resultReg);
   1.421 +      // Fallthru to tail compare
   1.422 +    }
   1.423 +
   1.424 +    // Compare 4-byte vectors
   1.425 +    masm.andl(tmp4Reg, 0xfffffffc);    // vector count (in bytes)
   1.426 +    masm.andl(resultReg, 0x00000002);  // tail char (in bytes)
   1.427 +    masm.testl(tmp4Reg, tmp4Reg);
   1.428 +    masm.jccb(Assembler::zero, COMPARE_CHAR);
   1.429 +    masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   1.430 +    masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   1.431 +    masm.negl(tmp4Reg);
   1.432 +
   1.433 +    masm.bind(COMPARE_VECTORS);
   1.434 +    masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   1.435 +    masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   1.436 +    masm.jccb(Assembler::notEqual, FALSE_LABEL);
   1.437 +    masm.addl(tmp4Reg, 4);
   1.438 +    masm.jcc(Assembler::notZero, COMPARE_VECTORS);
   1.439 +
   1.440 +    // Compare trailing char (final 2 bytes), if any
   1.441 +    masm.bind(COMPARE_CHAR);
   1.442      masm.testl(resultReg, resultReg);
   1.443 -    masm.jcc(Assembler::zero, TRUE_LABEL);
   1.444 -
   1.445 -    // Setup compare loop
   1.446 -    masm.bind(COMPARE_LOOP_HDR);
   1.447 -    // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays
   1.448 -    masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
   1.449 -    masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
   1.450 -    masm.negl(resultReg);
   1.451 -
   1.452 -    // 4-byte-wide compare loop
   1.453 -    masm.bind(COMPARE_LOOP);
   1.454 -    masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
   1.455 -    masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
   1.456 -    masm.cmpl(ary1Reg, ary2Reg);
   1.457 -    masm.jcc(Assembler::notEqual, FALSE_LABEL);
   1.458 -    masm.increment(resultReg);
   1.459 -    masm.jcc(Assembler::notZero, COMPARE_LOOP);
   1.460 +    masm.jccb(Assembler::zero, TRUE_LABEL);
   1.461 +    masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
   1.462 +    masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
   1.463 +    masm.cmpl(tmp3Reg, tmp4Reg);
   1.464 +    masm.jccb(Assembler::notEqual, FALSE_LABEL);
   1.465  
   1.466      masm.bind(TRUE_LABEL);
   1.467      masm.movl(resultReg, 1);   // return true
   1.468 -    masm.jmp(DONE_LABEL);
   1.469 +    masm.jmpb(DONE);
   1.470  
   1.471      masm.bind(FALSE_LABEL);
   1.472      masm.xorl(resultReg, resultReg); // return false
   1.473  
   1.474      // That's it
   1.475 -    masm.bind(DONE_LABEL);
   1.476 +    masm.bind(DONE);
   1.477    %}
   1.478  
   1.479    enc_class enc_pop_rdx() %{
   1.480 @@ -12074,11 +12356,8 @@
   1.481    ins_pipe( fpu_reg_reg );
   1.482  %}
   1.483  
   1.484 -
   1.485 -
   1.486  // =======================================================================
   1.487  // fast clearing of an array
   1.488 -
   1.489  instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
   1.490    match(Set dummy (ClearArray cnt base));
   1.491    effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
   1.492 @@ -12092,24 +12371,48 @@
   1.493    ins_pipe( pipe_slow );
   1.494  %}
   1.495  
   1.496 -instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
   1.497 +instruct string_compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
   1.498 +                        eAXRegI tmp3, eBXRegI tmp4, eCXRegI result, eFlagsReg cr) %{
   1.499    match(Set result (StrComp str1 str2));
   1.500 -  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr);
   1.501 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
   1.502    //ins_cost(300);
   1.503  
   1.504    format %{ "String Compare $str1,$str2 -> $result    // KILL EAX, EBX" %}
   1.505 -  ins_encode( enc_String_Compare() );
   1.506 +  ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
   1.507 +  ins_pipe( pipe_slow );
   1.508 +%}
   1.509 +
   1.510 +// fast string equals
   1.511 +instruct string_equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
   1.512 +                       eBXRegI tmp3, eCXRegI tmp4, eAXRegI result, eFlagsReg cr) %{
   1.513 +  match(Set result (StrEquals str1 str2));
   1.514 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
   1.515 +
   1.516 +  format %{ "String Equals $str1,$str2 -> $result    // KILL EBX, ECX" %}
   1.517 +  ins_encode( enc_String_Equals(tmp1, tmp2, str1, str2, tmp3, tmp4, result) );
   1.518 +  ins_pipe( pipe_slow );
   1.519 +%}
   1.520 +
   1.521 +instruct string_indexof(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2,
   1.522 +                        eCXRegI tmp3, eDXRegI tmp4, eBXRegI result, eFlagsReg cr) %{
   1.523 +  predicate(UseSSE42Intrinsics);
   1.524 +  match(Set result (StrIndexOf str1 str2));
   1.525 +  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
   1.526 +
   1.527 +  format %{ "String IndexOf $str1,$str2 -> $result    // KILL EAX, ECX, EDX" %}
   1.528 +  ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
   1.529    ins_pipe( pipe_slow );
   1.530  %}
   1.531  
   1.532  // fast array equals
   1.533 -instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
   1.534 +instruct array_equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, eBXRegI tmp3,
   1.535 +                      eDXRegI tmp4, eAXRegI result, eFlagsReg cr) %{
   1.536    match(Set result (AryEq ary1 ary2));
   1.537 -  effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr);
   1.538 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
   1.539    //ins_cost(300);
   1.540  
   1.541 -  format %{ "Array Equals $ary1,$ary2 -> $result    // KILL EAX, EBX" %}
   1.542 -  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) );
   1.543 +  format %{ "Array Equals $ary1,$ary2 -> $result    // KILL EBX, EDX" %}
   1.544 +  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
   1.545    ins_pipe( pipe_slow );
   1.546  %}
   1.547  

mercurial