src/cpu/x86/vm/assembler_x86.cpp

changeset 2602
41d4973cf100
parent 2569
6bbaedb03534
child 2639
8033953d67ff
     1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Feb 24 14:49:34 2011 -0800
     1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Sat Feb 26 12:10:54 2011 -0800
     1.3 @@ -1601,6 +1601,17 @@
     1.4    emit_byte(0xC0 | encode);
     1.5  }
     1.6  
     1.7 +void Assembler::movdl(XMMRegister dst, Address src) {
     1.8 +  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
     1.9 +  InstructionMark im(this);
    1.10 +  emit_byte(0x66);
    1.11 +  prefix(src, dst);
    1.12 +  emit_byte(0x0F);
    1.13 +  emit_byte(0x6E);
    1.14 +  emit_operand(dst, src);
    1.15 +}
    1.16 +
    1.17 +
    1.18  void Assembler::movdqa(XMMRegister dst, Address src) {
    1.19    NOT_LP64(assert(VM_Version::supports_sse2(), ""));
    1.20    InstructionMark im(this);
    1.21 @@ -2412,7 +2423,10 @@
    1.22  }
    1.23  
    1.24  void Assembler::psrlq(XMMRegister dst, int shift) {
    1.25 -  // HMM Table D-1 says sse2 or mmx
    1.26 +  // Shift 64 bit value logically right by specified number of bits.
    1.27 +  // HMM Table D-1 says sse2 or mmx.
    1.28 +  // Do not confuse it with psrldq SSE2 instruction which
    1.29 +  // shifts 128 bit value in xmm register by number of bytes.
    1.30    NOT_LP64(assert(VM_Version::supports_sse(), ""));
    1.31  
    1.32    int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding());
    1.33 @@ -2423,6 +2437,18 @@
    1.34    emit_byte(shift);
    1.35  }
    1.36  
    1.37 +void Assembler::psrldq(XMMRegister dst, int shift) {
    1.38 +  // Shift 128 bit value in xmm register by number of bytes.
    1.39 +  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
    1.40 +
    1.41 +  int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding());
    1.42 +  emit_byte(0x66);
    1.43 +  emit_byte(0x0F);
    1.44 +  emit_byte(0x73);
    1.45 +  emit_byte(0xC0 | encode);
    1.46 +  emit_byte(shift);
    1.47 +}
    1.48 +
    1.49  void Assembler::ptest(XMMRegister dst, Address src) {
    1.50    assert(VM_Version::supports_sse4_1(), "");
    1.51  
    1.52 @@ -8567,101 +8593,418 @@
    1.53  }
    1.54  #endif // _LP64
    1.55  
    1.56 -// IndexOf substring.
    1.57 -void MacroAssembler::string_indexof(Register str1, Register str2,
    1.58 -                                    Register cnt1, Register cnt2, Register result,
    1.59 -                                    XMMRegister vec, Register tmp) {
    1.60 +// IndexOf for constant substrings with size >= 8 chars
    1.61 +// which don't need to be loaded through stack.
    1.62 +void MacroAssembler::string_indexofC8(Register str1, Register str2,
    1.63 +                                      Register cnt1, Register cnt2,
    1.64 +                                      int int_cnt2,  Register result,
    1.65 +                                      XMMRegister vec, Register tmp) {
    1.66    assert(UseSSE42Intrinsics, "SSE4.2 is required");
    1.67  
    1.68 -  Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
    1.69 -        SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP;
    1.70 -
    1.71 -  push(str1); // string addr
    1.72 -  push(str2); // substr addr
    1.73 -  push(cnt2); // substr count
    1.74 -  jmpb(PREP_FOR_SCAN);
    1.75 -
    1.76 -  // Substr count saved at sp
    1.77 -  // Substr saved at sp+1*wordSize
    1.78 -  // String saved at sp+2*wordSize
    1.79 -
    1.80 -  // Reload substr for rescan
    1.81 -  bind(RELOAD_SUBSTR);
    1.82 -  movl(cnt2, Address(rsp, 0));
    1.83 -  movptr(str2, Address(rsp, wordSize));
    1.84 -  // We came here after the beginninig of the substring was
    1.85 -  // matched but the rest of it was not so we need to search
    1.86 -  // again. Start from the next element after the previous match.
    1.87 -  subptr(str1, result); // Restore counter
    1.88 -  shrl(str1, 1);
    1.89 -  addl(cnt1, str1);
    1.90 -  decrementl(cnt1);
    1.91 -  lea(str1, Address(result, 2)); // Reload string
    1.92 -
    1.93 -  // Load substr
    1.94 -  bind(PREP_FOR_SCAN);
    1.95 -  movdqu(vec, Address(str2, 0));
    1.96 -  addl(cnt1, 8);  // prime the loop
    1.97 -  subptr(str1, 16);
    1.98 -
    1.99 -  // Scan string for substr in 16-byte vectors
   1.100 -  bind(SCAN_TO_SUBSTR);
   1.101 -  subl(cnt1, 8);
   1.102 -  addptr(str1, 16);
   1.103 -
   1.104 -  // pcmpestri
   1.105 +  // This method uses pcmpestri inxtruction with bound registers
   1.106    //   inputs:
   1.107    //     xmm - substring
   1.108    //     rax - substring length (elements count)
   1.109 -  //     mem - scaned string
   1.110 +  //     mem - scanned string
   1.111    //     rdx - string length (elements count)
   1.112    //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
   1.113    //   outputs:
   1.114    //     rcx - matched index in string
   1.115    assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
   1.116  
   1.117 -  pcmpestri(vec, Address(str1, 0), 0x0d);
   1.118 -  jcc(Assembler::above, SCAN_TO_SUBSTR);      // CF == 0 && ZF == 0
   1.119 -  jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
   1.120 -
   1.121 -  // Fallthrough: found a potential substr
   1.122 +  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
   1.123 +        RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
   1.124 +        MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
   1.125 +
   1.126 +  // Note, inline_string_indexOf() generates checks:
   1.127 +  // if (substr.count > string.count) return -1;
   1.128 +  // if (substr.count == 0) return 0;
   1.129 +  assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
   1.130 +
   1.131 +  // Load substring.
   1.132 +  movdqu(vec, Address(str2, 0));
   1.133 +  movl(cnt2, int_cnt2);
   1.134 +  movptr(result, str1); // string addr
   1.135 +
   1.136 +  if (int_cnt2 > 8) {
   1.137 +    jmpb(SCAN_TO_SUBSTR);
   1.138 +
   1.139 +    // Reload substr for rescan, this code
   1.140 +    // is executed only for large substrings (> 8 chars)
   1.141 +    bind(RELOAD_SUBSTR);
   1.142 +    movdqu(vec, Address(str2, 0));
   1.143 +    negptr(cnt2); // Jumped here with negative cnt2, convert to positive
   1.144 +
   1.145 +    bind(RELOAD_STR);
   1.146 +    // We came here after the beginning of the substring was
   1.147 +    // matched but the rest of it was not so we need to search
   1.148 +    // again. Start from the next element after the previous match.
   1.149 +
   1.150 +    // cnt2 is number of substring reminding elements and
   1.151 +    // cnt1 is number of string reminding elements when cmp failed.
   1.152 +    // Restored cnt1 = cnt1 - cnt2 + int_cnt2
   1.153 +    subl(cnt1, cnt2);
   1.154 +    addl(cnt1, int_cnt2);
   1.155 +    movl(cnt2, int_cnt2); // Now restore cnt2
   1.156 +
   1.157 +    decrementl(cnt1);     // Shift to next element
   1.158 +    cmpl(cnt1, cnt2);
   1.159 +    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
   1.160 +
   1.161 +    addptr(result, 2);
   1.162 +
   1.163 +  } // (int_cnt2 > 8)
   1.164 +
   1.165 +  // Scan string for start of substr in 16-byte vectors
   1.166 +  bind(SCAN_TO_SUBSTR);
   1.167 +  pcmpestri(vec, Address(result, 0), 0x0d);
   1.168 +  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
   1.169 +  subl(cnt1, 8);
   1.170 +  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
   1.171 +  cmpl(cnt1, cnt2);
   1.172 +  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
   1.173 +  addptr(result, 16);
   1.174 +  jmpb(SCAN_TO_SUBSTR);
   1.175 +
   1.176 +  // Found a potential substr
   1.177 +  bind(FOUND_CANDIDATE);
   1.178 +  // Matched whole vector if first element matched (tmp(rcx) == 0).
   1.179 +  if (int_cnt2 == 8) {
   1.180 +    jccb(Assembler::overflow, RET_FOUND);    // OF == 1
   1.181 +  } else { // int_cnt2 > 8
   1.182 +    jccb(Assembler::overflow, FOUND_SUBSTR);
   1.183 +  }
   1.184 +  // After pcmpestri tmp(rcx) contains matched element index
   1.185 +  // Compute start addr of substr
   1.186 +  lea(result, Address(result, tmp, Address::times_2));
   1.187  
   1.188    // Make sure string is still long enough
   1.189    subl(cnt1, tmp);
   1.190    cmpl(cnt1, cnt2);
   1.191 -  jccb(Assembler::negative, RET_NOT_FOUND);
   1.192 -  // Compute start addr of substr
   1.193 -  lea(str1, Address(str1, tmp, Address::times_2));
   1.194 -  movptr(result, str1); // save
   1.195 -
   1.196 -  // Compare potential substr
   1.197 -  addl(cnt1, 8);     // prime the loop
   1.198 -  addl(cnt2, 8);
   1.199 -  subptr(str1, 16);
   1.200 -  subptr(str2, 16);
   1.201 -
   1.202 -  // Scan 16-byte vectors of string and substr
   1.203 -  bind(SCAN_SUBSTR);
   1.204 -  subl(cnt1, 8);
   1.205 -  subl(cnt2, 8);
   1.206 -  addptr(str1, 16);
   1.207 -  addptr(str2, 16);
   1.208 -  movdqu(vec, Address(str2, 0));
   1.209 -  pcmpestri(vec, Address(str1, 0), 0x0d);
   1.210 -  jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
   1.211 -  jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
   1.212 -
   1.213 -  // Compute substr offset
   1.214 -  subptr(result, Address(rsp, 2*wordSize));
   1.215 -  shrl(result, 1); // index
   1.216 -  jmpb(CLEANUP);
   1.217 +  if (int_cnt2 == 8) {
   1.218 +    jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
   1.219 +  } else { // int_cnt2 > 8
   1.220 +    jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
   1.221 +  }
   1.222 +  // Left less then substring.
   1.223  
   1.224    bind(RET_NOT_FOUND);
   1.225    movl(result, -1);
   1.226 +  jmpb(EXIT);
   1.227 +
   1.228 +  if (int_cnt2 > 8) {
   1.229 +    // This code is optimized for the case when whole substring
   1.230 +    // is matched if its head is matched.
   1.231 +    bind(MATCH_SUBSTR_HEAD);
   1.232 +    pcmpestri(vec, Address(result, 0), 0x0d);
   1.233 +    // Reload only string if does not match
   1.234 +    jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
   1.235 +
   1.236 +    Label CONT_SCAN_SUBSTR;
   1.237 +    // Compare the rest of substring (> 8 chars).
   1.238 +    bind(FOUND_SUBSTR);
   1.239 +    // First 8 chars are already matched.
   1.240 +    negptr(cnt2);
   1.241 +    addptr(cnt2, 8);
   1.242 +
   1.243 +    bind(SCAN_SUBSTR);
   1.244 +    subl(cnt1, 8);
   1.245 +    cmpl(cnt2, -8); // Do not read beyond substring
   1.246 +    jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
   1.247 +    // Back-up strings to avoid reading beyond substring:
   1.248 +    // cnt1 = cnt1 - cnt2 + 8
   1.249 +    addl(cnt1, cnt2); // cnt2 is negative
   1.250 +    addl(cnt1, 8);
   1.251 +    movl(cnt2, 8); negptr(cnt2);
   1.252 +    bind(CONT_SCAN_SUBSTR);
   1.253 +    if (int_cnt2 < (int)G) {
   1.254 +      movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
   1.255 +      pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
   1.256 +    } else {
   1.257 +      // calculate index in register to avoid integer overflow (int_cnt2*2)
   1.258 +      movl(tmp, int_cnt2);
   1.259 +      addptr(tmp, cnt2);
   1.260 +      movdqu(vec, Address(str2, tmp, Address::times_2, 0));
   1.261 +      pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
   1.262 +    }
   1.263 +    // Need to reload strings pointers if not matched whole vector
   1.264 +    jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
   1.265 +    addptr(cnt2, 8);
   1.266 +    jccb(Assembler::negative, SCAN_SUBSTR);
   1.267 +    // Fall through if found full substring
   1.268 +
   1.269 +  } // (int_cnt2 > 8)
   1.270 +
   1.271 +  bind(RET_FOUND);
   1.272 +  // Found result if we matched full small substring.
   1.273 +  // Compute substr offset
   1.274 +  subptr(result, str1);
   1.275 +  shrl(result, 1); // index
   1.276 +  bind(EXIT);
   1.277 +
   1.278 +} // string_indexofC8
   1.279 +
   1.280 +// Small strings are loaded through stack if they cross page boundary.
   1.281 +void MacroAssembler::string_indexof(Register str1, Register str2,
   1.282 +                                    Register cnt1, Register cnt2,
   1.283 +                                    int int_cnt2,  Register result,
   1.284 +                                    XMMRegister vec, Register tmp) {
   1.285 +  assert(UseSSE42Intrinsics, "SSE4.2 is required");
   1.286 +  //
   1.287 +  // int_cnt2 is length of small (< 8 chars) constant substring
   1.288 +  // or (-1) for non constant substring in which case its length
   1.289 +  // is in cnt2 register.
   1.290 +  //
   1.291 +  // Note, inline_string_indexOf() generates checks:
   1.292 +  // if (substr.count > string.count) return -1;
   1.293 +  // if (substr.count == 0) return 0;
   1.294 +  //
   1.295 +  assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
   1.296 +
   1.297 +  // This method uses pcmpestri inxtruction with bound registers
   1.298 +  //   inputs:
   1.299 +  //     xmm - substring
   1.300 +  //     rax - substring length (elements count)
   1.301 +  //     mem - scanned string
   1.302 +  //     rdx - string length (elements count)
   1.303 +  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
   1.304 +  //   outputs:
   1.305 +  //     rcx - matched index in string
   1.306 +  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
   1.307 +
   1.308 +  Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
   1.309 +        RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
   1.310 +        FOUND_CANDIDATE;
   1.311 +
   1.312 +  { //========================================================
   1.313 +    // We don't know where these strings are located
   1.314 +    // and we can't read beyond them. Load them through stack.
   1.315 +    Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
   1.316 +
   1.317 +    movptr(tmp, rsp); // save old SP
   1.318 +
   1.319 +    if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
   1.320 +      if (int_cnt2 == 1) {  // One char
   1.321 +        load_unsigned_short(result, Address(str2, 0));
   1.322 +        movdl(vec, result); // move 32 bits
   1.323 +      } else if (int_cnt2 == 2) { // Two chars
   1.324 +        movdl(vec, Address(str2, 0)); // move 32 bits
   1.325 +      } else if (int_cnt2 == 4) { // Four chars
   1.326 +        movq(vec, Address(str2, 0));  // move 64 bits
   1.327 +      } else { // cnt2 = { 3, 5, 6, 7 }
   1.328 +        // Array header size is 12 bytes in 32-bit VM
   1.329 +        // + 6 bytes for 3 chars == 18 bytes,
   1.330 +        // enough space to load vec and shift.
   1.331 +        assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity");
   1.332 +        movdqu(vec, Address(str2, (int_cnt2*2)-16));
   1.333 +        psrldq(vec, 16-(int_cnt2*2));
   1.334 +      }
   1.335 +    } else { // not constant substring
   1.336 +      cmpl(cnt2, 8);
   1.337 +      jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
   1.338 +
   1.339 +      // We can read beyond string if srt+16 does not cross page boundary
   1.340 +      // since heaps are aligned and mapped by pages.
   1.341 +      assert(os::vm_page_size() < (int)G, "default page should be small");
   1.342 +      movl(result, str2); // We need only low 32 bits
   1.343 +      andl(result, (os::vm_page_size()-1));
   1.344 +      cmpl(result, (os::vm_page_size()-16));
   1.345 +      jccb(Assembler::belowEqual, CHECK_STR);
   1.346 +
   1.347 +      // Move small strings to stack to allow load 16 bytes into vec.
   1.348 +      subptr(rsp, 16);
   1.349 +      int stk_offset = wordSize-2;
   1.350 +      push(cnt2);
   1.351 +
   1.352 +      bind(COPY_SUBSTR);
   1.353 +      load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
   1.354 +      movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
   1.355 +      decrement(cnt2);
   1.356 +      jccb(Assembler::notZero, COPY_SUBSTR);
   1.357 +
   1.358 +      pop(cnt2);
   1.359 +      movptr(str2, rsp);  // New substring address
   1.360 +    } // non constant
   1.361 +
   1.362 +    bind(CHECK_STR);
   1.363 +    cmpl(cnt1, 8);
   1.364 +    jccb(Assembler::aboveEqual, BIG_STRINGS);
   1.365 +
   1.366 +    // Check cross page boundary.
   1.367 +    movl(result, str1); // We need only low 32 bits
   1.368 +    andl(result, (os::vm_page_size()-1));
   1.369 +    cmpl(result, (os::vm_page_size()-16));
   1.370 +    jccb(Assembler::belowEqual, BIG_STRINGS);
   1.371 +
   1.372 +    subptr(rsp, 16);
   1.373 +    int stk_offset = -2;
   1.374 +    if (int_cnt2 < 0) { // not constant
   1.375 +      push(cnt2);
   1.376 +      stk_offset += wordSize;
   1.377 +    }
   1.378 +    movl(cnt2, cnt1);
   1.379 +
   1.380 +    bind(COPY_STR);
   1.381 +    load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
   1.382 +    movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
   1.383 +    decrement(cnt2);
   1.384 +    jccb(Assembler::notZero, COPY_STR);
   1.385 +
   1.386 +    if (int_cnt2 < 0) { // not constant
   1.387 +      pop(cnt2);
   1.388 +    }
   1.389 +    movptr(str1, rsp);  // New string address
   1.390 +
   1.391 +    bind(BIG_STRINGS);
   1.392 +    // Load substring.
   1.393 +    if (int_cnt2 < 0) { // -1
   1.394 +      movdqu(vec, Address(str2, 0));
   1.395 +      push(cnt2);       // substr count
   1.396 +      push(str2);       // substr addr
   1.397 +      push(str1);       // string addr
   1.398 +    } else {
   1.399 +      // Small (< 8 chars) constant substrings are loaded already.
   1.400 +      movl(cnt2, int_cnt2);
   1.401 +    }
   1.402 +    push(tmp);  // original SP
   1.403 +
   1.404 +  } // Finished loading
   1.405 +
   1.406 +  //========================================================
   1.407 +  // Start search
   1.408 +  //
   1.409 +
   1.410 +  movptr(result, str1); // string addr
   1.411 +
   1.412 +  if (int_cnt2  < 0) {  // Only for non constant substring
   1.413 +    jmpb(SCAN_TO_SUBSTR);
   1.414 +
   1.415 +    // SP saved at sp+0
   1.416 +    // String saved at sp+1*wordSize
   1.417 +    // Substr saved at sp+2*wordSize
   1.418 +    // Substr count saved at sp+3*wordSize
   1.419 +
   1.420 +    // Reload substr for rescan, this code
   1.421 +    // is executed only for large substrings (> 8 chars)
   1.422 +    bind(RELOAD_SUBSTR);
   1.423 +    movptr(str2, Address(rsp, 2*wordSize));
   1.424 +    movl(cnt2, Address(rsp, 3*wordSize));
   1.425 +    movdqu(vec, Address(str2, 0));
   1.426 +    // We came here after the beginning of the substring was
   1.427 +    // matched but the rest of it was not so we need to search
   1.428 +    // again. Start from the next element after the previous match.
   1.429 +    subptr(str1, result); // Restore counter
   1.430 +    shrl(str1, 1);
   1.431 +    addl(cnt1, str1);
   1.432 +    decrementl(cnt1);   // Shift to next element
   1.433 +    cmpl(cnt1, cnt2);
   1.434 +    jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
   1.435 +
   1.436 +    addptr(result, 2);
   1.437 +  } // non constant
   1.438 +
   1.439 +  // Scan string for start of substr in 16-byte vectors
   1.440 +  bind(SCAN_TO_SUBSTR);
   1.441 +  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
   1.442 +  pcmpestri(vec, Address(result, 0), 0x0d);
   1.443 +  jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
   1.444 +  subl(cnt1, 8);
   1.445 +  jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
   1.446 +  cmpl(cnt1, cnt2);
   1.447 +  jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
   1.448 +  addptr(result, 16);
   1.449 +
   1.450 +  bind(ADJUST_STR);
   1.451 +  cmpl(cnt1, 8); // Do not read beyond string
   1.452 +  jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
   1.453 +  // Back-up string to avoid reading beyond string.
   1.454 +  lea(result, Address(result, cnt1, Address::times_2, -16));
   1.455 +  movl(cnt1, 8);
   1.456 +  jmpb(SCAN_TO_SUBSTR);
   1.457 +
   1.458 +  // Found a potential substr
   1.459 +  bind(FOUND_CANDIDATE);
   1.460 +  // After pcmpestri tmp(rcx) contains matched element index
   1.461 +
   1.462 +  // Make sure string is still long enough
   1.463 +  subl(cnt1, tmp);
   1.464 +  cmpl(cnt1, cnt2);
   1.465 +  jccb(Assembler::greaterEqual, FOUND_SUBSTR);
   1.466 +  // Left less then substring.
   1.467 +
   1.468 +  bind(RET_NOT_FOUND);
   1.469 +  movl(result, -1);
   1.470 +  jmpb(CLEANUP);
   1.471 +
   1.472 +  bind(FOUND_SUBSTR);
   1.473 +  // Compute start addr of substr
   1.474 +  lea(result, Address(result, tmp, Address::times_2));
   1.475 +
   1.476 +  if (int_cnt2 > 0) { // Constant substring
   1.477 +    // Repeat search for small substring (< 8 chars)
   1.478 +    // from new point without reloading substring.
   1.479 +    // Have to check that we don't read beyond string.
   1.480 +    cmpl(tmp, 8-int_cnt2);
   1.481 +    jccb(Assembler::greater, ADJUST_STR);
   1.482 +    // Fall through if matched whole substring.
   1.483 +  } else { // non constant
   1.484 +    assert(int_cnt2 == -1, "should be != 0");
   1.485 +
   1.486 +    addl(tmp, cnt2);
   1.487 +    // Found result if we matched whole substring.
   1.488 +    cmpl(tmp, 8);
   1.489 +    jccb(Assembler::lessEqual, RET_FOUND);
   1.490 +
   1.491 +    // Repeat search for small substring (<= 8 chars)
   1.492 +    // from new point 'str1' without reloading substring.
   1.493 +    cmpl(cnt2, 8);
   1.494 +    // Have to check that we don't read beyond string.
   1.495 +    jccb(Assembler::lessEqual, ADJUST_STR);
   1.496 +
   1.497 +    Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
   1.498 +    // Compare the rest of substring (> 8 chars).
   1.499 +    movptr(str1, result);
   1.500 +
   1.501 +    cmpl(tmp, cnt2);
   1.502 +    // First 8 chars are already matched.
   1.503 +    jccb(Assembler::equal, CHECK_NEXT);
   1.504 +
   1.505 +    bind(SCAN_SUBSTR);
   1.506 +    pcmpestri(vec, Address(str1, 0), 0x0d);
   1.507 +    // Need to reload strings pointers if not matched whole vector
   1.508 +    jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
   1.509 +
   1.510 +    bind(CHECK_NEXT);
   1.511 +    subl(cnt2, 8);
   1.512 +    jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
   1.513 +    addptr(str1, 16);
   1.514 +    addptr(str2, 16);
   1.515 +    subl(cnt1, 8);
   1.516 +    cmpl(cnt2, 8); // Do not read beyond substring
   1.517 +    jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
   1.518 +    // Back-up strings to avoid reading beyond substring.
   1.519 +    lea(str2, Address(str2, cnt2, Address::times_2, -16));
   1.520 +    lea(str1, Address(str1, cnt2, Address::times_2, -16));
   1.521 +    subl(cnt1, cnt2);
   1.522 +    movl(cnt2, 8);
   1.523 +    addl(cnt1, 8);
   1.524 +    bind(CONT_SCAN_SUBSTR);
   1.525 +    movdqu(vec, Address(str2, 0));
   1.526 +    jmpb(SCAN_SUBSTR);
   1.527 +
   1.528 +    bind(RET_FOUND_LONG);
   1.529 +    movptr(str1, Address(rsp, wordSize));
   1.530 +  } // non constant
   1.531 +
   1.532 +  bind(RET_FOUND);
   1.533 +  // Compute substr offset
   1.534 +  subptr(result, str1);
   1.535 +  shrl(result, 1); // index
   1.536  
   1.537    bind(CLEANUP);
   1.538 -  addptr(rsp, 3*wordSize);
   1.539 -}
   1.540 +  pop(rsp); // restore SP
   1.541 +
   1.542 +} // string_indexof
   1.543  
   1.544  // Compare strings.
   1.545  void MacroAssembler::string_compare(Register str1, Register str2,

mercurial