src/cpu/x86/vm/assembler_x86.cpp

changeset 1421
62001a362ce9
parent 1333
60fea60a6db5
child 1451
d40f03b57795
     1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Mon Sep 14 09:49:54 2009 -0700
     1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Mon Sep 14 12:14:20 2009 -0700
     1.3 @@ -8404,6 +8404,319 @@
     1.4  }
     1.5  #endif // _LP64
     1.6  
     1.7 +// IndexOf substring.
     1.8 +void MacroAssembler::string_indexof(Register str1, Register str2,
     1.9 +                                    Register cnt1, Register cnt2, Register result,
    1.10 +                                    XMMRegister vec, Register tmp) {
    1.11 +  assert(UseSSE42Intrinsics, "SSE4.2 is required");
    1.12 +
    1.13 +  Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
    1.14 +        SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP;
    1.15 +
    1.16 +  push(str1); // string addr
    1.17 +  push(str2); // substr addr
    1.18 +  push(cnt2); // substr count
    1.19 +  jmpb(PREP_FOR_SCAN);
    1.20 +
    1.21 +  // Substr count saved at sp
    1.22 +  // Substr saved at sp+1*wordSize
    1.23 +  // String saved at sp+2*wordSize
    1.24 +
    1.25 +  // Reload substr for rescan
    1.26 +  bind(RELOAD_SUBSTR);
    1.27 +  movl(cnt2, Address(rsp, 0));
    1.28 +  movptr(str2, Address(rsp, wordSize));
    1.29 +  // We came here after the beginninig of the substring was
    1.30 +  // matched but the rest of it was not so we need to search
    1.31 +  // again. Start from the next element after the previous match.
    1.32 +  subptr(str1, result); // Restore counter
    1.33 +  shrl(str1, 1);
    1.34 +  addl(cnt1, str1);
    1.35 +  lea(str1, Address(result, 2)); // Reload string
    1.36 +
    1.37 +  // Load substr
    1.38 +  bind(PREP_FOR_SCAN);
    1.39 +  movdqu(vec, Address(str2, 0));
    1.40 +  addl(cnt1, 8);  // prime the loop
    1.41 +  subptr(str1, 16);
    1.42 +
    1.43 +  // Scan string for substr in 16-byte vectors
    1.44 +  bind(SCAN_TO_SUBSTR);
    1.45 +  subl(cnt1, 8);
    1.46 +  addptr(str1, 16);
    1.47 +
    1.48 +  // pcmpestri
    1.49 +  //   inputs:
    1.50 +  //     xmm - substring
    1.51 +  //     rax - substring length (elements count)
    1.52 +  //     mem - scaned string
    1.53 +  //     rdx - string length (elements count)
    1.54 +  //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
    1.55 +  //   outputs:
    1.56 +  //     rcx - matched index in string
    1.57 +  assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
    1.58 +
    1.59 +  pcmpestri(vec, Address(str1, 0), 0x0d);
    1.60 +  jcc(Assembler::above, SCAN_TO_SUBSTR);      // CF == 0 && ZF == 0
    1.61 +  jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
    1.62 +
    1.63 +  // Fallthrough: found a potential substr
    1.64 +
    1.65 +  // Make sure string is still long enough
    1.66 +  subl(cnt1, tmp);
    1.67 +  cmpl(cnt1, cnt2);
    1.68 +  jccb(Assembler::negative, RET_NOT_FOUND);
    1.69 +  // Compute start addr of substr
    1.70 +  lea(str1, Address(str1, tmp, Address::times_2));
    1.71 +  movptr(result, str1); // save
    1.72 +
    1.73 +  // Compare potential substr
    1.74 +  addl(cnt1, 8);     // prime the loop
    1.75 +  addl(cnt2, 8);
    1.76 +  subptr(str1, 16);
    1.77 +  subptr(str2, 16);
    1.78 +
    1.79 +  // Scan 16-byte vectors of string and substr
    1.80 +  bind(SCAN_SUBSTR);
    1.81 +  subl(cnt1, 8);
    1.82 +  subl(cnt2, 8);
    1.83 +  addptr(str1, 16);
    1.84 +  addptr(str2, 16);
    1.85 +  movdqu(vec, Address(str2, 0));
    1.86 +  pcmpestri(vec, Address(str1, 0), 0x0d);
    1.87 +  jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
    1.88 +  jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
    1.89 +
    1.90 +  // Compute substr offset
    1.91 +  subptr(result, Address(rsp, 2*wordSize));
    1.92 +  shrl(result, 1); // index
    1.93 +  jmpb(CLEANUP);
    1.94 +
    1.95 +  bind(RET_NOT_FOUND);
    1.96 +  movl(result, -1);
    1.97 +
    1.98 +  bind(CLEANUP);
    1.99 +  addptr(rsp, 3*wordSize);
   1.100 +}
   1.101 +
   1.102 +// Compare strings.
   1.103 +void MacroAssembler::string_compare(Register str1, Register str2,
   1.104 +                                    Register cnt1, Register cnt2, Register result,
   1.105 +                                    XMMRegister vec1, XMMRegister vec2) {
   1.106 +  Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
   1.107 +
   1.108 +  // Compute the minimum of the string lengths and the
   1.109 +  // difference of the string lengths (stack).
   1.110 +  // Do the conditional move stuff
   1.111 +  movl(result, cnt1);
   1.112 +  subl(cnt1, cnt2);
   1.113 +  push(cnt1);
   1.114 +  if (VM_Version::supports_cmov()) {
   1.115 +    cmovl(Assembler::lessEqual, cnt2, result);
   1.116 +  } else {
   1.117 +    Label GT_LABEL;
   1.118 +    jccb(Assembler::greater, GT_LABEL);
   1.119 +    movl(cnt2, result);
   1.120 +    bind(GT_LABEL);
   1.121 +  }
   1.122 +
   1.123 +  // Is the minimum length zero?
   1.124 +  testl(cnt2, cnt2);
   1.125 +  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
   1.126 +
   1.127 +  // Load first characters
   1.128 +  load_unsigned_short(result, Address(str1, 0));
   1.129 +  load_unsigned_short(cnt1, Address(str2, 0));
   1.130 +
   1.131 +  // Compare first characters
   1.132 +  subl(result, cnt1);
   1.133 +  jcc(Assembler::notZero,  POP_LABEL);
   1.134 +  decrementl(cnt2);
   1.135 +  jcc(Assembler::zero, LENGTH_DIFF_LABEL);
   1.136 +
   1.137 +  {
   1.138 +    // Check after comparing first character to see if strings are equivalent
   1.139 +    Label LSkip2;
   1.140 +    // Check if the strings start at same location
   1.141 +    cmpptr(str1, str2);
   1.142 +    jccb(Assembler::notEqual, LSkip2);
   1.143 +
   1.144 +    // Check if the length difference is zero (from stack)
   1.145 +    cmpl(Address(rsp, 0), 0x0);
   1.146 +    jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
   1.147 +
   1.148 +    // Strings might not be equivalent
   1.149 +    bind(LSkip2);
   1.150 +  }
   1.151 +
   1.152 +  // Advance to next character
   1.153 +  addptr(str1, 2);
   1.154 +  addptr(str2, 2);
   1.155 +
   1.156 +  if (UseSSE42Intrinsics) {
   1.157 +    // With SSE4.2, use double quad vector compare
   1.158 +    Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
   1.159 +    // Setup to compare 16-byte vectors
   1.160 +    movl(cnt1, cnt2);
   1.161 +    andl(cnt2, 0xfffffff8); // cnt2 holds the vector count
   1.162 +    andl(cnt1, 0x00000007); // cnt1 holds the tail count
   1.163 +    testl(cnt2, cnt2);
   1.164 +    jccb(Assembler::zero, COMPARE_TAIL);
   1.165 +
   1.166 +    lea(str2, Address(str2, cnt2, Address::times_2));
   1.167 +    lea(str1, Address(str1, cnt2, Address::times_2));
   1.168 +    negptr(cnt2);
   1.169 +
   1.170 +    bind(COMPARE_VECTORS);
   1.171 +    movdqu(vec1, Address(str1, cnt2, Address::times_2));
   1.172 +    movdqu(vec2, Address(str2, cnt2, Address::times_2));
   1.173 +    pxor(vec1, vec2);
   1.174 +    ptest(vec1, vec1);
   1.175 +    jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
   1.176 +    addptr(cnt2, 8);
   1.177 +    jcc(Assembler::notZero, COMPARE_VECTORS);
   1.178 +    jmpb(COMPARE_TAIL);
   1.179 +
   1.180 +    // Mismatched characters in the vectors
   1.181 +    bind(VECTOR_NOT_EQUAL);
   1.182 +    lea(str1, Address(str1, cnt2, Address::times_2));
   1.183 +    lea(str2, Address(str2, cnt2, Address::times_2));
   1.184 +    movl(cnt1, 8);
   1.185 +
   1.186 +    // Compare tail (< 8 chars), or rescan last vectors to
   1.187 +    // find 1st mismatched characters
   1.188 +    bind(COMPARE_TAIL);
   1.189 +    testl(cnt1, cnt1);
   1.190 +    jccb(Assembler::zero, LENGTH_DIFF_LABEL);
   1.191 +    movl(cnt2, cnt1);
   1.192 +    // Fallthru to tail compare
   1.193 +  }
   1.194 +
   1.195 +  // Shift str2 and str1 to the end of the arrays, negate min
   1.196 +  lea(str1, Address(str1, cnt2, Address::times_2, 0));
   1.197 +  lea(str2, Address(str2, cnt2, Address::times_2, 0));
   1.198 +  negptr(cnt2);
   1.199 +
   1.200 +    // Compare the rest of the characters
   1.201 +  bind(WHILE_HEAD_LABEL);
   1.202 +  load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0));
   1.203 +  load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0));
   1.204 +  subl(result, cnt1);
   1.205 +  jccb(Assembler::notZero, POP_LABEL);
   1.206 +  increment(cnt2);
   1.207 +  jcc(Assembler::notZero, WHILE_HEAD_LABEL);
   1.208 +
   1.209 +  // Strings are equal up to min length.  Return the length difference.
   1.210 +  bind(LENGTH_DIFF_LABEL);
   1.211 +  pop(result);
   1.212 +  jmpb(DONE_LABEL);
   1.213 +
   1.214 +  // Discard the stored length difference
   1.215 +  bind(POP_LABEL);
   1.216 +  addptr(rsp, wordSize);
   1.217 +
   1.218 +  // That's it
   1.219 +  bind(DONE_LABEL);
   1.220 +}
   1.221 +
   1.222 +// Compare char[] arrays aligned to 4 bytes or substrings.
   1.223 +void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
   1.224 +                                        Register limit, Register result, Register chr,
   1.225 +                                        XMMRegister vec1, XMMRegister vec2) {
   1.226 +  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
   1.227 +
   1.228 +  int length_offset  = arrayOopDesc::length_offset_in_bytes();
   1.229 +  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   1.230 +
   1.231 +  // Check the input args
   1.232 +  cmpptr(ary1, ary2);
   1.233 +  jcc(Assembler::equal, TRUE_LABEL);
   1.234 +
   1.235 +  if (is_array_equ) {
   1.236 +    // Need additional checks for arrays_equals.
   1.237 +    andptr(ary1, ary2);
   1.238 +    jcc(Assembler::zero, FALSE_LABEL); // One pointer is NULL
   1.239 +
   1.240 +    // Check the lengths
   1.241 +    movl(limit, Address(ary1, length_offset));
   1.242 +    cmpl(limit, Address(ary2, length_offset));
   1.243 +    jcc(Assembler::notEqual, FALSE_LABEL);
   1.244 +  }
   1.245 +
   1.246 +  // count == 0
   1.247 +  testl(limit, limit);
   1.248 +  jcc(Assembler::zero, TRUE_LABEL);
   1.249 +
   1.250 +  if (is_array_equ) {
   1.251 +    // Load array address
   1.252 +    lea(ary1, Address(ary1, base_offset));
   1.253 +    lea(ary2, Address(ary2, base_offset));
   1.254 +  }
   1.255 +
   1.256 +  shll(limit, 1);      // byte count != 0
   1.257 +  movl(result, limit); // copy
   1.258 +
   1.259 +  if (UseSSE42Intrinsics) {
   1.260 +    // With SSE4.2, use double quad vector compare
   1.261 +    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
   1.262 +    // Compare 16-byte vectors
   1.263 +    andl(result, 0x0000000e);  //   tail count (in bytes)
   1.264 +    andl(limit, 0xfffffff0);   // vector count (in bytes)
   1.265 +    jccb(Assembler::zero, COMPARE_TAIL);
   1.266 +
   1.267 +    lea(ary1, Address(ary1, limit, Address::times_1));
   1.268 +    lea(ary2, Address(ary2, limit, Address::times_1));
   1.269 +    negptr(limit);
   1.270 +
   1.271 +    bind(COMPARE_WIDE_VECTORS);
   1.272 +    movdqu(vec1, Address(ary1, limit, Address::times_1));
   1.273 +    movdqu(vec2, Address(ary2, limit, Address::times_1));
   1.274 +    pxor(vec1, vec2);
   1.275 +    ptest(vec1, vec1);
   1.276 +    jccb(Assembler::notZero, FALSE_LABEL);
   1.277 +    addptr(limit, 16);
   1.278 +    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
   1.279 +
   1.280 +    bind(COMPARE_TAIL); // limit is zero
   1.281 +    movl(limit, result);
   1.282 +    // Fallthru to tail compare
   1.283 +  }
   1.284 +
   1.285 +  // Compare 4-byte vectors
   1.286 +  andl(limit, 0xfffffffc); // vector count (in bytes)
   1.287 +  jccb(Assembler::zero, COMPARE_CHAR);
   1.288 +
   1.289 +  lea(ary1, Address(ary1, limit, Address::times_1));
   1.290 +  lea(ary2, Address(ary2, limit, Address::times_1));
   1.291 +  negptr(limit);
   1.292 +
   1.293 +  bind(COMPARE_VECTORS);
   1.294 +  movl(chr, Address(ary1, limit, Address::times_1));
   1.295 +  cmpl(chr, Address(ary2, limit, Address::times_1));
   1.296 +  jccb(Assembler::notEqual, FALSE_LABEL);
   1.297 +  addptr(limit, 4);
   1.298 +  jcc(Assembler::notZero, COMPARE_VECTORS);
   1.299 +
   1.300 +  // Compare trailing char (final 2 bytes), if any
   1.301 +  bind(COMPARE_CHAR);
   1.302 +  testl(result, 0x2);   // tail  char
   1.303 +  jccb(Assembler::zero, TRUE_LABEL);
   1.304 +  load_unsigned_short(chr, Address(ary1, 0));
   1.305 +  load_unsigned_short(limit, Address(ary2, 0));
   1.306 +  cmpl(chr, limit);
   1.307 +  jccb(Assembler::notEqual, FALSE_LABEL);
   1.308 +
   1.309 +  bind(TRUE_LABEL);
   1.310 +  movl(result, 1);   // return true
   1.311 +  jmpb(DONE);
   1.312 +
   1.313 +  bind(FALSE_LABEL);
   1.314 +  xorl(result, result); // return false
   1.315 +
   1.316 +  // That's it
   1.317 +  bind(DONE);
   1.318 +}
   1.319 +
   1.320  Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
   1.321    switch (cond) {
   1.322      // Note some conditions are synonyms for others

mercurial