5673 |
5673 |
5674 // Is the minimum length zero? |
5674 // Is the minimum length zero? |
5675 testl(cnt2, cnt2); |
5675 testl(cnt2, cnt2); |
5676 jcc(Assembler::zero, LENGTH_DIFF_LABEL); |
5676 jcc(Assembler::zero, LENGTH_DIFF_LABEL); |
5677 |
5677 |
5678 // Load first characters |
5678 // Compare first characters |
5679 load_unsigned_short(result, Address(str1, 0)); |
5679 load_unsigned_short(result, Address(str1, 0)); |
5680 load_unsigned_short(cnt1, Address(str2, 0)); |
5680 load_unsigned_short(cnt1, Address(str2, 0)); |
5681 |
|
5682 // Compare first characters |
|
5683 subl(result, cnt1); |
5681 subl(result, cnt1); |
5684 jcc(Assembler::notZero, POP_LABEL); |
5682 jcc(Assembler::notZero, POP_LABEL); |
5685 decrementl(cnt2); |
5683 cmpl(cnt2, 1); |
5686 jcc(Assembler::zero, LENGTH_DIFF_LABEL); |
5684 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
5687 |
5685 |
5688 { |
5686 // Check if the strings start at the same location. |
5689 // Check after comparing first character to see if strings are equivalent |
5687 cmpptr(str1, str2); |
5690 Label LSkip2; |
5688 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
5691 // Check if the strings start at same location |
|
5692 cmpptr(str1, str2); |
|
5693 jccb(Assembler::notEqual, LSkip2); |
|
5694 |
|
5695 // Check if the length difference is zero (from stack) |
|
5696 cmpl(Address(rsp, 0), 0x0); |
|
5697 jcc(Assembler::equal, LENGTH_DIFF_LABEL); |
|
5698 |
|
5699 // Strings might not be equivalent |
|
5700 bind(LSkip2); |
|
5701 } |
|
5702 |
5689 |
5703 Address::ScaleFactor scale = Address::times_2; |
5690 Address::ScaleFactor scale = Address::times_2; |
5704 int stride = 8; |
5691 int stride = 8; |
5705 |
5692 |
5706 // Advance to next element |
5693 if (UseAVX >= 2) { |
5707 addptr(str1, 16/stride); |
5694 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; |
5708 addptr(str2, 16/stride); |
5695 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; |
5709 |
5696 Label COMPARE_TAIL_LONG; |
5710 if (UseSSE42Intrinsics) { |
5697 int pcmpmask = 0x19; |
|
5698 |
|
5699 // Setup to compare 16-chars (32-bytes) vectors, |
|
5700 // start from first character again because it has aligned address. |
|
5701 int stride2 = 16; |
|
5702 int adr_stride = stride << scale; |
|
5703 int adr_stride2 = stride2 << scale; |
|
5704 |
|
5705 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); |
|
5706 // rax and rdx are used by pcmpestri as elements counters |
|
5707 movl(result, cnt2); |
|
5708 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count |
|
5709 jcc(Assembler::zero, COMPARE_TAIL_LONG); |
|
5710 |
|
5711 // fast path : compare first 2 8-char vectors. |
|
5712 bind(COMPARE_16_CHARS); |
|
5713 movdqu(vec1, Address(str1, 0)); |
|
5714 pcmpestri(vec1, Address(str2, 0), pcmpmask); |
|
5715 jccb(Assembler::below, COMPARE_INDEX_CHAR); |
|
5716 |
|
5717 movdqu(vec1, Address(str1, adr_stride)); |
|
5718 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask); |
|
5719 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS); |
|
5720 addl(cnt1, stride); |
|
5721 |
|
5722 // Compare the characters at index in cnt1 |
|
5723 bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character |
|
5724 load_unsigned_short(result, Address(str1, cnt1, scale)); |
|
5725 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); |
|
5726 subl(result, cnt2); |
|
5727 jmp(POP_LABEL); |
|
5728 |
|
5729 // Setup the registers to start vector comparison loop |
|
5730 bind(COMPARE_WIDE_VECTORS); |
|
5731 lea(str1, Address(str1, result, scale)); |
|
5732 lea(str2, Address(str2, result, scale)); |
|
5733 subl(result, stride2); |
|
5734 subl(cnt2, stride2); |
|
5735 jccb(Assembler::zero, COMPARE_WIDE_TAIL); |
|
5736 negptr(result); |
|
5737 |
|
5738 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) |
|
5739 bind(COMPARE_WIDE_VECTORS_LOOP); |
|
5740 vmovdqu(vec1, Address(str1, result, scale)); |
|
5741 vpxor(vec1, Address(str2, result, scale)); |
|
5742 vptest(vec1, vec1); |
|
5743 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); |
|
5744 addptr(result, stride2); |
|
5745 subl(cnt2, stride2); |
|
5746 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); |
|
5747 |
|
5748 // compare wide vectors tail |
|
5749 bind(COMPARE_WIDE_TAIL); |
|
5750 testptr(result, result); |
|
5751 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
|
5752 |
|
5753 movl(result, stride2); |
|
5754 movl(cnt2, result); |
|
5755 negptr(result); |
|
5756 jmpb(COMPARE_WIDE_VECTORS_LOOP); |
|
5757 |
|
5758 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. |
|
5759 bind(VECTOR_NOT_EQUAL); |
|
5760 lea(str1, Address(str1, result, scale)); |
|
5761 lea(str2, Address(str2, result, scale)); |
|
5762 jmp(COMPARE_16_CHARS); |
|
5763 |
|
5764 // Compare tail chars, length between 1 to 15 chars |
|
5765 bind(COMPARE_TAIL_LONG); |
|
5766 movl(cnt2, result); |
|
5767 cmpl(cnt2, stride); |
|
5768 jccb(Assembler::less, COMPARE_SMALL_STR); |
|
5769 |
|
5770 movdqu(vec1, Address(str1, 0)); |
|
5771 pcmpestri(vec1, Address(str2, 0), pcmpmask); |
|
5772 jcc(Assembler::below, COMPARE_INDEX_CHAR); |
|
5773 subptr(cnt2, stride); |
|
5774 jccb(Assembler::zero, LENGTH_DIFF_LABEL); |
|
5775 lea(str1, Address(str1, result, scale)); |
|
5776 lea(str2, Address(str2, result, scale)); |
|
5777 negptr(cnt2); |
|
5778 jmpb(WHILE_HEAD_LABEL); |
|
5779 |
|
5780 bind(COMPARE_SMALL_STR); |
|
5781 } else if (UseSSE42Intrinsics) { |
5711 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; |
5782 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; |
5712 int pcmpmask = 0x19; |
5783 int pcmpmask = 0x19; |
5713 // Setup to compare 16-byte vectors |
5784 // Setup to compare 8-char (16-byte) vectors, |
|
5785 // start from first character again because it has aligned address. |
5714 movl(result, cnt2); |
5786 movl(result, cnt2); |
5715 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count |
5787 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count |
5716 jccb(Assembler::zero, COMPARE_TAIL); |
5788 jccb(Assembler::zero, COMPARE_TAIL); |
5717 |
5789 |
5718 lea(str1, Address(str1, result, scale)); |
5790 lea(str1, Address(str1, result, scale)); |
5752 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); |
5824 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); |
5753 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); |
5825 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); |
5754 |
5826 |
5755 // Mismatched characters in the vectors |
5827 // Mismatched characters in the vectors |
5756 bind(VECTOR_NOT_EQUAL); |
5828 bind(VECTOR_NOT_EQUAL); |
5757 addptr(result, cnt1); |
5829 addptr(cnt1, result); |
5758 movptr(cnt2, result); |
5830 load_unsigned_short(result, Address(str1, cnt1, scale)); |
5759 load_unsigned_short(result, Address(str1, cnt2, scale)); |
5831 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); |
5760 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); |
5832 subl(result, cnt2); |
5761 subl(result, cnt1); |
|
5762 jmpb(POP_LABEL); |
5833 jmpb(POP_LABEL); |
5763 |
5834 |
5764 bind(COMPARE_TAIL); // limit is zero |
5835 bind(COMPARE_TAIL); // limit is zero |
5765 movl(cnt2, result); |
5836 movl(cnt2, result); |
5766 // Fallthru to tail compare |
5837 // Fallthru to tail compare |
5767 } |
5838 } |
5768 |
|
5769 // Shift str2 and str1 to the end of the arrays, negate min |
5839 // Shift str2 and str1 to the end of the arrays, negate min |
5770 lea(str1, Address(str1, cnt2, scale, 0)); |
5840 lea(str1, Address(str1, cnt2, scale)); |
5771 lea(str2, Address(str2, cnt2, scale, 0)); |
5841 lea(str2, Address(str2, cnt2, scale)); |
|
5842 decrementl(cnt2); // first character was compared already |
5772 negptr(cnt2); |
5843 negptr(cnt2); |
5773 |
5844 |
5774 // Compare the rest of the elements |
5845 // Compare the rest of the elements |
5775 bind(WHILE_HEAD_LABEL); |
5846 bind(WHILE_HEAD_LABEL); |
5776 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); |
5847 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); |
5831 } |
5902 } |
5832 |
5903 |
5833 shll(limit, 1); // byte count != 0 |
5904 shll(limit, 1); // byte count != 0 |
5834 movl(result, limit); // copy |
5905 movl(result, limit); // copy |
5835 |
5906 |
5836 if (UseSSE42Intrinsics) { |
5907 if (UseAVX >= 2) { |
|
5908 // With AVX2, use 32-byte vector compare |
|
5909 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
|
5910 |
|
5911 // Compare 32-byte vectors |
|
5912 andl(result, 0x0000001e); // tail count (in bytes) |
|
5913 andl(limit, 0xffffffe0); // vector count (in bytes) |
|
5914 jccb(Assembler::zero, COMPARE_TAIL); |
|
5915 |
|
5916 lea(ary1, Address(ary1, limit, Address::times_1)); |
|
5917 lea(ary2, Address(ary2, limit, Address::times_1)); |
|
5918 negptr(limit); |
|
5919 |
|
5920 bind(COMPARE_WIDE_VECTORS); |
|
5921 vmovdqu(vec1, Address(ary1, limit, Address::times_1)); |
|
5922 vmovdqu(vec2, Address(ary2, limit, Address::times_1)); |
|
5923 vpxor(vec1, vec2); |
|
5924 |
|
5925 vptest(vec1, vec1); |
|
5926 jccb(Assembler::notZero, FALSE_LABEL); |
|
5927 addptr(limit, 32); |
|
5928 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); |
|
5929 |
|
5930 testl(result, result); |
|
5931 jccb(Assembler::zero, TRUE_LABEL); |
|
5932 |
|
5933 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); |
|
5934 vmovdqu(vec2, Address(ary2, result, Address::times_1, -32)); |
|
5935 vpxor(vec1, vec2); |
|
5936 |
|
5937 vptest(vec1, vec1); |
|
5938 jccb(Assembler::notZero, FALSE_LABEL); |
|
5939 jmpb(TRUE_LABEL); |
|
5940 |
|
5941 bind(COMPARE_TAIL); // limit is zero |
|
5942 movl(limit, result); |
|
5943 // Fallthru to tail compare |
|
5944 } else if (UseSSE42Intrinsics) { |
5837 // With SSE4.2, use double quad vector compare |
5945 // With SSE4.2, use double quad vector compare |
5838 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
5946 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; |
5839 |
5947 |
5840 // Compare 16-byte vectors |
5948 // Compare 16-byte vectors |
5841 andl(result, 0x0000000e); // tail count (in bytes) |
5949 andl(result, 0x0000000e); // tail count (in bytes) |