Thu, 02 Apr 2009 10:49:41 -0700
Merge
src/share/vm/runtime/globals.hpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cpu/sparc/vm/sparc.ad Thu Apr 02 05:22:02 2009 -0700 1.2 +++ b/src/cpu/sparc/vm/sparc.ad Thu Apr 02 10:49:41 2009 -0700 1.3 @@ -3003,6 +3003,202 @@ 1.4 __ bind(Ldone); 1.5 %} 1.6 1.7 +enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{ 1.8 + Label Lword, Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone; 1.9 + MacroAssembler _masm(&cbuf); 1.10 + 1.11 + Register str1_reg = reg_to_register_object($str1$$reg); 1.12 + Register str2_reg = reg_to_register_object($str2$$reg); 1.13 + Register tmp1_reg = reg_to_register_object($tmp1$$reg); 1.14 + Register tmp2_reg = reg_to_register_object($tmp2$$reg); 1.15 + Register result_reg = reg_to_register_object($result$$reg); 1.16 + 1.17 + // Get the first character position in both strings 1.18 + // [8] char array, [12] offset, [16] count 1.19 + int value_offset = java_lang_String:: value_offset_in_bytes(); 1.20 + int offset_offset = java_lang_String::offset_offset_in_bytes(); 1.21 + int count_offset = java_lang_String:: count_offset_in_bytes(); 1.22 + 1.23 + // load str1 (jchar*) base address into tmp1_reg 1.24 + __ load_heap_oop(Address(str1_reg, 0, value_offset), tmp1_reg); 1.25 + __ ld(Address(str1_reg, 0, offset_offset), result_reg); 1.26 + __ add(tmp1_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1_reg); 1.27 + __ ld(Address(str1_reg, 0, count_offset), str1_reg); // hoisted 1.28 + __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg); 1.29 + __ load_heap_oop(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted 1.30 + __ add(result_reg, tmp1_reg, tmp1_reg); 1.31 + 1.32 + // load str2 (jchar*) base address into tmp2_reg 1.33 + // __ ld_ptr(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted 1.34 + __ ld(Address(str2_reg, 0, offset_offset), result_reg); 1.35 + __ add(tmp2_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp2_reg); 1.36 + __ ld(Address(str2_reg, 0, count_offset), str2_reg); // hoisted 1.37 + __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg); 1.38 + __ cmp(str1_reg, str2_reg); // hoisted 1.39 + __ add(result_reg, tmp2_reg, tmp2_reg); 1.40 + 1.41 + __ sll(str1_reg, exact_log2(sizeof(jchar)), str1_reg); 1.42 + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); 1.43 + __ delayed()->mov(G0, result_reg); // not equal 1.44 + 1.45 + __ br_zero(Assembler::equal, true, Assembler::pn, str1_reg, Ldone); 1.46 + __ delayed()->add(G0, 1, result_reg); //equals 1.47 + 1.48 + __ cmp(tmp1_reg, tmp2_reg); //same string ? 1.49 + __ brx(Assembler::equal, true, Assembler::pn, Ldone); 1.50 + __ delayed()->add(G0, 1, result_reg); 1.51 + 1.52 + //rename registers 1.53 + Register limit_reg = str1_reg; 1.54 + Register chr2_reg = str2_reg; 1.55 + Register chr1_reg = result_reg; 1.56 + // tmp{12} are the base pointers 1.57 + 1.58 + //check for alignment and position the pointers to the ends 1.59 + __ or3(tmp1_reg, tmp2_reg, chr1_reg); 1.60 + __ andcc(chr1_reg, 0x3, chr1_reg); // notZero means at least one not 4-byte aligned 1.61 + __ br(Assembler::notZero, false, Assembler::pn, Lchar); 1.62 + __ delayed()->nop(); 1.63 + 1.64 + __ bind(Lword); 1.65 + __ and3(limit_reg, 0x2, O7); //remember the remainder (either 0 or 2) 1.66 + __ andn(limit_reg, 0x3, limit_reg); 1.67 + __ br_zero(Assembler::zero, false, Assembler::pn, limit_reg, Lpost_word); 1.68 + __ delayed()->nop(); 1.69 + 1.70 + __ add(tmp1_reg, limit_reg, tmp1_reg); 1.71 + __ add(tmp2_reg, limit_reg, tmp2_reg); 1.72 + __ neg(limit_reg); 1.73 + 1.74 + __ lduw(tmp1_reg, limit_reg, chr1_reg); 1.75 + __ bind(Lword_loop); 1.76 + __ lduw(tmp2_reg, limit_reg, chr2_reg); 1.77 + __ cmp(chr1_reg, chr2_reg); 1.78 + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); 1.79 + __ delayed()->mov(G0, result_reg); 1.80 + __ inccc(limit_reg, 2*sizeof(jchar)); 1.81 + // annul LDUW if branch i s not taken to prevent access past end of string 1.82 + __ br(Assembler::notZero, true, Assembler::pt, Lword_loop); //annul on taken 1.83 + __ delayed()->lduw(tmp1_reg, limit_reg, chr1_reg); // hoisted 1.84 + 1.85 + __ bind(Lpost_word); 1.86 + __ br_zero(Assembler::zero, true, Assembler::pt, O7, Ldone); 1.87 + __ delayed()->add(G0, 1, result_reg); 1.88 + 1.89 + __ lduh(tmp1_reg, 0, chr1_reg); 1.90 + __ lduh(tmp2_reg, 0, chr2_reg); 1.91 + __ cmp (chr1_reg, chr2_reg); 1.92 + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); 1.93 + __ delayed()->mov(G0, result_reg); 1.94 + __ ba(false,Ldone); 1.95 + __ delayed()->add(G0, 1, result_reg); 1.96 + 1.97 + __ bind(Lchar); 1.98 + __ add(tmp1_reg, limit_reg, tmp1_reg); 1.99 + __ add(tmp2_reg, limit_reg, tmp2_reg); 1.100 + __ neg(limit_reg); //negate count 1.101 + 1.102 + __ lduh(tmp1_reg, limit_reg, chr1_reg); 1.103 + __ bind(Lchar_loop); 1.104 + __ lduh(tmp2_reg, limit_reg, chr2_reg); 1.105 + __ cmp(chr1_reg, chr2_reg); 1.106 + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); 1.107 + __ delayed()->mov(G0, result_reg); //not equal 1.108 + __ inccc(limit_reg, sizeof(jchar)); 1.109 + // annul LDUH if branch is not taken to prevent access past end of string 1.110 + __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop); //annul on taken 1.111 + __ delayed()->lduh(tmp1_reg, limit_reg, chr1_reg); // hoisted 1.112 + 1.113 + __ add(G0, 1, result_reg); //equal 1.114 + 1.115 + __ bind(Ldone); 1.116 + %} 1.117 + 1.118 +enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{ 1.119 + Label Lvector, Ldone, Lloop; 1.120 + MacroAssembler _masm(&cbuf); 1.121 + 1.122 + Register ary1_reg = reg_to_register_object($ary1$$reg); 1.123 + Register ary2_reg = reg_to_register_object($ary2$$reg); 1.124 + Register tmp1_reg = reg_to_register_object($tmp1$$reg); 1.125 + Register tmp2_reg = reg_to_register_object($tmp2$$reg); 1.126 + Register result_reg = reg_to_register_object($result$$reg); 1.127 + 1.128 + int length_offset = arrayOopDesc::length_offset_in_bytes(); 1.129 + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 1.130 + 1.131 + // return true if the same array 1.132 + __ cmp(ary1_reg, ary2_reg); 1.133 + __ br(Assembler::equal, true, Assembler::pn, Ldone); 1.134 + __ delayed()->add(G0, 1, result_reg); // equal 1.135 + 1.136 + __ br_null(ary1_reg, true, Assembler::pn, Ldone); 1.137 + __ delayed()->mov(G0, result_reg); // not equal 1.138 + 1.139 + __ br_null(ary2_reg, true, Assembler::pn, Ldone); 1.140 + __ delayed()->mov(G0, result_reg); // not equal 1.141 + 1.142 + //load the lengths of arrays 1.143 + __ ld(Address(ary1_reg, 0, length_offset), tmp1_reg); 1.144 + __ ld(Address(ary2_reg, 0, length_offset), tmp2_reg); 1.145 + 1.146 + // return false if the two arrays are not equal length 1.147 + __ cmp(tmp1_reg, tmp2_reg); 1.148 + __ br(Assembler::notEqual, true, Assembler::pn, Ldone); 1.149 + __ delayed()->mov(G0, result_reg); // not equal 1.150 + 1.151 + __ br_zero(Assembler::zero, true, Assembler::pn, tmp1_reg, Ldone); 1.152 + __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal 1.153 + 1.154 + // load array addresses 1.155 + __ add(ary1_reg, base_offset, ary1_reg); 1.156 + __ add(ary2_reg, base_offset, ary2_reg); 1.157 + 1.158 + // renaming registers 1.159 + Register chr1_reg = tmp2_reg; // for characters in ary1 1.160 + Register chr2_reg = result_reg; // for characters in ary2 1.161 + Register limit_reg = tmp1_reg; // length 1.162 + 1.163 + // set byte count 1.164 + __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg); 1.165 + __ andcc(limit_reg, 0x2, chr1_reg); //trailing character ? 1.166 + __ br(Assembler::zero, false, Assembler::pt, Lvector); 1.167 + __ delayed()->nop(); 1.168 + 1.169 + //compare the trailing char 1.170 + __ sub(limit_reg, sizeof(jchar), limit_reg); 1.171 + __ lduh(ary1_reg, limit_reg, chr1_reg); 1.172 + __ lduh(ary2_reg, limit_reg, chr2_reg); 1.173 + __ cmp(chr1_reg, chr2_reg); 1.174 + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); 1.175 + __ delayed()->mov(G0, result_reg); // not equal 1.176 + 1.177 + // only one char ? 1.178 + __ br_zero(Assembler::zero, true, Assembler::pn, limit_reg, Ldone); 1.179 + __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal 1.180 + 1.181 + __ bind(Lvector); 1.182 + // Shift ary1_reg and ary2_reg to the end of the arrays, negate limit 1.183 + __ add(ary1_reg, limit_reg, ary1_reg); 1.184 + __ add(ary2_reg, limit_reg, ary2_reg); 1.185 + __ neg(limit_reg, limit_reg); 1.186 + 1.187 + __ lduw(ary1_reg, limit_reg, chr1_reg); 1.188 + __ bind(Lloop); 1.189 + __ lduw(ary2_reg, limit_reg, chr2_reg); 1.190 + __ cmp(chr1_reg, chr2_reg); 1.191 + __ br(Assembler::notEqual, false, Assembler::pt, Ldone); 1.192 + __ delayed()->mov(G0, result_reg); // not equal 1.193 + __ inccc(limit_reg, 2*sizeof(jchar)); 1.194 + // annul LDUW if branch is not taken to prevent access past end of string 1.195 + __ br(Assembler::notZero, true, Assembler::pt, Lloop); //annul on taken 1.196 + __ delayed()->lduw(ary1_reg, limit_reg, chr1_reg); // hoisted 1.197 + 1.198 + __ add(G0, 1, result_reg); // equals 1.199 + 1.200 + __ bind(Ldone); 1.201 + %} 1.202 + 1.203 enc_class enc_rethrow() %{ 1.204 cbuf.set_inst_mark(); 1.205 Register temp_reg = G3; 1.206 @@ -9015,6 +9211,25 @@ 1.207 ins_pipe(long_memory_op); 1.208 %} 1.209 1.210 +instruct string_equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result, 1.211 + o7RegI tmp3, flagsReg ccr) %{ 1.212 + match(Set result (StrEquals str1 str2)); 1.213 + effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL ccr, KILL tmp3); 1.214 + ins_cost(300); 1.215 + format %{ "String Equals $str1,$str2 -> $result" %} 1.216 + ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, result) ); 1.217 + ins_pipe(long_memory_op); 1.218 +%} 1.219 + 1.220 +instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result, 1.221 + flagsReg ccr) %{ 1.222 + match(Set result (AryEq ary1 ary2)); 1.223 + effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr); 1.224 + ins_cost(300); 1.225 + format %{ "Array Equals $ary1,$ary2 -> $result" %} 1.226 + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result)); 1.227 + ins_pipe(long_memory_op); 1.228 +%} 1.229 1.230 //---------- Population Count Instructions ------------------------------------- 1.231
2.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Thu Apr 02 05:22:02 2009 -0700 2.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu Apr 02 10:49:41 2009 -0700 2.3 @@ -2173,6 +2173,31 @@ 2.4 emit_arith(0x0B, 0xC0, dst, src); 2.5 } 2.6 2.7 +void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2.8 + assert(VM_Version::supports_sse4_2(), ""); 2.9 + 2.10 + InstructionMark im(this); 2.11 + emit_byte(0x66); 2.12 + prefix(src, dst); 2.13 + emit_byte(0x0F); 2.14 + emit_byte(0x3A); 2.15 + emit_byte(0x61); 2.16 + emit_operand(dst, src); 2.17 + emit_byte(imm8); 2.18 +} 2.19 + 2.20 +void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2.21 + assert(VM_Version::supports_sse4_2(), ""); 2.22 + 2.23 + emit_byte(0x66); 2.24 + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2.25 + emit_byte(0x0F); 2.26 + emit_byte(0x3A); 2.27 + emit_byte(0x61); 2.28 + emit_byte(0xC0 | encode); 2.29 + emit_byte(imm8); 2.30 +} 2.31 + 2.32 // generic 2.33 void Assembler::pop(Register dst) { 2.34 int encode = prefix_and_encode(dst->encoding()); 2.35 @@ -2330,6 +2355,29 @@ 2.36 emit_byte(shift); 2.37 } 2.38 2.39 +void Assembler::ptest(XMMRegister dst, Address src) { 2.40 + assert(VM_Version::supports_sse4_1(), ""); 2.41 + 2.42 + InstructionMark im(this); 2.43 + emit_byte(0x66); 2.44 + prefix(src, dst); 2.45 + emit_byte(0x0F); 2.46 + emit_byte(0x38); 2.47 + emit_byte(0x17); 2.48 + emit_operand(dst, src); 2.49 +} 2.50 + 2.51 +void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2.52 + assert(VM_Version::supports_sse4_1(), ""); 2.53 + 2.54 + emit_byte(0x66); 2.55 + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2.56 + emit_byte(0x0F); 2.57 + emit_byte(0x38); 2.58 + emit_byte(0x17); 2.59 + emit_byte(0xC0 | encode); 2.60 +} 2.61 + 2.62 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2.63 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2.64 emit_byte(0x66);
3.1 --- a/src/cpu/x86/vm/assembler_x86.hpp Thu Apr 02 05:22:02 2009 -0700 3.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Thu Apr 02 10:49:41 2009 -0700 3.3 @@ -1226,6 +1226,10 @@ 3.4 void orq(Register dst, Address src); 3.5 void orq(Register dst, Register src); 3.6 3.7 + // SSE4.2 string instructions 3.8 + void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); 3.9 + void pcmpestri(XMMRegister xmm1, Address src, int imm8); 3.10 + 3.11 void popl(Address dst); 3.12 3.13 #ifdef _LP64 3.14 @@ -1260,6 +1264,10 @@ 3.15 // Shift Right Logical Quadword Immediate 3.16 void psrlq(XMMRegister dst, int shift); 3.17 3.18 + // Logical Compare Double Quadword 3.19 + void ptest(XMMRegister dst, XMMRegister src); 3.20 + void ptest(XMMRegister dst, Address src); 3.21 + 3.22 // Interleave Low Bytes 3.23 void punpcklbw(XMMRegister dst, XMMRegister src); 3.24
4.1 --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Apr 02 05:22:02 2009 -0700 4.2 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Apr 02 10:49:41 2009 -0700 4.3 @@ -2691,7 +2691,7 @@ 4.4 __ mov(rdi, rax); 4.5 4.6 Label noException; 4.7 - __ cmpl(r12, Deoptimization::Unpack_exception); // Was exception pending? 4.8 + __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? 4.9 __ jcc(Assembler::notEqual, noException); 4.10 __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); 4.11 // QQQ this is useless it was NULL above
5.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Thu Apr 02 05:22:02 2009 -0700 5.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Thu Apr 02 10:49:41 2009 -0700 5.3 @@ -408,6 +408,11 @@ 5.4 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 5.5 } 5.6 } 5.7 + if( supports_sse4_2() && UseSSE >= 4 ) { 5.8 + if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 5.9 + UseSSE42Intrinsics = true; 5.10 + } 5.11 + } 5.12 } 5.13 } 5.14
6.1 --- a/src/cpu/x86/vm/x86_32.ad Thu Apr 02 05:22:02 2009 -0700 6.2 +++ b/src/cpu/x86/vm/x86_32.ad Thu Apr 02 10:49:41 2009 -0700 6.3 @@ -3694,12 +3694,16 @@ 6.4 } 6.5 %} 6.6 6.7 - enc_class enc_String_Compare() %{ 6.8 + enc_class enc_String_Compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, 6.9 + eAXRegI tmp3, eBXRegI tmp4, eCXRegI result) %{ 6.10 Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL, 6.11 POP_LABEL, DONE_LABEL, CONT_LABEL, 6.12 WHILE_HEAD_LABEL; 6.13 MacroAssembler masm(&cbuf); 6.14 6.15 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 6.16 + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); 6.17 + 6.18 // Get the first character position in both strings 6.19 // [8] char array, [12] offset, [16] count 6.20 int value_offset = java_lang_String::value_offset_in_bytes(); 6.21 @@ -3717,7 +3721,6 @@ 6.22 // Compute the minimum of the string lengths(rsi) and the 6.23 // difference of the string lengths (stack) 6.24 6.25 - 6.26 if (VM_Version::supports_cmov()) { 6.27 masm.movl(rdi, Address(rdi, count_offset)); 6.28 masm.movl(rsi, Address(rsi, count_offset)); 6.29 @@ -3731,7 +3734,7 @@ 6.30 masm.movl(rsi, rdi); 6.31 masm.subl(rdi, rcx); 6.32 masm.push(rdi); 6.33 - masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL); 6.34 + masm.jccb(Assembler::lessEqual, ECX_GOOD_LABEL); 6.35 masm.movl(rsi, rcx); 6.36 // rsi holds min, rcx is unused 6.37 } 6.38 @@ -3756,7 +3759,7 @@ 6.39 Label LSkip2; 6.40 // Check if the strings start at same location 6.41 masm.cmpptr(rbx,rax); 6.42 - masm.jcc(Assembler::notEqual, LSkip2); 6.43 + masm.jccb(Assembler::notEqual, LSkip2); 6.44 6.45 // Check if the length difference is zero (from stack) 6.46 masm.cmpl(Address(rsp, 0), 0x0); 6.47 @@ -3766,9 +3769,52 @@ 6.48 masm.bind(LSkip2); 6.49 } 6.50 6.51 - // Shift rax, and rbx, to the end of the arrays, negate min 6.52 - masm.lea(rax, Address(rax, rsi, Address::times_2, 2)); 6.53 - masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2)); 6.54 + // Advance to next character 6.55 + masm.addptr(rax, 2); 6.56 + masm.addptr(rbx, 2); 6.57 + 6.58 + if (UseSSE42Intrinsics) { 6.59 + // With SSE4.2, use double quad vector compare 6.60 + Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 6.61 + // Setup to compare 16-byte vectors 6.62 + masm.movl(rdi, rsi); 6.63 + masm.andl(rsi, 0xfffffff8); // rsi holds the vector count 6.64 + masm.andl(rdi, 0x00000007); // rdi holds the tail count 6.65 + masm.testl(rsi, rsi); 6.66 + masm.jccb(Assembler::zero, COMPARE_TAIL); 6.67 + 6.68 + masm.lea(rax, Address(rax, rsi, Address::times_2)); 6.69 + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); 6.70 + masm.negl(rsi); 6.71 + 6.72 + masm.bind(COMPARE_VECTORS); 6.73 + masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2)); 6.74 + masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2)); 6.75 + masm.pxor(tmp1Reg, tmp2Reg); 6.76 + masm.ptest(tmp1Reg, tmp1Reg); 6.77 + masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL); 6.78 + masm.addl(rsi, 8); 6.79 + masm.jcc(Assembler::notZero, COMPARE_VECTORS); 6.80 + masm.jmpb(COMPARE_TAIL); 6.81 + 6.82 + // Mismatched characters in the vectors 6.83 + masm.bind(VECTOR_NOT_EQUAL); 6.84 + masm.lea(rax, Address(rax, rsi, Address::times_2)); 6.85 + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); 6.86 + masm.movl(rdi, 8); 6.87 + 6.88 + // Compare tail (< 8 chars), or rescan last vectors to 6.89 + // find 1st mismatched characters 6.90 + masm.bind(COMPARE_TAIL); 6.91 + masm.testl(rdi, rdi); 6.92 + masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL); 6.93 + masm.movl(rsi, rdi); 6.94 + // Fallthru to tail compare 6.95 + } 6.96 + 6.97 + //Shift rax, and rbx, to the end of the arrays, negate min 6.98 + masm.lea(rax, Address(rax, rsi, Address::times_2, 0)); 6.99 + masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0)); 6.100 masm.negl(rsi); 6.101 6.102 // Compare the rest of the characters 6.103 @@ -3776,93 +3822,329 @@ 6.104 masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); 6.105 masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); 6.106 masm.subl(rcx, rdi); 6.107 - masm.jcc(Assembler::notZero, POP_LABEL); 6.108 + masm.jccb(Assembler::notZero, POP_LABEL); 6.109 masm.incrementl(rsi); 6.110 masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL); 6.111 6.112 // Strings are equal up to min length. Return the length difference. 6.113 masm.bind(LENGTH_DIFF_LABEL); 6.114 masm.pop(rcx); 6.115 - masm.jmp(DONE_LABEL); 6.116 + masm.jmpb(DONE_LABEL); 6.117 6.118 // Discard the stored length difference 6.119 masm.bind(POP_LABEL); 6.120 masm.addptr(rsp, 4); 6.121 - 6.122 + 6.123 // That's it 6.124 masm.bind(DONE_LABEL); 6.125 %} 6.126 6.127 - enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{ 6.128 - Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP; 6.129 + enc_class enc_String_Equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, 6.130 + eBXRegI tmp3, eCXRegI tmp4, eAXRegI result) %{ 6.131 + Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR; 6.132 MacroAssembler masm(&cbuf); 6.133 6.134 - Register ary1Reg = as_Register($ary1$$reg); 6.135 - Register ary2Reg = as_Register($ary2$$reg); 6.136 - Register tmp1Reg = as_Register($tmp1$$reg); 6.137 - Register tmp2Reg = as_Register($tmp2$$reg); 6.138 - Register resultReg = as_Register($result$$reg); 6.139 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 6.140 + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); 6.141 + 6.142 + int value_offset = java_lang_String::value_offset_in_bytes(); 6.143 + int offset_offset = java_lang_String::offset_offset_in_bytes(); 6.144 + int count_offset = java_lang_String::count_offset_in_bytes(); 6.145 + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 6.146 + 6.147 + // does source == target string? 6.148 + masm.cmpptr(rdi, rsi); 6.149 + masm.jcc(Assembler::equal, RET_TRUE); 6.150 + 6.151 + // get and compare counts 6.152 + masm.movl(rcx, Address(rdi, count_offset)); 6.153 + masm.movl(rax, Address(rsi, count_offset)); 6.154 + masm.cmpl(rcx, rax); 6.155 + masm.jcc(Assembler::notEqual, RET_FALSE); 6.156 + masm.testl(rax, rax); 6.157 + masm.jcc(Assembler::zero, RET_TRUE); 6.158 + 6.159 + // get source string offset and value 6.160 + masm.movptr(rbx, Address(rsi, value_offset)); 6.161 + masm.movl(rax, Address(rsi, offset_offset)); 6.162 + masm.leal(rsi, Address(rbx, rax, Address::times_2, base_offset)); 6.163 + 6.164 + // get compare string offset and value 6.165 + masm.movptr(rbx, Address(rdi, value_offset)); 6.166 + masm.movl(rax, Address(rdi, offset_offset)); 6.167 + masm.leal(rdi, Address(rbx, rax, Address::times_2, base_offset)); 6.168 + 6.169 + // Set byte count 6.170 + masm.shll(rcx, 1); 6.171 + masm.movl(rax, rcx); 6.172 + 6.173 + if (UseSSE42Intrinsics) { 6.174 + // With SSE4.2, use double quad vector compare 6.175 + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 6.176 + // Compare 16-byte vectors 6.177 + masm.andl(rcx, 0xfffffff0); // vector count (in bytes) 6.178 + masm.andl(rax, 0x0000000e); // tail count (in bytes) 6.179 + masm.testl(rcx, rcx); 6.180 + masm.jccb(Assembler::zero, COMPARE_TAIL); 6.181 + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); 6.182 + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); 6.183 + masm.negl(rcx); 6.184 + 6.185 + masm.bind(COMPARE_WIDE_VECTORS); 6.186 + masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1)); 6.187 + masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1)); 6.188 + masm.pxor(tmp1Reg, tmp2Reg); 6.189 + masm.ptest(tmp1Reg, tmp1Reg); 6.190 + masm.jccb(Assembler::notZero, RET_FALSE); 6.191 + masm.addl(rcx, 16); 6.192 + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 6.193 + masm.bind(COMPARE_TAIL); 6.194 + masm.movl(rcx, rax); 6.195 + // Fallthru to tail compare 6.196 + } 6.197 + 6.198 + // Compare 4-byte vectors 6.199 + masm.andl(rcx, 0xfffffffc); // vector count (in bytes) 6.200 + masm.andl(rax, 0x00000002); // tail char (in bytes) 6.201 + masm.testl(rcx, rcx); 6.202 + masm.jccb(Assembler::zero, COMPARE_CHAR); 6.203 + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); 6.204 + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); 6.205 + masm.negl(rcx); 6.206 + 6.207 + masm.bind(COMPARE_VECTORS); 6.208 + masm.movl(rbx, Address(rdi, rcx, Address::times_1)); 6.209 + masm.cmpl(rbx, Address(rsi, rcx, Address::times_1)); 6.210 + masm.jccb(Assembler::notEqual, RET_FALSE); 6.211 + masm.addl(rcx, 4); 6.212 + masm.jcc(Assembler::notZero, COMPARE_VECTORS); 6.213 + 6.214 + // Compare trailing char (final 2 bytes), if any 6.215 + masm.bind(COMPARE_CHAR); 6.216 + masm.testl(rax, rax); 6.217 + masm.jccb(Assembler::zero, RET_TRUE); 6.218 + masm.load_unsigned_short(rbx, Address(rdi, 0)); 6.219 + masm.load_unsigned_short(rcx, Address(rsi, 0)); 6.220 + masm.cmpl(rbx, rcx); 6.221 + masm.jccb(Assembler::notEqual, RET_FALSE); 6.222 + 6.223 + masm.bind(RET_TRUE); 6.224 + masm.movl(rax, 1); // return true 6.225 + masm.jmpb(DONE); 6.226 + 6.227 + masm.bind(RET_FALSE); 6.228 + masm.xorl(rax, rax); // return false 6.229 + 6.230 + masm.bind(DONE); 6.231 + %} 6.232 + 6.233 + enc_class enc_String_IndexOf(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2, 6.234 + eCXRegI tmp3, eDXRegI tmp4, eBXRegI result) %{ 6.235 + // SSE4.2 version 6.236 + Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR, 6.237 + SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE; 6.238 + MacroAssembler masm(&cbuf); 6.239 + 6.240 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 6.241 + 6.242 + // Get the first character position in both strings 6.243 + // [8] char array, [12] offset, [16] count 6.244 + int value_offset = java_lang_String::value_offset_in_bytes(); 6.245 + int offset_offset = java_lang_String::offset_offset_in_bytes(); 6.246 + int count_offset = java_lang_String::count_offset_in_bytes(); 6.247 + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 6.248 + 6.249 + // Get counts for string and substr 6.250 + masm.movl(rdx, Address(rsi, count_offset)); 6.251 + masm.movl(rax, Address(rdi, count_offset)); 6.252 + // Check for substr count > string count 6.253 + masm.cmpl(rax, rdx); 6.254 + masm.jcc(Assembler::greater, RET_NEG_ONE); 6.255 + 6.256 + // Start the indexOf operation 6.257 + // Get start addr of string 6.258 + masm.movptr(rbx, Address(rsi, value_offset)); 6.259 + masm.movl(rcx, Address(rsi, offset_offset)); 6.260 + masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset)); 6.261 + masm.push(rsi); 6.262 + 6.263 + // Get start addr of substr 6.264 + masm.movptr(rbx, Address(rdi, value_offset)); 6.265 + masm.movl(rcx, Address(rdi, offset_offset)); 6.266 + masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset)); 6.267 + masm.push(rdi); 6.268 + masm.push(rax); 6.269 + masm.jmpb(PREP_FOR_SCAN); 6.270 + 6.271 + // Substr count saved at sp 6.272 + // Substr saved at sp+4 6.273 + // String saved at sp+8 6.274 + 6.275 + // Prep to load substr for scan 6.276 + masm.bind(LOAD_SUBSTR); 6.277 + masm.movptr(rdi, Address(rsp, 4)); 6.278 + masm.movl(rax, Address(rsp, 0)); 6.279 + 6.280 + // Load substr 6.281 + masm.bind(PREP_FOR_SCAN); 6.282 + masm.movdqu(tmp1Reg, Address(rdi, 0)); 6.283 + masm.addl(rdx, 8); // prime the loop 6.284 + masm.subptr(rsi, 16); 6.285 + 6.286 + // Scan string for substr in 16-byte vectors 6.287 + masm.bind(SCAN_TO_SUBSTR); 6.288 + masm.subl(rdx, 8); 6.289 + masm.addptr(rsi, 16); 6.290 + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); 6.291 + masm.jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0 6.292 + masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0 6.293 + 6.294 + // Fallthru: found a potential substr 6.295 + 6.296 + // Make sure string is still long enough 6.297 + masm.subl(rdx, rcx); 6.298 + masm.cmpl(rdx, rax); 6.299 + masm.jccb(Assembler::negative, RET_NOT_FOUND); 6.300 + // Compute start addr of substr 6.301 + masm.lea(rsi, Address(rsi, rcx, Address::times_2)); 6.302 + masm.movptr(rbx, rsi); 6.303 + 6.304 + // Compare potential substr 6.305 + masm.addl(rdx, 8); // prime the loop 6.306 + masm.addl(rax, 8); 6.307 + masm.subptr(rsi, 16); 6.308 + masm.subptr(rdi, 16); 6.309 + 6.310 + // Scan 16-byte vectors of string and substr 6.311 + masm.bind(SCAN_SUBSTR); 6.312 + masm.subl(rax, 8); 6.313 + masm.subl(rdx, 8); 6.314 + masm.addptr(rsi, 16); 6.315 + masm.addptr(rdi, 16); 6.316 + masm.movdqu(tmp1Reg, Address(rdi, 0)); 6.317 + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); 6.318 + masm.jcc(Assembler::noOverflow, LOAD_SUBSTR); // OF == 0 6.319 + masm.jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0 6.320 + 6.321 + // Compute substr offset 6.322 + masm.movptr(rsi, Address(rsp, 8)); 6.323 + masm.subptr(rbx, rsi); 6.324 + masm.shrl(rbx, 1); 6.325 + masm.jmpb(CLEANUP); 6.326 + 6.327 + masm.bind(RET_NEG_ONE); 6.328 + masm.movl(rbx, -1); 6.329 + masm.jmpb(DONE); 6.330 + 6.331 + masm.bind(RET_NOT_FOUND); 6.332 + masm.movl(rbx, -1); 6.333 + 6.334 + masm.bind(CLEANUP); 6.335 + masm.addptr(rsp, 12); 6.336 + 6.337 + masm.bind(DONE); 6.338 + %} 6.339 + 6.340 + enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, 6.341 + eBXRegI tmp3, eDXRegI tmp4, eAXRegI result) %{ 6.342 + Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 6.343 + MacroAssembler masm(&cbuf); 6.344 + 6.345 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 6.346 + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); 6.347 + Register ary1Reg = as_Register($ary1$$reg); 6.348 + Register ary2Reg = as_Register($ary2$$reg); 6.349 + Register tmp3Reg = as_Register($tmp3$$reg); 6.350 + Register tmp4Reg = as_Register($tmp4$$reg); 6.351 + Register resultReg = as_Register($result$$reg); 6.352 6.353 int length_offset = arrayOopDesc::length_offset_in_bytes(); 6.354 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 6.355 6.356 // Check the input args 6.357 - masm.cmpl(ary1Reg, ary2Reg); 6.358 + masm.cmpptr(ary1Reg, ary2Reg); 6.359 masm.jcc(Assembler::equal, TRUE_LABEL); 6.360 - masm.testl(ary1Reg, ary1Reg); 6.361 + masm.testptr(ary1Reg, ary1Reg); 6.362 masm.jcc(Assembler::zero, FALSE_LABEL); 6.363 - masm.testl(ary2Reg, ary2Reg); 6.364 + masm.testptr(ary2Reg, ary2Reg); 6.365 masm.jcc(Assembler::zero, FALSE_LABEL); 6.366 6.367 // Check the lengths 6.368 - masm.movl(tmp2Reg, Address(ary1Reg, length_offset)); 6.369 + masm.movl(tmp4Reg, Address(ary1Reg, length_offset)); 6.370 masm.movl(resultReg, Address(ary2Reg, length_offset)); 6.371 - masm.cmpl(tmp2Reg, resultReg); 6.372 + masm.cmpl(tmp4Reg, resultReg); 6.373 masm.jcc(Assembler::notEqual, FALSE_LABEL); 6.374 masm.testl(resultReg, resultReg); 6.375 masm.jcc(Assembler::zero, TRUE_LABEL); 6.376 6.377 - // Get the number of 4 byte vectors to compare 6.378 - masm.shrl(resultReg, 1); 6.379 - 6.380 - // Check for odd-length arrays 6.381 - masm.andl(tmp2Reg, 1); 6.382 - masm.testl(tmp2Reg, tmp2Reg); 6.383 - masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); 6.384 - 6.385 - // Compare 2-byte "tail" at end of arrays 6.386 - masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); 6.387 - masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); 6.388 - masm.cmpl(tmp1Reg, tmp2Reg); 6.389 - masm.jcc(Assembler::notEqual, FALSE_LABEL); 6.390 + // Load array addrs 6.391 + masm.lea(ary1Reg, Address(ary1Reg, base_offset)); 6.392 + masm.lea(ary2Reg, Address(ary2Reg, base_offset)); 6.393 + 6.394 + // Set byte count 6.395 + masm.shll(tmp4Reg, 1); 6.396 + masm.movl(resultReg, tmp4Reg); 6.397 + 6.398 + if (UseSSE42Intrinsics) { 6.399 + // With SSE4.2, use double quad vector compare 6.400 + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 6.401 + // Compare 16-byte vectors 6.402 + masm.andl(tmp4Reg, 0xfffffff0); // vector count (in bytes) 6.403 + masm.andl(resultReg, 0x0000000e); // tail count (in bytes) 6.404 + masm.testl(tmp4Reg, tmp4Reg); 6.405 + masm.jccb(Assembler::zero, COMPARE_TAIL); 6.406 + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 6.407 + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 6.408 + masm.negl(tmp4Reg); 6.409 + 6.410 + masm.bind(COMPARE_WIDE_VECTORS); 6.411 + masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 6.412 + masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 6.413 + masm.pxor(tmp1Reg, tmp2Reg); 6.414 + masm.ptest(tmp1Reg, tmp1Reg); 6.415 + 6.416 + masm.jccb(Assembler::notZero, FALSE_LABEL); 6.417 + masm.addl(tmp4Reg, 16); 6.418 + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 6.419 + masm.bind(COMPARE_TAIL); 6.420 + masm.movl(tmp4Reg, resultReg); 6.421 + // Fallthru to tail compare 6.422 + } 6.423 + 6.424 + // Compare 4-byte vectors 6.425 + masm.andl(tmp4Reg, 0xfffffffc); // vector count (in bytes) 6.426 + masm.andl(resultReg, 0x00000002); // tail char (in bytes) 6.427 + masm.testl(tmp4Reg, tmp4Reg); 6.428 + masm.jccb(Assembler::zero, COMPARE_CHAR); 6.429 + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 6.430 + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 6.431 + masm.negl(tmp4Reg); 6.432 + 6.433 + masm.bind(COMPARE_VECTORS); 6.434 + masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 6.435 + masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 6.436 + masm.jccb(Assembler::notEqual, FALSE_LABEL); 6.437 + masm.addl(tmp4Reg, 4); 6.438 + masm.jcc(Assembler::notZero, COMPARE_VECTORS); 6.439 + 6.440 + // Compare trailing char (final 2 bytes), if any 6.441 + masm.bind(COMPARE_CHAR); 6.442 masm.testl(resultReg, resultReg); 6.443 - masm.jcc(Assembler::zero, TRUE_LABEL); 6.444 - 6.445 - // Setup compare loop 6.446 - masm.bind(COMPARE_LOOP_HDR); 6.447 - // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays 6.448 - masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); 6.449 - masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); 6.450 - masm.negl(resultReg); 6.451 - 6.452 - // 4-byte-wide compare loop 6.453 - masm.bind(COMPARE_LOOP); 6.454 - masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0)); 6.455 - masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0)); 6.456 - masm.cmpl(ary1Reg, ary2Reg); 6.457 - masm.jcc(Assembler::notEqual, FALSE_LABEL); 6.458 - masm.increment(resultReg); 6.459 - masm.jcc(Assembler::notZero, COMPARE_LOOP); 6.460 + masm.jccb(Assembler::zero, TRUE_LABEL); 6.461 + masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0)); 6.462 + masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0)); 6.463 + masm.cmpl(tmp3Reg, tmp4Reg); 6.464 + masm.jccb(Assembler::notEqual, FALSE_LABEL); 6.465 6.466 masm.bind(TRUE_LABEL); 6.467 masm.movl(resultReg, 1); // return true 6.468 - masm.jmp(DONE_LABEL); 6.469 + masm.jmpb(DONE); 6.470 6.471 masm.bind(FALSE_LABEL); 6.472 masm.xorl(resultReg, resultReg); // return false 6.473 6.474 // That's it 6.475 - masm.bind(DONE_LABEL); 6.476 + masm.bind(DONE); 6.477 %} 6.478 6.479 enc_class enc_pop_rdx() %{ 6.480 @@ -12074,11 +12356,8 @@ 6.481 ins_pipe( fpu_reg_reg ); 6.482 %} 6.483 6.484 - 6.485 - 6.486 // ======================================================================= 6.487 // fast clearing of an array 6.488 - 6.489 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 6.490 match(Set dummy (ClearArray cnt base)); 6.491 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 6.492 @@ -12092,24 +12371,48 @@ 6.493 ins_pipe( pipe_slow ); 6.494 %} 6.495 6.496 -instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ 6.497 +instruct string_compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, 6.498 + eAXRegI tmp3, eBXRegI tmp4, eCXRegI result, eFlagsReg cr) %{ 6.499 match(Set result (StrComp str1 str2)); 6.500 - effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr); 6.501 + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); 6.502 //ins_cost(300); 6.503 6.504 format %{ "String Compare $str1,$str2 -> $result // KILL EAX, EBX" %} 6.505 - ins_encode( enc_String_Compare() ); 6.506 + ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); 6.507 + ins_pipe( pipe_slow ); 6.508 +%} 6.509 + 6.510 +// fast string equals 6.511 +instruct string_equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, 6.512 + eBXRegI tmp3, eCXRegI tmp4, eAXRegI result, eFlagsReg cr) %{ 6.513 + match(Set result (StrEquals str1 str2)); 6.514 + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); 6.515 + 6.516 + format %{ "String Equals $str1,$str2 -> $result // KILL EBX, ECX" %} 6.517 + ins_encode( enc_String_Equals(tmp1, tmp2, str1, str2, tmp3, tmp4, result) ); 6.518 + ins_pipe( pipe_slow ); 6.519 +%} 6.520 + 6.521 +instruct string_indexof(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2, 6.522 + eCXRegI tmp3, eDXRegI tmp4, eBXRegI result, eFlagsReg cr) %{ 6.523 + predicate(UseSSE42Intrinsics); 6.524 + match(Set result (StrIndexOf str1 str2)); 6.525 + effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr); 6.526 + 6.527 + format %{ "String IndexOf $str1,$str2 -> $result // KILL EAX, ECX, EDX" %} 6.528 + ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); 6.529 ins_pipe( pipe_slow ); 6.530 %} 6.531 6.532 // fast array equals 6.533 -instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ 6.534 +instruct array_equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, eBXRegI tmp3, 6.535 + eDXRegI tmp4, eAXRegI result, eFlagsReg cr) %{ 6.536 match(Set result (AryEq ary1 ary2)); 6.537 - effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr); 6.538 + effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 6.539 //ins_cost(300); 6.540 6.541 - format %{ "Array Equals $ary1,$ary2 -> $result // KILL EAX, EBX" %} 6.542 - ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) ); 6.543 + format %{ "Array Equals $ary1,$ary2 -> $result // KILL EBX, EDX" %} 6.544 + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) ); 6.545 ins_pipe( pipe_slow ); 6.546 %} 6.547
7.1 --- a/src/cpu/x86/vm/x86_64.ad Thu Apr 02 05:22:02 2009 -0700 7.2 +++ b/src/cpu/x86/vm/x86_64.ad Thu Apr 02 10:49:41 2009 -0700 7.3 @@ -3694,13 +3694,16 @@ 7.4 } 7.5 %} 7.6 7.7 - enc_class enc_String_Compare() 7.8 - %{ 7.9 + enc_class enc_String_Compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, 7.10 + rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{ 7.11 Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL, 7.12 POP_LABEL, DONE_LABEL, CONT_LABEL, 7.13 WHILE_HEAD_LABEL; 7.14 MacroAssembler masm(&cbuf); 7.15 7.16 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 7.17 + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); 7.18 + 7.19 // Get the first character position in both strings 7.20 // [8] char array, [12] offset, [16] count 7.21 int value_offset = java_lang_String::value_offset_in_bytes(); 7.22 @@ -3718,6 +3721,7 @@ 7.23 // Compute the minimum of the string lengths(rsi) and the 7.24 // difference of the string lengths (stack) 7.25 7.26 + // do the conditional move stuff 7.27 masm.movl(rdi, Address(rdi, count_offset)); 7.28 masm.movl(rsi, Address(rsi, count_offset)); 7.29 masm.movl(rcx, rdi); 7.30 @@ -3745,7 +3749,7 @@ 7.31 Label LSkip2; 7.32 // Check if the strings start at same location 7.33 masm.cmpptr(rbx, rax); 7.34 - masm.jcc(Assembler::notEqual, LSkip2); 7.35 + masm.jccb(Assembler::notEqual, LSkip2); 7.36 7.37 // Check if the length difference is zero (from stack) 7.38 masm.cmpl(Address(rsp, 0), 0x0); 7.39 @@ -3755,9 +3759,52 @@ 7.40 masm.bind(LSkip2); 7.41 } 7.42 7.43 + // Advance to next character 7.44 + masm.addptr(rax, 2); 7.45 + masm.addptr(rbx, 2); 7.46 + 7.47 + if (UseSSE42Intrinsics) { 7.48 + // With SSE4.2, use double quad vector compare 7.49 + Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 7.50 + // Setup to compare 16-byte vectors 7.51 + masm.movl(rdi, rsi); 7.52 + masm.andl(rsi, 0xfffffff8); // rsi holds the vector count 7.53 + masm.andl(rdi, 0x00000007); // rdi holds the tail count 7.54 + masm.testl(rsi, rsi); 7.55 + masm.jccb(Assembler::zero, COMPARE_TAIL); 7.56 + 7.57 + masm.lea(rax, Address(rax, rsi, Address::times_2)); 7.58 + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); 7.59 + masm.negptr(rsi); 7.60 + 7.61 + masm.bind(COMPARE_VECTORS); 7.62 + masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2)); 7.63 + masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2)); 7.64 + masm.pxor(tmp1Reg, tmp2Reg); 7.65 + masm.ptest(tmp1Reg, tmp1Reg); 7.66 + masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL); 7.67 + masm.addptr(rsi, 8); 7.68 + masm.jcc(Assembler::notZero, COMPARE_VECTORS); 7.69 + masm.jmpb(COMPARE_TAIL); 7.70 + 7.71 + // Mismatched characters in the vectors 7.72 + masm.bind(VECTOR_NOT_EQUAL); 7.73 + masm.lea(rax, Address(rax, rsi, Address::times_2)); 7.74 + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); 7.75 + masm.movl(rdi, 8); 7.76 + 7.77 + // Compare tail (< 8 chars), or rescan last vectors to 7.78 + // find 1st mismatched characters 7.79 + masm.bind(COMPARE_TAIL); 7.80 + masm.testl(rdi, rdi); 7.81 + masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL); 7.82 + masm.movl(rsi, rdi); 7.83 + // Fallthru to tail compare 7.84 + } 7.85 + 7.86 // Shift RAX and RBX to the end of the arrays, negate min 7.87 - masm.lea(rax, Address(rax, rsi, Address::times_2, 2)); 7.88 - masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2)); 7.89 + masm.lea(rax, Address(rax, rsi, Address::times_2, 0)); 7.90 + masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0)); 7.91 masm.negptr(rsi); 7.92 7.93 // Compare the rest of the characters 7.94 @@ -3765,93 +3812,329 @@ 7.95 masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); 7.96 masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); 7.97 masm.subl(rcx, rdi); 7.98 - masm.jcc(Assembler::notZero, POP_LABEL); 7.99 + masm.jccb(Assembler::notZero, POP_LABEL); 7.100 masm.increment(rsi); 7.101 masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL); 7.102 7.103 // Strings are equal up to min length. Return the length difference. 7.104 masm.bind(LENGTH_DIFF_LABEL); 7.105 masm.pop(rcx); 7.106 - masm.jmp(DONE_LABEL); 7.107 + masm.jmpb(DONE_LABEL); 7.108 7.109 // Discard the stored length difference 7.110 masm.bind(POP_LABEL); 7.111 masm.addptr(rsp, 8); 7.112 - 7.113 + 7.114 // That's it 7.115 masm.bind(DONE_LABEL); 7.116 %} 7.117 7.118 - enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, rbx_RegI tmp2, rcx_RegI result) %{ 7.119 - Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP; 7.120 + enc_class enc_String_IndexOf(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2, 7.121 + rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result) %{ 7.122 + // SSE4.2 version 7.123 + Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR, 7.124 + SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE; 7.125 MacroAssembler masm(&cbuf); 7.126 7.127 - Register ary1Reg = as_Register($ary1$$reg); 7.128 - Register ary2Reg = as_Register($ary2$$reg); 7.129 - Register tmp1Reg = as_Register($tmp1$$reg); 7.130 - Register tmp2Reg = as_Register($tmp2$$reg); 7.131 - Register resultReg = as_Register($result$$reg); 7.132 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 7.133 + 7.134 + // Get the first character position in both strings 7.135 + // [8] char array, [12] offset, [16] count 7.136 + int value_offset = java_lang_String::value_offset_in_bytes(); 7.137 + int offset_offset = java_lang_String::offset_offset_in_bytes(); 7.138 + int count_offset = java_lang_String::count_offset_in_bytes(); 7.139 + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 7.140 + 7.141 + // Get counts for string and substr 7.142 + masm.movl(rdx, Address(rsi, count_offset)); 7.143 + masm.movl(rax, Address(rdi, count_offset)); 7.144 + // Check for substr count > string count 7.145 + masm.cmpl(rax, rdx); 7.146 + masm.jcc(Assembler::greater, RET_NEG_ONE); 7.147 + 7.148 + // Start the indexOf operation 7.149 + // Get start addr of string 7.150 + masm.load_heap_oop(rbx, Address(rsi, value_offset)); 7.151 + masm.movl(rcx, Address(rsi, offset_offset)); 7.152 + masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset)); 7.153 + masm.push(rsi); 7.154 + 7.155 + // Get start addr of substr 7.156 + masm.load_heap_oop(rbx, Address(rdi, value_offset)); 7.157 + masm.movl(rcx, Address(rdi, offset_offset)); 7.158 + masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset)); 7.159 + masm.push(rdi); 7.160 + masm.push(rax); 7.161 + masm.jmpb(PREP_FOR_SCAN); 7.162 + 7.163 + // Substr count saved at sp 7.164 + // Substr saved at sp+8 7.165 + // String saved at sp+16 7.166 + 7.167 + // Prep to load substr for scan 7.168 + masm.bind(LOAD_SUBSTR); 7.169 + masm.movptr(rdi, Address(rsp, 8)); 7.170 + masm.movl(rax, Address(rsp, 0)); 7.171 + 7.172 + // Load substr 7.173 + masm.bind(PREP_FOR_SCAN); 7.174 + masm.movdqu(tmp1Reg, Address(rdi, 0)); 7.175 + masm.addq(rdx, 8); // prime the loop 7.176 + masm.subptr(rsi, 16); 7.177 + 7.178 + // Scan string for substr in 16-byte vectors 7.179 + masm.bind(SCAN_TO_SUBSTR); 7.180 + masm.subq(rdx, 8); 7.181 + masm.addptr(rsi, 16); 7.182 + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); 7.183 + masm.jcc(Assembler::above, SCAN_TO_SUBSTR); 7.184 + masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); 7.185 + 7.186 + // Fallthru: found a potential substr 7.187 + 7.188 + //Make sure string is still long enough 7.189 + masm.subl(rdx, rcx); 7.190 + masm.cmpl(rdx, rax); 7.191 + masm.jccb(Assembler::negative, RET_NOT_FOUND); 7.192 + // Compute start addr of substr 7.193 + masm.lea(rsi, Address(rsi, rcx, Address::times_2)); 7.194 + masm.movptr(rbx, rsi); 7.195 + 7.196 + // Compare potential substr 7.197 + masm.addq(rdx, 8); // prime the loop 7.198 + masm.addq(rax, 8); 7.199 + masm.subptr(rsi, 16); 7.200 + masm.subptr(rdi, 16); 7.201 + 7.202 + // Scan 16-byte vectors of string and substr 7.203 + masm.bind(SCAN_SUBSTR); 7.204 + masm.subq(rax, 8); 7.205 + masm.subq(rdx, 8); 7.206 + masm.addptr(rsi, 16); 7.207 + masm.addptr(rdi, 16); 7.208 + masm.movdqu(tmp1Reg, Address(rdi, 0)); 7.209 + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); 7.210 + masm.jcc(Assembler::noOverflow, LOAD_SUBSTR); // OF == 0 7.211 + masm.jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0 7.212 + 7.213 + // Compute substr offset 7.214 + masm.movptr(rsi, Address(rsp, 16)); 7.215 + masm.subptr(rbx, rsi); 7.216 + masm.shrl(rbx, 1); 7.217 + masm.jmpb(CLEANUP); 7.218 + 7.219 + masm.bind(RET_NEG_ONE); 7.220 + masm.movl(rbx, -1); 7.221 + masm.jmpb(DONE); 7.222 + 7.223 + masm.bind(RET_NOT_FOUND); 7.224 + masm.movl(rbx, -1); 7.225 + 7.226 + masm.bind(CLEANUP); 7.227 + masm.addptr(rsp, 24); 7.228 + 7.229 + masm.bind(DONE); 7.230 + %} 7.231 + 7.232 + enc_class enc_String_Equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, 7.233 + rbx_RegI tmp3, rcx_RegI tmp2, rax_RegI result) %{ 7.234 + Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR; 7.235 + MacroAssembler masm(&cbuf); 7.236 + 7.237 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 7.238 + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); 7.239 + 7.240 + int value_offset = java_lang_String::value_offset_in_bytes(); 7.241 + int offset_offset = java_lang_String::offset_offset_in_bytes(); 7.242 + int count_offset = java_lang_String::count_offset_in_bytes(); 7.243 + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 7.244 + 7.245 + // does source == target string? 7.246 + masm.cmpptr(rdi, rsi); 7.247 + masm.jcc(Assembler::equal, RET_TRUE); 7.248 + 7.249 + // get and compare counts 7.250 + masm.movl(rcx, Address(rdi, count_offset)); 7.251 + masm.movl(rax, Address(rsi, count_offset)); 7.252 + masm.cmpl(rcx, rax); 7.253 + masm.jcc(Assembler::notEqual, RET_FALSE); 7.254 + masm.testl(rax, rax); 7.255 + masm.jcc(Assembler::zero, RET_TRUE); 7.256 + 7.257 + // get source string offset and value 7.258 + masm.load_heap_oop(rbx, Address(rsi, value_offset)); 7.259 + masm.movl(rax, Address(rsi, offset_offset)); 7.260 + masm.lea(rsi, Address(rbx, rax, Address::times_2, base_offset)); 7.261 + 7.262 + // get compare string offset and value 7.263 + masm.load_heap_oop(rbx, Address(rdi, value_offset)); 7.264 + masm.movl(rax, Address(rdi, offset_offset)); 7.265 + masm.lea(rdi, Address(rbx, rax, Address::times_2, base_offset)); 7.266 + 7.267 + // Set byte count 7.268 + masm.shll(rcx, 1); 7.269 + masm.movl(rax, rcx); 7.270 + 7.271 + if (UseSSE42Intrinsics) { 7.272 + // With SSE4.2, use double quad vector compare 7.273 + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 7.274 + // Compare 16-byte vectors 7.275 + masm.andl(rcx, 0xfffffff0); // vector count (in bytes) 7.276 + masm.andl(rax, 0x0000000e); // tail count (in bytes) 7.277 + masm.testl(rcx, rcx); 7.278 + masm.jccb(Assembler::zero, COMPARE_TAIL); 7.279 + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); 7.280 + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); 7.281 + masm.negptr(rcx); 7.282 + 7.283 + masm.bind(COMPARE_WIDE_VECTORS); 7.284 + masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1)); 7.285 + masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1)); 7.286 + masm.pxor(tmp1Reg, tmp2Reg); 7.287 + masm.ptest(tmp1Reg, tmp1Reg); 7.288 + masm.jccb(Assembler::notZero, RET_FALSE); 7.289 + masm.addptr(rcx, 16); 7.290 + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 7.291 + masm.bind(COMPARE_TAIL); 7.292 + masm.movl(rcx, rax); 7.293 + // Fallthru to tail compare 7.294 + } 7.295 + 7.296 + // Compare 4-byte vectors 7.297 + masm.andl(rcx, 0xfffffffc); // vector count (in bytes) 7.298 + masm.andl(rax, 0x00000002); // tail char (in bytes) 7.299 + masm.testl(rcx, rcx); 7.300 + masm.jccb(Assembler::zero, COMPARE_CHAR); 7.301 + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); 7.302 + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); 7.303 + masm.negptr(rcx); 7.304 + 7.305 + masm.bind(COMPARE_VECTORS); 7.306 + masm.movl(rbx, Address(rdi, rcx, Address::times_1)); 7.307 + masm.cmpl(rbx, Address(rsi, rcx, Address::times_1)); 7.308 + masm.jccb(Assembler::notEqual, RET_FALSE); 7.309 + masm.addptr(rcx, 4); 7.310 + masm.jcc(Assembler::notZero, COMPARE_VECTORS); 7.311 + 7.312 + // Compare trailing char (final 2 bytes), if any 7.313 + masm.bind(COMPARE_CHAR); 7.314 + masm.testl(rax, rax); 7.315 + masm.jccb(Assembler::zero, RET_TRUE); 7.316 + masm.load_unsigned_short(rbx, Address(rdi, 0)); 7.317 + masm.load_unsigned_short(rcx, Address(rsi, 0)); 7.318 + masm.cmpl(rbx, rcx); 7.319 + masm.jccb(Assembler::notEqual, RET_FALSE); 7.320 + 7.321 + masm.bind(RET_TRUE); 7.322 + masm.movl(rax, 1); // return true 7.323 + masm.jmpb(DONE); 7.324 + 7.325 + masm.bind(RET_FALSE); 7.326 + masm.xorl(rax, rax); // return false 7.327 + 7.328 + masm.bind(DONE); 7.329 + %} 7.330 + 7.331 + enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, 7.332 + rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{ 7.333 + Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 7.334 + MacroAssembler masm(&cbuf); 7.335 + 7.336 + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); 7.337 + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); 7.338 + Register ary1Reg = as_Register($ary1$$reg); 7.339 + Register ary2Reg = as_Register($ary2$$reg); 7.340 + Register tmp3Reg = as_Register($tmp3$$reg); 7.341 + Register tmp4Reg = as_Register($tmp4$$reg); 7.342 + Register resultReg = as_Register($result$$reg); 7.343 7.344 int length_offset = arrayOopDesc::length_offset_in_bytes(); 7.345 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 7.346 7.347 // Check the input args 7.348 - masm.cmpq(ary1Reg, ary2Reg); 7.349 + masm.cmpq(ary1Reg, ary2Reg); 7.350 masm.jcc(Assembler::equal, TRUE_LABEL); 7.351 - masm.testq(ary1Reg, ary1Reg); 7.352 + masm.testq(ary1Reg, ary1Reg); 7.353 masm.jcc(Assembler::zero, FALSE_LABEL); 7.354 - masm.testq(ary2Reg, ary2Reg); 7.355 + masm.testq(ary2Reg, ary2Reg); 7.356 masm.jcc(Assembler::zero, FALSE_LABEL); 7.357 7.358 // Check the lengths 7.359 - masm.movl(tmp2Reg, Address(ary1Reg, length_offset)); 7.360 + masm.movl(tmp4Reg, Address(ary1Reg, length_offset)); 7.361 masm.movl(resultReg, Address(ary2Reg, length_offset)); 7.362 - masm.cmpl(tmp2Reg, resultReg); 7.363 + masm.cmpl(tmp4Reg, resultReg); 7.364 masm.jcc(Assembler::notEqual, FALSE_LABEL); 7.365 masm.testl(resultReg, resultReg); 7.366 masm.jcc(Assembler::zero, TRUE_LABEL); 7.367 7.368 - // Get the number of 4 byte vectors to compare 7.369 - masm.shrl(resultReg, 1); 7.370 - 7.371 - // Check for odd-length arrays 7.372 - masm.andl(tmp2Reg, 1); 7.373 - masm.testl(tmp2Reg, tmp2Reg); 7.374 - masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); 7.375 - 7.376 - // Compare 2-byte "tail" at end of arrays 7.377 - masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); 7.378 - masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); 7.379 - masm.cmpl(tmp1Reg, tmp2Reg); 7.380 - masm.jcc(Assembler::notEqual, FALSE_LABEL); 7.381 + //load array address 7.382 + masm.lea(ary1Reg, Address(ary1Reg, base_offset)); 7.383 + masm.lea(ary2Reg, Address(ary2Reg, base_offset)); 7.384 + 7.385 + //set byte count 7.386 + masm.shll(tmp4Reg, 1); 7.387 + masm.movl(resultReg,tmp4Reg); 7.388 + 7.389 + if (UseSSE42Intrinsics){ 7.390 + // With SSE4.2, use double quad vector compare 7.391 + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 7.392 + // Compare 16-byte vectors 7.393 + masm.andl(tmp4Reg, 0xfffffff0); // vector count (in bytes) 7.394 + masm.andl(resultReg, 0x0000000e); // tail count (in bytes) 7.395 + masm.testl(tmp4Reg, tmp4Reg); 7.396 + masm.jccb(Assembler::zero, COMPARE_TAIL); 7.397 + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 7.398 + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 7.399 + masm.negptr(tmp4Reg); 7.400 + 7.401 + masm.bind(COMPARE_WIDE_VECTORS); 7.402 + masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 7.403 + masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 7.404 + masm.pxor(tmp1Reg, tmp2Reg); 7.405 + masm.ptest(tmp1Reg, tmp1Reg); 7.406 + 7.407 + masm.jccb(Assembler::notZero, FALSE_LABEL); 7.408 + masm.addptr(tmp4Reg, 16); 7.409 + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 7.410 + masm.bind(COMPARE_TAIL); 7.411 + masm.movl(tmp4Reg, resultReg); 7.412 + // Fallthru to tail compare 7.413 + } 7.414 + 7.415 + // Compare 4-byte vectors 7.416 + masm.andl(tmp4Reg, 0xfffffffc); // vector count (in bytes) 7.417 + masm.andl(resultReg, 0x00000002); // tail char (in bytes) 7.418 + masm.testl(tmp4Reg, tmp4Reg); //if tmp2 == 0, only compare char 7.419 + masm.jccb(Assembler::zero, COMPARE_CHAR); 7.420 + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 7.421 + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 7.422 + masm.negptr(tmp4Reg); 7.423 + 7.424 + masm.bind(COMPARE_VECTORS); 7.425 + masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); 7.426 + masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); 7.427 + masm.jccb(Assembler::notEqual, FALSE_LABEL); 7.428 + masm.addptr(tmp4Reg, 4); 7.429 + masm.jcc(Assembler::notZero, COMPARE_VECTORS); 7.430 + 7.431 + // Compare trailing char (final 2 bytes), if any 7.432 + masm.bind(COMPARE_CHAR); 7.433 masm.testl(resultReg, resultReg); 7.434 - masm.jcc(Assembler::zero, TRUE_LABEL); 7.435 - 7.436 - // Setup compare loop 7.437 - masm.bind(COMPARE_LOOP_HDR); 7.438 - // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays 7.439 - masm.leaq(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); 7.440 - masm.leaq(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); 7.441 - masm.negq(resultReg); 7.442 - 7.443 - // 4-byte-wide compare loop 7.444 - masm.bind(COMPARE_LOOP); 7.445 - masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0)); 7.446 - masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0)); 7.447 - masm.cmpl(ary1Reg, ary2Reg); 7.448 - masm.jcc(Assembler::notEqual, FALSE_LABEL); 7.449 - masm.incrementq(resultReg); 7.450 - masm.jcc(Assembler::notZero, COMPARE_LOOP); 7.451 + masm.jccb(Assembler::zero, TRUE_LABEL); 7.452 + masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0)); 7.453 + masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0)); 7.454 + masm.cmpl(tmp3Reg, tmp4Reg); 7.455 + masm.jccb(Assembler::notEqual, FALSE_LABEL); 7.456 7.457 masm.bind(TRUE_LABEL); 7.458 masm.movl(resultReg, 1); // return true 7.459 - masm.jmp(DONE_LABEL); 7.460 + masm.jmpb(DONE); 7.461 7.462 masm.bind(FALSE_LABEL); 7.463 masm.xorl(resultReg, resultReg); // return false 7.464 7.465 // That's it 7.466 - masm.bind(DONE_LABEL); 7.467 + masm.bind(DONE); 7.468 %} 7.469 7.470 enc_class enc_rethrow() 7.471 @@ -5087,7 +5370,7 @@ 7.472 %} 7.473 7.474 // Double register operands 7.475 -operand regD() 7.476 +operand regD() 7.477 %{ 7.478 constraint(ALLOC_IN_RC(double_reg)); 7.479 match(RegD); 7.480 @@ -11540,27 +11823,52 @@ 7.481 ins_pipe(pipe_slow); 7.482 %} 7.483 7.484 -instruct string_compare(rdi_RegP str1, rsi_RegP str2, rax_RegI tmp1, 7.485 - rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) 7.486 +instruct string_compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, 7.487 + rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr) 7.488 %{ 7.489 match(Set result (StrComp str1 str2)); 7.490 - effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr); 7.491 + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); 7.492 //ins_cost(300); 7.493 7.494 format %{ "String Compare $str1, $str2 -> $result // XXX KILL RAX, RBX" %} 7.495 - ins_encode( enc_String_Compare() ); 7.496 + ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); 7.497 ins_pipe( pipe_slow ); 7.498 %} 7.499 7.500 +instruct string_indexof(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2, 7.501 + rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result, rFlagsReg cr) 7.502 +%{ 7.503 + predicate(UseSSE42Intrinsics); 7.504 + match(Set result (StrIndexOf str1 str2)); 7.505 + effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr); 7.506 + 7.507 + format %{ "String IndexOf $str1,$str2 -> $result // KILL RAX, RCX, RDX" %} 7.508 + ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); 7.509 + ins_pipe( pipe_slow ); 7.510 +%} 7.511 + 7.512 +// fast string equals 7.513 +instruct string_equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, rbx_RegI tmp3, 7.514 + rcx_RegI tmp4, rax_RegI result, rFlagsReg cr) 7.515 +%{ 7.516 + match(Set result (StrEquals str1 str2)); 7.517 + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); 7.518 + 7.519 + format %{ "String Equals $str1,$str2 -> $result // KILL RBX, RCX" %} 7.520 + ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); 7.521 + ins_pipe( pipe_slow ); 7.522 +%} 7.523 + 7.524 // fast array equals 7.525 -instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, 7.526 - rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) %{ 7.527 +instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, rax_RegI tmp3, 7.528 + rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr) 7.529 +%{ 7.530 match(Set result (AryEq ary1 ary2)); 7.531 - effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr); 7.532 + effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 7.533 //ins_cost(300); 7.534 7.535 - format %{ "Array Equals $ary1,$ary2 -> $result // KILL RAX, RBX" %} 7.536 - ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) ); 7.537 + format %{ "Array Equals $ary1,$ary2 -> $result // KILL RAX, RBX" %} 7.538 + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) ); 7.539 ins_pipe( pipe_slow ); 7.540 %} 7.541
8.1 --- a/src/share/vm/adlc/formssel.cpp Thu Apr 02 05:22:02 2009 -0700 8.2 +++ b/src/share/vm/adlc/formssel.cpp Thu Apr 02 10:49:41 2009 -0700 8.3 @@ -574,9 +574,13 @@ 8.4 // TEMPORARY 8.5 // if( is_simple_chain_rule(globals) ) return false; 8.6 8.7 - // String-compare uses many memorys edges, but writes none 8.8 + // String.(compareTo/equals/indexOf) and Arrays.equals use many memorys edges, 8.9 + // but writes none 8.10 if( _matrule && _matrule->_rChild && 8.11 - strcmp(_matrule->_rChild->_opType,"StrComp")==0 ) 8.12 + ( strcmp(_matrule->_rChild->_opType,"StrComp" )==0 || 8.13 + strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 || 8.14 + strcmp(_matrule->_rChild->_opType,"StrIndexOf" )==0 || 8.15 + strcmp(_matrule->_rChild->_opType,"AryEq" )==0 )) 8.16 return true; 8.17 8.18 // Check if instruction has a USE of a memory operand class, but no defs 8.19 @@ -815,8 +819,10 @@ 8.20 return AdlcVMDeps::Parms; // Skip the machine-state edges 8.21 8.22 if( _matrule->_rChild && 8.23 - strcmp(_matrule->_rChild->_opType,"StrComp")==0 ) { 8.24 - // String compare takes 1 control and 4 memory edges. 8.25 + ( strcmp(_matrule->_rChild->_opType,"StrComp" )==0 || 8.26 + strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 || 8.27 + strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 )) { 8.28 + // String.(compareTo/equals/indexOf) take 1 control and 4 memory edges. 8.29 return 5; 8.30 } 8.31
9.1 --- a/src/share/vm/classfile/vmSymbols.hpp Thu Apr 02 05:22:02 2009 -0700 9.2 +++ b/src/share/vm/classfile/vmSymbols.hpp Thu Apr 02 10:49:41 2009 -0700 9.3 @@ -288,6 +288,7 @@ 9.4 template(stringCacheEnabled_name, "stringCacheEnabled") \ 9.5 template(bitCount_name, "bitCount") \ 9.6 template(profile_name, "profile") \ 9.7 + template(equals_name, "equals") \ 9.8 \ 9.9 /* non-intrinsic name/signature pairs: */ \ 9.10 template(register_method_name, "register") \ 9.11 @@ -579,7 +580,6 @@ 9.12 do_signature(copyOfRange_signature, "([Ljava/lang/Object;IILjava/lang/Class;)[Ljava/lang/Object;") \ 9.13 \ 9.14 do_intrinsic(_equalsC, java_util_Arrays, equals_name, equalsC_signature, F_S) \ 9.15 - do_name( equals_name, "equals") \ 9.16 do_signature(equalsC_signature, "([C[C)Z") \ 9.17 \ 9.18 do_intrinsic(_invoke, java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \ 9.19 @@ -589,6 +589,7 @@ 9.20 do_name( compareTo_name, "compareTo") \ 9.21 do_intrinsic(_indexOf, java_lang_String, indexOf_name, string_int_signature, F_R) \ 9.22 do_name( indexOf_name, "indexOf") \ 9.23 + do_intrinsic(_equals, java_lang_String, equals_name, object_boolean_signature, F_R) \ 9.24 \ 9.25 do_class(java_nio_Buffer, "java/nio/Buffer") \ 9.26 do_intrinsic(_checkIndex, java_nio_Buffer, checkIndex_name, int_int_signature, F_R) \
10.1 --- a/src/share/vm/opto/classes.hpp Thu Apr 02 05:22:02 2009 -0700 10.2 +++ b/src/share/vm/opto/classes.hpp Thu Apr 02 10:49:41 2009 -0700 10.3 @@ -218,6 +218,8 @@ 10.4 macro(StoreP) 10.5 macro(StoreN) 10.6 macro(StrComp) 10.7 +macro(StrEquals) 10.8 +macro(StrIndexOf) 10.9 macro(SubD) 10.10 macro(SubF) 10.11 macro(SubI)
11.1 --- a/src/share/vm/opto/gcm.cpp Thu Apr 02 05:22:02 2009 -0700 11.2 +++ b/src/share/vm/opto/gcm.cpp Thu Apr 02 10:49:41 2009 -0700 11.3 @@ -438,6 +438,12 @@ 11.4 #endif 11.5 assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp), 11.6 "String compare is only known 'load' that does not conflict with any stores"); 11.7 + assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals), 11.8 + "String equals is a 'load' that does not conflict with any stores"); 11.9 + assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf), 11.10 + "String indexOf is a 'load' that does not conflict with any stores"); 11.11 + assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq), 11.12 + "Arrays equals is a 'load' that do not conflict with any stores"); 11.13 11.14 if (!C->alias_type(load_alias_idx)->is_rewritable()) { 11.15 // It is impossible to spoil this load by putting stores before it,
12.1 --- a/src/share/vm/opto/lcm.cpp Thu Apr 02 05:22:02 2009 -0700 12.2 +++ b/src/share/vm/opto/lcm.cpp Thu Apr 02 10:49:41 2009 -0700 12.3 @@ -137,6 +137,8 @@ 12.4 if( mach->in(2) != val ) continue; 12.5 break; // Found a memory op? 12.6 case Op_StrComp: 12.7 + case Op_StrEquals: 12.8 + case Op_StrIndexOf: 12.9 case Op_AryEq: 12.10 // Not a legit memory op for implicit null check regardless of 12.11 // embedded loads
13.1 --- a/src/share/vm/opto/library_call.cpp Thu Apr 02 05:22:02 2009 -0700 13.2 +++ b/src/share/vm/opto/library_call.cpp Thu Apr 02 10:49:41 2009 -0700 13.3 @@ -136,6 +136,7 @@ 13.4 bool inline_string_compareTo(); 13.5 bool inline_string_indexOf(); 13.6 Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i); 13.7 + bool inline_string_equals(); 13.8 Node* pop_math_arg(); 13.9 bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName); 13.10 bool inline_math_native(vmIntrinsics::ID id); 13.11 @@ -261,6 +262,7 @@ 13.12 switch (id) { 13.13 case vmIntrinsics::_indexOf: 13.14 case vmIntrinsics::_compareTo: 13.15 + case vmIntrinsics::_equals: 13.16 case vmIntrinsics::_equalsC: 13.17 break; // InlineNatives does not control String.compareTo 13.18 default: 13.19 @@ -275,6 +277,9 @@ 13.20 case vmIntrinsics::_indexOf: 13.21 if (!SpecialStringIndexOf) return NULL; 13.22 break; 13.23 + case vmIntrinsics::_equals: 13.24 + if (!SpecialStringEquals) return NULL; 13.25 + break; 13.26 case vmIntrinsics::_equalsC: 13.27 if (!SpecialArraysEquals) return NULL; 13.28 break; 13.29 @@ -442,6 +447,8 @@ 13.30 return inline_string_compareTo(); 13.31 case vmIntrinsics::_indexOf: 13.32 return inline_string_indexOf(); 13.33 + case vmIntrinsics::_equals: 13.34 + return inline_string_equals(); 13.35 13.36 case vmIntrinsics::_getObject: 13.37 return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false); 13.38 @@ -793,6 +800,8 @@ 13.39 //------------------------------inline_string_compareTo------------------------ 13.40 bool LibraryCallKit::inline_string_compareTo() { 13.41 13.42 + if (!Matcher::has_match_rule(Op_StrComp)) return false; 13.43 + 13.44 const int value_offset = java_lang_String::value_offset_in_bytes(); 13.45 const int count_offset = java_lang_String::count_offset_in_bytes(); 13.46 const int offset_offset = java_lang_String::offset_offset_in_bytes(); 13.47 @@ -830,6 +839,82 @@ 13.48 return true; 13.49 } 13.50 13.51 +//------------------------------inline_string_equals------------------------ 13.52 +bool LibraryCallKit::inline_string_equals() { 13.53 + 13.54 + if (!Matcher::has_match_rule(Op_StrEquals)) return false; 13.55 + 13.56 + const int value_offset = java_lang_String::value_offset_in_bytes(); 13.57 + const int count_offset = java_lang_String::count_offset_in_bytes(); 13.58 + const int offset_offset = java_lang_String::offset_offset_in_bytes(); 13.59 + 13.60 + _sp += 2; 13.61 + Node* argument = pop(); // pop non-receiver first: it was pushed second 13.62 + Node* receiver = pop(); 13.63 + 13.64 + // Null check on self without removing any arguments. The argument 13.65 + // null check technically happens in the wrong place, which can lead to 13.66 + // invalid stack traces when string compare is inlined into a method 13.67 + // which handles NullPointerExceptions. 13.68 + _sp += 2; 13.69 + receiver = do_null_check(receiver, T_OBJECT); 13.70 + //should not do null check for argument for String.equals(), because spec 13.71 + //allows to specify NULL as argument. 13.72 + _sp -= 2; 13.73 + 13.74 + if (stopped()) { 13.75 + return true; 13.76 + } 13.77 + 13.78 + // get String klass for instanceOf 13.79 + ciInstanceKlass* klass = env()->String_klass(); 13.80 + 13.81 + // two paths (plus control) merge 13.82 + RegionNode* region = new (C, 3) RegionNode(3); 13.83 + Node* phi = new (C, 3) PhiNode(region, TypeInt::BOOL); 13.84 + 13.85 + Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass))); 13.86 + Node* cmp = _gvn.transform(new (C, 3) CmpINode(inst, intcon(1))); 13.87 + Node* bol = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq)); 13.88 + 13.89 + IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN); 13.90 + 13.91 + Node* if_true = _gvn.transform(new (C, 1) IfTrueNode(iff)); 13.92 + set_control(if_true); 13.93 + 13.94 + const TypeInstPtr* string_type = 13.95 + TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0); 13.96 + 13.97 + // instanceOf == true 13.98 + Node* equals = 13.99 + _gvn.transform(new (C, 7) StrEqualsNode( 13.100 + control(), 13.101 + memory(TypeAryPtr::CHARS), 13.102 + memory(string_type->add_offset(value_offset)), 13.103 + memory(string_type->add_offset(count_offset)), 13.104 + memory(string_type->add_offset(offset_offset)), 13.105 + receiver, 13.106 + argument)); 13.107 + 13.108 + phi->init_req(1, _gvn.transform(equals)); 13.109 + region->init_req(1, if_true); 13.110 + 13.111 + //instanceOf == false, fallthrough 13.112 + Node* if_false = _gvn.transform(new (C, 1) IfFalseNode(iff)); 13.113 + set_control(if_false); 13.114 + 13.115 + phi->init_req(2, _gvn.transform(intcon(0))); 13.116 + region->init_req(2, if_false); 13.117 + 13.118 + // post merge 13.119 + set_control(_gvn.transform(region)); 13.120 + record_for_igvn(region); 13.121 + 13.122 + push(_gvn.transform(phi)); 13.123 + 13.124 + return true; 13.125 +} 13.126 + 13.127 //------------------------------inline_array_equals---------------------------- 13.128 bool LibraryCallKit::inline_array_equals() { 13.129 13.130 @@ -994,80 +1079,115 @@ 13.131 return result; 13.132 } 13.133 13.134 - 13.135 //------------------------------inline_string_indexOf------------------------ 13.136 bool LibraryCallKit::inline_string_indexOf() { 13.137 13.138 + const int value_offset = java_lang_String::value_offset_in_bytes(); 13.139 + const int count_offset = java_lang_String::count_offset_in_bytes(); 13.140 + const int offset_offset = java_lang_String::offset_offset_in_bytes(); 13.141 + 13.142 _sp += 2; 13.143 Node *argument = pop(); // pop non-receiver first: it was pushed second 13.144 Node *receiver = pop(); 13.145 13.146 - // don't intrinsify if argument isn't a constant string. 13.147 - if (!argument->is_Con()) { 13.148 - return false; 13.149 + Node* result; 13.150 + if (Matcher::has_match_rule(Op_StrIndexOf) && 13.151 + UseSSE42Intrinsics) { 13.152 + // Generate SSE4.2 version of indexOf 13.153 + // We currently only have match rules that use SSE4.2 13.154 + 13.155 + // Null check on self without removing any arguments. The argument 13.156 + // null check technically happens in the wrong place, which can lead to 13.157 + // invalid stack traces when string compare is inlined into a method 13.158 + // which handles NullPointerExceptions. 13.159 + _sp += 2; 13.160 + receiver = do_null_check(receiver, T_OBJECT); 13.161 + argument = do_null_check(argument, T_OBJECT); 13.162 + _sp -= 2; 13.163 + 13.164 + if (stopped()) { 13.165 + return true; 13.166 + } 13.167 + 13.168 + ciInstanceKlass* klass = env()->String_klass(); 13.169 + const TypeInstPtr* string_type = 13.170 + TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0); 13.171 + 13.172 + result = 13.173 + _gvn.transform(new (C, 7) 13.174 + StrIndexOfNode(control(), 13.175 + memory(TypeAryPtr::CHARS), 13.176 + memory(string_type->add_offset(value_offset)), 13.177 + memory(string_type->add_offset(count_offset)), 13.178 + memory(string_type->add_offset(offset_offset)), 13.179 + receiver, 13.180 + argument)); 13.181 + } else { //Use LibraryCallKit::string_indexOf 13.182 + // don't intrinsify is argument isn't a constant string. 13.183 + if (!argument->is_Con()) { 13.184 + return false; 13.185 + } 13.186 + const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr(); 13.187 + if (str_type == NULL) { 13.188 + return false; 13.189 + } 13.190 + ciInstanceKlass* klass = env()->String_klass(); 13.191 + ciObject* str_const = str_type->const_oop(); 13.192 + if (str_const == NULL || str_const->klass() != klass) { 13.193 + return false; 13.194 + } 13.195 + ciInstance* str = str_const->as_instance(); 13.196 + assert(str != NULL, "must be instance"); 13.197 + 13.198 + ciObject* v = str->field_value_by_offset(value_offset).as_object(); 13.199 + int o = str->field_value_by_offset(offset_offset).as_int(); 13.200 + int c = str->field_value_by_offset(count_offset).as_int(); 13.201 + ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array 13.202 + 13.203 + // constant strings have no offset and count == length which 13.204 + // simplifies the resulting code somewhat so lets optimize for that. 13.205 + if (o != 0 || c != pat->length()) { 13.206 + return false; 13.207 + } 13.208 + 13.209 + // Null check on self without removing any arguments. The argument 13.210 + // null check technically happens in the wrong place, which can lead to 13.211 + // invalid stack traces when string compare is inlined into a method 13.212 + // which handles NullPointerExceptions. 13.213 + _sp += 2; 13.214 + receiver = do_null_check(receiver, T_OBJECT); 13.215 + // No null check on the argument is needed since it's a constant String oop. 13.216 + _sp -= 2; 13.217 + if (stopped()) { 13.218 + return true; 13.219 + } 13.220 + 13.221 + // The null string as a pattern always returns 0 (match at beginning of string) 13.222 + if (c == 0) { 13.223 + push(intcon(0)); 13.224 + return true; 13.225 + } 13.226 + 13.227 + // Generate default indexOf 13.228 + jchar lastChar = pat->char_at(o + (c - 1)); 13.229 + int cache = 0; 13.230 + int i; 13.231 + for (i = 0; i < c - 1; i++) { 13.232 + assert(i < pat->length(), "out of range"); 13.233 + cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1))); 13.234 + } 13.235 + 13.236 + int md2 = c; 13.237 + for (i = 0; i < c - 1; i++) { 13.238 + assert(i < pat->length(), "out of range"); 13.239 + if (pat->char_at(o + i) == lastChar) { 13.240 + md2 = (c - 1) - i; 13.241 + } 13.242 + } 13.243 + 13.244 + result = string_indexOf(receiver, pat, o, cache, md2); 13.245 } 13.246 - const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr(); 13.247 - if (str_type == NULL) { 13.248 - return false; 13.249 - } 13.250 - ciInstanceKlass* klass = env()->String_klass(); 13.251 - ciObject* str_const = str_type->const_oop(); 13.252 - if (str_const == NULL || str_const->klass() != klass) { 13.253 - return false; 13.254 - } 13.255 - ciInstance* str = str_const->as_instance(); 13.256 - assert(str != NULL, "must be instance"); 13.257 - 13.258 - const int value_offset = java_lang_String::value_offset_in_bytes(); 13.259 - const int count_offset = java_lang_String::count_offset_in_bytes(); 13.260 - const int offset_offset = java_lang_String::offset_offset_in_bytes(); 13.261 - 13.262 - ciObject* v = str->field_value_by_offset(value_offset).as_object(); 13.263 - int o = str->field_value_by_offset(offset_offset).as_int(); 13.264 - int c = str->field_value_by_offset(count_offset).as_int(); 13.265 - ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array 13.266 - 13.267 - // constant strings have no offset and count == length which 13.268 - // simplifies the resulting code somewhat so lets optimize for that. 13.269 - if (o != 0 || c != pat->length()) { 13.270 - return false; 13.271 - } 13.272 - 13.273 - // Null check on self without removing any arguments. The argument 13.274 - // null check technically happens in the wrong place, which can lead to 13.275 - // invalid stack traces when string compare is inlined into a method 13.276 - // which handles NullPointerExceptions. 13.277 - _sp += 2; 13.278 - receiver = do_null_check(receiver, T_OBJECT); 13.279 - // No null check on the argument is needed since it's a constant String oop. 13.280 - _sp -= 2; 13.281 - if (stopped()) { 13.282 - return true; 13.283 - } 13.284 - 13.285 - // The null string as a pattern always returns 0 (match at beginning of string) 13.286 - if (c == 0) { 13.287 - push(intcon(0)); 13.288 - return true; 13.289 - } 13.290 - 13.291 - jchar lastChar = pat->char_at(o + (c - 1)); 13.292 - int cache = 0; 13.293 - int i; 13.294 - for (i = 0; i < c - 1; i++) { 13.295 - assert(i < pat->length(), "out of range"); 13.296 - cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1))); 13.297 - } 13.298 - 13.299 - int md2 = c; 13.300 - for (i = 0; i < c - 1; i++) { 13.301 - assert(i < pat->length(), "out of range"); 13.302 - if (pat->char_at(o + i) == lastChar) { 13.303 - md2 = (c - 1) - i; 13.304 - } 13.305 - } 13.306 - 13.307 - Node* result = string_indexOf(receiver, pat, o, cache, md2); 13.308 + 13.309 push(result); 13.310 return true; 13.311 }
14.1 --- a/src/share/vm/opto/loopnode.cpp Thu Apr 02 05:22:02 2009 -0700 14.2 +++ b/src/share/vm/opto/loopnode.cpp Thu Apr 02 10:49:41 2009 -0700 14.3 @@ -2668,6 +2668,8 @@ 14.4 case Op_LoadD_unaligned: 14.5 case Op_LoadL_unaligned: 14.6 case Op_StrComp: // Does a bunch of load-like effects 14.7 + case Op_StrEquals: 14.8 + case Op_StrIndexOf: 14.9 case Op_AryEq: 14.10 pinned = false; 14.11 }
15.1 --- a/src/share/vm/opto/matcher.cpp Thu Apr 02 05:22:02 2009 -0700 15.2 +++ b/src/share/vm/opto/matcher.cpp Thu Apr 02 10:49:41 2009 -0700 15.3 @@ -746,6 +746,8 @@ 15.4 if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) { 15.5 switch (n->Opcode()) { 15.6 case Op_StrComp: 15.7 + case Op_StrEquals: 15.8 + case Op_StrIndexOf: 15.9 case Op_AryEq: 15.10 case Op_MemBarVolatile: 15.11 case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type? 15.12 @@ -1788,6 +1790,8 @@ 15.13 mstack.push(n->in(0), Pre_Visit); // Visit Control input 15.14 continue; // while (mstack.is_nonempty()) 15.15 case Op_StrComp: 15.16 + case Op_StrEquals: 15.17 + case Op_StrIndexOf: 15.18 case Op_AryEq: 15.19 set_shared(n); // Force result into register (it will be anyways) 15.20 break;
16.1 --- a/src/share/vm/opto/memnode.cpp Thu Apr 02 05:22:02 2009 -0700 16.2 +++ b/src/share/vm/opto/memnode.cpp Thu Apr 02 10:49:41 2009 -0700 16.3 @@ -2481,6 +2481,31 @@ 16.4 return remove_dead_region(phase, can_reshape) ? this : NULL; 16.5 } 16.6 16.7 +// Do we match on this edge? No memory edges 16.8 +uint StrEqualsNode::match_edge(uint idx) const { 16.9 + return idx == 5 || idx == 6; 16.10 +} 16.11 + 16.12 +//------------------------------Ideal------------------------------------------ 16.13 +// Return a node which is more "ideal" than the current node. Strip out 16.14 +// control copies 16.15 +Node *StrEqualsNode::Ideal(PhaseGVN *phase, bool can_reshape){ 16.16 + return remove_dead_region(phase, can_reshape) ? this : NULL; 16.17 +} 16.18 + 16.19 +//============================================================================= 16.20 +// Do we match on this edge? No memory edges 16.21 +uint StrIndexOfNode::match_edge(uint idx) const { 16.22 + return idx == 5 || idx == 6; 16.23 +} 16.24 + 16.25 +//------------------------------Ideal------------------------------------------ 16.26 +// Return a node which is more "ideal" than the current node. Strip out 16.27 +// control copies 16.28 +Node *StrIndexOfNode::Ideal(PhaseGVN *phase, bool can_reshape){ 16.29 + return remove_dead_region(phase, can_reshape) ? this : NULL; 16.30 +} 16.31 + 16.32 //------------------------------Ideal------------------------------------------ 16.33 // Return a node which is more "ideal" than the current node. Strip out 16.34 // control copies 16.35 @@ -2488,7 +2513,6 @@ 16.36 return remove_dead_region(phase, can_reshape) ? this : NULL; 16.37 } 16.38 16.39 - 16.40 //============================================================================= 16.41 MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent) 16.42 : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)),
17.1 --- a/src/share/vm/opto/memnode.hpp Thu Apr 02 05:22:02 2009 -0700 17.2 +++ b/src/share/vm/opto/memnode.hpp Thu Apr 02 10:49:41 2009 -0700 17.3 @@ -765,6 +765,54 @@ 17.4 virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); 17.5 }; 17.6 17.7 +//------------------------------StrEquals------------------------------------- 17.8 +class StrEqualsNode: public Node { 17.9 +public: 17.10 + StrEqualsNode(Node *control, 17.11 + Node* char_array_mem, 17.12 + Node* value_mem, 17.13 + Node* count_mem, 17.14 + Node* offset_mem, 17.15 + Node* s1, Node* s2): Node(control, 17.16 + char_array_mem, 17.17 + value_mem, 17.18 + count_mem, 17.19 + offset_mem, 17.20 + s1, s2) {}; 17.21 + virtual int Opcode() const; 17.22 + virtual bool depends_only_on_test() const { return false; } 17.23 + virtual const Type* bottom_type() const { return TypeInt::BOOL; } 17.24 + // a StrEqualsNode (conservatively) aliases with everything: 17.25 + virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; } 17.26 + virtual uint match_edge(uint idx) const; 17.27 + virtual uint ideal_reg() const { return Op_RegI; } 17.28 + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); 17.29 +}; 17.30 + 17.31 +//------------------------------StrIndexOf------------------------------------- 17.32 +class StrIndexOfNode: public Node { 17.33 +public: 17.34 + StrIndexOfNode(Node *control, 17.35 + Node* char_array_mem, 17.36 + Node* value_mem, 17.37 + Node* count_mem, 17.38 + Node* offset_mem, 17.39 + Node* s1, Node* s2): Node(control, 17.40 + char_array_mem, 17.41 + value_mem, 17.42 + count_mem, 17.43 + offset_mem, 17.44 + s1, s2) {}; 17.45 + virtual int Opcode() const; 17.46 + virtual bool depends_only_on_test() const { return false; } 17.47 + virtual const Type* bottom_type() const { return TypeInt::INT; } 17.48 + // a StrIndexOfNode (conservatively) aliases with everything: 17.49 + virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; } 17.50 + virtual uint match_edge(uint idx) const; 17.51 + virtual uint ideal_reg() const { return Op_RegI; } 17.52 + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); 17.53 +}; 17.54 + 17.55 //------------------------------AryEq--------------------------------------- 17.56 class AryEqNode: public Node { 17.57 public:
18.1 --- a/src/share/vm/opto/parse1.cpp Thu Apr 02 05:22:02 2009 -0700 18.2 +++ b/src/share/vm/opto/parse1.cpp Thu Apr 02 10:49:41 2009 -0700 18.3 @@ -95,7 +95,7 @@ 18.4 switch( bt ) { // Signature is flattened 18.5 case T_INT: l = new (C, 3) LoadINode( 0, mem, adr, TypeRawPtr::BOTTOM ); break; 18.6 case T_FLOAT: l = new (C, 3) LoadFNode( 0, mem, adr, TypeRawPtr::BOTTOM ); break; 18.7 - case T_ADDRESS: 18.8 + case T_ADDRESS: l = new (C, 3) LoadPNode( 0, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM ); break; 18.9 case T_OBJECT: l = new (C, 3) LoadPNode( 0, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM ); break; 18.10 case T_LONG: 18.11 case T_DOUBLE: {
19.1 --- a/src/share/vm/runtime/arguments.cpp Thu Apr 02 05:22:02 2009 -0700 19.2 +++ b/src/share/vm/runtime/arguments.cpp Thu Apr 02 10:49:41 2009 -0700 19.3 @@ -1366,9 +1366,6 @@ 19.4 if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) { 19.5 FLAG_SET_DEFAULT(DoEscapeAnalysis, true); 19.6 } 19.7 - if (AggressiveOpts && FLAG_IS_DEFAULT(SpecialArraysEquals)) { 19.8 - FLAG_SET_DEFAULT(SpecialArraysEquals, true); 19.9 - } 19.10 if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) { 19.11 FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500); 19.12 }
20.1 --- a/src/share/vm/runtime/globals.hpp Thu Apr 02 05:22:02 2009 -0700 20.2 +++ b/src/share/vm/runtime/globals.hpp Thu Apr 02 10:49:41 2009 -0700 20.3 @@ -491,9 +491,15 @@ 20.4 develop(bool, SpecialStringIndexOf, true, \ 20.5 "special version of string indexOf") \ 20.6 \ 20.7 - product(bool, SpecialArraysEquals, false, \ 20.8 + develop(bool, SpecialStringEquals, true, \ 20.9 + "special version of string equals") \ 20.10 + \ 20.11 + develop(bool, SpecialArraysEquals, true, \ 20.12 "special version of Arrays.equals(char[],char[])") \ 20.13 \ 20.14 + product(bool, UseSSE42Intrinsics, false, \ 20.15 + "SSE4.2 versions of intrinsics") \ 20.16 + \ 20.17 develop(bool, TraceCallFixup, false, \ 20.18 "traces all call fixups") \ 20.19 \ 20.20 @@ -1316,10 +1322,10 @@ 20.21 \ 20.22 product(intx, ParGCArrayScanChunk, 50, \ 20.23 "Scan a subset and push remainder, if array is bigger than this") \ 20.24 + \ 20.25 product(bool, ParGCTrimOverflow, true, \ 20.26 "Eagerly trim the overflow lists (useful for UseCompressedOops") \ 20.27 \ 20.28 - \ 20.29 notproduct(bool, ParGCWorkQueueOverflowALot, false, \ 20.30 "Whether we should simulate work queue overflow in ParNew") \ 20.31 \