Merge

Thu, 02 Apr 2009 10:49:41 -0700

author
never
date
Thu, 02 Apr 2009 10:49:41 -0700
changeset 1120
4e35bfab60a5
parent 1115
a80d48f6fde1
parent 1119
7230de7c4610
child 1121
a9d9d7e06593
child 1128
2c1dbb844832
child 1139
ad8c635e757e

Merge

src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/sparc/vm/sparc.ad	Thu Apr 02 05:22:02 2009 -0700
     1.2 +++ b/src/cpu/sparc/vm/sparc.ad	Thu Apr 02 10:49:41 2009 -0700
     1.3 @@ -3003,6 +3003,202 @@
     1.4      __ bind(Ldone);
     1.5    %}
     1.6  
     1.7 +enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{
     1.8 +    Label Lword, Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone;
     1.9 +    MacroAssembler _masm(&cbuf);
    1.10 +
    1.11 +    Register   str1_reg = reg_to_register_object($str1$$reg);
    1.12 +    Register   str2_reg = reg_to_register_object($str2$$reg);
    1.13 +    Register   tmp1_reg = reg_to_register_object($tmp1$$reg);
    1.14 +    Register   tmp2_reg = reg_to_register_object($tmp2$$reg);
    1.15 +    Register result_reg = reg_to_register_object($result$$reg);
    1.16 +
    1.17 +    // Get the first character position in both strings
    1.18 +    //         [8] char array, [12] offset, [16] count
    1.19 +    int  value_offset = java_lang_String:: value_offset_in_bytes();
    1.20 +    int offset_offset = java_lang_String::offset_offset_in_bytes();
    1.21 +    int  count_offset = java_lang_String:: count_offset_in_bytes();
    1.22 +
    1.23 +    // load str1 (jchar*) base address into tmp1_reg
    1.24 +    __ load_heap_oop(Address(str1_reg, 0,  value_offset), tmp1_reg);
    1.25 +    __ ld(Address(str1_reg, 0, offset_offset), result_reg);
    1.26 +    __ add(tmp1_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1_reg);
    1.27 +    __    ld(Address(str1_reg, 0, count_offset), str1_reg); // hoisted
    1.28 +    __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg);
    1.29 +    __    load_heap_oop(Address(str2_reg, 0,  value_offset), tmp2_reg); // hoisted
    1.30 +    __ add(result_reg, tmp1_reg, tmp1_reg);
    1.31 +
    1.32 +    // load str2 (jchar*) base address into tmp2_reg
    1.33 +    // __ ld_ptr(Address(str2_reg, 0,  value_offset), tmp2_reg); // hoisted
    1.34 +    __ ld(Address(str2_reg, 0, offset_offset), result_reg);
    1.35 +    __ add(tmp2_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp2_reg);
    1.36 +    __    ld(Address(str2_reg, 0, count_offset), str2_reg); // hoisted
    1.37 +    __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg);
    1.38 +    __   cmp(str1_reg, str2_reg); // hoisted
    1.39 +    __ add(result_reg, tmp2_reg, tmp2_reg);
    1.40 +
    1.41 +    __ sll(str1_reg, exact_log2(sizeof(jchar)), str1_reg);
    1.42 +    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
    1.43 +    __ delayed()->mov(G0, result_reg);    // not equal
    1.44 +
    1.45 +    __ br_zero(Assembler::equal, true, Assembler::pn, str1_reg, Ldone);
    1.46 +    __ delayed()->add(G0, 1, result_reg); //equals
    1.47 +
    1.48 +    __ cmp(tmp1_reg, tmp2_reg); //same string ?
    1.49 +    __ brx(Assembler::equal, true, Assembler::pn, Ldone);
    1.50 +    __ delayed()->add(G0, 1, result_reg);
    1.51 +
    1.52 +    //rename registers
    1.53 +    Register limit_reg =   str1_reg;
    1.54 +    Register  chr2_reg =   str2_reg;
    1.55 +    Register  chr1_reg = result_reg;
    1.56 +    // tmp{12} are the base pointers
    1.57 +
    1.58 +    //check for alignment and position the pointers to the ends
    1.59 +    __ or3(tmp1_reg, tmp2_reg, chr1_reg);
    1.60 +    __ andcc(chr1_reg, 0x3, chr1_reg); // notZero means at least one not 4-byte aligned
    1.61 +    __ br(Assembler::notZero, false, Assembler::pn, Lchar);
    1.62 +    __ delayed()->nop();
    1.63 +
    1.64 +    __ bind(Lword);
    1.65 +    __ and3(limit_reg, 0x2, O7); //remember the remainder (either 0 or 2)
    1.66 +    __ andn(limit_reg, 0x3, limit_reg);
    1.67 +    __ br_zero(Assembler::zero, false, Assembler::pn, limit_reg, Lpost_word);
    1.68 +    __ delayed()->nop();
    1.69 +
    1.70 +    __ add(tmp1_reg, limit_reg, tmp1_reg);
    1.71 +    __ add(tmp2_reg, limit_reg, tmp2_reg);
    1.72 +    __ neg(limit_reg);
    1.73 +
    1.74 +    __ lduw(tmp1_reg, limit_reg, chr1_reg);
    1.75 +    __ bind(Lword_loop);
    1.76 +    __ lduw(tmp2_reg, limit_reg, chr2_reg);
    1.77 +    __ cmp(chr1_reg, chr2_reg);
    1.78 +    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
    1.79 +    __ delayed()->mov(G0, result_reg);
    1.80 +    __ inccc(limit_reg, 2*sizeof(jchar));
    1.81 +    // annul LDUW if branch i  s not taken to prevent access past end of string
    1.82 +    __ br(Assembler::notZero, true, Assembler::pt, Lword_loop); //annul on taken
    1.83 +    __ delayed()->lduw(tmp1_reg, limit_reg, chr1_reg); // hoisted
    1.84 +
    1.85 +    __ bind(Lpost_word);
    1.86 +    __ br_zero(Assembler::zero, true, Assembler::pt, O7, Ldone);
    1.87 +    __ delayed()->add(G0, 1, result_reg);
    1.88 +
    1.89 +    __ lduh(tmp1_reg, 0, chr1_reg);
    1.90 +    __ lduh(tmp2_reg, 0, chr2_reg);
    1.91 +    __ cmp (chr1_reg, chr2_reg);
    1.92 +    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
    1.93 +    __ delayed()->mov(G0, result_reg);
    1.94 +    __ ba(false,Ldone);
    1.95 +    __ delayed()->add(G0, 1, result_reg);
    1.96 +
    1.97 +    __ bind(Lchar);
    1.98 +    __ add(tmp1_reg, limit_reg, tmp1_reg);
    1.99 +    __ add(tmp2_reg, limit_reg, tmp2_reg);
   1.100 +    __ neg(limit_reg); //negate count
   1.101 +
   1.102 +    __ lduh(tmp1_reg, limit_reg, chr1_reg);
   1.103 +    __ bind(Lchar_loop);
   1.104 +    __ lduh(tmp2_reg, limit_reg, chr2_reg);
   1.105 +    __ cmp(chr1_reg, chr2_reg);
   1.106 +    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
   1.107 +    __ delayed()->mov(G0, result_reg); //not equal
   1.108 +    __ inccc(limit_reg, sizeof(jchar));
   1.109 +    // annul LDUH if branch is not taken to prevent access past end of string
   1.110 +    __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop); //annul on taken
   1.111 +    __ delayed()->lduh(tmp1_reg, limit_reg, chr1_reg); // hoisted
   1.112 +
   1.113 +    __ add(G0, 1, result_reg);  //equal
   1.114 +
   1.115 +    __ bind(Ldone);
   1.116 +  %}
   1.117 +
   1.118 +enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{
   1.119 +    Label Lvector, Ldone, Lloop;
   1.120 +    MacroAssembler _masm(&cbuf);
   1.121 +
   1.122 +    Register   ary1_reg = reg_to_register_object($ary1$$reg);
   1.123 +    Register   ary2_reg = reg_to_register_object($ary2$$reg);
   1.124 +    Register   tmp1_reg = reg_to_register_object($tmp1$$reg);
   1.125 +    Register   tmp2_reg = reg_to_register_object($tmp2$$reg);
   1.126 +    Register result_reg = reg_to_register_object($result$$reg);
   1.127 +
   1.128 +    int length_offset  = arrayOopDesc::length_offset_in_bytes();
   1.129 +    int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   1.130 +
   1.131 +    // return true if the same array
   1.132 +    __ cmp(ary1_reg, ary2_reg);
   1.133 +    __ br(Assembler::equal, true, Assembler::pn, Ldone);
   1.134 +    __ delayed()->add(G0, 1, result_reg); // equal
   1.135 +
   1.136 +    __ br_null(ary1_reg, true, Assembler::pn, Ldone);
   1.137 +    __ delayed()->mov(G0, result_reg);    // not equal
   1.138 +
   1.139 +    __ br_null(ary2_reg, true, Assembler::pn, Ldone);
   1.140 +    __ delayed()->mov(G0, result_reg);    // not equal
   1.141 +
   1.142 +    //load the lengths of arrays
   1.143 +    __ ld(Address(ary1_reg, 0, length_offset), tmp1_reg);
   1.144 +    __ ld(Address(ary2_reg, 0, length_offset), tmp2_reg);
   1.145 +
   1.146 +    // return false if the two arrays are not equal length
   1.147 +    __ cmp(tmp1_reg, tmp2_reg);
   1.148 +    __ br(Assembler::notEqual, true, Assembler::pn, Ldone);
   1.149 +    __ delayed()->mov(G0, result_reg);     // not equal
   1.150 +
   1.151 +    __ br_zero(Assembler::zero, true, Assembler::pn, tmp1_reg, Ldone);
   1.152 +    __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
   1.153 +
   1.154 +    // load array addresses
   1.155 +    __ add(ary1_reg, base_offset, ary1_reg);
   1.156 +    __ add(ary2_reg, base_offset, ary2_reg);
   1.157 +
   1.158 +    // renaming registers
   1.159 +    Register chr1_reg  =  tmp2_reg;   // for characters in ary1
   1.160 +    Register chr2_reg  =  result_reg; // for characters in ary2
   1.161 +    Register limit_reg =  tmp1_reg;   // length
   1.162 +
   1.163 +    // set byte count
   1.164 +    __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg);
   1.165 +    __ andcc(limit_reg, 0x2, chr1_reg); //trailing character ?
   1.166 +    __ br(Assembler::zero, false, Assembler::pt, Lvector);
   1.167 +    __ delayed()->nop();
   1.168 +
   1.169 +    //compare the trailing char
   1.170 +    __ sub(limit_reg, sizeof(jchar), limit_reg);
   1.171 +    __ lduh(ary1_reg, limit_reg, chr1_reg);
   1.172 +    __ lduh(ary2_reg, limit_reg, chr2_reg);
   1.173 +    __ cmp(chr1_reg, chr2_reg);
   1.174 +    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
   1.175 +    __ delayed()->mov(G0, result_reg);     // not equal
   1.176 +
   1.177 +    // only one char ?
   1.178 +    __ br_zero(Assembler::zero, true, Assembler::pn, limit_reg, Ldone);
   1.179 +    __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
   1.180 +
   1.181 +    __ bind(Lvector);
   1.182 +    // Shift ary1_reg and ary2_reg to the end of the arrays, negate limit
   1.183 +    __ add(ary1_reg, limit_reg, ary1_reg);
   1.184 +    __ add(ary2_reg, limit_reg, ary2_reg);
   1.185 +    __ neg(limit_reg, limit_reg);
   1.186 +
   1.187 +    __ lduw(ary1_reg, limit_reg, chr1_reg);
   1.188 +    __ bind(Lloop);
   1.189 +    __ lduw(ary2_reg, limit_reg, chr2_reg);
   1.190 +    __ cmp(chr1_reg, chr2_reg);
   1.191 +    __ br(Assembler::notEqual, false, Assembler::pt, Ldone);
   1.192 +    __ delayed()->mov(G0, result_reg);     // not equal
   1.193 +    __ inccc(limit_reg, 2*sizeof(jchar));
   1.194 +    // annul LDUW if branch is not taken to prevent access past end of string
   1.195 +    __ br(Assembler::notZero, true, Assembler::pt, Lloop); //annul on taken
   1.196 +    __ delayed()->lduw(ary1_reg, limit_reg, chr1_reg); // hoisted
   1.197 +
   1.198 +    __ add(G0, 1, result_reg); // equals
   1.199 +
   1.200 +    __ bind(Ldone);
   1.201 +  %}
   1.202 +
   1.203    enc_class enc_rethrow() %{
   1.204      cbuf.set_inst_mark();
   1.205      Register temp_reg = G3;
   1.206 @@ -9015,6 +9211,25 @@
   1.207    ins_pipe(long_memory_op);
   1.208  %}
   1.209  
   1.210 +instruct string_equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result,
   1.211 +                       o7RegI tmp3, flagsReg ccr) %{
   1.212 +  match(Set result (StrEquals str1 str2));
   1.213 +  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL ccr, KILL tmp3);
   1.214 +  ins_cost(300);
   1.215 +  format %{ "String Equals $str1,$str2 -> $result" %}
   1.216 +  ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, result) );
   1.217 +  ins_pipe(long_memory_op);
   1.218 +%}
   1.219 +
   1.220 +instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result,
   1.221 +                        flagsReg ccr) %{
   1.222 +  match(Set result (AryEq ary1 ary2));
   1.223 +  effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr);
   1.224 +  ins_cost(300);
   1.225 +  format %{ "Array Equals $ary1,$ary2 -> $result" %}
   1.226 +  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result));
   1.227 +  ins_pipe(long_memory_op);
   1.228 +%}
   1.229  
   1.230  //---------- Population Count Instructions -------------------------------------
   1.231  
     2.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Apr 02 05:22:02 2009 -0700
     2.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu Apr 02 10:49:41 2009 -0700
     2.3 @@ -2173,6 +2173,31 @@
     2.4    emit_arith(0x0B, 0xC0, dst, src);
     2.5  }
     2.6  
     2.7 +void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
     2.8 +  assert(VM_Version::supports_sse4_2(), "");
     2.9 +
    2.10 +  InstructionMark im(this);
    2.11 +  emit_byte(0x66);
    2.12 +  prefix(src, dst);
    2.13 +  emit_byte(0x0F);
    2.14 +  emit_byte(0x3A);
    2.15 +  emit_byte(0x61);
    2.16 +  emit_operand(dst, src);
    2.17 +  emit_byte(imm8);
    2.18 +}
    2.19 +
    2.20 +void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
    2.21 +  assert(VM_Version::supports_sse4_2(), "");
    2.22 +
    2.23 +  emit_byte(0x66);
    2.24 +  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
    2.25 +  emit_byte(0x0F);
    2.26 +  emit_byte(0x3A);
    2.27 +  emit_byte(0x61);
    2.28 +  emit_byte(0xC0 | encode);
    2.29 +  emit_byte(imm8);
    2.30 +}
    2.31 +
    2.32  // generic
    2.33  void Assembler::pop(Register dst) {
    2.34    int encode = prefix_and_encode(dst->encoding());
    2.35 @@ -2330,6 +2355,29 @@
    2.36    emit_byte(shift);
    2.37  }
    2.38  
    2.39 +void Assembler::ptest(XMMRegister dst, Address src) {
    2.40 +  assert(VM_Version::supports_sse4_1(), "");
    2.41 +
    2.42 +  InstructionMark im(this);
    2.43 +  emit_byte(0x66);
    2.44 +  prefix(src, dst);
    2.45 +  emit_byte(0x0F);
    2.46 +  emit_byte(0x38);
    2.47 +  emit_byte(0x17);
    2.48 +  emit_operand(dst, src);
    2.49 +}
    2.50 +
    2.51 +void Assembler::ptest(XMMRegister dst, XMMRegister src) {
    2.52 +  assert(VM_Version::supports_sse4_1(), "");
    2.53 +
    2.54 +  emit_byte(0x66);
    2.55 +  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
    2.56 +  emit_byte(0x0F);
    2.57 +  emit_byte(0x38);
    2.58 +  emit_byte(0x17);
    2.59 +  emit_byte(0xC0 | encode);
    2.60 +}
    2.61 +
    2.62  void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
    2.63    NOT_LP64(assert(VM_Version::supports_sse2(), ""));
    2.64    emit_byte(0x66);
     3.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Apr 02 05:22:02 2009 -0700
     3.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu Apr 02 10:49:41 2009 -0700
     3.3 @@ -1226,6 +1226,10 @@
     3.4    void orq(Register dst, Address src);
     3.5    void orq(Register dst, Register src);
     3.6  
     3.7 +  // SSE4.2 string instructions
     3.8 +  void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
     3.9 +  void pcmpestri(XMMRegister xmm1, Address src, int imm8);
    3.10 +
    3.11    void popl(Address dst);
    3.12  
    3.13  #ifdef _LP64
    3.14 @@ -1260,6 +1264,10 @@
    3.15    // Shift Right Logical Quadword Immediate
    3.16    void psrlq(XMMRegister dst, int shift);
    3.17  
    3.18 +  // Logical Compare Double Quadword
    3.19 +  void ptest(XMMRegister dst, XMMRegister src);
    3.20 +  void ptest(XMMRegister dst, Address src);
    3.21 +
    3.22    // Interleave Low Bytes
    3.23    void punpcklbw(XMMRegister dst, XMMRegister src);
    3.24  
     4.1 --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Apr 02 05:22:02 2009 -0700
     4.2 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Apr 02 10:49:41 2009 -0700
     4.3 @@ -2691,7 +2691,7 @@
     4.4    __ mov(rdi, rax);
     4.5  
     4.6     Label noException;
     4.7 -  __ cmpl(r12, Deoptimization::Unpack_exception);   // Was exception pending?
     4.8 +  __ cmpl(r14, Deoptimization::Unpack_exception);   // Was exception pending?
     4.9    __ jcc(Assembler::notEqual, noException);
    4.10    __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
    4.11    // QQQ this is useless it was NULL above
     5.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Thu Apr 02 05:22:02 2009 -0700
     5.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Thu Apr 02 10:49:41 2009 -0700
     5.3 @@ -408,6 +408,11 @@
     5.4            UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
     5.5          }
     5.6        }
     5.7 +      if( supports_sse4_2() && UseSSE >= 4 ) {
     5.8 +        if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
     5.9 +          UseSSE42Intrinsics = true;
    5.10 +        }
    5.11 +      }
    5.12      }
    5.13    }
    5.14  
     6.1 --- a/src/cpu/x86/vm/x86_32.ad	Thu Apr 02 05:22:02 2009 -0700
     6.2 +++ b/src/cpu/x86/vm/x86_32.ad	Thu Apr 02 10:49:41 2009 -0700
     6.3 @@ -3694,12 +3694,16 @@
     6.4      }
     6.5    %}
     6.6  
     6.7 -  enc_class enc_String_Compare() %{
     6.8 +  enc_class enc_String_Compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
     6.9 +                        eAXRegI tmp3, eBXRegI tmp4, eCXRegI result) %{
    6.10      Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL,
    6.11            POP_LABEL, DONE_LABEL, CONT_LABEL,
    6.12            WHILE_HEAD_LABEL;
    6.13      MacroAssembler masm(&cbuf);
    6.14  
    6.15 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
    6.16 +    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
    6.17 +
    6.18      // Get the first character position in both strings
    6.19      //         [8] char array, [12] offset, [16] count
    6.20      int value_offset  = java_lang_String::value_offset_in_bytes();
    6.21 @@ -3717,7 +3721,6 @@
    6.22      // Compute the minimum of the string lengths(rsi) and the
    6.23      // difference of the string lengths (stack)
    6.24  
    6.25 -
    6.26      if (VM_Version::supports_cmov()) {
    6.27        masm.movl(rdi, Address(rdi, count_offset));
    6.28        masm.movl(rsi, Address(rsi, count_offset));
    6.29 @@ -3731,7 +3734,7 @@
    6.30        masm.movl(rsi, rdi);
    6.31        masm.subl(rdi, rcx);
    6.32        masm.push(rdi);
    6.33 -      masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL);
    6.34 +      masm.jccb(Assembler::lessEqual, ECX_GOOD_LABEL);
    6.35        masm.movl(rsi, rcx);
    6.36        // rsi holds min, rcx is unused
    6.37      }
    6.38 @@ -3756,7 +3759,7 @@
    6.39        Label LSkip2;
    6.40        // Check if the strings start at same location
    6.41        masm.cmpptr(rbx,rax);
    6.42 -      masm.jcc(Assembler::notEqual, LSkip2);
    6.43 +      masm.jccb(Assembler::notEqual, LSkip2);
    6.44  
    6.45        // Check if the length difference is zero (from stack)
    6.46        masm.cmpl(Address(rsp, 0), 0x0);
    6.47 @@ -3766,9 +3769,52 @@
    6.48        masm.bind(LSkip2);
    6.49      }
    6.50  
    6.51 -    // Shift rax, and rbx, to the end of the arrays, negate min
    6.52 -    masm.lea(rax, Address(rax, rsi, Address::times_2, 2));
    6.53 -    masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2));
    6.54 +   // Advance to next character
    6.55 +    masm.addptr(rax, 2);
    6.56 +    masm.addptr(rbx, 2);
    6.57 +
    6.58 +    if (UseSSE42Intrinsics) {
    6.59 +      // With SSE4.2, use double quad vector compare
    6.60 +      Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
    6.61 +      // Setup to compare 16-byte vectors
    6.62 +      masm.movl(rdi, rsi);
    6.63 +      masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
    6.64 +      masm.andl(rdi, 0x00000007); // rdi holds the tail count
    6.65 +      masm.testl(rsi, rsi);
    6.66 +      masm.jccb(Assembler::zero, COMPARE_TAIL);
    6.67 +
    6.68 +      masm.lea(rax, Address(rax, rsi, Address::times_2));
    6.69 +      masm.lea(rbx, Address(rbx, rsi, Address::times_2));
    6.70 +      masm.negl(rsi);
    6.71 +
    6.72 +      masm.bind(COMPARE_VECTORS);
    6.73 +      masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2));
    6.74 +      masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2));
    6.75 +      masm.pxor(tmp1Reg, tmp2Reg);
    6.76 +      masm.ptest(tmp1Reg, tmp1Reg);
    6.77 +      masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
    6.78 +      masm.addl(rsi, 8);
    6.79 +      masm.jcc(Assembler::notZero, COMPARE_VECTORS);
    6.80 +      masm.jmpb(COMPARE_TAIL);
    6.81 +
    6.82 +      // Mismatched characters in the vectors
    6.83 +      masm.bind(VECTOR_NOT_EQUAL);
    6.84 +      masm.lea(rax, Address(rax, rsi, Address::times_2));
    6.85 +      masm.lea(rbx, Address(rbx, rsi, Address::times_2));
    6.86 +      masm.movl(rdi, 8);
    6.87 +
    6.88 +      // Compare tail (< 8 chars), or rescan last vectors to
    6.89 +      // find 1st mismatched characters
    6.90 +      masm.bind(COMPARE_TAIL);
    6.91 +      masm.testl(rdi, rdi);
    6.92 +      masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
    6.93 +      masm.movl(rsi, rdi);
    6.94 +      // Fallthru to tail compare
    6.95 +    }
    6.96 +
    6.97 +    //Shift rax, and rbx, to the end of the arrays, negate min
    6.98 +    masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
    6.99 +    masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
   6.100      masm.negl(rsi);
   6.101  
   6.102      // Compare the rest of the characters
   6.103 @@ -3776,93 +3822,329 @@
   6.104      masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
   6.105      masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
   6.106      masm.subl(rcx, rdi);
   6.107 -    masm.jcc(Assembler::notZero, POP_LABEL);
   6.108 +    masm.jccb(Assembler::notZero, POP_LABEL);
   6.109      masm.incrementl(rsi);
   6.110      masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
   6.111  
   6.112      // Strings are equal up to min length.  Return the length difference.
   6.113      masm.bind(LENGTH_DIFF_LABEL);
   6.114      masm.pop(rcx);
   6.115 -    masm.jmp(DONE_LABEL);
   6.116 +    masm.jmpb(DONE_LABEL);
   6.117  
   6.118      // Discard the stored length difference
   6.119      masm.bind(POP_LABEL);
   6.120      masm.addptr(rsp, 4);
   6.121 -       
   6.122 +
   6.123      // That's it
   6.124      masm.bind(DONE_LABEL);
   6.125    %}
   6.126  
   6.127 -  enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{
   6.128 -    Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP;
   6.129 + enc_class enc_String_Equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
   6.130 +                       eBXRegI tmp3, eCXRegI tmp4, eAXRegI result) %{
   6.131 +    Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
   6.132      MacroAssembler masm(&cbuf);
   6.133  
   6.134 -    Register ary1Reg   = as_Register($ary1$$reg);
   6.135 -    Register ary2Reg   = as_Register($ary2$$reg);
   6.136 -    Register tmp1Reg   = as_Register($tmp1$$reg);
   6.137 -    Register tmp2Reg   = as_Register($tmp2$$reg);
   6.138 -    Register resultReg = as_Register($result$$reg);
   6.139 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
   6.140 +    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
   6.141 +
   6.142 +    int value_offset  = java_lang_String::value_offset_in_bytes();
   6.143 +    int offset_offset = java_lang_String::offset_offset_in_bytes();
   6.144 +    int count_offset  = java_lang_String::count_offset_in_bytes();
   6.145 +    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   6.146 +
   6.147 +    // does source == target string?
   6.148 +    masm.cmpptr(rdi, rsi);
   6.149 +    masm.jcc(Assembler::equal, RET_TRUE);
   6.150 +
   6.151 +    // get and compare counts
   6.152 +    masm.movl(rcx, Address(rdi, count_offset));
   6.153 +    masm.movl(rax, Address(rsi, count_offset));
   6.154 +    masm.cmpl(rcx, rax);
   6.155 +    masm.jcc(Assembler::notEqual, RET_FALSE);
   6.156 +    masm.testl(rax, rax);
   6.157 +    masm.jcc(Assembler::zero, RET_TRUE);
   6.158 +
   6.159 +    // get source string offset and value
   6.160 +    masm.movptr(rbx, Address(rsi, value_offset));
   6.161 +    masm.movl(rax, Address(rsi, offset_offset));
   6.162 +    masm.leal(rsi, Address(rbx, rax, Address::times_2, base_offset));
   6.163 +
   6.164 +    // get compare string offset and value
   6.165 +    masm.movptr(rbx, Address(rdi, value_offset));
   6.166 +    masm.movl(rax, Address(rdi, offset_offset));
   6.167 +    masm.leal(rdi, Address(rbx, rax, Address::times_2, base_offset));
   6.168 +
   6.169 +    // Set byte count
   6.170 +    masm.shll(rcx, 1);
   6.171 +    masm.movl(rax, rcx);
   6.172 +
   6.173 +    if (UseSSE42Intrinsics) {
   6.174 +      // With SSE4.2, use double quad vector compare
   6.175 +      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
   6.176 +      // Compare 16-byte vectors
   6.177 +      masm.andl(rcx, 0xfffffff0);  // vector count (in bytes)
   6.178 +      masm.andl(rax, 0x0000000e);  // tail count (in bytes)
   6.179 +      masm.testl(rcx, rcx);
   6.180 +      masm.jccb(Assembler::zero, COMPARE_TAIL);
   6.181 +      masm.lea(rdi, Address(rdi, rcx, Address::times_1));
   6.182 +      masm.lea(rsi, Address(rsi, rcx, Address::times_1));
   6.183 +      masm.negl(rcx);
   6.184 +
   6.185 +      masm.bind(COMPARE_WIDE_VECTORS);
   6.186 +      masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
   6.187 +      masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
   6.188 +      masm.pxor(tmp1Reg, tmp2Reg);
   6.189 +      masm.ptest(tmp1Reg, tmp1Reg);
   6.190 +      masm.jccb(Assembler::notZero, RET_FALSE);
   6.191 +      masm.addl(rcx, 16);
   6.192 +      masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
   6.193 +      masm.bind(COMPARE_TAIL);
   6.194 +      masm.movl(rcx, rax);
   6.195 +      // Fallthru to tail compare
   6.196 +    }
   6.197 +
   6.198 +    // Compare 4-byte vectors
   6.199 +    masm.andl(rcx, 0xfffffffc);  // vector count (in bytes)
   6.200 +    masm.andl(rax, 0x00000002);  // tail char (in bytes)
   6.201 +    masm.testl(rcx, rcx);
   6.202 +    masm.jccb(Assembler::zero, COMPARE_CHAR);
   6.203 +    masm.lea(rdi, Address(rdi, rcx, Address::times_1));
   6.204 +    masm.lea(rsi, Address(rsi, rcx, Address::times_1));
   6.205 +    masm.negl(rcx);
   6.206 +
   6.207 +    masm.bind(COMPARE_VECTORS);
   6.208 +    masm.movl(rbx, Address(rdi, rcx, Address::times_1));
   6.209 +    masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
   6.210 +    masm.jccb(Assembler::notEqual, RET_FALSE);
   6.211 +    masm.addl(rcx, 4);
   6.212 +    masm.jcc(Assembler::notZero, COMPARE_VECTORS);
   6.213 +
   6.214 +    // Compare trailing char (final 2 bytes), if any
   6.215 +    masm.bind(COMPARE_CHAR);
   6.216 +    masm.testl(rax, rax);
   6.217 +    masm.jccb(Assembler::zero, RET_TRUE);
   6.218 +    masm.load_unsigned_short(rbx, Address(rdi, 0));
   6.219 +    masm.load_unsigned_short(rcx, Address(rsi, 0));
   6.220 +    masm.cmpl(rbx, rcx);
   6.221 +    masm.jccb(Assembler::notEqual, RET_FALSE);
   6.222 +
   6.223 +    masm.bind(RET_TRUE);
   6.224 +    masm.movl(rax, 1);   // return true
   6.225 +    masm.jmpb(DONE);
   6.226 +
   6.227 +    masm.bind(RET_FALSE);
   6.228 +    masm.xorl(rax, rax); // return false
   6.229 +
   6.230 +    masm.bind(DONE);
   6.231 +    %}
   6.232 +
   6.233 + enc_class enc_String_IndexOf(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2,
   6.234 +                        eCXRegI tmp3, eDXRegI tmp4, eBXRegI result) %{
   6.235 +    // SSE4.2 version
   6.236 +    Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
   6.237 +          SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
   6.238 +    MacroAssembler masm(&cbuf);
   6.239 +
   6.240 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
   6.241 +
   6.242 +    // Get the first character position in both strings
   6.243 +    //         [8] char array, [12] offset, [16] count
   6.244 +    int value_offset  = java_lang_String::value_offset_in_bytes();
   6.245 +    int offset_offset = java_lang_String::offset_offset_in_bytes();
   6.246 +    int count_offset  = java_lang_String::count_offset_in_bytes();
   6.247 +    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   6.248 +
   6.249 +    // Get counts for string and substr
   6.250 +    masm.movl(rdx, Address(rsi, count_offset));
   6.251 +    masm.movl(rax, Address(rdi, count_offset));
   6.252 +    // Check for substr count > string count
   6.253 +    masm.cmpl(rax, rdx);
   6.254 +    masm.jcc(Assembler::greater, RET_NEG_ONE);
   6.255 +
   6.256 +    // Start the indexOf operation
   6.257 +    // Get start addr of string
   6.258 +    masm.movptr(rbx, Address(rsi, value_offset));
   6.259 +    masm.movl(rcx, Address(rsi, offset_offset));
   6.260 +    masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
   6.261 +    masm.push(rsi);
   6.262 +
   6.263 +    // Get start addr of substr
   6.264 +    masm.movptr(rbx, Address(rdi, value_offset));
   6.265 +    masm.movl(rcx, Address(rdi, offset_offset));
   6.266 +    masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
   6.267 +    masm.push(rdi);
   6.268 +    masm.push(rax);
   6.269 +    masm.jmpb(PREP_FOR_SCAN);
   6.270 +
   6.271 +    // Substr count saved at sp
   6.272 +    // Substr saved at sp+4
   6.273 +    // String saved at sp+8
   6.274 +
   6.275 +    // Prep to load substr for scan
   6.276 +    masm.bind(LOAD_SUBSTR);
   6.277 +    masm.movptr(rdi, Address(rsp, 4));
   6.278 +    masm.movl(rax, Address(rsp, 0));
   6.279 +
   6.280 +    // Load substr
   6.281 +    masm.bind(PREP_FOR_SCAN);
   6.282 +    masm.movdqu(tmp1Reg, Address(rdi, 0));
   6.283 +    masm.addl(rdx, 8);        // prime the loop
   6.284 +    masm.subptr(rsi, 16);
   6.285 +
   6.286 +    // Scan string for substr in 16-byte vectors
   6.287 +    masm.bind(SCAN_TO_SUBSTR);
   6.288 +    masm.subl(rdx, 8);
   6.289 +    masm.addptr(rsi, 16);
   6.290 +    masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
   6.291 +    masm.jcc(Assembler::above, SCAN_TO_SUBSTR);     // CF == 0 && ZF == 0
   6.292 +    masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0
   6.293 +
   6.294 +    // Fallthru: found a potential substr
   6.295 +
   6.296 +    // Make sure string is still long enough
   6.297 +    masm.subl(rdx, rcx);
   6.298 +    masm.cmpl(rdx, rax);
   6.299 +    masm.jccb(Assembler::negative, RET_NOT_FOUND);
   6.300 +    // Compute start addr of substr
   6.301 +    masm.lea(rsi, Address(rsi, rcx, Address::times_2));
   6.302 +    masm.movptr(rbx, rsi);
   6.303 +
   6.304 +    // Compare potential substr
   6.305 +    masm.addl(rdx, 8);        // prime the loop
   6.306 +    masm.addl(rax, 8);
   6.307 +    masm.subptr(rsi, 16);
   6.308 +    masm.subptr(rdi, 16);
   6.309 +
   6.310 +    // Scan 16-byte vectors of string and substr
   6.311 +    masm.bind(SCAN_SUBSTR);
   6.312 +    masm.subl(rax, 8);
   6.313 +    masm.subl(rdx, 8);
   6.314 +    masm.addptr(rsi, 16);
   6.315 +    masm.addptr(rdi, 16);
   6.316 +    masm.movdqu(tmp1Reg, Address(rdi, 0));
   6.317 +    masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
   6.318 +    masm.jcc(Assembler::noOverflow, LOAD_SUBSTR);   // OF == 0
   6.319 +    masm.jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
   6.320 +
   6.321 +    // Compute substr offset
   6.322 +    masm.movptr(rsi, Address(rsp, 8));
   6.323 +    masm.subptr(rbx, rsi);
   6.324 +    masm.shrl(rbx, 1);
   6.325 +    masm.jmpb(CLEANUP);
   6.326 +
   6.327 +    masm.bind(RET_NEG_ONE);
   6.328 +    masm.movl(rbx, -1);
   6.329 +    masm.jmpb(DONE);
   6.330 +
   6.331 +    masm.bind(RET_NOT_FOUND);
   6.332 +    masm.movl(rbx, -1);
   6.333 +
   6.334 +    masm.bind(CLEANUP);
   6.335 +    masm.addptr(rsp, 12);
   6.336 +
   6.337 +    masm.bind(DONE);
   6.338 +  %}
   6.339 +
   6.340 +  enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2,
   6.341 +                             eBXRegI tmp3, eDXRegI tmp4, eAXRegI result) %{
   6.342 +    Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
   6.343 +    MacroAssembler masm(&cbuf);
   6.344 +
   6.345 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
   6.346 +    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
   6.347 +    Register ary1Reg      = as_Register($ary1$$reg);
   6.348 +    Register ary2Reg      = as_Register($ary2$$reg);
   6.349 +    Register tmp3Reg      = as_Register($tmp3$$reg);
   6.350 +    Register tmp4Reg      = as_Register($tmp4$$reg);
   6.351 +    Register resultReg    = as_Register($result$$reg);
   6.352  
   6.353      int length_offset  = arrayOopDesc::length_offset_in_bytes();
   6.354      int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   6.355  
   6.356      // Check the input args
   6.357 -    masm.cmpl(ary1Reg, ary2Reg);
   6.358 +    masm.cmpptr(ary1Reg, ary2Reg);
   6.359      masm.jcc(Assembler::equal, TRUE_LABEL);
   6.360 -    masm.testl(ary1Reg, ary1Reg);
   6.361 +    masm.testptr(ary1Reg, ary1Reg);
   6.362      masm.jcc(Assembler::zero, FALSE_LABEL);
   6.363 -    masm.testl(ary2Reg, ary2Reg);
   6.364 +    masm.testptr(ary2Reg, ary2Reg);
   6.365      masm.jcc(Assembler::zero, FALSE_LABEL);
   6.366  
   6.367      // Check the lengths
   6.368 -    masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
   6.369 +    masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
   6.370      masm.movl(resultReg, Address(ary2Reg, length_offset));
   6.371 -    masm.cmpl(tmp2Reg, resultReg);
   6.372 +    masm.cmpl(tmp4Reg, resultReg);
   6.373      masm.jcc(Assembler::notEqual, FALSE_LABEL);
   6.374      masm.testl(resultReg, resultReg);
   6.375      masm.jcc(Assembler::zero, TRUE_LABEL);
   6.376  
   6.377 -    // Get the number of 4 byte vectors to compare
   6.378 -    masm.shrl(resultReg, 1);
   6.379 -
   6.380 -    // Check for odd-length arrays
   6.381 -    masm.andl(tmp2Reg, 1);
   6.382 -    masm.testl(tmp2Reg, tmp2Reg);
   6.383 -    masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
   6.384 -
   6.385 -    // Compare 2-byte "tail" at end of arrays
   6.386 -    masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
   6.387 -    masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
   6.388 -    masm.cmpl(tmp1Reg, tmp2Reg);
   6.389 -    masm.jcc(Assembler::notEqual, FALSE_LABEL);
   6.390 +    // Load array addrs
   6.391 +    masm.lea(ary1Reg, Address(ary1Reg, base_offset));
   6.392 +    masm.lea(ary2Reg, Address(ary2Reg, base_offset));
   6.393 +
   6.394 +    // Set byte count
   6.395 +    masm.shll(tmp4Reg, 1);
   6.396 +    masm.movl(resultReg, tmp4Reg);
   6.397 +
   6.398 +    if (UseSSE42Intrinsics) {
   6.399 +      // With SSE4.2, use double quad vector compare
   6.400 +      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
   6.401 +      // Compare 16-byte vectors
   6.402 +      masm.andl(tmp4Reg, 0xfffffff0);    // vector count (in bytes)
   6.403 +      masm.andl(resultReg, 0x0000000e);  // tail count (in bytes)
   6.404 +      masm.testl(tmp4Reg, tmp4Reg);
   6.405 +      masm.jccb(Assembler::zero, COMPARE_TAIL);
   6.406 +      masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   6.407 +      masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   6.408 +      masm.negl(tmp4Reg);
   6.409 +
   6.410 +      masm.bind(COMPARE_WIDE_VECTORS);
   6.411 +      masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   6.412 +      masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   6.413 +      masm.pxor(tmp1Reg, tmp2Reg);
   6.414 +      masm.ptest(tmp1Reg, tmp1Reg);
   6.415 +
   6.416 +      masm.jccb(Assembler::notZero, FALSE_LABEL);
   6.417 +      masm.addl(tmp4Reg, 16);
   6.418 +      masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
   6.419 +      masm.bind(COMPARE_TAIL);
   6.420 +      masm.movl(tmp4Reg, resultReg);
   6.421 +      // Fallthru to tail compare
   6.422 +    }
   6.423 +
   6.424 +    // Compare 4-byte vectors
   6.425 +    masm.andl(tmp4Reg, 0xfffffffc);    // vector count (in bytes)
   6.426 +    masm.andl(resultReg, 0x00000002);  // tail char (in bytes)
   6.427 +    masm.testl(tmp4Reg, tmp4Reg);
   6.428 +    masm.jccb(Assembler::zero, COMPARE_CHAR);
   6.429 +    masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   6.430 +    masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   6.431 +    masm.negl(tmp4Reg);
   6.432 +
   6.433 +    masm.bind(COMPARE_VECTORS);
   6.434 +    masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   6.435 +    masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   6.436 +    masm.jccb(Assembler::notEqual, FALSE_LABEL);
   6.437 +    masm.addl(tmp4Reg, 4);
   6.438 +    masm.jcc(Assembler::notZero, COMPARE_VECTORS);
   6.439 +
   6.440 +    // Compare trailing char (final 2 bytes), if any
   6.441 +    masm.bind(COMPARE_CHAR);
   6.442      masm.testl(resultReg, resultReg);
   6.443 -    masm.jcc(Assembler::zero, TRUE_LABEL);
   6.444 -
   6.445 -    // Setup compare loop
   6.446 -    masm.bind(COMPARE_LOOP_HDR);
   6.447 -    // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays
   6.448 -    masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
   6.449 -    masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
   6.450 -    masm.negl(resultReg);
   6.451 -
   6.452 -    // 4-byte-wide compare loop
   6.453 -    masm.bind(COMPARE_LOOP);
   6.454 -    masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
   6.455 -    masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
   6.456 -    masm.cmpl(ary1Reg, ary2Reg);
   6.457 -    masm.jcc(Assembler::notEqual, FALSE_LABEL);
   6.458 -    masm.increment(resultReg);
   6.459 -    masm.jcc(Assembler::notZero, COMPARE_LOOP);
   6.460 +    masm.jccb(Assembler::zero, TRUE_LABEL);
   6.461 +    masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
   6.462 +    masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
   6.463 +    masm.cmpl(tmp3Reg, tmp4Reg);
   6.464 +    masm.jccb(Assembler::notEqual, FALSE_LABEL);
   6.465  
   6.466      masm.bind(TRUE_LABEL);
   6.467      masm.movl(resultReg, 1);   // return true
   6.468 -    masm.jmp(DONE_LABEL);
   6.469 +    masm.jmpb(DONE);
   6.470  
   6.471      masm.bind(FALSE_LABEL);
   6.472      masm.xorl(resultReg, resultReg); // return false
   6.473  
   6.474      // That's it
   6.475 -    masm.bind(DONE_LABEL);
   6.476 +    masm.bind(DONE);
   6.477    %}
   6.478  
   6.479    enc_class enc_pop_rdx() %{
   6.480 @@ -12074,11 +12356,8 @@
   6.481    ins_pipe( fpu_reg_reg );
   6.482  %}
   6.483  
   6.484 -
   6.485 -
   6.486  // =======================================================================
   6.487  // fast clearing of an array
   6.488 -
   6.489  instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
   6.490    match(Set dummy (ClearArray cnt base));
   6.491    effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
   6.492 @@ -12092,24 +12371,48 @@
   6.493    ins_pipe( pipe_slow );
   6.494  %}
   6.495  
   6.496 -instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
   6.497 +instruct string_compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
   6.498 +                        eAXRegI tmp3, eBXRegI tmp4, eCXRegI result, eFlagsReg cr) %{
   6.499    match(Set result (StrComp str1 str2));
   6.500 -  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr);
   6.501 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
   6.502    //ins_cost(300);
   6.503  
   6.504    format %{ "String Compare $str1,$str2 -> $result    // KILL EAX, EBX" %}
   6.505 -  ins_encode( enc_String_Compare() );
   6.506 +  ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
   6.507 +  ins_pipe( pipe_slow );
   6.508 +%}
   6.509 +
   6.510 +// fast string equals
   6.511 +instruct string_equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2,
   6.512 +                       eBXRegI tmp3, eCXRegI tmp4, eAXRegI result, eFlagsReg cr) %{
   6.513 +  match(Set result (StrEquals str1 str2));
   6.514 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
   6.515 +
   6.516 +  format %{ "String Equals $str1,$str2 -> $result    // KILL EBX, ECX" %}
   6.517 +  ins_encode( enc_String_Equals(tmp1, tmp2, str1, str2, tmp3, tmp4, result) );
   6.518 +  ins_pipe( pipe_slow );
   6.519 +%}
   6.520 +
   6.521 +instruct string_indexof(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2,
   6.522 +                        eCXRegI tmp3, eDXRegI tmp4, eBXRegI result, eFlagsReg cr) %{
   6.523 +  predicate(UseSSE42Intrinsics);
   6.524 +  match(Set result (StrIndexOf str1 str2));
   6.525 +  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
   6.526 +
   6.527 +  format %{ "String IndexOf $str1,$str2 -> $result    // KILL EAX, ECX, EDX" %}
   6.528 +  ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
   6.529    ins_pipe( pipe_slow );
   6.530  %}
   6.531  
   6.532  // fast array equals
   6.533 -instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
   6.534 +instruct array_equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, eBXRegI tmp3,
   6.535 +                      eDXRegI tmp4, eAXRegI result, eFlagsReg cr) %{
   6.536    match(Set result (AryEq ary1 ary2));
   6.537 -  effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr);
   6.538 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
   6.539    //ins_cost(300);
   6.540  
   6.541 -  format %{ "Array Equals $ary1,$ary2 -> $result    // KILL EAX, EBX" %}
   6.542 -  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) );
   6.543 +  format %{ "Array Equals $ary1,$ary2 -> $result    // KILL EBX, EDX" %}
   6.544 +  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
   6.545    ins_pipe( pipe_slow );
   6.546  %}
   6.547  
     7.1 --- a/src/cpu/x86/vm/x86_64.ad	Thu Apr 02 05:22:02 2009 -0700
     7.2 +++ b/src/cpu/x86/vm/x86_64.ad	Thu Apr 02 10:49:41 2009 -0700
     7.3 @@ -3694,13 +3694,16 @@
     7.4      }
     7.5    %}
     7.6  
     7.7 -  enc_class enc_String_Compare()
     7.8 -  %{
     7.9 +  enc_class enc_String_Compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
    7.10 +                        rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
    7.11      Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL,
    7.12            POP_LABEL, DONE_LABEL, CONT_LABEL,
    7.13            WHILE_HEAD_LABEL;
    7.14      MacroAssembler masm(&cbuf);
    7.15  
    7.16 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
    7.17 +    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
    7.18 +
    7.19      // Get the first character position in both strings
    7.20      //         [8] char array, [12] offset, [16] count
    7.21      int value_offset  = java_lang_String::value_offset_in_bytes();
    7.22 @@ -3718,6 +3721,7 @@
    7.23      // Compute the minimum of the string lengths(rsi) and the
    7.24      // difference of the string lengths (stack)
    7.25  
    7.26 +    // do the conditional move stuff
    7.27      masm.movl(rdi, Address(rdi, count_offset));
    7.28      masm.movl(rsi, Address(rsi, count_offset));
    7.29      masm.movl(rcx, rdi);
    7.30 @@ -3745,7 +3749,7 @@
    7.31        Label LSkip2;
    7.32        // Check if the strings start at same location
    7.33        masm.cmpptr(rbx, rax);
    7.34 -      masm.jcc(Assembler::notEqual, LSkip2);
    7.35 +      masm.jccb(Assembler::notEqual, LSkip2);
    7.36  
    7.37        // Check if the length difference is zero (from stack)
    7.38        masm.cmpl(Address(rsp, 0), 0x0);
    7.39 @@ -3755,9 +3759,52 @@
    7.40        masm.bind(LSkip2);
    7.41      }
    7.42  
    7.43 +    // Advance to next character
    7.44 +    masm.addptr(rax, 2);
    7.45 +    masm.addptr(rbx, 2);
    7.46 +
    7.47 +    if (UseSSE42Intrinsics) {
    7.48 +      // With SSE4.2, use double quad vector compare
    7.49 +      Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
    7.50 +      // Setup to compare 16-byte vectors
    7.51 +      masm.movl(rdi, rsi);
    7.52 +      masm.andl(rsi, 0xfffffff8); // rsi holds the vector count
    7.53 +      masm.andl(rdi, 0x00000007); // rdi holds the tail count
    7.54 +      masm.testl(rsi, rsi);
    7.55 +      masm.jccb(Assembler::zero, COMPARE_TAIL);
    7.56 +
    7.57 +      masm.lea(rax, Address(rax, rsi, Address::times_2));
    7.58 +      masm.lea(rbx, Address(rbx, rsi, Address::times_2));
    7.59 +      masm.negptr(rsi);
    7.60 +
    7.61 +      masm.bind(COMPARE_VECTORS);
    7.62 +      masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2));
    7.63 +      masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2));
    7.64 +      masm.pxor(tmp1Reg, tmp2Reg);
    7.65 +      masm.ptest(tmp1Reg, tmp1Reg);
    7.66 +      masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
    7.67 +      masm.addptr(rsi, 8);
    7.68 +      masm.jcc(Assembler::notZero, COMPARE_VECTORS);
    7.69 +      masm.jmpb(COMPARE_TAIL);
    7.70 +
    7.71 +      // Mismatched characters in the vectors
    7.72 +      masm.bind(VECTOR_NOT_EQUAL);
    7.73 +      masm.lea(rax, Address(rax, rsi, Address::times_2));
    7.74 +      masm.lea(rbx, Address(rbx, rsi, Address::times_2));
    7.75 +      masm.movl(rdi, 8);
    7.76 +
    7.77 +      // Compare tail (< 8 chars), or rescan last vectors to
    7.78 +      // find 1st mismatched characters
    7.79 +      masm.bind(COMPARE_TAIL);
    7.80 +      masm.testl(rdi, rdi);
    7.81 +      masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL);
    7.82 +      masm.movl(rsi, rdi);
    7.83 +      // Fallthru to tail compare
    7.84 +    }
    7.85 +
    7.86      // Shift RAX and RBX to the end of the arrays, negate min
    7.87 -    masm.lea(rax, Address(rax, rsi, Address::times_2, 2));
    7.88 -    masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2));
    7.89 +    masm.lea(rax, Address(rax, rsi, Address::times_2, 0));
    7.90 +    masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0));
    7.91      masm.negptr(rsi);
    7.92  
    7.93      // Compare the rest of the characters
    7.94 @@ -3765,93 +3812,329 @@
    7.95      masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0));
    7.96      masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0));
    7.97      masm.subl(rcx, rdi);
    7.98 -    masm.jcc(Assembler::notZero, POP_LABEL);
    7.99 +    masm.jccb(Assembler::notZero, POP_LABEL);
   7.100      masm.increment(rsi);
   7.101      masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
   7.102  
   7.103      // Strings are equal up to min length.  Return the length difference.
   7.104      masm.bind(LENGTH_DIFF_LABEL);
   7.105      masm.pop(rcx);
   7.106 -    masm.jmp(DONE_LABEL);
   7.107 +    masm.jmpb(DONE_LABEL);
   7.108  
   7.109      // Discard the stored length difference
   7.110      masm.bind(POP_LABEL);
   7.111      masm.addptr(rsp, 8);
   7.112 -       
   7.113 +
   7.114      // That's it
   7.115      masm.bind(DONE_LABEL);
   7.116    %}
   7.117  
   7.118 -  enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, rbx_RegI tmp2, rcx_RegI result) %{
   7.119 -    Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP;
   7.120 + enc_class enc_String_IndexOf(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
   7.121 +                        rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result) %{
   7.122 +    // SSE4.2 version
   7.123 +    Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR,
   7.124 +          SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE;
   7.125      MacroAssembler masm(&cbuf);
   7.126  
   7.127 -    Register ary1Reg   = as_Register($ary1$$reg);
   7.128 -    Register ary2Reg   = as_Register($ary2$$reg);
   7.129 -    Register tmp1Reg   = as_Register($tmp1$$reg);
   7.130 -    Register tmp2Reg   = as_Register($tmp2$$reg);
   7.131 -    Register resultReg = as_Register($result$$reg);
   7.132 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
   7.133 +
   7.134 +    // Get the first character position in both strings
   7.135 +    //         [8] char array, [12] offset, [16] count
   7.136 +    int value_offset  = java_lang_String::value_offset_in_bytes();
   7.137 +    int offset_offset = java_lang_String::offset_offset_in_bytes();
   7.138 +    int count_offset  = java_lang_String::count_offset_in_bytes();
   7.139 +    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   7.140 +
   7.141 +    // Get counts for string and substr
   7.142 +    masm.movl(rdx, Address(rsi, count_offset));
   7.143 +    masm.movl(rax, Address(rdi, count_offset));
   7.144 +    // Check for substr count > string count
   7.145 +    masm.cmpl(rax, rdx);
   7.146 +    masm.jcc(Assembler::greater, RET_NEG_ONE);
   7.147 +
   7.148 +    // Start the indexOf operation
   7.149 +    // Get start addr of string
   7.150 +    masm.load_heap_oop(rbx, Address(rsi, value_offset));
   7.151 +    masm.movl(rcx, Address(rsi, offset_offset));
   7.152 +    masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset));
   7.153 +    masm.push(rsi);
   7.154 +
   7.155 +    // Get start addr of substr
   7.156 +    masm.load_heap_oop(rbx, Address(rdi, value_offset));
   7.157 +    masm.movl(rcx, Address(rdi, offset_offset));
   7.158 +    masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset));
   7.159 +    masm.push(rdi);
   7.160 +    masm.push(rax);
   7.161 +    masm.jmpb(PREP_FOR_SCAN);
   7.162 +
   7.163 +    // Substr count saved at sp
   7.164 +    // Substr saved at sp+8
   7.165 +    // String saved at sp+16
   7.166 +
   7.167 +    // Prep to load substr for scan
   7.168 +    masm.bind(LOAD_SUBSTR);
   7.169 +    masm.movptr(rdi, Address(rsp, 8));
   7.170 +    masm.movl(rax, Address(rsp, 0));
   7.171 +
   7.172 +    // Load substr
   7.173 +    masm.bind(PREP_FOR_SCAN);
   7.174 +    masm.movdqu(tmp1Reg, Address(rdi, 0));
   7.175 +    masm.addq(rdx, 8);    // prime the loop
   7.176 +    masm.subptr(rsi, 16);
   7.177 +
   7.178 +    // Scan string for substr in 16-byte vectors
   7.179 +    masm.bind(SCAN_TO_SUBSTR);
   7.180 +    masm.subq(rdx, 8);
   7.181 +    masm.addptr(rsi, 16);
   7.182 +    masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
   7.183 +    masm.jcc(Assembler::above, SCAN_TO_SUBSTR);
   7.184 +    masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND);
   7.185 +
   7.186 +    // Fallthru: found a potential substr
   7.187 +
   7.188 +    //Make sure string is still long enough
   7.189 +    masm.subl(rdx, rcx);
   7.190 +    masm.cmpl(rdx, rax);
   7.191 +    masm.jccb(Assembler::negative, RET_NOT_FOUND);
   7.192 +    // Compute start addr of substr
   7.193 +    masm.lea(rsi, Address(rsi, rcx, Address::times_2));
   7.194 +    masm.movptr(rbx, rsi);
   7.195 +
   7.196 +    // Compare potential substr
   7.197 +    masm.addq(rdx, 8);        // prime the loop
   7.198 +    masm.addq(rax, 8);
   7.199 +    masm.subptr(rsi, 16);
   7.200 +    masm.subptr(rdi, 16);
   7.201 +
   7.202 +    // Scan 16-byte vectors of string and substr
   7.203 +    masm.bind(SCAN_SUBSTR);
   7.204 +    masm.subq(rax, 8);
   7.205 +    masm.subq(rdx, 8);
   7.206 +    masm.addptr(rsi, 16);
   7.207 +    masm.addptr(rdi, 16);
   7.208 +    masm.movdqu(tmp1Reg, Address(rdi, 0));
   7.209 +    masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d);
   7.210 +    masm.jcc(Assembler::noOverflow, LOAD_SUBSTR);   // OF == 0
   7.211 +    masm.jcc(Assembler::positive, SCAN_SUBSTR);     // SF == 0
   7.212 +
   7.213 +    // Compute substr offset
   7.214 +    masm.movptr(rsi, Address(rsp, 16));
   7.215 +    masm.subptr(rbx, rsi);
   7.216 +    masm.shrl(rbx, 1);
   7.217 +    masm.jmpb(CLEANUP);
   7.218 +
   7.219 +    masm.bind(RET_NEG_ONE);
   7.220 +    masm.movl(rbx, -1);
   7.221 +    masm.jmpb(DONE);
   7.222 +
   7.223 +    masm.bind(RET_NOT_FOUND);
   7.224 +    masm.movl(rbx, -1);
   7.225 +
   7.226 +    masm.bind(CLEANUP);
   7.227 +    masm.addptr(rsp, 24);
   7.228 +
   7.229 +    masm.bind(DONE);
   7.230 +  %}
   7.231 +
   7.232 +  enc_class enc_String_Equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
   7.233 +                              rbx_RegI tmp3, rcx_RegI tmp2, rax_RegI result) %{
   7.234 +    Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR;
   7.235 +    MacroAssembler masm(&cbuf);
   7.236 +
   7.237 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
   7.238 +    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
   7.239 +
   7.240 +    int value_offset  = java_lang_String::value_offset_in_bytes();
   7.241 +    int offset_offset = java_lang_String::offset_offset_in_bytes();
   7.242 +    int count_offset  = java_lang_String::count_offset_in_bytes();
   7.243 +    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   7.244 +
   7.245 +    // does source == target string?
   7.246 +    masm.cmpptr(rdi, rsi);
   7.247 +    masm.jcc(Assembler::equal, RET_TRUE);
   7.248 +
   7.249 +    // get and compare counts
   7.250 +    masm.movl(rcx, Address(rdi, count_offset));
   7.251 +    masm.movl(rax, Address(rsi, count_offset));
   7.252 +    masm.cmpl(rcx, rax);
   7.253 +    masm.jcc(Assembler::notEqual, RET_FALSE);
   7.254 +    masm.testl(rax, rax);
   7.255 +    masm.jcc(Assembler::zero, RET_TRUE);
   7.256 +
   7.257 +    // get source string offset and value
   7.258 +    masm.load_heap_oop(rbx, Address(rsi, value_offset));
   7.259 +    masm.movl(rax, Address(rsi, offset_offset));
   7.260 +    masm.lea(rsi, Address(rbx, rax, Address::times_2, base_offset));
   7.261 +
   7.262 +    // get compare string offset and value
   7.263 +    masm.load_heap_oop(rbx, Address(rdi, value_offset));
   7.264 +    masm.movl(rax, Address(rdi, offset_offset));
   7.265 +    masm.lea(rdi, Address(rbx, rax, Address::times_2, base_offset));
   7.266 +
   7.267 +    // Set byte count
   7.268 +    masm.shll(rcx, 1);
   7.269 +    masm.movl(rax, rcx);
   7.270 +
   7.271 +    if (UseSSE42Intrinsics) {
   7.272 +      // With SSE4.2, use double quad vector compare
   7.273 +      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
   7.274 +      // Compare 16-byte vectors
   7.275 +      masm.andl(rcx, 0xfffffff0);  // vector count (in bytes)
   7.276 +      masm.andl(rax, 0x0000000e);  // tail count (in bytes)
   7.277 +      masm.testl(rcx, rcx);
   7.278 +      masm.jccb(Assembler::zero, COMPARE_TAIL);
   7.279 +      masm.lea(rdi, Address(rdi, rcx, Address::times_1));
   7.280 +      masm.lea(rsi, Address(rsi, rcx, Address::times_1));
   7.281 +      masm.negptr(rcx);
   7.282 +
   7.283 +      masm.bind(COMPARE_WIDE_VECTORS);
   7.284 +      masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1));
   7.285 +      masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1));
   7.286 +      masm.pxor(tmp1Reg, tmp2Reg);
   7.287 +      masm.ptest(tmp1Reg, tmp1Reg);
   7.288 +      masm.jccb(Assembler::notZero, RET_FALSE);
   7.289 +      masm.addptr(rcx, 16);
   7.290 +      masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
   7.291 +      masm.bind(COMPARE_TAIL);
   7.292 +      masm.movl(rcx, rax);
   7.293 +      // Fallthru to tail compare
   7.294 +    }
   7.295 +
   7.296 +    // Compare 4-byte vectors
   7.297 +    masm.andl(rcx, 0xfffffffc);  // vector count (in bytes)
   7.298 +    masm.andl(rax, 0x00000002);  // tail char (in bytes)
   7.299 +    masm.testl(rcx, rcx);
   7.300 +    masm.jccb(Assembler::zero, COMPARE_CHAR);
   7.301 +    masm.lea(rdi, Address(rdi, rcx, Address::times_1));
   7.302 +    masm.lea(rsi, Address(rsi, rcx, Address::times_1));
   7.303 +    masm.negptr(rcx);
   7.304 +
   7.305 +    masm.bind(COMPARE_VECTORS);
   7.306 +    masm.movl(rbx, Address(rdi, rcx, Address::times_1));
   7.307 +    masm.cmpl(rbx, Address(rsi, rcx, Address::times_1));
   7.308 +    masm.jccb(Assembler::notEqual, RET_FALSE);
   7.309 +    masm.addptr(rcx, 4);
   7.310 +    masm.jcc(Assembler::notZero, COMPARE_VECTORS);
   7.311 +
   7.312 +    // Compare trailing char (final 2 bytes), if any
   7.313 +    masm.bind(COMPARE_CHAR);
   7.314 +    masm.testl(rax, rax);
   7.315 +    masm.jccb(Assembler::zero, RET_TRUE);
   7.316 +    masm.load_unsigned_short(rbx, Address(rdi, 0));
   7.317 +    masm.load_unsigned_short(rcx, Address(rsi, 0));
   7.318 +    masm.cmpl(rbx, rcx);
   7.319 +    masm.jccb(Assembler::notEqual, RET_FALSE);
   7.320 +
   7.321 +    masm.bind(RET_TRUE);
   7.322 +    masm.movl(rax, 1);   // return true
   7.323 +    masm.jmpb(DONE);
   7.324 +
   7.325 +    masm.bind(RET_FALSE);
   7.326 +    masm.xorl(rax, rax); // return false
   7.327 +
   7.328 +    masm.bind(DONE);
   7.329 +  %}
   7.330 +
   7.331 +  enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2,
   7.332 +                             rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{
   7.333 +    Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
   7.334 +    MacroAssembler masm(&cbuf);
   7.335 +
   7.336 +    XMMRegister tmp1Reg   = as_XMMRegister($tmp1$$reg);
   7.337 +    XMMRegister tmp2Reg   = as_XMMRegister($tmp2$$reg);
   7.338 +    Register ary1Reg      = as_Register($ary1$$reg);
   7.339 +    Register ary2Reg      = as_Register($ary2$$reg);
   7.340 +    Register tmp3Reg      = as_Register($tmp3$$reg);
   7.341 +    Register tmp4Reg      = as_Register($tmp4$$reg);
   7.342 +    Register resultReg    = as_Register($result$$reg);
   7.343  
   7.344      int length_offset  = arrayOopDesc::length_offset_in_bytes();
   7.345      int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
   7.346  
   7.347      // Check the input args
   7.348 -    masm.cmpq(ary1Reg, ary2Reg);                        
   7.349 +    masm.cmpq(ary1Reg, ary2Reg);
   7.350      masm.jcc(Assembler::equal, TRUE_LABEL);
   7.351 -    masm.testq(ary1Reg, ary1Reg);                       
   7.352 +    masm.testq(ary1Reg, ary1Reg);
   7.353      masm.jcc(Assembler::zero, FALSE_LABEL);
   7.354 -    masm.testq(ary2Reg, ary2Reg);                       
   7.355 +    masm.testq(ary2Reg, ary2Reg);
   7.356      masm.jcc(Assembler::zero, FALSE_LABEL);
   7.357  
   7.358      // Check the lengths
   7.359 -    masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
   7.360 +    masm.movl(tmp4Reg, Address(ary1Reg, length_offset));
   7.361      masm.movl(resultReg, Address(ary2Reg, length_offset));
   7.362 -    masm.cmpl(tmp2Reg, resultReg);
   7.363 +    masm.cmpl(tmp4Reg, resultReg);
   7.364      masm.jcc(Assembler::notEqual, FALSE_LABEL);
   7.365      masm.testl(resultReg, resultReg);
   7.366      masm.jcc(Assembler::zero, TRUE_LABEL);
   7.367  
   7.368 -    // Get the number of 4 byte vectors to compare
   7.369 -    masm.shrl(resultReg, 1);
   7.370 -
   7.371 -    // Check for odd-length arrays
   7.372 -    masm.andl(tmp2Reg, 1);
   7.373 -    masm.testl(tmp2Reg, tmp2Reg);
   7.374 -    masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
   7.375 -
   7.376 -    // Compare 2-byte "tail" at end of arrays
   7.377 -    masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
   7.378 -    masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
   7.379 -    masm.cmpl(tmp1Reg, tmp2Reg);
   7.380 -    masm.jcc(Assembler::notEqual, FALSE_LABEL);
   7.381 +    //load array address
   7.382 +    masm.lea(ary1Reg, Address(ary1Reg, base_offset));
   7.383 +    masm.lea(ary2Reg, Address(ary2Reg, base_offset));
   7.384 +
   7.385 +    //set byte count
   7.386 +    masm.shll(tmp4Reg, 1);
   7.387 +    masm.movl(resultReg,tmp4Reg);
   7.388 +
   7.389 +    if (UseSSE42Intrinsics){
   7.390 +      // With SSE4.2, use double quad vector compare
   7.391 +      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
   7.392 +      // Compare 16-byte vectors
   7.393 +      masm.andl(tmp4Reg, 0xfffffff0);    // vector count (in bytes)
   7.394 +      masm.andl(resultReg, 0x0000000e);  // tail count (in bytes)
   7.395 +      masm.testl(tmp4Reg, tmp4Reg);
   7.396 +      masm.jccb(Assembler::zero, COMPARE_TAIL);
   7.397 +      masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   7.398 +      masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   7.399 +      masm.negptr(tmp4Reg);
   7.400 +
   7.401 +      masm.bind(COMPARE_WIDE_VECTORS);
   7.402 +      masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   7.403 +      masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   7.404 +      masm.pxor(tmp1Reg, tmp2Reg);
   7.405 +      masm.ptest(tmp1Reg, tmp1Reg);
   7.406 +
   7.407 +      masm.jccb(Assembler::notZero, FALSE_LABEL);
   7.408 +      masm.addptr(tmp4Reg, 16);
   7.409 +      masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
   7.410 +      masm.bind(COMPARE_TAIL);
   7.411 +      masm.movl(tmp4Reg, resultReg);
   7.412 +      // Fallthru to tail compare
   7.413 +    }
   7.414 +
   7.415 +   // Compare 4-byte vectors
   7.416 +    masm.andl(tmp4Reg, 0xfffffffc);    // vector count (in bytes)
   7.417 +    masm.andl(resultReg, 0x00000002);  // tail char (in bytes)
   7.418 +    masm.testl(tmp4Reg, tmp4Reg); //if tmp2 == 0, only compare char
   7.419 +    masm.jccb(Assembler::zero, COMPARE_CHAR);
   7.420 +    masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   7.421 +    masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   7.422 +    masm.negptr(tmp4Reg);
   7.423 +
   7.424 +    masm.bind(COMPARE_VECTORS);
   7.425 +    masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1));
   7.426 +    masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1));
   7.427 +    masm.jccb(Assembler::notEqual, FALSE_LABEL);
   7.428 +    masm.addptr(tmp4Reg, 4);
   7.429 +    masm.jcc(Assembler::notZero, COMPARE_VECTORS);
   7.430 +
   7.431 +    // Compare trailing char (final 2 bytes), if any
   7.432 +    masm.bind(COMPARE_CHAR);
   7.433      masm.testl(resultReg, resultReg);
   7.434 -    masm.jcc(Assembler::zero, TRUE_LABEL);
   7.435 -
   7.436 -    // Setup compare loop
   7.437 -    masm.bind(COMPARE_LOOP_HDR);
   7.438 -    // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays
   7.439 -    masm.leaq(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
   7.440 -    masm.leaq(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
   7.441 -    masm.negq(resultReg);
   7.442 -
   7.443 -    // 4-byte-wide compare loop
   7.444 -    masm.bind(COMPARE_LOOP);
   7.445 -    masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
   7.446 -    masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
   7.447 -    masm.cmpl(ary1Reg, ary2Reg);
   7.448 -    masm.jcc(Assembler::notEqual, FALSE_LABEL);
   7.449 -    masm.incrementq(resultReg);
   7.450 -    masm.jcc(Assembler::notZero, COMPARE_LOOP);
   7.451 +    masm.jccb(Assembler::zero, TRUE_LABEL);
   7.452 +    masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0));
   7.453 +    masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0));
   7.454 +    masm.cmpl(tmp3Reg, tmp4Reg);
   7.455 +    masm.jccb(Assembler::notEqual, FALSE_LABEL);
   7.456  
   7.457      masm.bind(TRUE_LABEL);
   7.458      masm.movl(resultReg, 1);   // return true
   7.459 -    masm.jmp(DONE_LABEL);
   7.460 +    masm.jmpb(DONE);
   7.461  
   7.462      masm.bind(FALSE_LABEL);
   7.463      masm.xorl(resultReg, resultReg); // return false
   7.464  
   7.465      // That's it
   7.466 -    masm.bind(DONE_LABEL);
   7.467 +    masm.bind(DONE);
   7.468    %}
   7.469  
   7.470    enc_class enc_rethrow()
   7.471 @@ -5087,7 +5370,7 @@
   7.472  %}
   7.473  
   7.474  // Double register operands
   7.475 -operand regD()
   7.476 +operand regD() 
   7.477  %{
   7.478    constraint(ALLOC_IN_RC(double_reg));
   7.479    match(RegD);
   7.480 @@ -11540,27 +11823,52 @@
   7.481    ins_pipe(pipe_slow);
   7.482  %}
   7.483  
   7.484 -instruct string_compare(rdi_RegP str1, rsi_RegP str2, rax_RegI tmp1,
   7.485 -                        rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr)
   7.486 +instruct string_compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2,
   7.487 +                        rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
   7.488  %{
   7.489    match(Set result (StrComp str1 str2));
   7.490 -  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr);
   7.491 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
   7.492    //ins_cost(300);
   7.493  
   7.494    format %{ "String Compare $str1, $str2 -> $result    // XXX KILL RAX, RBX" %}
   7.495 -  ins_encode( enc_String_Compare() );
   7.496 +  ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
   7.497    ins_pipe( pipe_slow );
   7.498  %}
   7.499  
   7.500 +instruct string_indexof(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2,
   7.501 +                        rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result, rFlagsReg cr)
   7.502 +%{
   7.503 +  predicate(UseSSE42Intrinsics);
   7.504 +  match(Set result (StrIndexOf str1 str2));
   7.505 +  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr);
   7.506 +
   7.507 +  format %{ "String IndexOf $str1,$str2 -> $result   // KILL RAX, RCX, RDX" %}
   7.508 +  ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
   7.509 +  ins_pipe( pipe_slow );
   7.510 +%}
   7.511 +
   7.512 +// fast string equals
   7.513 +instruct string_equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, rbx_RegI tmp3,
   7.514 +                       rcx_RegI tmp4, rax_RegI result, rFlagsReg cr)
   7.515 +%{
   7.516 +  match(Set result (StrEquals str1 str2));
   7.517 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr);
   7.518 +
   7.519 +  format %{ "String Equals $str1,$str2 -> $result    // KILL RBX, RCX" %}
   7.520 +  ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, tmp3, tmp4, result) );
   7.521 +  ins_pipe( pipe_slow );
   7.522 +%}
   7.523 +
   7.524  // fast array equals
   7.525 -instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, 
   7.526 -                      rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) %{
   7.527 +instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, rax_RegI tmp3,
   7.528 +                      rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr)
   7.529 +%{
   7.530    match(Set result (AryEq ary1 ary2));
   7.531 -  effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr);
   7.532 +  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
   7.533    //ins_cost(300);
   7.534  
   7.535 -  format %{ "Array Equals $ary1,$ary2 -> $result    // KILL RAX, RBX" %}
   7.536 -  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) );
   7.537 +  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL RAX, RBX" %}
   7.538 +  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) );
   7.539    ins_pipe( pipe_slow );
   7.540  %}
   7.541  
     8.1 --- a/src/share/vm/adlc/formssel.cpp	Thu Apr 02 05:22:02 2009 -0700
     8.2 +++ b/src/share/vm/adlc/formssel.cpp	Thu Apr 02 10:49:41 2009 -0700
     8.3 @@ -574,9 +574,13 @@
     8.4    // TEMPORARY
     8.5    // if( is_simple_chain_rule(globals) )  return false;
     8.6  
     8.7 -  // String-compare uses many memorys edges, but writes none
     8.8 +  // String.(compareTo/equals/indexOf) and Arrays.equals use many memorys edges,
     8.9 +  // but writes none
    8.10    if( _matrule && _matrule->_rChild &&
    8.11 -      strcmp(_matrule->_rChild->_opType,"StrComp")==0 )
    8.12 +      ( strcmp(_matrule->_rChild->_opType,"StrComp"    )==0 ||
    8.13 +        strcmp(_matrule->_rChild->_opType,"StrEquals"  )==0 ||
    8.14 +        strcmp(_matrule->_rChild->_opType,"StrIndexOf" )==0 ||
    8.15 +        strcmp(_matrule->_rChild->_opType,"AryEq"      )==0 ))
    8.16      return true;
    8.17  
    8.18    // Check if instruction has a USE of a memory operand class, but no defs
    8.19 @@ -815,8 +819,10 @@
    8.20      return AdlcVMDeps::Parms;   // Skip the machine-state edges
    8.21  
    8.22    if( _matrule->_rChild &&
    8.23 -          strcmp(_matrule->_rChild->_opType,"StrComp")==0 ) {
    8.24 -        // String compare takes 1 control and 4 memory edges.
    8.25 +      ( strcmp(_matrule->_rChild->_opType,"StrComp"   )==0 ||
    8.26 +        strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 ||
    8.27 +        strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 )) {
    8.28 +        // String.(compareTo/equals/indexOf) take 1 control and 4 memory edges.
    8.29      return 5;
    8.30    }
    8.31  
     9.1 --- a/src/share/vm/classfile/vmSymbols.hpp	Thu Apr 02 05:22:02 2009 -0700
     9.2 +++ b/src/share/vm/classfile/vmSymbols.hpp	Thu Apr 02 10:49:41 2009 -0700
     9.3 @@ -288,6 +288,7 @@
     9.4    template(stringCacheEnabled_name,                   "stringCacheEnabled")                       \
     9.5    template(bitCount_name,                             "bitCount")                                 \
     9.6    template(profile_name,                              "profile")                                  \
     9.7 +  template(equals_name,                               "equals")                                   \
     9.8                                                                                                    \
     9.9    /* non-intrinsic name/signature pairs: */                                                       \
    9.10    template(register_method_name,                      "register")                                 \
    9.11 @@ -579,7 +580,6 @@
    9.12     do_signature(copyOfRange_signature,        "([Ljava/lang/Object;IILjava/lang/Class;)[Ljava/lang/Object;")            \
    9.13                                                                                                                          \
    9.14    do_intrinsic(_equalsC,                  java_util_Arrays,       equals_name,    equalsC_signature,             F_S)   \
    9.15 -   do_name(     equals_name,                                     "equals")                                              \
    9.16     do_signature(equalsC_signature,                               "([C[C)Z")                                             \
    9.17                                                                                                                          \
    9.18    do_intrinsic(_invoke,                   java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \
    9.19 @@ -589,6 +589,7 @@
    9.20     do_name(     compareTo_name,                                  "compareTo")                                           \
    9.21    do_intrinsic(_indexOf,                  java_lang_String,       indexOf_name, string_int_signature,            F_R)   \
    9.22     do_name(     indexOf_name,                                    "indexOf")                                             \
    9.23 +  do_intrinsic(_equals,                   java_lang_String,       equals_name, object_boolean_signature,         F_R)   \
    9.24                                                                                                                          \
    9.25    do_class(java_nio_Buffer,               "java/nio/Buffer")                                                            \
    9.26    do_intrinsic(_checkIndex,               java_nio_Buffer,        checkIndex_name, int_int_signature,            F_R)   \
    10.1 --- a/src/share/vm/opto/classes.hpp	Thu Apr 02 05:22:02 2009 -0700
    10.2 +++ b/src/share/vm/opto/classes.hpp	Thu Apr 02 10:49:41 2009 -0700
    10.3 @@ -218,6 +218,8 @@
    10.4  macro(StoreP)
    10.5  macro(StoreN)
    10.6  macro(StrComp)
    10.7 +macro(StrEquals)
    10.8 +macro(StrIndexOf)
    10.9  macro(SubD)
   10.10  macro(SubF)
   10.11  macro(SubI)
    11.1 --- a/src/share/vm/opto/gcm.cpp	Thu Apr 02 05:22:02 2009 -0700
    11.2 +++ b/src/share/vm/opto/gcm.cpp	Thu Apr 02 10:49:41 2009 -0700
    11.3 @@ -438,6 +438,12 @@
    11.4  #endif
    11.5    assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp),
    11.6           "String compare is only known 'load' that does not conflict with any stores");
    11.7 +  assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals),
    11.8 +         "String equals is a 'load' that does not conflict with any stores");
    11.9 +  assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf),
   11.10 +         "String indexOf is a 'load' that does not conflict with any stores");
   11.11 +  assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq),
   11.12 +         "Arrays equals is a 'load' that do not conflict with any stores");
   11.13  
   11.14    if (!C->alias_type(load_alias_idx)->is_rewritable()) {
   11.15      // It is impossible to spoil this load by putting stores before it,
    12.1 --- a/src/share/vm/opto/lcm.cpp	Thu Apr 02 05:22:02 2009 -0700
    12.2 +++ b/src/share/vm/opto/lcm.cpp	Thu Apr 02 10:49:41 2009 -0700
    12.3 @@ -137,6 +137,8 @@
    12.4        if( mach->in(2) != val ) continue;
    12.5        break;                    // Found a memory op?
    12.6      case Op_StrComp:
    12.7 +    case Op_StrEquals:
    12.8 +    case Op_StrIndexOf:
    12.9      case Op_AryEq:
   12.10        // Not a legit memory op for implicit null check regardless of
   12.11        // embedded loads
    13.1 --- a/src/share/vm/opto/library_call.cpp	Thu Apr 02 05:22:02 2009 -0700
    13.2 +++ b/src/share/vm/opto/library_call.cpp	Thu Apr 02 10:49:41 2009 -0700
    13.3 @@ -136,6 +136,7 @@
    13.4    bool inline_string_compareTo();
    13.5    bool inline_string_indexOf();
    13.6    Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i);
    13.7 +  bool inline_string_equals();
    13.8    Node* pop_math_arg();
    13.9    bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
   13.10    bool inline_math_native(vmIntrinsics::ID id);
   13.11 @@ -261,6 +262,7 @@
   13.12      switch (id) {
   13.13      case vmIntrinsics::_indexOf:
   13.14      case vmIntrinsics::_compareTo:
   13.15 +    case vmIntrinsics::_equals:
   13.16      case vmIntrinsics::_equalsC:
   13.17        break;  // InlineNatives does not control String.compareTo
   13.18      default:
   13.19 @@ -275,6 +277,9 @@
   13.20    case vmIntrinsics::_indexOf:
   13.21      if (!SpecialStringIndexOf)  return NULL;
   13.22      break;
   13.23 +  case vmIntrinsics::_equals:
   13.24 +    if (!SpecialStringEquals)  return NULL;
   13.25 +    break;
   13.26    case vmIntrinsics::_equalsC:
   13.27      if (!SpecialArraysEquals)  return NULL;
   13.28      break;
   13.29 @@ -442,6 +447,8 @@
   13.30      return inline_string_compareTo();
   13.31    case vmIntrinsics::_indexOf:
   13.32      return inline_string_indexOf();
   13.33 +  case vmIntrinsics::_equals:
   13.34 +    return inline_string_equals();
   13.35  
   13.36    case vmIntrinsics::_getObject:
   13.37      return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false);
   13.38 @@ -793,6 +800,8 @@
   13.39  //------------------------------inline_string_compareTo------------------------
   13.40  bool LibraryCallKit::inline_string_compareTo() {
   13.41  
   13.42 +  if (!Matcher::has_match_rule(Op_StrComp)) return false;
   13.43 +
   13.44    const int value_offset = java_lang_String::value_offset_in_bytes();
   13.45    const int count_offset = java_lang_String::count_offset_in_bytes();
   13.46    const int offset_offset = java_lang_String::offset_offset_in_bytes();
   13.47 @@ -830,6 +839,82 @@
   13.48    return true;
   13.49  }
   13.50  
   13.51 +//------------------------------inline_string_equals------------------------
   13.52 +bool LibraryCallKit::inline_string_equals() {
   13.53 +
   13.54 +  if (!Matcher::has_match_rule(Op_StrEquals)) return false;
   13.55 +
   13.56 +  const int value_offset = java_lang_String::value_offset_in_bytes();
   13.57 +  const int count_offset = java_lang_String::count_offset_in_bytes();
   13.58 +  const int offset_offset = java_lang_String::offset_offset_in_bytes();
   13.59 +
   13.60 +  _sp += 2;
   13.61 +  Node* argument = pop();  // pop non-receiver first:  it was pushed second
   13.62 +  Node* receiver = pop();
   13.63 +
   13.64 +  // Null check on self without removing any arguments.  The argument
   13.65 +  // null check technically happens in the wrong place, which can lead to
   13.66 +  // invalid stack traces when string compare is inlined into a method
   13.67 +  // which handles NullPointerExceptions.
   13.68 +  _sp += 2;
   13.69 +  receiver = do_null_check(receiver, T_OBJECT);
   13.70 +  //should not do null check for argument for String.equals(), because spec
   13.71 +  //allows to specify NULL as argument.
   13.72 +  _sp -= 2;
   13.73 +
   13.74 +  if (stopped()) {
   13.75 +    return true;
   13.76 +  }
   13.77 +
   13.78 +  // get String klass for instanceOf
   13.79 +  ciInstanceKlass* klass = env()->String_klass();
   13.80 +
   13.81 +  // two paths (plus control) merge
   13.82 +  RegionNode* region = new (C, 3) RegionNode(3);
   13.83 +  Node* phi = new (C, 3) PhiNode(region, TypeInt::BOOL);
   13.84 +
   13.85 +  Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass)));
   13.86 +  Node* cmp  = _gvn.transform(new (C, 3) CmpINode(inst, intcon(1)));
   13.87 +  Node* bol  = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq));
   13.88 +
   13.89 +  IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN);
   13.90 +
   13.91 +  Node* if_true  = _gvn.transform(new (C, 1) IfTrueNode(iff));
   13.92 +  set_control(if_true);
   13.93 +
   13.94 +  const TypeInstPtr* string_type =
   13.95 +    TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
   13.96 +
   13.97 +  // instanceOf == true
   13.98 +  Node* equals =
   13.99 +    _gvn.transform(new (C, 7) StrEqualsNode(
  13.100 +                        control(),
  13.101 +                        memory(TypeAryPtr::CHARS),
  13.102 +                        memory(string_type->add_offset(value_offset)),
  13.103 +                        memory(string_type->add_offset(count_offset)),
  13.104 +                        memory(string_type->add_offset(offset_offset)),
  13.105 +                        receiver,
  13.106 +                        argument));
  13.107 +
  13.108 +  phi->init_req(1, _gvn.transform(equals));
  13.109 +  region->init_req(1, if_true);
  13.110 +
  13.111 +  //instanceOf == false, fallthrough
  13.112 +  Node* if_false = _gvn.transform(new (C, 1) IfFalseNode(iff));
  13.113 +  set_control(if_false);
  13.114 +
  13.115 +  phi->init_req(2, _gvn.transform(intcon(0)));
  13.116 +  region->init_req(2, if_false);
  13.117 +
  13.118 +  // post merge
  13.119 +  set_control(_gvn.transform(region));
  13.120 +  record_for_igvn(region);
  13.121 +
  13.122 +  push(_gvn.transform(phi));
  13.123 +
  13.124 +  return true;
  13.125 +}
  13.126 +
  13.127  //------------------------------inline_array_equals----------------------------
  13.128  bool LibraryCallKit::inline_array_equals() {
  13.129  
  13.130 @@ -994,80 +1079,115 @@
  13.131    return result;
  13.132  }
  13.133  
  13.134 -
  13.135  //------------------------------inline_string_indexOf------------------------
  13.136  bool LibraryCallKit::inline_string_indexOf() {
  13.137  
  13.138 +  const int value_offset  = java_lang_String::value_offset_in_bytes();
  13.139 +  const int count_offset  = java_lang_String::count_offset_in_bytes();
  13.140 +  const int offset_offset = java_lang_String::offset_offset_in_bytes();
  13.141 +
  13.142    _sp += 2;
  13.143    Node *argument = pop();  // pop non-receiver first:  it was pushed second
  13.144    Node *receiver = pop();
  13.145  
  13.146 -  // don't intrinsify if argument isn't a constant string.
  13.147 -  if (!argument->is_Con()) {
  13.148 -    return false;
  13.149 +  Node* result;
  13.150 +  if (Matcher::has_match_rule(Op_StrIndexOf) &&
  13.151 +      UseSSE42Intrinsics) {
  13.152 +    // Generate SSE4.2 version of indexOf
  13.153 +    // We currently only have match rules that use SSE4.2
  13.154 +
  13.155 +    // Null check on self without removing any arguments.  The argument
  13.156 +    // null check technically happens in the wrong place, which can lead to
  13.157 +    // invalid stack traces when string compare is inlined into a method
  13.158 +    // which handles NullPointerExceptions.
  13.159 +    _sp += 2;
  13.160 +    receiver = do_null_check(receiver, T_OBJECT);
  13.161 +    argument = do_null_check(argument, T_OBJECT);
  13.162 +    _sp -= 2;
  13.163 +
  13.164 +    if (stopped()) {
  13.165 +      return true;
  13.166 +    }
  13.167 +
  13.168 +    ciInstanceKlass* klass = env()->String_klass();
  13.169 +    const TypeInstPtr* string_type =
  13.170 +      TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
  13.171 +
  13.172 +    result =
  13.173 +      _gvn.transform(new (C, 7)
  13.174 +                     StrIndexOfNode(control(),
  13.175 +                                    memory(TypeAryPtr::CHARS),
  13.176 +                                    memory(string_type->add_offset(value_offset)),
  13.177 +                                    memory(string_type->add_offset(count_offset)),
  13.178 +                                    memory(string_type->add_offset(offset_offset)),
  13.179 +                                    receiver,
  13.180 +                                    argument));
  13.181 +  } else { //Use LibraryCallKit::string_indexOf
  13.182 +    // don't intrinsify is argument isn't a constant string.
  13.183 +    if (!argument->is_Con()) {
  13.184 +     return false;
  13.185 +    }
  13.186 +    const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr();
  13.187 +    if (str_type == NULL) {
  13.188 +      return false;
  13.189 +    }
  13.190 +    ciInstanceKlass* klass = env()->String_klass();
  13.191 +    ciObject* str_const = str_type->const_oop();
  13.192 +    if (str_const == NULL || str_const->klass() != klass) {
  13.193 +      return false;
  13.194 +    }
  13.195 +    ciInstance* str = str_const->as_instance();
  13.196 +    assert(str != NULL, "must be instance");
  13.197 +
  13.198 +    ciObject* v = str->field_value_by_offset(value_offset).as_object();
  13.199 +    int       o = str->field_value_by_offset(offset_offset).as_int();
  13.200 +    int       c = str->field_value_by_offset(count_offset).as_int();
  13.201 +    ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array
  13.202 +
  13.203 +    // constant strings have no offset and count == length which
  13.204 +    // simplifies the resulting code somewhat so lets optimize for that.
  13.205 +    if (o != 0 || c != pat->length()) {
  13.206 +     return false;
  13.207 +    }
  13.208 +
  13.209 +    // Null check on self without removing any arguments.  The argument
  13.210 +    // null check technically happens in the wrong place, which can lead to
  13.211 +    // invalid stack traces when string compare is inlined into a method
  13.212 +    // which handles NullPointerExceptions.
  13.213 +    _sp += 2;
  13.214 +    receiver = do_null_check(receiver, T_OBJECT);
  13.215 +    // No null check on the argument is needed since it's a constant String oop.
  13.216 +    _sp -= 2;
  13.217 +    if (stopped()) {
  13.218 +     return true;
  13.219 +    }
  13.220 +
  13.221 +    // The null string as a pattern always returns 0 (match at beginning of string)
  13.222 +    if (c == 0) {
  13.223 +      push(intcon(0));
  13.224 +      return true;
  13.225 +    }
  13.226 +
  13.227 +    // Generate default indexOf
  13.228 +    jchar lastChar = pat->char_at(o + (c - 1));
  13.229 +    int cache = 0;
  13.230 +    int i;
  13.231 +    for (i = 0; i < c - 1; i++) {
  13.232 +      assert(i < pat->length(), "out of range");
  13.233 +      cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1)));
  13.234 +    }
  13.235 +
  13.236 +    int md2 = c;
  13.237 +    for (i = 0; i < c - 1; i++) {
  13.238 +      assert(i < pat->length(), "out of range");
  13.239 +      if (pat->char_at(o + i) == lastChar) {
  13.240 +        md2 = (c - 1) - i;
  13.241 +      }
  13.242 +    }
  13.243 +
  13.244 +    result = string_indexOf(receiver, pat, o, cache, md2);
  13.245    }
  13.246 -  const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr();
  13.247 -  if (str_type == NULL) {
  13.248 -    return false;
  13.249 -  }
  13.250 -  ciInstanceKlass* klass = env()->String_klass();
  13.251 -  ciObject* str_const = str_type->const_oop();
  13.252 -  if (str_const == NULL || str_const->klass() != klass) {
  13.253 -    return false;
  13.254 -  }
  13.255 -  ciInstance* str = str_const->as_instance();
  13.256 -  assert(str != NULL, "must be instance");
  13.257 -
  13.258 -  const int value_offset  = java_lang_String::value_offset_in_bytes();
  13.259 -  const int count_offset  = java_lang_String::count_offset_in_bytes();
  13.260 -  const int offset_offset = java_lang_String::offset_offset_in_bytes();
  13.261 -
  13.262 -  ciObject* v = str->field_value_by_offset(value_offset).as_object();
  13.263 -  int       o = str->field_value_by_offset(offset_offset).as_int();
  13.264 -  int       c = str->field_value_by_offset(count_offset).as_int();
  13.265 -  ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array
  13.266 -
  13.267 -  // constant strings have no offset and count == length which
  13.268 -  // simplifies the resulting code somewhat so lets optimize for that.
  13.269 -  if (o != 0 || c != pat->length()) {
  13.270 -    return false;
  13.271 -  }
  13.272 -
  13.273 -  // Null check on self without removing any arguments.  The argument
  13.274 -  // null check technically happens in the wrong place, which can lead to
  13.275 -  // invalid stack traces when string compare is inlined into a method
  13.276 -  // which handles NullPointerExceptions.
  13.277 -  _sp += 2;
  13.278 -  receiver = do_null_check(receiver, T_OBJECT);
  13.279 -  // No null check on the argument is needed since it's a constant String oop.
  13.280 -  _sp -= 2;
  13.281 -  if (stopped()) {
  13.282 -    return true;
  13.283 -  }
  13.284 -
  13.285 -  // The null string as a pattern always returns 0 (match at beginning of string)
  13.286 -  if (c == 0) {
  13.287 -    push(intcon(0));
  13.288 -    return true;
  13.289 -  }
  13.290 -
  13.291 -  jchar lastChar = pat->char_at(o + (c - 1));
  13.292 -  int cache = 0;
  13.293 -  int i;
  13.294 -  for (i = 0; i < c - 1; i++) {
  13.295 -    assert(i < pat->length(), "out of range");
  13.296 -    cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1)));
  13.297 -  }
  13.298 -
  13.299 -  int md2 = c;
  13.300 -  for (i = 0; i < c - 1; i++) {
  13.301 -    assert(i < pat->length(), "out of range");
  13.302 -    if (pat->char_at(o + i) == lastChar) {
  13.303 -      md2 = (c - 1) - i;
  13.304 -    }
  13.305 -  }
  13.306 -
  13.307 -  Node* result = string_indexOf(receiver, pat, o, cache, md2);
  13.308 +
  13.309    push(result);
  13.310    return true;
  13.311  }
    14.1 --- a/src/share/vm/opto/loopnode.cpp	Thu Apr 02 05:22:02 2009 -0700
    14.2 +++ b/src/share/vm/opto/loopnode.cpp	Thu Apr 02 10:49:41 2009 -0700
    14.3 @@ -2668,6 +2668,8 @@
    14.4      case Op_LoadD_unaligned:
    14.5      case Op_LoadL_unaligned:
    14.6      case Op_StrComp:            // Does a bunch of load-like effects
    14.7 +    case Op_StrEquals:
    14.8 +    case Op_StrIndexOf:
    14.9      case Op_AryEq:
   14.10        pinned = false;
   14.11      }
    15.1 --- a/src/share/vm/opto/matcher.cpp	Thu Apr 02 05:22:02 2009 -0700
    15.2 +++ b/src/share/vm/opto/matcher.cpp	Thu Apr 02 10:49:41 2009 -0700
    15.3 @@ -746,6 +746,8 @@
    15.4    if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) {
    15.5      switch (n->Opcode()) {
    15.6      case Op_StrComp:
    15.7 +    case Op_StrEquals:
    15.8 +    case Op_StrIndexOf:
    15.9      case Op_AryEq:
   15.10      case Op_MemBarVolatile:
   15.11      case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
   15.12 @@ -1788,6 +1790,8 @@
   15.13          mstack.push(n->in(0), Pre_Visit);     // Visit Control input
   15.14          continue;                             // while (mstack.is_nonempty())
   15.15        case Op_StrComp:
   15.16 +      case Op_StrEquals:
   15.17 +      case Op_StrIndexOf:
   15.18        case Op_AryEq:
   15.19          set_shared(n); // Force result into register (it will be anyways)
   15.20          break;
    16.1 --- a/src/share/vm/opto/memnode.cpp	Thu Apr 02 05:22:02 2009 -0700
    16.2 +++ b/src/share/vm/opto/memnode.cpp	Thu Apr 02 10:49:41 2009 -0700
    16.3 @@ -2481,6 +2481,31 @@
    16.4    return remove_dead_region(phase, can_reshape) ? this : NULL;
    16.5  }
    16.6  
    16.7 +// Do we match on this edge? No memory edges
    16.8 +uint StrEqualsNode::match_edge(uint idx) const {
    16.9 +  return idx == 5 || idx == 6;
   16.10 +}
   16.11 +
   16.12 +//------------------------------Ideal------------------------------------------
   16.13 +// Return a node which is more "ideal" than the current node.  Strip out
   16.14 +// control copies
   16.15 +Node *StrEqualsNode::Ideal(PhaseGVN *phase, bool can_reshape){
   16.16 +  return remove_dead_region(phase, can_reshape) ? this : NULL;
   16.17 +}
   16.18 +
   16.19 +//=============================================================================
   16.20 +// Do we match on this edge? No memory edges
   16.21 +uint StrIndexOfNode::match_edge(uint idx) const {
   16.22 +  return idx == 5 || idx == 6;
   16.23 +}
   16.24 +
   16.25 +//------------------------------Ideal------------------------------------------
   16.26 +// Return a node which is more "ideal" than the current node.  Strip out
   16.27 +// control copies
   16.28 +Node *StrIndexOfNode::Ideal(PhaseGVN *phase, bool can_reshape){
   16.29 +  return remove_dead_region(phase, can_reshape) ? this : NULL;
   16.30 +}
   16.31 +
   16.32  //------------------------------Ideal------------------------------------------
   16.33  // Return a node which is more "ideal" than the current node.  Strip out
   16.34  // control copies
   16.35 @@ -2488,7 +2513,6 @@
   16.36    return remove_dead_region(phase, can_reshape) ? this : NULL;
   16.37  }
   16.38  
   16.39 -
   16.40  //=============================================================================
   16.41  MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
   16.42    : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)),
    17.1 --- a/src/share/vm/opto/memnode.hpp	Thu Apr 02 05:22:02 2009 -0700
    17.2 +++ b/src/share/vm/opto/memnode.hpp	Thu Apr 02 10:49:41 2009 -0700
    17.3 @@ -765,6 +765,54 @@
    17.4    virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
    17.5  };
    17.6  
    17.7 +//------------------------------StrEquals-------------------------------------
    17.8 +class StrEqualsNode: public Node {
    17.9 +public:
   17.10 +  StrEqualsNode(Node *control,
   17.11 +                Node* char_array_mem,
   17.12 +                Node* value_mem,
   17.13 +                Node* count_mem,
   17.14 +                Node* offset_mem,
   17.15 +                Node* s1, Node* s2): Node(control,
   17.16 +                                          char_array_mem,
   17.17 +                                          value_mem,
   17.18 +                                          count_mem,
   17.19 +                                          offset_mem,
   17.20 +                                          s1, s2) {};
   17.21 +  virtual int Opcode() const;
   17.22 +  virtual bool depends_only_on_test() const { return false; }
   17.23 +  virtual const Type* bottom_type() const { return TypeInt::BOOL; }
   17.24 +  // a StrEqualsNode (conservatively) aliases with everything:
   17.25 +  virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
   17.26 +  virtual uint match_edge(uint idx) const;
   17.27 +  virtual uint ideal_reg() const { return Op_RegI; }
   17.28 +  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   17.29 +};
   17.30 +
   17.31 +//------------------------------StrIndexOf-------------------------------------
   17.32 +class StrIndexOfNode: public Node {
   17.33 +public:
   17.34 +  StrIndexOfNode(Node *control,
   17.35 +                 Node* char_array_mem,
   17.36 +                 Node* value_mem,
   17.37 +                 Node* count_mem,
   17.38 +                 Node* offset_mem,
   17.39 +                 Node* s1, Node* s2): Node(control,
   17.40 +                                           char_array_mem,
   17.41 +                                           value_mem,
   17.42 +                                           count_mem,
   17.43 +                                           offset_mem,
   17.44 +                                           s1, s2) {};
   17.45 +  virtual int Opcode() const;
   17.46 +  virtual bool depends_only_on_test() const { return false; }
   17.47 +  virtual const Type* bottom_type() const { return TypeInt::INT; }
   17.48 +  // a StrIndexOfNode (conservatively) aliases with everything:
   17.49 +  virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
   17.50 +  virtual uint match_edge(uint idx) const;
   17.51 +  virtual uint ideal_reg() const { return Op_RegI; }
   17.52 +  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   17.53 +};
   17.54 +
   17.55  //------------------------------AryEq---------------------------------------
   17.56  class AryEqNode: public Node {
   17.57  public:
    18.1 --- a/src/share/vm/opto/parse1.cpp	Thu Apr 02 05:22:02 2009 -0700
    18.2 +++ b/src/share/vm/opto/parse1.cpp	Thu Apr 02 10:49:41 2009 -0700
    18.3 @@ -95,7 +95,7 @@
    18.4    switch( bt ) {                // Signature is flattened
    18.5    case T_INT:     l = new (C, 3) LoadINode( 0, mem, adr, TypeRawPtr::BOTTOM ); break;
    18.6    case T_FLOAT:   l = new (C, 3) LoadFNode( 0, mem, adr, TypeRawPtr::BOTTOM ); break;
    18.7 -  case T_ADDRESS:
    18.8 +  case T_ADDRESS: l = new (C, 3) LoadPNode( 0, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM  ); break;
    18.9    case T_OBJECT:  l = new (C, 3) LoadPNode( 0, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM ); break;
   18.10    case T_LONG:
   18.11    case T_DOUBLE: {
    19.1 --- a/src/share/vm/runtime/arguments.cpp	Thu Apr 02 05:22:02 2009 -0700
    19.2 +++ b/src/share/vm/runtime/arguments.cpp	Thu Apr 02 10:49:41 2009 -0700
    19.3 @@ -1366,9 +1366,6 @@
    19.4    if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) {
    19.5      FLAG_SET_DEFAULT(DoEscapeAnalysis, true);
    19.6    }
    19.7 -  if (AggressiveOpts && FLAG_IS_DEFAULT(SpecialArraysEquals)) {
    19.8 -    FLAG_SET_DEFAULT(SpecialArraysEquals, true);
    19.9 -  }
   19.10    if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) {
   19.11      FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500);
   19.12    }
    20.1 --- a/src/share/vm/runtime/globals.hpp	Thu Apr 02 05:22:02 2009 -0700
    20.2 +++ b/src/share/vm/runtime/globals.hpp	Thu Apr 02 10:49:41 2009 -0700
    20.3 @@ -491,9 +491,15 @@
    20.4    develop(bool, SpecialStringIndexOf, true,                                 \
    20.5            "special version of string indexOf")                              \
    20.6                                                                              \
    20.7 -  product(bool, SpecialArraysEquals, false,                                 \
    20.8 +  develop(bool, SpecialStringEquals, true,                                  \
    20.9 +          "special version of string equals")                               \
   20.10 +                                                                            \
   20.11 +  develop(bool, SpecialArraysEquals, true,                                  \
   20.12            "special version of Arrays.equals(char[],char[])")                \
   20.13                                                                              \
   20.14 +  product(bool, UseSSE42Intrinsics, false,                                  \
   20.15 +          "SSE4.2 versions of intrinsics")                                  \
   20.16 +                                                                            \
   20.17    develop(bool, TraceCallFixup, false,                                      \
   20.18            "traces all call fixups")                                         \
   20.19                                                                              \
   20.20 @@ -1316,10 +1322,10 @@
   20.21                                                                              \
   20.22    product(intx, ParGCArrayScanChunk, 50,                                    \
   20.23            "Scan a subset and push remainder, if array is bigger than this") \
   20.24 +                                                                            \
   20.25    product(bool, ParGCTrimOverflow, true,                                    \
   20.26            "Eagerly trim the overflow lists (useful for UseCompressedOops")  \
   20.27                                                                              \
   20.28 -                                                                            \
   20.29    notproduct(bool, ParGCWorkQueueOverflowALot, false,                       \
   20.30            "Whether we should simulate work queue overflow in ParNew")       \
   20.31                                                                              \

mercurial