6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()

Wed, 06 May 2009 00:27:52 -0700

author
twisti
date
Wed, 06 May 2009 00:27:52 -0700
changeset 1210
93c14e5562c4
parent 1190
36ee9b69616e
child 1211
e85af0c0c94b

6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
Summary: These methods can be instrinsified by using bit scan, bit test, and population count instructions.
Reviewed-by: kvn, never

src/cpu/sparc/vm/sparc.ad file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_32.ad file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_64.ad file | annotate | diff | comparison | revisions
src/share/vm/classfile/vmSymbols.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/classes.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/connode.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/connode.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/library_call.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/matcher.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
test/compiler/6823354/Test6823354.java file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/sparc/vm/sparc.ad	Tue May 05 11:02:10 2009 -0700
     1.2 +++ b/src/cpu/sparc/vm/sparc.ad	Wed May 06 00:27:52 2009 -0700
     1.3 @@ -1712,6 +1712,23 @@
     1.4    return as_DoubleFloatRegister(register_encoding);
     1.5  }
     1.6  
     1.7 +const bool Matcher::match_rule_supported(int opcode) {
     1.8 +  if (!has_match_rule(opcode))
     1.9 +    return false;
    1.10 +
    1.11 +  switch (opcode) {
    1.12 +  case Op_CountLeadingZerosI:
    1.13 +  case Op_CountLeadingZerosL:
    1.14 +  case Op_CountTrailingZerosI:
    1.15 +  case Op_CountTrailingZerosL:
    1.16 +    if (!UsePopCountInstruction)
    1.17 +      return false;
    1.18 +    break;
    1.19 +  }
    1.20 +
    1.21 +  return true;  // Per default match rules are supported.
    1.22 +}
    1.23 +
    1.24  int Matcher::regnum_to_fpu_offset(int regnum) {
    1.25    return regnum - 32; // The FP registers are in the second chunk
    1.26  }
    1.27 @@ -9188,6 +9205,145 @@
    1.28    ins_pipe(long_memory_op);
    1.29  %}
    1.30  
    1.31 +
    1.32 +//---------- Zeros Count Instructions ------------------------------------------
    1.33 +
    1.34 +instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
    1.35 +  predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
    1.36 +  match(Set dst (CountLeadingZerosI src));
    1.37 +  effect(TEMP dst, TEMP tmp, KILL cr);
    1.38 +
    1.39 +  // x |= (x >> 1);
    1.40 +  // x |= (x >> 2);
    1.41 +  // x |= (x >> 4);
    1.42 +  // x |= (x >> 8);
    1.43 +  // x |= (x >> 16);
    1.44 +  // return (WORDBITS - popc(x));
    1.45 +  format %{ "SRL     $src,1,$dst\t! count leading zeros (int)\n\t"
    1.46 +            "OR      $src,$tmp,$dst\n\t"
    1.47 +            "SRL     $dst,2,$tmp\n\t"
    1.48 +            "OR      $dst,$tmp,$dst\n\t"
    1.49 +            "SRL     $dst,4,$tmp\n\t"
    1.50 +            "OR      $dst,$tmp,$dst\n\t"
    1.51 +            "SRL     $dst,8,$tmp\n\t"
    1.52 +            "OR      $dst,$tmp,$dst\n\t"
    1.53 +            "SRL     $dst,16,$tmp\n\t"
    1.54 +            "OR      $dst,$tmp,$dst\n\t"
    1.55 +            "POPC    $dst,$dst\n\t"
    1.56 +            "MOV     32,$tmp\n\t"
    1.57 +            "SUB     $tmp,$dst,$dst" %}
    1.58 +  ins_encode %{
    1.59 +    Register Rdst = $dst$$Register;
    1.60 +    Register Rsrc = $src$$Register;
    1.61 +    Register Rtmp = $tmp$$Register;
    1.62 +    __ srl(Rsrc, 1, Rtmp);
    1.63 +    __ or3(Rsrc, Rtmp, Rdst);
    1.64 +    __ srl(Rdst, 2, Rtmp);
    1.65 +    __ or3(Rdst, Rtmp, Rdst);
    1.66 +    __ srl(Rdst, 4, Rtmp);
    1.67 +    __ or3(Rdst, Rtmp, Rdst);
    1.68 +    __ srl(Rdst, 8, Rtmp);
    1.69 +    __ or3(Rdst, Rtmp, Rdst);
    1.70 +    __ srl(Rdst, 16, Rtmp);
    1.71 +    __ or3(Rdst, Rtmp, Rdst);
    1.72 +    __ popc(Rdst, Rdst);
    1.73 +    __ mov(BitsPerInt, Rtmp);
    1.74 +    __ sub(Rtmp, Rdst, Rdst);
    1.75 +  %}
    1.76 +  ins_pipe(ialu_reg);
    1.77 +%}
    1.78 +
    1.79 +instruct countLeadingZerosL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
    1.80 +  predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
    1.81 +  match(Set dst (CountLeadingZerosL src));
    1.82 +  effect(TEMP dst, TEMP tmp, KILL cr);
    1.83 +
    1.84 +  // x |= (x >> 1);
    1.85 +  // x |= (x >> 2);
    1.86 +  // x |= (x >> 4);
    1.87 +  // x |= (x >> 8);
    1.88 +  // x |= (x >> 16);
    1.89 +  // x |= (x >> 32);
    1.90 +  // return (WORDBITS - popc(x));
    1.91 +  format %{ "SRLX    $src,1,$dst\t! count leading zeros (long)\n\t"
    1.92 +            "OR      $src,$tmp,$dst\n\t"
    1.93 +            "SRLX    $dst,2,$tmp\n\t"
    1.94 +            "OR      $dst,$tmp,$dst\n\t"
    1.95 +            "SRLX    $dst,4,$tmp\n\t"
    1.96 +            "OR      $dst,$tmp,$dst\n\t"
    1.97 +            "SRLX    $dst,8,$tmp\n\t"
    1.98 +            "OR      $dst,$tmp,$dst\n\t"
    1.99 +            "SRLX    $dst,16,$tmp\n\t"
   1.100 +            "OR      $dst,$tmp,$dst\n\t"
   1.101 +            "SRLX    $dst,32,$tmp\n\t"
   1.102 +            "OR      $dst,$tmp,$dst\n\t"
   1.103 +            "POPC    $dst,$dst\n\t"
   1.104 +            "MOV     64,$tmp\n\t"
   1.105 +            "SUB     $tmp,$dst,$dst" %}
   1.106 +  ins_encode %{
   1.107 +    Register Rdst = $dst$$Register;
   1.108 +    Register Rsrc = $src$$Register;
   1.109 +    Register Rtmp = $tmp$$Register;
   1.110 +    __ srlx(Rsrc, 1, Rtmp);
   1.111 +    __ or3(Rsrc, Rtmp, Rdst);
   1.112 +    __ srlx(Rdst, 2, Rtmp);
   1.113 +    __ or3(Rdst, Rtmp, Rdst);
   1.114 +    __ srlx(Rdst, 4, Rtmp);
   1.115 +    __ or3(Rdst, Rtmp, Rdst);
   1.116 +    __ srlx(Rdst, 8, Rtmp);
   1.117 +    __ or3(Rdst, Rtmp, Rdst);
   1.118 +    __ srlx(Rdst, 16, Rtmp);
   1.119 +    __ or3(Rdst, Rtmp, Rdst);
   1.120 +    __ srlx(Rdst, 32, Rtmp);
   1.121 +    __ or3(Rdst, Rtmp, Rdst);
   1.122 +    __ popc(Rdst, Rdst);
   1.123 +    __ mov(BitsPerLong, Rtmp);
   1.124 +    __ sub(Rtmp, Rdst, Rdst);
   1.125 +  %}
   1.126 +  ins_pipe(ialu_reg);
   1.127 +%}
   1.128 +
   1.129 +instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{
   1.130 +  predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
   1.131 +  match(Set dst (CountTrailingZerosI src));
   1.132 +  effect(TEMP dst, KILL cr);
   1.133 +
   1.134 +  // return popc(~x & (x - 1));
   1.135 +  format %{ "SUB     $src,1,$dst\t! count trailing zeros (int)\n\t"
   1.136 +            "ANDN    $dst,$src,$dst\n\t"
   1.137 +            "SRL     $dst,R_G0,$dst\n\t"
   1.138 +            "POPC    $dst,$dst" %}
   1.139 +  ins_encode %{
   1.140 +    Register Rdst = $dst$$Register;
   1.141 +    Register Rsrc = $src$$Register;
   1.142 +    __ sub(Rsrc, 1, Rdst);
   1.143 +    __ andn(Rdst, Rsrc, Rdst);
   1.144 +    __ srl(Rdst, G0, Rdst);
   1.145 +    __ popc(Rdst, Rdst);
   1.146 +  %}
   1.147 +  ins_pipe(ialu_reg);
   1.148 +%}
   1.149 +
   1.150 +instruct countTrailingZerosL(iRegI dst, iRegL src, flagsReg cr) %{
   1.151 +  predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
   1.152 +  match(Set dst (CountTrailingZerosL src));
   1.153 +  effect(TEMP dst, KILL cr);
   1.154 +
   1.155 +  // return popc(~x & (x - 1));
   1.156 +  format %{ "SUB     $src,1,$dst\t! count trailing zeros (long)\n\t"
   1.157 +            "ANDN    $dst,$src,$dst\n\t"
   1.158 +            "POPC    $dst,$dst" %}
   1.159 +  ins_encode %{
   1.160 +    Register Rdst = $dst$$Register;
   1.161 +    Register Rsrc = $src$$Register;
   1.162 +    __ sub(Rsrc, 1, Rdst);
   1.163 +    __ andn(Rdst, Rsrc, Rdst);
   1.164 +    __ popc(Rdst, Rdst);
   1.165 +  %}
   1.166 +  ins_pipe(ialu_reg);
   1.167 +%}
   1.168 +
   1.169 +
   1.170  //---------- Population Count Instructions -------------------------------------
   1.171  
   1.172  instruct popCountI(iRegI dst, iRegI src) %{
     2.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Tue May 05 11:02:10 2009 -0700
     2.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Wed May 06 00:27:52 2009 -0700
     2.3 @@ -952,6 +952,21 @@
     2.4    emit_operand(dst, src);
     2.5  }
     2.6  
     2.7 +void Assembler::bsfl(Register dst, Register src) {
     2.8 +  int encode = prefix_and_encode(dst->encoding(), src->encoding());
     2.9 +  emit_byte(0x0F);
    2.10 +  emit_byte(0xBC);
    2.11 +  emit_byte(0xC0 | encode);
    2.12 +}
    2.13 +
    2.14 +void Assembler::bsrl(Register dst, Register src) {
    2.15 +  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
    2.16 +  int encode = prefix_and_encode(dst->encoding(), src->encoding());
    2.17 +  emit_byte(0x0F);
    2.18 +  emit_byte(0xBD);
    2.19 +  emit_byte(0xC0 | encode);
    2.20 +}
    2.21 +
    2.22  void Assembler::bswapl(Register reg) { // bswap
    2.23    int encode = prefix_and_encode(reg->encoding());
    2.24    emit_byte(0x0F);
    2.25 @@ -1438,6 +1453,15 @@
    2.26    }
    2.27  }
    2.28  
    2.29 +void Assembler::lzcntl(Register dst, Register src) {
    2.30 +  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
    2.31 +  emit_byte(0xF3);
    2.32 +  int encode = prefix_and_encode(dst->encoding(), src->encoding());
    2.33 +  emit_byte(0x0F);
    2.34 +  emit_byte(0xBD);
    2.35 +  emit_byte(0xC0 | encode);
    2.36 +}
    2.37 +
    2.38  // Emit mfence instruction
    2.39  void Assembler::mfence() {
    2.40    NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
    2.41 @@ -3688,6 +3712,21 @@
    2.42    emit_arith(0x23, 0xC0, dst, src);
    2.43  }
    2.44  
    2.45 +void Assembler::bsfq(Register dst, Register src) {
    2.46 +  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
    2.47 +  emit_byte(0x0F);
    2.48 +  emit_byte(0xBC);
    2.49 +  emit_byte(0xC0 | encode);
    2.50 +}
    2.51 +
    2.52 +void Assembler::bsrq(Register dst, Register src) {
    2.53 +  assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT");
    2.54 +  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
    2.55 +  emit_byte(0x0F);
    2.56 +  emit_byte(0xBD);
    2.57 +  emit_byte(0xC0 | encode);
    2.58 +}
    2.59 +
    2.60  void Assembler::bswapq(Register reg) {
    2.61    int encode = prefixq_and_encode(reg->encoding());
    2.62    emit_byte(0x0F);
    2.63 @@ -3941,6 +3980,15 @@
    2.64    emit_data((int)imm32, rspec, narrow_oop_operand);
    2.65  }
    2.66  
    2.67 +void Assembler::lzcntq(Register dst, Register src) {
    2.68 +  assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
    2.69 +  emit_byte(0xF3);
    2.70 +  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
    2.71 +  emit_byte(0x0F);
    2.72 +  emit_byte(0xBD);
    2.73 +  emit_byte(0xC0 | encode);
    2.74 +}
    2.75 +
    2.76  void Assembler::movdq(XMMRegister dst, Register src) {
    2.77    // table D-1 says MMX/SSE2
    2.78    NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
     3.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Tue May 05 11:02:10 2009 -0700
     3.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Wed May 06 00:27:52 2009 -0700
     3.3 @@ -757,6 +757,14 @@
     3.4    void andpd(XMMRegister dst, Address src);
     3.5    void andpd(XMMRegister dst, XMMRegister src);
     3.6  
     3.7 +  void bsfl(Register dst, Register src);
     3.8 +  void bsrl(Register dst, Register src);
     3.9 +
    3.10 +#ifdef _LP64
    3.11 +  void bsfq(Register dst, Register src);
    3.12 +  void bsrq(Register dst, Register src);
    3.13 +#endif
    3.14 +
    3.15    void bswapl(Register reg);
    3.16  
    3.17    void bswapq(Register reg);
    3.18 @@ -1061,6 +1069,12 @@
    3.19  
    3.20    void lock();
    3.21  
    3.22 +  void lzcntl(Register dst, Register src);
    3.23 +
    3.24 +#ifdef _LP64
    3.25 +  void lzcntq(Register dst, Register src);
    3.26 +#endif
    3.27 +
    3.28    enum Membar_mask_bits {
    3.29      StoreStore = 1 << 3,
    3.30      LoadStore  = 1 << 2,
     4.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Tue May 05 11:02:10 2009 -0700
     4.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Wed May 06 00:27:52 2009 -0700
     4.3 @@ -284,7 +284,7 @@
     4.4    }
     4.5  
     4.6    char buf[256];
     4.7 -  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
     4.8 +  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
     4.9                 cores_per_cpu(), threads_per_core(),
    4.10                 cpu_family(), _model, _stepping,
    4.11                 (supports_cmov() ? ", cmov" : ""),
    4.12 @@ -301,6 +301,7 @@
    4.13                 (supports_mmx_ext() ? ", mmxext" : ""),
    4.14                 (supports_3dnow()   ? ", 3dnow"  : ""),
    4.15                 (supports_3dnow2()  ? ", 3dnowext" : ""),
    4.16 +               (supports_lzcnt()   ? ", lzcnt": ""),
    4.17                 (supports_sse4a()   ? ", sse4a": ""),
    4.18                 (supports_ht() ? ", ht": ""));
    4.19    _features_str = strdup(buf);
    4.20 @@ -364,6 +365,13 @@
    4.21          UseXmmI2D = false;
    4.22        }
    4.23      }
    4.24 +
    4.25 +    // Use count leading zeros count instruction if available.
    4.26 +    if (supports_lzcnt()) {
    4.27 +      if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
    4.28 +        UseCountLeadingZerosInstruction = true;
    4.29 +      }
    4.30 +    }
    4.31    }
    4.32  
    4.33    if( is_intel() ) { // Intel cpus specific settings
     5.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp	Tue May 05 11:02:10 2009 -0700
     5.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp	Wed May 06 00:27:52 2009 -0700
     5.3 @@ -120,7 +120,7 @@
     5.4        uint32_t LahfSahf     : 1,
     5.5                 CmpLegacy    : 1,
     5.6                              : 4,
     5.7 -               abm          : 1,
     5.8 +               lzcnt        : 1,
     5.9                 sse4a        : 1,
    5.10                 misalignsse  : 1,
    5.11                 prefetchw    : 1,
    5.12 @@ -182,7 +182,8 @@
    5.13       CPU_SSE4A  = (1 << 10),
    5.14       CPU_SSE4_1 = (1 << 11),
    5.15       CPU_SSE4_2 = (1 << 12),
    5.16 -     CPU_POPCNT = (1 << 13)
    5.17 +     CPU_POPCNT = (1 << 13),
    5.18 +     CPU_LZCNT  = (1 << 14)
    5.19     } cpuFeatureFlags;
    5.20  
    5.21    // cpuid information block.  All info derived from executing cpuid with
    5.22 @@ -277,8 +278,6 @@
    5.23      if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() &&
    5.24          _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)
    5.25        result |= CPU_MMX;
    5.26 -    if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
    5.27 -      result |= CPU_3DNOW;
    5.28      if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
    5.29        result |= CPU_SSE;
    5.30      if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
    5.31 @@ -287,14 +286,23 @@
    5.32        result |= CPU_SSE3;
    5.33      if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
    5.34        result |= CPU_SSSE3;
    5.35 -    if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
    5.36 -      result |= CPU_SSE4A;
    5.37      if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
    5.38        result |= CPU_SSE4_1;
    5.39      if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
    5.40        result |= CPU_SSE4_2;
    5.41      if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
    5.42        result |= CPU_POPCNT;
    5.43 +
    5.44 +    // AMD features.
    5.45 +    if (is_amd()) {
    5.46 +      if (_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
    5.47 +        result |= CPU_3DNOW;
    5.48 +      if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
    5.49 +        result |= CPU_LZCNT;
    5.50 +      if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
    5.51 +        result |= CPU_SSE4A;
    5.52 +    }
    5.53 +
    5.54      return result;
    5.55    }
    5.56  
    5.57 @@ -391,6 +399,7 @@
    5.58    static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
    5.59    static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
    5.60    static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
    5.61 +  static bool supports_lzcnt()    { return (_cpuFeatures & CPU_LZCNT) != 0; }
    5.62    static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
    5.63  
    5.64    static bool supports_compare_and_exchange() { return true; }
     6.1 --- a/src/cpu/x86/vm/x86_32.ad	Tue May 05 11:02:10 2009 -0700
     6.2 +++ b/src/cpu/x86/vm/x86_32.ad	Wed May 06 00:27:52 2009 -0700
     6.3 @@ -1281,6 +1281,13 @@
     6.4  }
     6.5  
     6.6  
     6.7 +const bool Matcher::match_rule_supported(int opcode) {
     6.8 +  if (!has_match_rule(opcode))
     6.9 +    return false;
    6.10 +
    6.11 +  return true;  // Per default match rules are supported.
    6.12 +}
    6.13 +
    6.14  int Matcher::regnum_to_fpu_offset(int regnum) {
    6.15    return regnum - 32; // The FP registers are in the second chunk
    6.16  }
    6.17 @@ -6644,6 +6651,153 @@
    6.18  %}
    6.19  
    6.20  
    6.21 +//---------- Zeros Count Instructions ------------------------------------------
    6.22 +
    6.23 +instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
    6.24 +  predicate(UseCountLeadingZerosInstruction);
    6.25 +  match(Set dst (CountLeadingZerosI src));
    6.26 +  effect(KILL cr);
    6.27 +
    6.28 +  format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
    6.29 +  ins_encode %{
    6.30 +    __ lzcntl($dst$$Register, $src$$Register);
    6.31 +  %}
    6.32 +  ins_pipe(ialu_reg);
    6.33 +%}
    6.34 +
    6.35 +instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
    6.36 +  predicate(!UseCountLeadingZerosInstruction);
    6.37 +  match(Set dst (CountLeadingZerosI src));
    6.38 +  effect(KILL cr);
    6.39 +
    6.40 +  format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
    6.41 +            "JNZ    skip\n\t"
    6.42 +            "MOV    $dst, -1\n"
    6.43 +      "skip:\n\t"
    6.44 +            "NEG    $dst\n\t"
    6.45 +            "ADD    $dst, 31" %}
    6.46 +  ins_encode %{
    6.47 +    Register Rdst = $dst$$Register;
    6.48 +    Register Rsrc = $src$$Register;
    6.49 +    Label skip;
    6.50 +    __ bsrl(Rdst, Rsrc);
    6.51 +    __ jccb(Assembler::notZero, skip);
    6.52 +    __ movl(Rdst, -1);
    6.53 +    __ bind(skip);
    6.54 +    __ negl(Rdst);
    6.55 +    __ addl(Rdst, BitsPerInt - 1);
    6.56 +  %}
    6.57 +  ins_pipe(ialu_reg);
    6.58 +%}
    6.59 +
    6.60 +instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
    6.61 +  predicate(UseCountLeadingZerosInstruction);
    6.62 +  match(Set dst (CountLeadingZerosL src));
    6.63 +  effect(TEMP dst, KILL cr);
    6.64 +
    6.65 +  format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
    6.66 +            "JNC    done\n\t"
    6.67 +            "LZCNT  $dst, $src.lo\n\t"
    6.68 +            "ADD    $dst, 32\n"
    6.69 +      "done:" %}
    6.70 +  ins_encode %{
    6.71 +    Register Rdst = $dst$$Register;
    6.72 +    Register Rsrc = $src$$Register;
    6.73 +    Label done;
    6.74 +    __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
    6.75 +    __ jccb(Assembler::carryClear, done);
    6.76 +    __ lzcntl(Rdst, Rsrc);
    6.77 +    __ addl(Rdst, BitsPerInt);
    6.78 +    __ bind(done);
    6.79 +  %}
    6.80 +  ins_pipe(ialu_reg);
    6.81 +%}
    6.82 +
    6.83 +instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
    6.84 +  predicate(!UseCountLeadingZerosInstruction);
    6.85 +  match(Set dst (CountLeadingZerosL src));
    6.86 +  effect(TEMP dst, KILL cr);
    6.87 +
    6.88 +  format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
    6.89 +            "JZ     msw_is_zero\n\t"
    6.90 +            "ADD    $dst, 32\n\t"
    6.91 +            "JMP    not_zero\n"
    6.92 +      "msw_is_zero:\n\t"
    6.93 +            "BSR    $dst, $src.lo\n\t"
    6.94 +            "JNZ    not_zero\n\t"
    6.95 +            "MOV    $dst, -1\n"
    6.96 +      "not_zero:\n\t"
    6.97 +            "NEG    $dst\n\t"
    6.98 +            "ADD    $dst, 63\n" %}
    6.99 + ins_encode %{
   6.100 +    Register Rdst = $dst$$Register;
   6.101 +    Register Rsrc = $src$$Register;
   6.102 +    Label msw_is_zero;
   6.103 +    Label not_zero;
   6.104 +    __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
   6.105 +    __ jccb(Assembler::zero, msw_is_zero);
   6.106 +    __ addl(Rdst, BitsPerInt);
   6.107 +    __ jmpb(not_zero);
   6.108 +    __ bind(msw_is_zero);
   6.109 +    __ bsrl(Rdst, Rsrc);
   6.110 +    __ jccb(Assembler::notZero, not_zero);
   6.111 +    __ movl(Rdst, -1);
   6.112 +    __ bind(not_zero);
   6.113 +    __ negl(Rdst);
   6.114 +    __ addl(Rdst, BitsPerLong - 1);
   6.115 +  %}
   6.116 +  ins_pipe(ialu_reg);
   6.117 +%}
   6.118 +
   6.119 +instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
   6.120 +  match(Set dst (CountTrailingZerosI src));
   6.121 +  effect(KILL cr);
   6.122 +
   6.123 +  format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
   6.124 +            "JNZ    done\n\t"
   6.125 +            "MOV    $dst, 32\n"
   6.126 +      "done:" %}
   6.127 +  ins_encode %{
   6.128 +    Register Rdst = $dst$$Register;
   6.129 +    Label done;
   6.130 +    __ bsfl(Rdst, $src$$Register);
   6.131 +    __ jccb(Assembler::notZero, done);
   6.132 +    __ movl(Rdst, BitsPerInt);
   6.133 +    __ bind(done);
   6.134 +  %}
   6.135 +  ins_pipe(ialu_reg);
   6.136 +%}
   6.137 +
   6.138 +instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
   6.139 +  match(Set dst (CountTrailingZerosL src));
   6.140 +  effect(TEMP dst, KILL cr);
   6.141 +
   6.142 +  format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
   6.143 +            "JNZ    done\n\t"
   6.144 +            "BSF    $dst, $src.hi\n\t"
   6.145 +            "JNZ    msw_not_zero\n\t"
   6.146 +            "MOV    $dst, 32\n"
   6.147 +      "msw_not_zero:\n\t"
   6.148 +            "ADD    $dst, 32\n"
   6.149 +      "done:" %}
   6.150 +  ins_encode %{
   6.151 +    Register Rdst = $dst$$Register;
   6.152 +    Register Rsrc = $src$$Register;
   6.153 +    Label msw_not_zero;
   6.154 +    Label done;
   6.155 +    __ bsfl(Rdst, Rsrc);
   6.156 +    __ jccb(Assembler::notZero, done);
   6.157 +    __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
   6.158 +    __ jccb(Assembler::notZero, msw_not_zero);
   6.159 +    __ movl(Rdst, BitsPerInt);
   6.160 +    __ bind(msw_not_zero);
   6.161 +    __ addl(Rdst, BitsPerInt);
   6.162 +    __ bind(done);
   6.163 +  %}
   6.164 +  ins_pipe(ialu_reg);
   6.165 +%}
   6.166 +
   6.167 +
   6.168  //---------- Population Count Instructions -------------------------------------
   6.169  
   6.170  instruct popCountI(eRegI dst, eRegI src) %{
     7.1 --- a/src/cpu/x86/vm/x86_64.ad	Tue May 05 11:02:10 2009 -0700
     7.2 +++ b/src/cpu/x86/vm/x86_64.ad	Wed May 06 00:27:52 2009 -0700
     7.3 @@ -1980,6 +1980,13 @@
     7.4  }
     7.5  
     7.6  
     7.7 +const bool Matcher::match_rule_supported(int opcode) {
     7.8 +  if (!has_match_rule(opcode))
     7.9 +    return false;
    7.10 +
    7.11 +  return true;  // Per default match rules are supported.
    7.12 +}
    7.13 +
    7.14  int Matcher::regnum_to_fpu_offset(int regnum)
    7.15  {
    7.16    return regnum - 32; // The FP registers are in the second chunk
    7.17 @@ -7656,6 +7663,121 @@
    7.18  %}
    7.19  
    7.20  
    7.21 +//---------- Zeros Count Instructions ------------------------------------------
    7.22 +
    7.23 +instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
    7.24 +  predicate(UseCountLeadingZerosInstruction);
    7.25 +  match(Set dst (CountLeadingZerosI src));
    7.26 +  effect(KILL cr);
    7.27 +
    7.28 +  format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
    7.29 +  ins_encode %{
    7.30 +    __ lzcntl($dst$$Register, $src$$Register);
    7.31 +  %}
    7.32 +  ins_pipe(ialu_reg);
    7.33 +%}
    7.34 +
    7.35 +instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
    7.36 +  predicate(!UseCountLeadingZerosInstruction);
    7.37 +  match(Set dst (CountLeadingZerosI src));
    7.38 +  effect(KILL cr);
    7.39 +
    7.40 +  format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
    7.41 +            "jnz     skip\n\t"
    7.42 +            "movl    $dst, -1\n"
    7.43 +      "skip:\n\t"
    7.44 +            "negl    $dst\n\t"
    7.45 +            "addl    $dst, 31" %}
    7.46 +  ins_encode %{
    7.47 +    Register Rdst = $dst$$Register;
    7.48 +    Register Rsrc = $src$$Register;
    7.49 +    Label skip;
    7.50 +    __ bsrl(Rdst, Rsrc);
    7.51 +    __ jccb(Assembler::notZero, skip);
    7.52 +    __ movl(Rdst, -1);
    7.53 +    __ bind(skip);
    7.54 +    __ negl(Rdst);
    7.55 +    __ addl(Rdst, BitsPerInt - 1);
    7.56 +  %}
    7.57 +  ins_pipe(ialu_reg);
    7.58 +%}
    7.59 +
    7.60 +instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
    7.61 +  predicate(UseCountLeadingZerosInstruction);
    7.62 +  match(Set dst (CountLeadingZerosL src));
    7.63 +  effect(KILL cr);
    7.64 +
    7.65 +  format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
    7.66 +  ins_encode %{
    7.67 +    __ lzcntq($dst$$Register, $src$$Register);
    7.68 +  %}
    7.69 +  ins_pipe(ialu_reg);
    7.70 +%}
    7.71 +
    7.72 +instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
    7.73 +  predicate(!UseCountLeadingZerosInstruction);
    7.74 +  match(Set dst (CountLeadingZerosL src));
    7.75 +  effect(KILL cr);
    7.76 +
    7.77 +  format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
    7.78 +            "jnz     skip\n\t"
    7.79 +            "movl    $dst, -1\n"
    7.80 +      "skip:\n\t"
    7.81 +            "negl    $dst\n\t"
    7.82 +            "addl    $dst, 63" %}
    7.83 +  ins_encode %{
    7.84 +    Register Rdst = $dst$$Register;
    7.85 +    Register Rsrc = $src$$Register;
    7.86 +    Label skip;
    7.87 +    __ bsrq(Rdst, Rsrc);
    7.88 +    __ jccb(Assembler::notZero, skip);
    7.89 +    __ movl(Rdst, -1);
    7.90 +    __ bind(skip);
    7.91 +    __ negl(Rdst);
    7.92 +    __ addl(Rdst, BitsPerLong - 1);
    7.93 +  %}
    7.94 +  ins_pipe(ialu_reg);
    7.95 +%}
    7.96 +
    7.97 +instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
    7.98 +  match(Set dst (CountTrailingZerosI src));
    7.99 +  effect(KILL cr);
   7.100 +
   7.101 +  format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
   7.102 +            "jnz     done\n\t"
   7.103 +            "movl    $dst, 32\n"
   7.104 +      "done:" %}
   7.105 +  ins_encode %{
   7.106 +    Register Rdst = $dst$$Register;
   7.107 +    Label done;
   7.108 +    __ bsfl(Rdst, $src$$Register);
   7.109 +    __ jccb(Assembler::notZero, done);
   7.110 +    __ movl(Rdst, BitsPerInt);
   7.111 +    __ bind(done);
   7.112 +  %}
   7.113 +  ins_pipe(ialu_reg);
   7.114 +%}
   7.115 +
   7.116 +instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
   7.117 +  match(Set dst (CountTrailingZerosL src));
   7.118 +  effect(KILL cr);
   7.119 +
   7.120 +  format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
   7.121 +            "jnz     done\n\t"
   7.122 +            "movl    $dst, 64\n"
   7.123 +      "done:" %}
   7.124 +  ins_encode %{
   7.125 +    Register Rdst = $dst$$Register;
   7.126 +    Label done;
   7.127 +    __ bsfq(Rdst, $src$$Register);
   7.128 +    __ jccb(Assembler::notZero, done);
   7.129 +    __ movl(Rdst, BitsPerLong);
   7.130 +    __ bind(done);
   7.131 +  %}
   7.132 +  ins_pipe(ialu_reg);
   7.133 +%}
   7.134 +
   7.135 +
   7.136  //---------- Population Count Instructions -------------------------------------
   7.137  
   7.138  instruct popCountI(rRegI dst, rRegI src) %{
     8.1 --- a/src/share/vm/classfile/vmSymbols.hpp	Tue May 05 11:02:10 2009 -0700
     8.2 +++ b/src/share/vm/classfile/vmSymbols.hpp	Wed May 06 00:27:52 2009 -0700
     8.3 @@ -313,6 +313,8 @@
     8.4    template(value_name,                                "value")                                    \
     8.5    template(frontCacheEnabled_name,                    "frontCacheEnabled")                        \
     8.6    template(stringCacheEnabled_name,                   "stringCacheEnabled")                       \
     8.7 +  template(numberOfLeadingZeros_name,                 "numberOfLeadingZeros")                     \
     8.8 +  template(numberOfTrailingZeros_name,                "numberOfTrailingZeros")                    \
     8.9    template(bitCount_name,                             "bitCount")                                 \
    8.10    template(profile_name,                              "profile")                                  \
    8.11    template(equals_name,                               "equals")                                   \
    8.12 @@ -559,6 +561,12 @@
    8.13    do_intrinsic(_longBitsToDouble,         java_lang_Double,       longBitsToDouble_name,    long_double_signature, F_S) \
    8.14     do_name(     longBitsToDouble_name,                           "longBitsToDouble")                                    \
    8.15                                                                                                                          \
    8.16 +  do_intrinsic(_numberOfLeadingZeros_i,   java_lang_Integer,      numberOfLeadingZeros_name,int_int_signature,   F_S)   \
    8.17 +  do_intrinsic(_numberOfLeadingZeros_l,   java_lang_Long,         numberOfLeadingZeros_name,long_int_signature,  F_S)   \
    8.18 +                                                                                                                        \
    8.19 +  do_intrinsic(_numberOfTrailingZeros_i,  java_lang_Integer,      numberOfTrailingZeros_name,int_int_signature,  F_S)   \
    8.20 +  do_intrinsic(_numberOfTrailingZeros_l,  java_lang_Long,         numberOfTrailingZeros_name,long_int_signature, F_S)   \
    8.21 +                                                                                                                        \
    8.22    do_intrinsic(_bitCount_i,               java_lang_Integer,      bitCount_name,            int_int_signature,   F_S)   \
    8.23    do_intrinsic(_bitCount_l,               java_lang_Long,         bitCount_name,            long_int_signature,  F_S)   \
    8.24                                                                                                                          \
     9.1 --- a/src/share/vm/opto/classes.hpp	Tue May 05 11:02:10 2009 -0700
     9.2 +++ b/src/share/vm/opto/classes.hpp	Wed May 06 00:27:52 2009 -0700
     9.3 @@ -104,6 +104,10 @@
     9.4  macro(CosD)
     9.5  macro(CountedLoop)
     9.6  macro(CountedLoopEnd)
     9.7 +macro(CountLeadingZerosI)
     9.8 +macro(CountLeadingZerosL)
     9.9 +macro(CountTrailingZerosI)
    9.10 +macro(CountTrailingZerosL)
    9.11  macro(CreateEx)
    9.12  macro(DecodeN)
    9.13  macro(DivD)
    10.1 --- a/src/share/vm/opto/connode.cpp	Tue May 05 11:02:10 2009 -0700
    10.2 +++ b/src/share/vm/opto/connode.cpp	Wed May 06 00:27:52 2009 -0700
    10.3 @@ -1255,3 +1255,93 @@
    10.4    v.set_jdouble(td->getd());
    10.5    return TypeLong::make( v.get_jlong() );
    10.6  }
    10.7 +
    10.8 +//------------------------------Value------------------------------------------
    10.9 +const Type* CountLeadingZerosINode::Value(PhaseTransform* phase) const {
   10.10 +  const Type* t = phase->type(in(1));
   10.11 +  if (t == Type::TOP) return Type::TOP;
   10.12 +  const TypeInt* ti = t->isa_int();
   10.13 +  if (ti && ti->is_con()) {
   10.14 +    jint i = ti->get_con();
   10.15 +    // HD, Figure 5-6
   10.16 +    if (i == 0)
   10.17 +      return TypeInt::make(BitsPerInt);
   10.18 +    int n = 1;
   10.19 +    unsigned int x = i;
   10.20 +    if (x >> 16 == 0) { n += 16; x <<= 16; }
   10.21 +    if (x >> 24 == 0) { n +=  8; x <<=  8; }
   10.22 +    if (x >> 28 == 0) { n +=  4; x <<=  4; }
   10.23 +    if (x >> 30 == 0) { n +=  2; x <<=  2; }
   10.24 +    n -= x >> 31;
   10.25 +    return TypeInt::make(n);
   10.26 +  }
   10.27 +  return TypeInt::INT;
   10.28 +}
   10.29 +
   10.30 +//------------------------------Value------------------------------------------
   10.31 +const Type* CountLeadingZerosLNode::Value(PhaseTransform* phase) const {
   10.32 +  const Type* t = phase->type(in(1));
   10.33 +  if (t == Type::TOP) return Type::TOP;
   10.34 +  const TypeLong* tl = t->isa_long();
   10.35 +  if (tl && tl->is_con()) {
   10.36 +    jlong l = tl->get_con();
   10.37 +    // HD, Figure 5-6
   10.38 +    if (l == 0)
   10.39 +      return TypeInt::make(BitsPerLong);
   10.40 +    int n = 1;
   10.41 +    unsigned int x = (((julong) l) >> 32);
   10.42 +    if (x == 0) { n += 32; x = (int) l; }
   10.43 +    if (x >> 16 == 0) { n += 16; x <<= 16; }
   10.44 +    if (x >> 24 == 0) { n +=  8; x <<=  8; }
   10.45 +    if (x >> 28 == 0) { n +=  4; x <<=  4; }
   10.46 +    if (x >> 30 == 0) { n +=  2; x <<=  2; }
   10.47 +    n -= x >> 31;
   10.48 +    return TypeInt::make(n);
   10.49 +  }
   10.50 +  return TypeInt::INT;
   10.51 +}
   10.52 +
   10.53 +//------------------------------Value------------------------------------------
   10.54 +const Type* CountTrailingZerosINode::Value(PhaseTransform* phase) const {
   10.55 +  const Type* t = phase->type(in(1));
   10.56 +  if (t == Type::TOP) return Type::TOP;
   10.57 +  const TypeInt* ti = t->isa_int();
   10.58 +  if (ti && ti->is_con()) {
   10.59 +    jint i = ti->get_con();
   10.60 +    // HD, Figure 5-14
   10.61 +    int y;
   10.62 +    if (i == 0)
   10.63 +      return TypeInt::make(BitsPerInt);
   10.64 +    int n = 31;
   10.65 +    y = i << 16; if (y != 0) { n = n - 16; i = y; }
   10.66 +    y = i <<  8; if (y != 0) { n = n -  8; i = y; }
   10.67 +    y = i <<  4; if (y != 0) { n = n -  4; i = y; }
   10.68 +    y = i <<  2; if (y != 0) { n = n -  2; i = y; }
   10.69 +    y = i <<  1; if (y != 0) { n = n -  1; }
   10.70 +    return TypeInt::make(n);
   10.71 +  }
   10.72 +  return TypeInt::INT;
   10.73 +}
   10.74 +
   10.75 +//------------------------------Value------------------------------------------
   10.76 +const Type* CountTrailingZerosLNode::Value(PhaseTransform* phase) const {
   10.77 +  const Type* t = phase->type(in(1));
   10.78 +  if (t == Type::TOP) return Type::TOP;
   10.79 +  const TypeLong* tl = t->isa_long();
   10.80 +  if (tl && tl->is_con()) {
   10.81 +    jlong l = tl->get_con();
   10.82 +    // HD, Figure 5-14
   10.83 +    int x, y;
   10.84 +    if (l == 0)
   10.85 +      return TypeInt::make(BitsPerLong);
   10.86 +    int n = 63;
   10.87 +    y = (int) l; if (y != 0) { n = n - 32; x = y; } else x = (((julong) l) >> 32);
   10.88 +    y = x << 16; if (y != 0) { n = n - 16; x = y; }
   10.89 +    y = x <<  8; if (y != 0) { n = n -  8; x = y; }
   10.90 +    y = x <<  4; if (y != 0) { n = n -  4; x = y; }
   10.91 +    y = x <<  2; if (y != 0) { n = n -  2; x = y; }
   10.92 +    y = x <<  1; if (y != 0) { n = n -  1; }
   10.93 +    return TypeInt::make(n);
   10.94 +  }
   10.95 +  return TypeInt::INT;
   10.96 +}
    11.1 --- a/src/share/vm/opto/connode.hpp	Tue May 05 11:02:10 2009 -0700
    11.2 +++ b/src/share/vm/opto/connode.hpp	Wed May 06 00:27:52 2009 -0700
    11.3 @@ -636,22 +636,62 @@
    11.4    virtual const Type* Value( PhaseTransform *phase ) const;
    11.5  };
    11.6  
    11.7 -//---------- PopCountINode -----------------------------------------------------
    11.8 -// Population count (bit count) of an integer.
    11.9 -class PopCountINode : public Node {
   11.10 +//---------- CountBitsNode -----------------------------------------------------
   11.11 +class CountBitsNode : public Node {
   11.12  public:
   11.13 -  PopCountINode(Node* in1) : Node(0, in1) {}
   11.14 -  virtual int Opcode() const;
   11.15 +  CountBitsNode(Node* in1) : Node(0, in1) {}
   11.16    const Type* bottom_type() const { return TypeInt::INT; }
   11.17    virtual uint ideal_reg() const { return Op_RegI; }
   11.18  };
   11.19  
   11.20 +//---------- CountLeadingZerosINode --------------------------------------------
   11.21 +// Count leading zeros (0-bit count starting from MSB) of an integer.
   11.22 +class CountLeadingZerosINode : public CountBitsNode {
   11.23 +public:
   11.24 +  CountLeadingZerosINode(Node* in1) : CountBitsNode(in1) {}
   11.25 +  virtual int Opcode() const;
   11.26 +  virtual const Type* Value(PhaseTransform* phase) const;
   11.27 +};
   11.28 +
   11.29 +//---------- CountLeadingZerosLNode --------------------------------------------
   11.30 +// Count leading zeros (0-bit count starting from MSB) of a long.
   11.31 +class CountLeadingZerosLNode : public CountBitsNode {
   11.32 +public:
   11.33 +  CountLeadingZerosLNode(Node* in1) : CountBitsNode(in1) {}
   11.34 +  virtual int Opcode() const;
   11.35 +  virtual const Type* Value(PhaseTransform* phase) const;
   11.36 +};
   11.37 +
   11.38 +//---------- CountTrailingZerosINode -------------------------------------------
   11.39 +// Count trailing zeros (0-bit count starting from LSB) of an integer.
   11.40 +class CountTrailingZerosINode : public CountBitsNode {
   11.41 +public:
   11.42 +  CountTrailingZerosINode(Node* in1) : CountBitsNode(in1) {}
   11.43 +  virtual int Opcode() const;
   11.44 +  virtual const Type* Value(PhaseTransform* phase) const;
   11.45 +};
   11.46 +
   11.47 +//---------- CountTrailingZerosLNode -------------------------------------------
   11.48 +// Count trailing zeros (0-bit count starting from LSB) of a long.
   11.49 +class CountTrailingZerosLNode : public CountBitsNode {
   11.50 +public:
   11.51 +  CountTrailingZerosLNode(Node* in1) : CountBitsNode(in1) {}
   11.52 +  virtual int Opcode() const;
   11.53 +  virtual const Type* Value(PhaseTransform* phase) const;
   11.54 +};
   11.55 +
   11.56 +//---------- PopCountINode -----------------------------------------------------
   11.57 +// Population count (bit count) of an integer.
   11.58 +class PopCountINode : public CountBitsNode {
   11.59 +public:
   11.60 +  PopCountINode(Node* in1) : CountBitsNode(in1) {}
   11.61 +  virtual int Opcode() const;
   11.62 +};
   11.63 +
   11.64  //---------- PopCountLNode -----------------------------------------------------
   11.65  // Population count (bit count) of a long.
   11.66 -class PopCountLNode : public Node {
   11.67 +class PopCountLNode : public CountBitsNode {
   11.68  public:
   11.69 -  PopCountLNode(Node* in1) : Node(0, in1) {}
   11.70 +  PopCountLNode(Node* in1) : CountBitsNode(in1) {}
   11.71    virtual int Opcode() const;
   11.72 -  const Type* bottom_type() const { return TypeInt::INT; }
   11.73 -  virtual uint ideal_reg() const { return Op_RegI; }
   11.74  };
    12.1 --- a/src/share/vm/opto/library_call.cpp	Tue May 05 11:02:10 2009 -0700
    12.2 +++ b/src/share/vm/opto/library_call.cpp	Wed May 06 00:27:52 2009 -0700
    12.3 @@ -222,6 +222,8 @@
    12.4    bool inline_unsafe_CAS(BasicType type);
    12.5    bool inline_unsafe_ordered_store(BasicType type);
    12.6    bool inline_fp_conversions(vmIntrinsics::ID id);
    12.7 +  bool inline_numberOfLeadingZeros(vmIntrinsics::ID id);
    12.8 +  bool inline_numberOfTrailingZeros(vmIntrinsics::ID id);
    12.9    bool inline_bitCount(vmIntrinsics::ID id);
   12.10    bool inline_reverseBytes(vmIntrinsics::ID id);
   12.11  };
   12.12 @@ -630,6 +632,14 @@
   12.13    case vmIntrinsics::_longBitsToDouble:
   12.14      return inline_fp_conversions(intrinsic_id());
   12.15  
   12.16 +  case vmIntrinsics::_numberOfLeadingZeros_i:
   12.17 +  case vmIntrinsics::_numberOfLeadingZeros_l:
   12.18 +    return inline_numberOfLeadingZeros(intrinsic_id());
   12.19 +
   12.20 +  case vmIntrinsics::_numberOfTrailingZeros_i:
   12.21 +  case vmIntrinsics::_numberOfTrailingZeros_l:
   12.22 +    return inline_numberOfTrailingZeros(intrinsic_id());
   12.23 +
   12.24    case vmIntrinsics::_bitCount_i:
   12.25    case vmIntrinsics::_bitCount_l:
   12.26      return inline_bitCount(intrinsic_id());
   12.27 @@ -1844,6 +1854,48 @@
   12.28    }
   12.29  }
   12.30  
   12.31 +//-------------------inline_numberOfLeadingZeros_int/long-----------------------
   12.32 +// inline int Integer.numberOfLeadingZeros(int)
   12.33 +// inline int Long.numberOfLeadingZeros(long)
   12.34 +bool LibraryCallKit::inline_numberOfLeadingZeros(vmIntrinsics::ID id) {
   12.35 +  assert(id == vmIntrinsics::_numberOfLeadingZeros_i || id == vmIntrinsics::_numberOfLeadingZeros_l, "not numberOfLeadingZeros");
   12.36 +  if (id == vmIntrinsics::_numberOfLeadingZeros_i && !Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false;
   12.37 +  if (id == vmIntrinsics::_numberOfLeadingZeros_l && !Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false;
   12.38 +  _sp += arg_size();  // restore stack pointer
   12.39 +  switch (id) {
   12.40 +  case vmIntrinsics::_numberOfLeadingZeros_i:
   12.41 +    push(_gvn.transform(new (C, 2) CountLeadingZerosINode(pop())));
   12.42 +    break;
   12.43 +  case vmIntrinsics::_numberOfLeadingZeros_l:
   12.44 +    push(_gvn.transform(new (C, 2) CountLeadingZerosLNode(pop_pair())));
   12.45 +    break;
   12.46 +  default:
   12.47 +    ShouldNotReachHere();
   12.48 +  }
   12.49 +  return true;
   12.50 +}
   12.51 +
   12.52 +//-------------------inline_numberOfTrailingZeros_int/long----------------------
   12.53 +// inline int Integer.numberOfTrailingZeros(int)
   12.54 +// inline int Long.numberOfTrailingZeros(long)
   12.55 +bool LibraryCallKit::inline_numberOfTrailingZeros(vmIntrinsics::ID id) {
   12.56 +  assert(id == vmIntrinsics::_numberOfTrailingZeros_i || id == vmIntrinsics::_numberOfTrailingZeros_l, "not numberOfTrailingZeros");
   12.57 +  if (id == vmIntrinsics::_numberOfTrailingZeros_i && !Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false;
   12.58 +  if (id == vmIntrinsics::_numberOfTrailingZeros_l && !Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false;
   12.59 +  _sp += arg_size();  // restore stack pointer
   12.60 +  switch (id) {
   12.61 +  case vmIntrinsics::_numberOfTrailingZeros_i:
   12.62 +    push(_gvn.transform(new (C, 2) CountTrailingZerosINode(pop())));
   12.63 +    break;
   12.64 +  case vmIntrinsics::_numberOfTrailingZeros_l:
   12.65 +    push(_gvn.transform(new (C, 2) CountTrailingZerosLNode(pop_pair())));
   12.66 +    break;
   12.67 +  default:
   12.68 +    ShouldNotReachHere();
   12.69 +  }
   12.70 +  return true;
   12.71 +}
   12.72 +
   12.73  //----------------------------inline_bitCount_int/long-----------------------
   12.74  // inline int Integer.bitCount(int)
   12.75  // inline int Long.bitCount(long)
    13.1 --- a/src/share/vm/opto/matcher.hpp	Tue May 05 11:02:10 2009 -0700
    13.2 +++ b/src/share/vm/opto/matcher.hpp	Wed May 06 00:27:52 2009 -0700
    13.3 @@ -225,10 +225,16 @@
    13.4    OptoRegPair *_parm_regs;        // Array of machine registers per argument
    13.5    RegMask *_calling_convention_mask; // Array of RegMasks per argument
    13.6  
    13.7 -  // Does matcher support this ideal node?
    13.8 +  // Does matcher have a match rule for this ideal node?
    13.9    static const bool has_match_rule(int opcode);
   13.10    static const bool _hasMatchRule[_last_opcode];
   13.11  
   13.12 +  // Does matcher have a match rule for this ideal node and is the
   13.13 +  // predicate (if there is one) true?
   13.14 +  // NOTE: If this function is used more commonly in the future, ADLC
   13.15 +  // should generate this one.
   13.16 +  static const bool match_rule_supported(int opcode);
   13.17 +
   13.18    // Used to determine if we have fast l2f conversion
   13.19    // USII has it, USIII doesn't
   13.20    static const bool convL2FSupported(void);
    14.1 --- a/src/share/vm/runtime/globals.hpp	Tue May 05 11:02:10 2009 -0700
    14.2 +++ b/src/share/vm/runtime/globals.hpp	Wed May 06 00:27:52 2009 -0700
    14.3 @@ -2185,6 +2185,9 @@
    14.4    diagnostic(bool, PrintIntrinsics, false,                                  \
    14.5            "prints attempted and successful inlining of intrinsics")         \
    14.6                                                                              \
    14.7 +  product(bool, UseCountLeadingZerosInstruction, false,                     \
    14.8 +          "Use count leading zeros instruction")                            \
    14.9 +                                                                            \
   14.10    product(bool, UsePopCountInstruction, false,                              \
   14.11            "Use population count instruction")                               \
   14.12                                                                              \
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/test/compiler/6823354/Test6823354.java	Wed May 06 00:27:52 2009 -0700
    15.3 @@ -0,0 +1,266 @@
    15.4 +/*
    15.5 + * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
    15.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    15.7 + *
    15.8 + * This code is free software; you can redistribute it and/or modify it
    15.9 + * under the terms of the GNU General Public License version 2 only, as
   15.10 + * published by the Free Software Foundation.
   15.11 + *
   15.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
   15.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   15.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   15.15 + * version 2 for more details (a copy is included in the LICENSE file that
   15.16 + * accompanied this code).
   15.17 + *
   15.18 + * You should have received a copy of the GNU General Public License version
   15.19 + * 2 along with this work; if not, write to the Free Software Foundation,
   15.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   15.21 + *
   15.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
   15.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
   15.24 + * have any questions.
   15.25 + */
   15.26 +
   15.27 +/**
   15.28 + * @test
   15.29 + * @bug 6823354
   15.30 + * @summary These methods can be instrinsified by using bit scan, bit test, and population count instructions.
   15.31 + *
   15.32 + * @run main/othervm -Xcomp -XX:CompileOnly=Test6823354.lzcomp,Test6823354.tzcomp,.dolzcomp,.dotzcomp Test6823354
   15.33 + */
   15.34 +
   15.35 +import java.net.URLClassLoader;
   15.36 +
   15.37 +public class Test6823354 {
   15.38 +    // Arrays of corner case values.
   15.39 +    static final int[]  ia = new int[]  { 0,  1,  -1,  Integer.MIN_VALUE, Integer.MAX_VALUE };
   15.40 +    static final long[] la = new long[] { 0L, 1L, -1L, Long.MIN_VALUE,    Long.MAX_VALUE    };
   15.41 +
   15.42 +    public static void main(String[] args) throws Exception {
   15.43 +        // Load the classes and the methods.
   15.44 +        Integer.numberOfLeadingZeros(0);
   15.45 +        Integer.numberOfTrailingZeros(0);
   15.46 +        Long.numberOfLeadingZeros(0);
   15.47 +        Long.numberOfTrailingZeros(0);
   15.48 +
   15.49 +        lz();
   15.50 +        tz();
   15.51 +    }
   15.52 +
   15.53 +    static void lz() throws Exception {
   15.54 +        // int
   15.55 +
   15.56 +        // Test corner cases.
   15.57 +        for (int i = 0; i < ia.length; i++) {
   15.58 +            int x = ia[i];
   15.59 +            check(x, lzcomp(x), lzint(x));
   15.60 +        }
   15.61 +
   15.62 +        // Test all possible return values.
   15.63 +        for (int i = 0; i < Integer.SIZE; i++) {
   15.64 +            int x = 1 << i;
   15.65 +            check(x, lzcomp(x), lzint(x));
   15.66 +        }
   15.67 +
   15.68 +        String classname = "Test6823354$lzconI";
   15.69 +
   15.70 +        // Test Ideal optimizations (constant values).
   15.71 +        for (int i = 0; i < ia.length; i++) {
   15.72 +            testclass(classname, ia[i]);
   15.73 +        }
   15.74 +
   15.75 +        // Test Ideal optimizations (constant values).
   15.76 +        for (int i = 0; i < Integer.SIZE; i++) {
   15.77 +            int x = 1 << i;
   15.78 +            testclass(classname, x);
   15.79 +        }
   15.80 +
   15.81 +
   15.82 +        // long
   15.83 +
   15.84 +        // Test corner cases.
   15.85 +        for (int i = 0; i < ia.length; i++) {
   15.86 +            long x = la[i];
   15.87 +            check(x, lzcomp(x), lzint(x));
   15.88 +        }
   15.89 +
   15.90 +        // Test all possible return values.
   15.91 +        for (int i = 0; i < Long.SIZE; i++) {
   15.92 +            long x = 1L << i;
   15.93 +            check(x, lzcomp(x), lzint(x));
   15.94 +        }
   15.95 +
   15.96 +        classname = "Test6823354$lzconL";
   15.97 +
   15.98 +        // Test Ideal optimizations (constant values).
   15.99 +        for (int i = 0; i < la.length; i++) {
  15.100 +            testclass(classname, la[i]);
  15.101 +        }
  15.102 +
  15.103 +        // Test Ideal optimizations (constant values).
  15.104 +        for (int i = 0; i < Long.SIZE; i++) {
  15.105 +            long x = 1L << i;
  15.106 +            testclass(classname, x);
  15.107 +        }
  15.108 +    }
  15.109 +
  15.110 +    static void tz() throws Exception {
  15.111 +        // int
  15.112 +
  15.113 +        // Test corner cases.
  15.114 +        for (int i = 0; i < ia.length; i++) {
  15.115 +            int x = ia[i];
  15.116 +            check(x, tzcomp(x), tzint(x));
  15.117 +        }
  15.118 +
  15.119 +        // Test all possible return values.
  15.120 +        for (int i = 0; i < Integer.SIZE; i++) {
  15.121 +            int x = 1 << i;
  15.122 +            check(x, tzcomp(x), tzint(x));
  15.123 +        }
  15.124 +
  15.125 +        String classname = "Test6823354$tzconI";
  15.126 +
  15.127 +        // Test Ideal optimizations (constant values).
  15.128 +        for (int i = 0; i < ia.length; i++) {
  15.129 +            testclass(classname, ia[i]);
  15.130 +        }
  15.131 +
  15.132 +        // Test Ideal optimizations (constant values).
  15.133 +        for (int i = 0; i < Integer.SIZE; i++) {
  15.134 +            int x = 1 << i;
  15.135 +            testclass(classname, x);
  15.136 +        }
  15.137 +
  15.138 +
  15.139 +        // long
  15.140 +
  15.141 +        // Test corner cases.
  15.142 +        for (int i = 0; i < la.length; i++) {
  15.143 +            long x = la[i];
  15.144 +            check(x, tzcomp(x), tzint(x));
  15.145 +        }
  15.146 +
  15.147 +        // Test all possible return values.
  15.148 +        for (int i = 0; i < Long.SIZE; i++) {
  15.149 +            long x = 1L << i;
  15.150 +            check(x, tzcomp(x), tzint(x));
  15.151 +        }
  15.152 +
  15.153 +        classname = "Test6823354$tzconL";
  15.154 +
  15.155 +        // Test Ideal optimizations (constant values).
  15.156 +        for (int i = 0; i < la.length; i++) {
  15.157 +            testclass(classname, la[i]);
  15.158 +        }
  15.159 +
  15.160 +        // Test Ideal optimizations (constant values).
  15.161 +        for (int i = 0; i < Long.SIZE; i++) {
  15.162 +            long x = 1L << i;
  15.163 +            testclass(classname, x);
  15.164 +        }
  15.165 +    }
  15.166 +
  15.167 +    static void check(int value, int result, int expected) {
  15.168 +        //System.out.println(value + ": " + result + ", " + expected);
  15.169 +        if (result != expected)
  15.170 +            throw new InternalError(value + " failed: " + result + " != " + expected);
  15.171 +    }
  15.172 +
  15.173 +    static void check(long value, long result, long expected) {
  15.174 +        //System.out.println(value + ": " + result + ", " + expected);
  15.175 +        if (result != expected)
  15.176 +            throw new InternalError(value + " failed: " + result + " != " + expected);
  15.177 +    }
  15.178 +
  15.179 +    static int lzint( int i)  { return Integer.numberOfLeadingZeros(i); }
  15.180 +    static int lzcomp(int i)  { return Integer.numberOfLeadingZeros(i); }
  15.181 +
  15.182 +    static int lzint( long l) { return Long.numberOfLeadingZeros(l); }
  15.183 +    static int lzcomp(long l) { return Long.numberOfLeadingZeros(l); }
  15.184 +
  15.185 +    static int tzint( int i)  { return Integer.numberOfTrailingZeros(i); }
  15.186 +    static int tzcomp(int i)  { return Integer.numberOfTrailingZeros(i); }
  15.187 +
  15.188 +    static int tzint( long l) { return Long.numberOfTrailingZeros(l); }
  15.189 +    static int tzcomp(long l) { return Long.numberOfTrailingZeros(l); }
  15.190 +
  15.191 +    static void testclass(String classname, int x) throws Exception {
  15.192 +        System.setProperty("value", "" + x);
  15.193 +        loadandrunclass(classname);
  15.194 +    }
  15.195 +
  15.196 +    static void testclass(String classname, long x) throws Exception {
  15.197 +        System.setProperty("value", "" + x);
  15.198 +        loadandrunclass(classname);
  15.199 +    }
  15.200 +
  15.201 +    static void loadandrunclass(String classname) throws Exception {
  15.202 +        Class cl = Class.forName(classname);
  15.203 +        URLClassLoader apploader = (URLClassLoader) cl.getClassLoader();
  15.204 +        ClassLoader loader = new URLClassLoader(apploader.getURLs(), apploader.getParent());
  15.205 +        Class c = loader.loadClass(classname);
  15.206 +        Runnable r = (Runnable) c.newInstance();
  15.207 +        r.run();
  15.208 +    }
  15.209 +
  15.210 +    public static class lzconI implements Runnable {
  15.211 +        static final int VALUE;
  15.212 +
  15.213 +        static {
  15.214 +            int value = 0;
  15.215 +            try {
  15.216 +                value = Integer.decode(System.getProperty("value"));
  15.217 +            } catch (Throwable e) {}
  15.218 +            VALUE = value;
  15.219 +        }
  15.220 +
  15.221 +        public void run() { check(VALUE, lzint(VALUE), dolzcomp()); }
  15.222 +        static int dolzcomp() { return lzcomp(VALUE); }
  15.223 +    }
  15.224 +
  15.225 +    public static class lzconL implements Runnable {
  15.226 +        static final long VALUE;
  15.227 +
  15.228 +        static {
  15.229 +            long value = 0;
  15.230 +            try {
  15.231 +                value = Long.decode(System.getProperty("value"));
  15.232 +            } catch (Throwable e) {}
  15.233 +            VALUE = value;
  15.234 +        }
  15.235 +
  15.236 +        public void run() { check(VALUE, lzint(VALUE), dolzcomp()); }
  15.237 +        static int dolzcomp() { return lzcomp(VALUE); }
  15.238 +    }
  15.239 +
  15.240 +    public static class tzconI implements Runnable {
  15.241 +        static final int VALUE;
  15.242 +
  15.243 +        static {
  15.244 +            int value = 0;
  15.245 +            try {
  15.246 +                value = Integer.decode(System.getProperty("value"));
  15.247 +            } catch (Throwable e) {}
  15.248 +            VALUE = value;
  15.249 +        }
  15.250 +
  15.251 +        public void run() { check(VALUE, tzint(VALUE), dotzcomp()); }
  15.252 +        static int dotzcomp() { return tzcomp(VALUE); }
  15.253 +    }
  15.254 +
  15.255 +    public static class tzconL implements Runnable {
  15.256 +        static final long VALUE;
  15.257 +
  15.258 +        static {
  15.259 +            long value = 0;
  15.260 +            try {
  15.261 +                value = Long.decode(System.getProperty("value"));
  15.262 +            } catch (Throwable e) {}
  15.263 +            VALUE = value;
  15.264 +        }
  15.265 +
  15.266 +        public void run() { check(VALUE, tzint(VALUE), dotzcomp()); }
  15.267 +        static int dotzcomp() { return tzcomp(VALUE); }
  15.268 +    }
  15.269 +}

mercurial