6378821: bitCount() should use POPC on SPARC processors and AMD+10h

Fri, 13 Mar 2009 11:35:17 -0700

author
twisti
date
Fri, 13 Mar 2009 11:35:17 -0700
changeset 1078
c771b7f43bbf
parent 1077
660978a2a31a
child 1079
c517646eef23

6378821: bitCount() should use POPC on SPARC processors and AMD+10h
Summary: bitCount() should use POPC on SPARC processors where POPC is implemented directly in hardware.
Reviewed-by: kvn, never

src/cpu/sparc/vm/sparc.ad file | annotate | diff | comparison | revisions
src/cpu/sparc/vm/vm_version_sparc.cpp file | annotate | diff | comparison | revisions
src/cpu/sparc/vm/vm_version_sparc.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_32.ad file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_64.ad file | annotate | diff | comparison | revisions
src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp file | annotate | diff | comparison | revisions
src/share/vm/classfile/vmSymbols.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/classes.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/connode.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/library_call.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
test/compiler/6378821/Test6378821.java file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/sparc/vm/sparc.ad	Thu Mar 12 10:37:46 2009 -0700
     1.2 +++ b/src/cpu/sparc/vm/sparc.ad	Fri Mar 13 11:35:17 2009 -0700
     1.3 @@ -9015,6 +9015,33 @@
     1.4    ins_pipe(long_memory_op);
     1.5  %}
     1.6  
     1.7 +
     1.8 +//---------- Population Count Instructions -------------------------------------
     1.9 +
    1.10 +instruct popCountI(iRegI dst, iRegI src) %{
    1.11 +  predicate(UsePopCountInstruction);
    1.12 +  match(Set dst (PopCountI src));
    1.13 +
    1.14 +  format %{ "POPC   $src, $dst" %}
    1.15 +  ins_encode %{
    1.16 +    __ popc($src$$Register, $dst$$Register);
    1.17 +  %}
    1.18 +  ins_pipe(ialu_reg);
    1.19 +%}
    1.20 +
    1.21 +// Note: Long.bitCount(long) returns an int.
    1.22 +instruct popCountL(iRegI dst, iRegL src) %{
    1.23 +  predicate(UsePopCountInstruction);
    1.24 +  match(Set dst (PopCountL src));
    1.25 +
    1.26 +  format %{ "POPC   $src, $dst" %}
    1.27 +  ins_encode %{
    1.28 +    __ popc($src$$Register, $dst$$Register);
    1.29 +  %}
    1.30 +  ins_pipe(ialu_reg);
    1.31 +%}
    1.32 +
    1.33 +
    1.34  // ============================================================================
    1.35  //------------Bytes reverse--------------------------------------------------
    1.36  
     2.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Mar 12 10:37:46 2009 -0700
     2.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Mar 13 11:35:17 2009 -0700
     2.3 @@ -92,10 +92,18 @@
     2.4  #endif
     2.5    }
     2.6  
     2.7 +  // Use hardware population count instruction if available.
     2.8 +  if (has_hardware_popc()) {
     2.9 +    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
    2.10 +      UsePopCountInstruction = true;
    2.11 +    }
    2.12 +  }
    2.13 +
    2.14    char buf[512];
    2.15 -  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s",
    2.16 +  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
    2.17                 (has_v8() ? ", has_v8" : ""),
    2.18                 (has_v9() ? ", has_v9" : ""),
    2.19 +               (has_hardware_popc() ? ", popc" : ""),
    2.20                 (has_vis1() ? ", has_vis1" : ""),
    2.21                 (has_vis2() ? ", has_vis2" : ""),
    2.22                 (is_ultra3() ? ", is_ultra3" : ""),
     3.1 --- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu Mar 12 10:37:46 2009 -0700
     3.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Mar 13 11:35:17 2009 -0700
     3.3 @@ -29,10 +29,11 @@
     3.4      hardware_mul32     = 1,
     3.5      hardware_div32     = 2,
     3.6      hardware_fsmuld    = 3,
     3.7 -    v9_instructions    = 4,
     3.8 -    vis1_instructions  = 5,
     3.9 -    vis2_instructions  = 6,
    3.10 -    sun4v_instructions = 7
    3.11 +    hardware_popc      = 4,
    3.12 +    v9_instructions    = 5,
    3.13 +    vis1_instructions  = 6,
    3.14 +    vis2_instructions  = 7,
    3.15 +    sun4v_instructions = 8
    3.16    };
    3.17  
    3.18    enum Feature_Flag_Set {
    3.19 @@ -43,6 +44,7 @@
    3.20      hardware_mul32_m    = 1 << hardware_mul32,
    3.21      hardware_div32_m    = 1 << hardware_div32,
    3.22      hardware_fsmuld_m   = 1 << hardware_fsmuld,
    3.23 +    hardware_popc_m     = 1 << hardware_popc,
    3.24      v9_instructions_m   = 1 << v9_instructions,
    3.25      vis1_instructions_m = 1 << vis1_instructions,
    3.26      vis2_instructions_m = 1 << vis2_instructions,
    3.27 @@ -81,6 +83,7 @@
    3.28    static bool has_hardware_mul32()      { return (_features & hardware_mul32_m) != 0; }
    3.29    static bool has_hardware_div32()      { return (_features & hardware_div32_m) != 0; }
    3.30    static bool has_hardware_fsmuld()     { return (_features & hardware_fsmuld_m) != 0; }
    3.31 +  static bool has_hardware_popc()       { return (_features & hardware_popc_m) != 0; }
    3.32    static bool has_vis1()                { return (_features & vis1_instructions_m) != 0; }
    3.33    static bool has_vis2()                { return (_features & vis2_instructions_m) != 0; }
    3.34  
     4.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 12 10:37:46 2009 -0700
     4.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Mar 13 11:35:17 2009 -0700
     4.3 @@ -2193,6 +2193,25 @@
     4.4    emit_byte(0x58 | encode);
     4.5  }
     4.6  
     4.7 +void Assembler::popcntl(Register dst, Address src) {
     4.8 +  assert(VM_Version::supports_popcnt(), "must support");
     4.9 +  InstructionMark im(this);
    4.10 +  emit_byte(0xF3);
    4.11 +  prefix(src, dst);
    4.12 +  emit_byte(0x0F);
    4.13 +  emit_byte(0xB8);
    4.14 +  emit_operand(dst, src);
    4.15 +}
    4.16 +
    4.17 +void Assembler::popcntl(Register dst, Register src) {
    4.18 +  assert(VM_Version::supports_popcnt(), "must support");
    4.19 +  emit_byte(0xF3);
    4.20 +  int encode = prefix_and_encode(dst->encoding(), src->encoding());
    4.21 +  emit_byte(0x0F);
    4.22 +  emit_byte(0xB8);
    4.23 +  emit_byte(0xC0 | encode);
    4.24 +}
    4.25 +
    4.26  void Assembler::popf() {
    4.27    emit_byte(0x9D);
    4.28  }
    4.29 @@ -4080,6 +4099,25 @@
    4.30    addq(rsp, 16 * wordSize);
    4.31  }
    4.32  
    4.33 +void Assembler::popcntq(Register dst, Address src) {
    4.34 +  assert(VM_Version::supports_popcnt(), "must support");
    4.35 +  InstructionMark im(this);
    4.36 +  emit_byte(0xF3);
    4.37 +  prefixq(src, dst);
    4.38 +  emit_byte(0x0F);
    4.39 +  emit_byte(0xB8);
    4.40 +  emit_operand(dst, src);
    4.41 +}
    4.42 +
    4.43 +void Assembler::popcntq(Register dst, Register src) {
    4.44 +  assert(VM_Version::supports_popcnt(), "must support");
    4.45 +  emit_byte(0xF3);
    4.46 +  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
    4.47 +  emit_byte(0x0F);
    4.48 +  emit_byte(0xB8);
    4.49 +  emit_byte(0xC0 | encode);
    4.50 +}
    4.51 +
    4.52  void Assembler::popq(Address dst) {
    4.53    InstructionMark im(this);
    4.54    prefixq(dst);
     5.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 12 10:37:46 2009 -0700
     5.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Mar 13 11:35:17 2009 -0700
     5.3 @@ -1224,6 +1224,14 @@
     5.4    void popq(Address dst);
     5.5  #endif
     5.6  
     5.7 +  void popcntl(Register dst, Address src);
     5.8 +  void popcntl(Register dst, Register src);
     5.9 +
    5.10 +#ifdef _LP64
    5.11 +  void popcntq(Register dst, Address src);
    5.12 +  void popcntq(Register dst, Register src);
    5.13 +#endif
    5.14 +
    5.15    // Prefetches (SSE, SSE2, 3DNOW only)
    5.16  
    5.17    void prefetchnta(Address src);
     6.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Thu Mar 12 10:37:46 2009 -0700
     6.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Fri Mar 13 11:35:17 2009 -0700
     6.3 @@ -284,7 +284,7 @@
     6.4    }
     6.5  
     6.6    char buf[256];
     6.7 -  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
     6.8 +  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
     6.9                 cores_per_cpu(), threads_per_core(),
    6.10                 cpu_family(), _model, _stepping,
    6.11                 (supports_cmov() ? ", cmov" : ""),
    6.12 @@ -297,6 +297,7 @@
    6.13                 (supports_ssse3()? ", ssse3": ""),
    6.14                 (supports_sse4_1() ? ", sse4.1" : ""),
    6.15                 (supports_sse4_2() ? ", sse4.2" : ""),
    6.16 +               (supports_popcnt() ? ", popcnt" : ""),
    6.17                 (supports_mmx_ext() ? ", mmxext" : ""),
    6.18                 (supports_3dnow()   ? ", 3dnow"  : ""),
    6.19                 (supports_3dnow2()  ? ", 3dnowext" : ""),
    6.20 @@ -410,6 +411,13 @@
    6.21      }
    6.22    }
    6.23  
    6.24 +  // Use population count instruction if available.
    6.25 +  if (supports_popcnt()) {
    6.26 +    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
    6.27 +      UsePopCountInstruction = true;
    6.28 +    }
    6.29 +  }
    6.30 +
    6.31    assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
    6.32    assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
    6.33  
     7.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp	Thu Mar 12 10:37:46 2009 -0700
     7.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp	Fri Mar 13 11:35:17 2009 -0700
     7.3 @@ -70,7 +70,9 @@
     7.4                 dca      : 1,
     7.5                 sse4_1   : 1,
     7.6                 sse4_2   : 1,
     7.7 -                        : 11;
     7.8 +                        : 2,
     7.9 +               popcnt   : 1,
    7.10 +                        : 8;
    7.11      } bits;
    7.12    };
    7.13  
    7.14 @@ -179,7 +181,8 @@
    7.15       CPU_SSSE3  = (1 << 9),
    7.16       CPU_SSE4A  = (1 << 10),
    7.17       CPU_SSE4_1 = (1 << 11),
    7.18 -     CPU_SSE4_2 = (1 << 12)
    7.19 +     CPU_SSE4_2 = (1 << 12),
    7.20 +     CPU_POPCNT = (1 << 13)
    7.21     } cpuFeatureFlags;
    7.22  
    7.23    // cpuid information block.  All info derived from executing cpuid with
    7.24 @@ -290,6 +293,8 @@
    7.25        result |= CPU_SSE4_1;
    7.26      if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
    7.27        result |= CPU_SSE4_2;
    7.28 +    if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
    7.29 +      result |= CPU_POPCNT;
    7.30      return result;
    7.31    }
    7.32  
    7.33 @@ -379,6 +384,7 @@
    7.34    static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
    7.35    static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
    7.36    static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
    7.37 +  static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
    7.38    //
    7.39    // AMD features
    7.40    //
     8.1 --- a/src/cpu/x86/vm/x86_32.ad	Thu Mar 12 10:37:46 2009 -0700
     8.2 +++ b/src/cpu/x86/vm/x86_32.ad	Fri Mar 13 11:35:17 2009 -0700
     8.3 @@ -1483,16 +1483,20 @@
     8.4    // main source block for now.  In future, we can generalize this by
     8.5    // adding a syntax that specifies the sizes of fields in an order,
     8.6    // so that the adlc can build the emit functions automagically
     8.7 -  enc_class OpcP %{             // Emit opcode
     8.8 -    emit_opcode(cbuf,$primary);
     8.9 -  %}
    8.10 -
    8.11 -  enc_class OpcS %{             // Emit opcode
    8.12 -    emit_opcode(cbuf,$secondary);
    8.13 -  %}
    8.14 -
    8.15 -  enc_class Opcode(immI d8 ) %{ // Emit opcode
    8.16 -    emit_opcode(cbuf,$d8$$constant);
    8.17 +
    8.18 +  // Emit primary opcode
    8.19 +  enc_class OpcP %{
    8.20 +    emit_opcode(cbuf, $primary);
    8.21 +  %}
    8.22 +
    8.23 +  // Emit secondary opcode
    8.24 +  enc_class OpcS %{
    8.25 +    emit_opcode(cbuf, $secondary);
    8.26 +  %}
    8.27 +
    8.28 +  // Emit opcode directly
    8.29 +  enc_class Opcode(immI d8) %{
    8.30 +    emit_opcode(cbuf, $d8$$constant);
    8.31    %}
    8.32  
    8.33    enc_class SizePrefix %{
    8.34 @@ -6387,6 +6391,67 @@
    8.35  %}
    8.36  
    8.37  
    8.38 +//---------- Population Count Instructions -------------------------------------
    8.39 +
    8.40 +instruct popCountI(eRegI dst, eRegI src) %{
    8.41 +  predicate(UsePopCountInstruction);
    8.42 +  match(Set dst (PopCountI src));
    8.43 +
    8.44 +  format %{ "POPCNT $dst, $src" %}
    8.45 +  ins_encode %{
    8.46 +    __ popcntl($dst$$Register, $src$$Register);
    8.47 +  %}
    8.48 +  ins_pipe(ialu_reg);
    8.49 +%}
    8.50 +
    8.51 +instruct popCountI_mem(eRegI dst, memory mem) %{
    8.52 +  predicate(UsePopCountInstruction);
    8.53 +  match(Set dst (PopCountI (LoadI mem)));
    8.54 +
    8.55 +  format %{ "POPCNT $dst, $mem" %}
    8.56 +  ins_encode %{
    8.57 +    __ popcntl($dst$$Register, $mem$$Address);
    8.58 +  %}
    8.59 +  ins_pipe(ialu_reg);
    8.60 +%}
    8.61 +
    8.62 +// Note: Long.bitCount(long) returns an int.
    8.63 +instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
    8.64 +  predicate(UsePopCountInstruction);
    8.65 +  match(Set dst (PopCountL src));
    8.66 +  effect(KILL cr, TEMP tmp, TEMP dst);
    8.67 +
    8.68 +  format %{ "POPCNT $dst, $src.lo\n\t"
    8.69 +            "POPCNT $tmp, $src.hi\n\t"
    8.70 +            "ADD    $dst, $tmp" %}
    8.71 +  ins_encode %{
    8.72 +    __ popcntl($dst$$Register, $src$$Register);
    8.73 +    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
    8.74 +    __ addl($dst$$Register, $tmp$$Register);
    8.75 +  %}
    8.76 +  ins_pipe(ialu_reg);
    8.77 +%}
    8.78 +
    8.79 +// Note: Long.bitCount(long) returns an int.
    8.80 +instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
    8.81 +  predicate(UsePopCountInstruction);
    8.82 +  match(Set dst (PopCountL (LoadL mem)));
    8.83 +  effect(KILL cr, TEMP tmp, TEMP dst);
    8.84 +
    8.85 +  format %{ "POPCNT $dst, $mem\n\t"
    8.86 +            "POPCNT $tmp, $mem+4\n\t"
    8.87 +            "ADD    $dst, $tmp" %}
    8.88 +  ins_encode %{
    8.89 +    //__ popcntl($dst$$Register, $mem$$Address$$first);
    8.90 +    //__ popcntl($tmp$$Register, $mem$$Address$$second);
    8.91 +    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
    8.92 +    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
    8.93 +    __ addl($dst$$Register, $tmp$$Register);
    8.94 +  %}
    8.95 +  ins_pipe(ialu_reg);
    8.96 +%}
    8.97 +
    8.98 +
    8.99  //----------Load/Store/Move Instructions---------------------------------------
   8.100  //----------Load Instructions--------------------------------------------------
   8.101  // Load Byte (8bit signed)
     9.1 --- a/src/cpu/x86/vm/x86_64.ad	Thu Mar 12 10:37:46 2009 -0700
     9.2 +++ b/src/cpu/x86/vm/x86_64.ad	Fri Mar 13 11:35:17 2009 -0700
     9.3 @@ -7429,6 +7429,56 @@
     9.4    ins_pipe( ialu_mem_reg );
     9.5  %}
     9.6  
     9.7 +
     9.8 +//---------- Population Count Instructions -------------------------------------
     9.9 +
    9.10 +instruct popCountI(rRegI dst, rRegI src) %{
    9.11 +  predicate(UsePopCountInstruction);
    9.12 +  match(Set dst (PopCountI src));
    9.13 +
    9.14 +  format %{ "popcnt  $dst, $src" %}
    9.15 +  ins_encode %{
    9.16 +    __ popcntl($dst$$Register, $src$$Register);
    9.17 +  %}
    9.18 +  ins_pipe(ialu_reg);
    9.19 +%}
    9.20 +
    9.21 +instruct popCountI_mem(rRegI dst, memory mem) %{
    9.22 +  predicate(UsePopCountInstruction);
    9.23 +  match(Set dst (PopCountI (LoadI mem)));
    9.24 +
    9.25 +  format %{ "popcnt  $dst, $mem" %}
    9.26 +  ins_encode %{
    9.27 +    __ popcntl($dst$$Register, $mem$$Address);
    9.28 +  %}
    9.29 +  ins_pipe(ialu_reg);
    9.30 +%}
    9.31 +
    9.32 +// Note: Long.bitCount(long) returns an int.
    9.33 +instruct popCountL(rRegI dst, rRegL src) %{
    9.34 +  predicate(UsePopCountInstruction);
    9.35 +  match(Set dst (PopCountL src));
    9.36 +
    9.37 +  format %{ "popcnt  $dst, $src" %}
    9.38 +  ins_encode %{
    9.39 +    __ popcntq($dst$$Register, $src$$Register);
    9.40 +  %}
    9.41 +  ins_pipe(ialu_reg);
    9.42 +%}
    9.43 +
    9.44 +// Note: Long.bitCount(long) returns an int.
    9.45 +instruct popCountL_mem(rRegI dst, memory mem) %{
    9.46 +  predicate(UsePopCountInstruction);
    9.47 +  match(Set dst (PopCountL (LoadL mem)));
    9.48 +
    9.49 +  format %{ "popcnt  $dst, $mem" %}
    9.50 +  ins_encode %{
    9.51 +    __ popcntq($dst$$Register, $mem$$Address);
    9.52 +  %}
    9.53 +  ins_pipe(ialu_reg);
    9.54 +%}
    9.55 +
    9.56 +
    9.57  //----------MemBar Instructions-----------------------------------------------
    9.58  // Memory barrier flavors
    9.59  
    10.1 --- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Thu Mar 12 10:37:46 2009 -0700
    10.2 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Fri Mar 13 11:35:17 2009 -0700
    10.3 @@ -85,6 +85,7 @@
    10.4      if (av & AV_SPARC_DIV32)  features |= hardware_div32_m;
    10.5      if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m;
    10.6      if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m;
    10.7 +    if (av & AV_SPARC_POPC)   features |= hardware_popc_m;
    10.8      if (av & AV_SPARC_VIS)    features |= vis1_instructions_m;
    10.9      if (av & AV_SPARC_VIS2)   features |= vis2_instructions_m;
   10.10    } else {
    11.1 --- a/src/share/vm/classfile/vmSymbols.hpp	Thu Mar 12 10:37:46 2009 -0700
    11.2 +++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Mar 13 11:35:17 2009 -0700
    11.3 @@ -1,5 +1,5 @@
    11.4  /*
    11.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    11.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    11.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    11.8   *
    11.9   * This code is free software; you can redistribute it and/or modify it
   11.10 @@ -284,6 +284,7 @@
   11.11    template(value_name,                                "value")                                    \
   11.12    template(frontCacheEnabled_name,                    "frontCacheEnabled")                        \
   11.13    template(stringCacheEnabled_name,                   "stringCacheEnabled")                       \
   11.14 +  template(bitCount_name,                             "bitCount")                                 \
   11.15                                                                                                    \
   11.16    /* non-intrinsic name/signature pairs: */                                                       \
   11.17    template(register_method_name,                      "register")                                 \
   11.18 @@ -304,6 +305,7 @@
   11.19    template(double_long_signature,                     "(D)J")                                     \
   11.20    template(double_double_signature,                   "(D)D")                                     \
   11.21    template(int_float_signature,                       "(I)F")                                     \
   11.22 +  template(long_int_signature,                        "(J)I")                                     \
   11.23    template(long_long_signature,                       "(J)J")                                     \
   11.24    template(long_double_signature,                     "(J)D")                                     \
   11.25    template(byte_signature,                            "B")                                        \
   11.26 @@ -507,6 +509,10 @@
   11.27     do_name(     doubleToLongBits_name,                           "doubleToLongBits")                                    \
   11.28    do_intrinsic(_longBitsToDouble,         java_lang_Double,       longBitsToDouble_name,    long_double_signature, F_S) \
   11.29     do_name(     longBitsToDouble_name,                           "longBitsToDouble")                                    \
   11.30 +                                                                                                                        \
   11.31 +  do_intrinsic(_bitCount_i,               java_lang_Integer,      bitCount_name,            int_int_signature,   F_S)   \
   11.32 +  do_intrinsic(_bitCount_l,               java_lang_Long,         bitCount_name,            long_int_signature,  F_S)   \
   11.33 +                                                                                                                        \
   11.34    do_intrinsic(_reverseBytes_i,           java_lang_Integer,      reverseBytes_name,        int_int_signature,   F_S)   \
   11.35     do_name(     reverseBytes_name,                               "reverseBytes")                                        \
   11.36    do_intrinsic(_reverseBytes_l,           java_lang_Long,         reverseBytes_name,        long_long_signature, F_S)   \
   11.37 @@ -696,7 +702,6 @@
   11.38    do_signature(putShort_raw_signature,    "(JS)V")                                                                      \
   11.39    do_signature(getChar_raw_signature,     "(J)C")                                                                       \
   11.40    do_signature(putChar_raw_signature,     "(JC)V")                                                                      \
   11.41 -  do_signature(getInt_raw_signature,      "(J)I")                                                                       \
   11.42    do_signature(putInt_raw_signature,      "(JI)V")                                                                      \
   11.43        do_alias(getLong_raw_signature,    /*(J)J*/ long_long_signature)                                                  \
   11.44        do_alias(putLong_raw_signature,    /*(JJ)V*/ long_long_void_signature)                                            \
   11.45 @@ -713,7 +718,7 @@
   11.46    do_intrinsic(_getByte_raw,              sun_misc_Unsafe,        getByte_name, getByte_raw_signature,           F_RN)  \
   11.47    do_intrinsic(_getShort_raw,             sun_misc_Unsafe,        getShort_name, getShort_raw_signature,         F_RN)  \
   11.48    do_intrinsic(_getChar_raw,              sun_misc_Unsafe,        getChar_name, getChar_raw_signature,           F_RN)  \
   11.49 -  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, getInt_raw_signature,             F_RN)  \
   11.50 +  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, long_int_signature,               F_RN)  \
   11.51    do_intrinsic(_getLong_raw,              sun_misc_Unsafe,        getLong_name, getLong_raw_signature,           F_RN)  \
   11.52    do_intrinsic(_getFloat_raw,             sun_misc_Unsafe,        getFloat_name, getFloat_raw_signature,         F_RN)  \
   11.53    do_intrinsic(_getDouble_raw,            sun_misc_Unsafe,        getDouble_name, getDouble_raw_signature,       F_RN)  \
    12.1 --- a/src/share/vm/opto/classes.hpp	Thu Mar 12 10:37:46 2009 -0700
    12.2 +++ b/src/share/vm/opto/classes.hpp	Fri Mar 13 11:35:17 2009 -0700
    12.3 @@ -184,6 +184,8 @@
    12.4  macro(Parm)
    12.5  macro(PartialSubtypeCheck)
    12.6  macro(Phi)
    12.7 +macro(PopCountI)
    12.8 +macro(PopCountL)
    12.9  macro(PowD)
   12.10  macro(PrefetchRead)
   12.11  macro(PrefetchWrite)
    13.1 --- a/src/share/vm/opto/connode.hpp	Thu Mar 12 10:37:46 2009 -0700
    13.2 +++ b/src/share/vm/opto/connode.hpp	Fri Mar 13 11:35:17 2009 -0700
    13.3 @@ -1,5 +1,5 @@
    13.4  /*
    13.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    13.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    13.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    13.8   *
    13.9   * This code is free software; you can redistribute it and/or modify it
   13.10 @@ -635,3 +635,23 @@
   13.11    virtual uint ideal_reg() const { return Op_RegL; }
   13.12    virtual const Type* Value( PhaseTransform *phase ) const;
   13.13  };
   13.14 +
   13.15 +//---------- PopCountINode -----------------------------------------------------
   13.16 +// Population count (bit count) of an integer.
   13.17 +class PopCountINode : public Node {
   13.18 +public:
   13.19 +  PopCountINode(Node* in1) : Node(0, in1) {}
   13.20 +  virtual int Opcode() const;
   13.21 +  const Type* bottom_type() const { return TypeInt::INT; }
   13.22 +  virtual uint ideal_reg() const { return Op_RegI; }
   13.23 +};
   13.24 +
   13.25 +//---------- PopCountLNode -----------------------------------------------------
   13.26 +// Population count (bit count) of a long.
   13.27 +class PopCountLNode : public Node {
   13.28 +public:
   13.29 +  PopCountLNode(Node* in1) : Node(0, in1) {}
   13.30 +  virtual int Opcode() const;
   13.31 +  const Type* bottom_type() const { return TypeInt::INT; }
   13.32 +  virtual uint ideal_reg() const { return Op_RegI; }
   13.33 +};
    14.1 --- a/src/share/vm/opto/library_call.cpp	Thu Mar 12 10:37:46 2009 -0700
    14.2 +++ b/src/share/vm/opto/library_call.cpp	Fri Mar 13 11:35:17 2009 -0700
    14.3 @@ -1,5 +1,5 @@
    14.4  /*
    14.5 - * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
    14.6 + * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
    14.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    14.8   *
    14.9   * This code is free software; you can redistribute it and/or modify it
   14.10 @@ -221,6 +221,7 @@
   14.11    bool inline_unsafe_CAS(BasicType type);
   14.12    bool inline_unsafe_ordered_store(BasicType type);
   14.13    bool inline_fp_conversions(vmIntrinsics::ID id);
   14.14 +  bool inline_bitCount(vmIntrinsics::ID id);
   14.15    bool inline_reverseBytes(vmIntrinsics::ID id);
   14.16  };
   14.17  
   14.18 @@ -314,6 +315,11 @@
   14.19      if (!JDK_Version::is_gte_jdk14x_version())  return NULL;
   14.20      break;
   14.21  
   14.22 +  case vmIntrinsics::_bitCount_i:
   14.23 +  case vmIntrinsics::_bitCount_l:
   14.24 +    if (!UsePopCountInstruction)  return NULL;
   14.25 +    break;
   14.26 +
   14.27   default:
   14.28      break;
   14.29    }
   14.30 @@ -617,6 +623,10 @@
   14.31    case vmIntrinsics::_longBitsToDouble:
   14.32      return inline_fp_conversions(intrinsic_id());
   14.33  
   14.34 +  case vmIntrinsics::_bitCount_i:
   14.35 +  case vmIntrinsics::_bitCount_l:
   14.36 +    return inline_bitCount(intrinsic_id());
   14.37 +
   14.38    case vmIntrinsics::_reverseBytes_i:
   14.39    case vmIntrinsics::_reverseBytes_l:
   14.40      return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
   14.41 @@ -1714,6 +1724,27 @@
   14.42    }
   14.43  }
   14.44  
   14.45 +//----------------------------inline_bitCount_int/long-----------------------
   14.46 +// inline int Integer.bitCount(int)
   14.47 +// inline int Long.bitCount(long)
   14.48 +bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) {
   14.49 +  assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount");
   14.50 +  if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false;
   14.51 +  if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false;
   14.52 +  _sp += arg_size();  // restore stack pointer
   14.53 +  switch (id) {
   14.54 +  case vmIntrinsics::_bitCount_i:
   14.55 +    push(_gvn.transform(new (C, 2) PopCountINode(pop())));
   14.56 +    break;
   14.57 +  case vmIntrinsics::_bitCount_l:
   14.58 +    push(_gvn.transform(new (C, 2) PopCountLNode(pop_pair())));
   14.59 +    break;
   14.60 +  default:
   14.61 +    ShouldNotReachHere();
   14.62 +  }
   14.63 +  return true;
   14.64 +}
   14.65 +
   14.66  //----------------------------inline_reverseBytes_int/long-------------------
   14.67  // inline Integer.reverseBytes(int)
   14.68  // inline Long.reverseBytes(long)
    15.1 --- a/src/share/vm/runtime/globals.hpp	Thu Mar 12 10:37:46 2009 -0700
    15.2 +++ b/src/share/vm/runtime/globals.hpp	Fri Mar 13 11:35:17 2009 -0700
    15.3 @@ -1,5 +1,5 @@
    15.4  /*
    15.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    15.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    15.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    15.8   *
    15.9   * This code is free software; you can redistribute it and/or modify it
   15.10 @@ -2172,6 +2172,9 @@
   15.11    diagnostic(bool, PrintIntrinsics, false,                                  \
   15.12            "prints attempted and successful inlining of intrinsics")         \
   15.13                                                                              \
   15.14 +  product(bool, UsePopCountInstruction, false,                              \
   15.15 +          "Use population count instruction")                               \
   15.16 +                                                                            \
   15.17    diagnostic(ccstrlist, DisableIntrinsic, "",                               \
   15.18            "do not expand intrinsics whose (internal) names appear here")    \
   15.19                                                                              \
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/test/compiler/6378821/Test6378821.java	Fri Mar 13 11:35:17 2009 -0700
    16.3 @@ -0,0 +1,75 @@
    16.4 +/*
    16.5 + * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
    16.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    16.7 + *
    16.8 + * This code is free software; you can redistribute it and/or modify it
    16.9 + * under the terms of the GNU General Public License version 2 only, as
   16.10 + * published by the Free Software Foundation.
   16.11 + *
   16.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
   16.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   16.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   16.15 + * version 2 for more details (a copy is included in the LICENSE file that
   16.16 + * accompanied this code).
   16.17 + *
   16.18 + * You should have received a copy of the GNU General Public License version
   16.19 + * 2 along with this work; if not, write to the Free Software Foundation,
   16.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   16.21 + *
   16.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
   16.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
   16.24 + * have any questions.
   16.25 + */
   16.26 +
   16.27 +/**
   16.28 + * @test
   16.29 + * @bug 6378821
   16.30 + * @summary where available, bitCount() should use POPC on SPARC processors and AMD+10h
   16.31 + *
   16.32 + * @run main/othervm -Xcomp -XX:CompileOnly=Test6378821.fcomp Test6378821
   16.33 + */
   16.34 +
   16.35 +public class Test6378821 {
   16.36 +    static final int[]  ia = new int[]  { 0x12345678 };
   16.37 +    static final long[] la = new long[] { 0x12345678abcdefL };
   16.38 +
   16.39 +    public static void main(String [] args) {
   16.40 +        // Resolve the class and the method.
   16.41 +        Integer.bitCount(1);
   16.42 +        Long.bitCount(1);
   16.43 +
   16.44 +        sub(ia[0]);
   16.45 +        sub(la[0]);
   16.46 +        sub(ia);
   16.47 +        sub(la);
   16.48 +    }
   16.49 +
   16.50 +    static void check(int i, int expected, int result) {
   16.51 +        if (result != expected) {
   16.52 +            throw new InternalError("Wrong population count for " + i + ": " + result + " != " + expected);
   16.53 +        }
   16.54 +    }
   16.55 +
   16.56 +    static void check(long l, int expected, int result) {
   16.57 +        if (result != expected) {
   16.58 +            throw new InternalError("Wrong population count for " + l + ": " + result + " != " + expected);
   16.59 +        }
   16.60 +    }
   16.61 +
   16.62 +    static void sub(int i)     { check(i,     fint(i),  fcomp(i) ); }
   16.63 +    static void sub(int[] ia)  { check(ia[0], fint(ia), fcomp(ia)); }
   16.64 +    static void sub(long l)    { check(l,     fint(l),  fcomp(l) ); }
   16.65 +    static void sub(long[] la) { check(la[0], fint(la), fcomp(la)); }
   16.66 +
   16.67 +    static int fint (int i)     { return Integer.bitCount(i); }
   16.68 +    static int fcomp(int i)     { return Integer.bitCount(i); }
   16.69 +
   16.70 +    static int fint (int[] ia)  { return Integer.bitCount(ia[0]); }
   16.71 +    static int fcomp(int[] ia)  { return Integer.bitCount(ia[0]); }
   16.72 +
   16.73 +    static int fint (long l)    { return Long.bitCount(l); }
   16.74 +    static int fcomp(long l)    { return Long.bitCount(l); }
   16.75 +
   16.76 +    static int fint (long[] la) { return Long.bitCount(la[0]); }
   16.77 +    static int fcomp(long[] la) { return Long.bitCount(la[0]); }
   16.78 +}

mercurial