Fri, 13 Mar 2009 11:35:17 -0700
6378821: bitCount() should use POPC on SPARC processors and AMD+10h
Summary: bitCount() should use POPC on SPARC processors where POPC is implemented directly in hardware.
Reviewed-by: kvn, never
1.1 --- a/src/cpu/sparc/vm/sparc.ad Thu Mar 12 10:37:46 2009 -0700 1.2 +++ b/src/cpu/sparc/vm/sparc.ad Fri Mar 13 11:35:17 2009 -0700 1.3 @@ -9015,6 +9015,33 @@ 1.4 ins_pipe(long_memory_op); 1.5 %} 1.6 1.7 + 1.8 +//---------- Population Count Instructions ------------------------------------- 1.9 + 1.10 +instruct popCountI(iRegI dst, iRegI src) %{ 1.11 + predicate(UsePopCountInstruction); 1.12 + match(Set dst (PopCountI src)); 1.13 + 1.14 + format %{ "POPC $src, $dst" %} 1.15 + ins_encode %{ 1.16 + __ popc($src$$Register, $dst$$Register); 1.17 + %} 1.18 + ins_pipe(ialu_reg); 1.19 +%} 1.20 + 1.21 +// Note: Long.bitCount(long) returns an int. 1.22 +instruct popCountL(iRegI dst, iRegL src) %{ 1.23 + predicate(UsePopCountInstruction); 1.24 + match(Set dst (PopCountL src)); 1.25 + 1.26 + format %{ "POPC $src, $dst" %} 1.27 + ins_encode %{ 1.28 + __ popc($src$$Register, $dst$$Register); 1.29 + %} 1.30 + ins_pipe(ialu_reg); 1.31 +%} 1.32 + 1.33 + 1.34 // ============================================================================ 1.35 //------------Bytes reverse-------------------------------------------------- 1.36
2.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Mar 12 10:37:46 2009 -0700 2.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Fri Mar 13 11:35:17 2009 -0700 2.3 @@ -92,10 +92,18 @@ 2.4 #endif 2.5 } 2.6 2.7 + // Use hardware population count instruction if available. 2.8 + if (has_hardware_popc()) { 2.9 + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 2.10 + UsePopCountInstruction = true; 2.11 + } 2.12 + } 2.13 + 2.14 char buf[512]; 2.15 - jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s", 2.16 + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s", 2.17 (has_v8() ? ", has_v8" : ""), 2.18 (has_v9() ? ", has_v9" : ""), 2.19 + (has_hardware_popc() ? ", popc" : ""), 2.20 (has_vis1() ? ", has_vis1" : ""), 2.21 (has_vis2() ? ", has_vis2" : ""), 2.22 (is_ultra3() ? ", is_ultra3" : ""),
3.1 --- a/src/cpu/sparc/vm/vm_version_sparc.hpp Thu Mar 12 10:37:46 2009 -0700 3.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Fri Mar 13 11:35:17 2009 -0700 3.3 @@ -29,10 +29,11 @@ 3.4 hardware_mul32 = 1, 3.5 hardware_div32 = 2, 3.6 hardware_fsmuld = 3, 3.7 - v9_instructions = 4, 3.8 - vis1_instructions = 5, 3.9 - vis2_instructions = 6, 3.10 - sun4v_instructions = 7 3.11 + hardware_popc = 4, 3.12 + v9_instructions = 5, 3.13 + vis1_instructions = 6, 3.14 + vis2_instructions = 7, 3.15 + sun4v_instructions = 8 3.16 }; 3.17 3.18 enum Feature_Flag_Set { 3.19 @@ -43,6 +44,7 @@ 3.20 hardware_mul32_m = 1 << hardware_mul32, 3.21 hardware_div32_m = 1 << hardware_div32, 3.22 hardware_fsmuld_m = 1 << hardware_fsmuld, 3.23 + hardware_popc_m = 1 << hardware_popc, 3.24 v9_instructions_m = 1 << v9_instructions, 3.25 vis1_instructions_m = 1 << vis1_instructions, 3.26 vis2_instructions_m = 1 << vis2_instructions, 3.27 @@ -81,6 +83,7 @@ 3.28 static bool has_hardware_mul32() { return (_features & hardware_mul32_m) != 0; } 3.29 static bool has_hardware_div32() { return (_features & hardware_div32_m) != 0; } 3.30 static bool has_hardware_fsmuld() { return (_features & hardware_fsmuld_m) != 0; } 3.31 + static bool has_hardware_popc() { return (_features & hardware_popc_m) != 0; } 3.32 static bool has_vis1() { return (_features & vis1_instructions_m) != 0; } 3.33 static bool has_vis2() { return (_features & vis2_instructions_m) != 0; } 3.34
4.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Thu Mar 12 10:37:46 2009 -0700 4.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Fri Mar 13 11:35:17 2009 -0700 4.3 @@ -2193,6 +2193,25 @@ 4.4 emit_byte(0x58 | encode); 4.5 } 4.6 4.7 +void Assembler::popcntl(Register dst, Address src) { 4.8 + assert(VM_Version::supports_popcnt(), "must support"); 4.9 + InstructionMark im(this); 4.10 + emit_byte(0xF3); 4.11 + prefix(src, dst); 4.12 + emit_byte(0x0F); 4.13 + emit_byte(0xB8); 4.14 + emit_operand(dst, src); 4.15 +} 4.16 + 4.17 +void Assembler::popcntl(Register dst, Register src) { 4.18 + assert(VM_Version::supports_popcnt(), "must support"); 4.19 + emit_byte(0xF3); 4.20 + int encode = prefix_and_encode(dst->encoding(), src->encoding()); 4.21 + emit_byte(0x0F); 4.22 + emit_byte(0xB8); 4.23 + emit_byte(0xC0 | encode); 4.24 +} 4.25 + 4.26 void Assembler::popf() { 4.27 emit_byte(0x9D); 4.28 } 4.29 @@ -4080,6 +4099,25 @@ 4.30 addq(rsp, 16 * wordSize); 4.31 } 4.32 4.33 +void Assembler::popcntq(Register dst, Address src) { 4.34 + assert(VM_Version::supports_popcnt(), "must support"); 4.35 + InstructionMark im(this); 4.36 + emit_byte(0xF3); 4.37 + prefixq(src, dst); 4.38 + emit_byte(0x0F); 4.39 + emit_byte(0xB8); 4.40 + emit_operand(dst, src); 4.41 +} 4.42 + 4.43 +void Assembler::popcntq(Register dst, Register src) { 4.44 + assert(VM_Version::supports_popcnt(), "must support"); 4.45 + emit_byte(0xF3); 4.46 + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4.47 + emit_byte(0x0F); 4.48 + emit_byte(0xB8); 4.49 + emit_byte(0xC0 | encode); 4.50 +} 4.51 + 4.52 void Assembler::popq(Address dst) { 4.53 InstructionMark im(this); 4.54 prefixq(dst);
5.1 --- a/src/cpu/x86/vm/assembler_x86.hpp Thu Mar 12 10:37:46 2009 -0700 5.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Fri Mar 13 11:35:17 2009 -0700 5.3 @@ -1224,6 +1224,14 @@ 5.4 void popq(Address dst); 5.5 #endif 5.6 5.7 + void popcntl(Register dst, Address src); 5.8 + void popcntl(Register dst, Register src); 5.9 + 5.10 +#ifdef _LP64 5.11 + void popcntq(Register dst, Address src); 5.12 + void popcntq(Register dst, Register src); 5.13 +#endif 5.14 + 5.15 // Prefetches (SSE, SSE2, 3DNOW only) 5.16 5.17 void prefetchnta(Address src);
6.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Thu Mar 12 10:37:46 2009 -0700 6.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Fri Mar 13 11:35:17 2009 -0700 6.3 @@ -284,7 +284,7 @@ 6.4 } 6.5 6.6 char buf[256]; 6.7 - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 6.8 + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 6.9 cores_per_cpu(), threads_per_core(), 6.10 cpu_family(), _model, _stepping, 6.11 (supports_cmov() ? ", cmov" : ""), 6.12 @@ -297,6 +297,7 @@ 6.13 (supports_ssse3()? ", ssse3": ""), 6.14 (supports_sse4_1() ? ", sse4.1" : ""), 6.15 (supports_sse4_2() ? ", sse4.2" : ""), 6.16 + (supports_popcnt() ? ", popcnt" : ""), 6.17 (supports_mmx_ext() ? ", mmxext" : ""), 6.18 (supports_3dnow() ? ", 3dnow" : ""), 6.19 (supports_3dnow2() ? ", 3dnowext" : ""), 6.20 @@ -410,6 +411,13 @@ 6.21 } 6.22 } 6.23 6.24 + // Use population count instruction if available. 6.25 + if (supports_popcnt()) { 6.26 + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 6.27 + UsePopCountInstruction = true; 6.28 + } 6.29 + } 6.30 + 6.31 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 6.32 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 6.33
7.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp Thu Mar 12 10:37:46 2009 -0700 7.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Fri Mar 13 11:35:17 2009 -0700 7.3 @@ -70,7 +70,9 @@ 7.4 dca : 1, 7.5 sse4_1 : 1, 7.6 sse4_2 : 1, 7.7 - : 11; 7.8 + : 2, 7.9 + popcnt : 1, 7.10 + : 8; 7.11 } bits; 7.12 }; 7.13 7.14 @@ -179,7 +181,8 @@ 7.15 CPU_SSSE3 = (1 << 9), 7.16 CPU_SSE4A = (1 << 10), 7.17 CPU_SSE4_1 = (1 << 11), 7.18 - CPU_SSE4_2 = (1 << 12) 7.19 + CPU_SSE4_2 = (1 << 12), 7.20 + CPU_POPCNT = (1 << 13) 7.21 } cpuFeatureFlags; 7.22 7.23 // cpuid information block. All info derived from executing cpuid with 7.24 @@ -290,6 +293,8 @@ 7.25 result |= CPU_SSE4_1; 7.26 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 7.27 result |= CPU_SSE4_2; 7.28 + if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) 7.29 + result |= CPU_POPCNT; 7.30 return result; 7.31 } 7.32 7.33 @@ -379,6 +384,7 @@ 7.34 static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } 7.35 static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } 7.36 static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } 7.37 + static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } 7.38 // 7.39 // AMD features 7.40 //
8.1 --- a/src/cpu/x86/vm/x86_32.ad Thu Mar 12 10:37:46 2009 -0700 8.2 +++ b/src/cpu/x86/vm/x86_32.ad Fri Mar 13 11:35:17 2009 -0700 8.3 @@ -1483,16 +1483,20 @@ 8.4 // main source block for now. In future, we can generalize this by 8.5 // adding a syntax that specifies the sizes of fields in an order, 8.6 // so that the adlc can build the emit functions automagically 8.7 - enc_class OpcP %{ // Emit opcode 8.8 - emit_opcode(cbuf,$primary); 8.9 - %} 8.10 - 8.11 - enc_class OpcS %{ // Emit opcode 8.12 - emit_opcode(cbuf,$secondary); 8.13 - %} 8.14 - 8.15 - enc_class Opcode(immI d8 ) %{ // Emit opcode 8.16 - emit_opcode(cbuf,$d8$$constant); 8.17 + 8.18 + // Emit primary opcode 8.19 + enc_class OpcP %{ 8.20 + emit_opcode(cbuf, $primary); 8.21 + %} 8.22 + 8.23 + // Emit secondary opcode 8.24 + enc_class OpcS %{ 8.25 + emit_opcode(cbuf, $secondary); 8.26 + %} 8.27 + 8.28 + // Emit opcode directly 8.29 + enc_class Opcode(immI d8) %{ 8.30 + emit_opcode(cbuf, $d8$$constant); 8.31 %} 8.32 8.33 enc_class SizePrefix %{ 8.34 @@ -6387,6 +6391,67 @@ 8.35 %} 8.36 8.37 8.38 +//---------- Population Count Instructions ------------------------------------- 8.39 + 8.40 +instruct popCountI(eRegI dst, eRegI src) %{ 8.41 + predicate(UsePopCountInstruction); 8.42 + match(Set dst (PopCountI src)); 8.43 + 8.44 + format %{ "POPCNT $dst, $src" %} 8.45 + ins_encode %{ 8.46 + __ popcntl($dst$$Register, $src$$Register); 8.47 + %} 8.48 + ins_pipe(ialu_reg); 8.49 +%} 8.50 + 8.51 +instruct popCountI_mem(eRegI dst, memory mem) %{ 8.52 + predicate(UsePopCountInstruction); 8.53 + match(Set dst (PopCountI (LoadI mem))); 8.54 + 8.55 + format %{ "POPCNT $dst, $mem" %} 8.56 + ins_encode %{ 8.57 + __ popcntl($dst$$Register, $mem$$Address); 8.58 + %} 8.59 + ins_pipe(ialu_reg); 8.60 +%} 8.61 + 8.62 +// Note: Long.bitCount(long) returns an int. 8.63 +instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 8.64 + predicate(UsePopCountInstruction); 8.65 + match(Set dst (PopCountL src)); 8.66 + effect(KILL cr, TEMP tmp, TEMP dst); 8.67 + 8.68 + format %{ "POPCNT $dst, $src.lo\n\t" 8.69 + "POPCNT $tmp, $src.hi\n\t" 8.70 + "ADD $dst, $tmp" %} 8.71 + ins_encode %{ 8.72 + __ popcntl($dst$$Register, $src$$Register); 8.73 + __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 8.74 + __ addl($dst$$Register, $tmp$$Register); 8.75 + %} 8.76 + ins_pipe(ialu_reg); 8.77 +%} 8.78 + 8.79 +// Note: Long.bitCount(long) returns an int. 8.80 +instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{ 8.81 + predicate(UsePopCountInstruction); 8.82 + match(Set dst (PopCountL (LoadL mem))); 8.83 + effect(KILL cr, TEMP tmp, TEMP dst); 8.84 + 8.85 + format %{ "POPCNT $dst, $mem\n\t" 8.86 + "POPCNT $tmp, $mem+4\n\t" 8.87 + "ADD $dst, $tmp" %} 8.88 + ins_encode %{ 8.89 + //__ popcntl($dst$$Register, $mem$$Address$$first); 8.90 + //__ popcntl($tmp$$Register, $mem$$Address$$second); 8.91 + __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false)); 8.92 + __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false)); 8.93 + __ addl($dst$$Register, $tmp$$Register); 8.94 + %} 8.95 + ins_pipe(ialu_reg); 8.96 +%} 8.97 + 8.98 + 8.99 //----------Load/Store/Move Instructions--------------------------------------- 8.100 //----------Load Instructions-------------------------------------------------- 8.101 // Load Byte (8bit signed)
9.1 --- a/src/cpu/x86/vm/x86_64.ad Thu Mar 12 10:37:46 2009 -0700 9.2 +++ b/src/cpu/x86/vm/x86_64.ad Fri Mar 13 11:35:17 2009 -0700 9.3 @@ -7429,6 +7429,56 @@ 9.4 ins_pipe( ialu_mem_reg ); 9.5 %} 9.6 9.7 + 9.8 +//---------- Population Count Instructions ------------------------------------- 9.9 + 9.10 +instruct popCountI(rRegI dst, rRegI src) %{ 9.11 + predicate(UsePopCountInstruction); 9.12 + match(Set dst (PopCountI src)); 9.13 + 9.14 + format %{ "popcnt $dst, $src" %} 9.15 + ins_encode %{ 9.16 + __ popcntl($dst$$Register, $src$$Register); 9.17 + %} 9.18 + ins_pipe(ialu_reg); 9.19 +%} 9.20 + 9.21 +instruct popCountI_mem(rRegI dst, memory mem) %{ 9.22 + predicate(UsePopCountInstruction); 9.23 + match(Set dst (PopCountI (LoadI mem))); 9.24 + 9.25 + format %{ "popcnt $dst, $mem" %} 9.26 + ins_encode %{ 9.27 + __ popcntl($dst$$Register, $mem$$Address); 9.28 + %} 9.29 + ins_pipe(ialu_reg); 9.30 +%} 9.31 + 9.32 +// Note: Long.bitCount(long) returns an int. 9.33 +instruct popCountL(rRegI dst, rRegL src) %{ 9.34 + predicate(UsePopCountInstruction); 9.35 + match(Set dst (PopCountL src)); 9.36 + 9.37 + format %{ "popcnt $dst, $src" %} 9.38 + ins_encode %{ 9.39 + __ popcntq($dst$$Register, $src$$Register); 9.40 + %} 9.41 + ins_pipe(ialu_reg); 9.42 +%} 9.43 + 9.44 +// Note: Long.bitCount(long) returns an int. 9.45 +instruct popCountL_mem(rRegI dst, memory mem) %{ 9.46 + predicate(UsePopCountInstruction); 9.47 + match(Set dst (PopCountL (LoadL mem))); 9.48 + 9.49 + format %{ "popcnt $dst, $mem" %} 9.50 + ins_encode %{ 9.51 + __ popcntq($dst$$Register, $mem$$Address); 9.52 + %} 9.53 + ins_pipe(ialu_reg); 9.54 +%} 9.55 + 9.56 + 9.57 //----------MemBar Instructions----------------------------------------------- 9.58 // Memory barrier flavors 9.59
10.1 --- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Thu Mar 12 10:37:46 2009 -0700 10.2 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Fri Mar 13 11:35:17 2009 -0700 10.3 @@ -85,6 +85,7 @@ 10.4 if (av & AV_SPARC_DIV32) features |= hardware_div32_m; 10.5 if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m; 10.6 if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m; 10.7 + if (av & AV_SPARC_POPC) features |= hardware_popc_m; 10.8 if (av & AV_SPARC_VIS) features |= vis1_instructions_m; 10.9 if (av & AV_SPARC_VIS2) features |= vis2_instructions_m; 10.10 } else {
11.1 --- a/src/share/vm/classfile/vmSymbols.hpp Thu Mar 12 10:37:46 2009 -0700 11.2 +++ b/src/share/vm/classfile/vmSymbols.hpp Fri Mar 13 11:35:17 2009 -0700 11.3 @@ -1,5 +1,5 @@ 11.4 /* 11.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 11.6 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 11.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 11.8 * 11.9 * This code is free software; you can redistribute it and/or modify it 11.10 @@ -284,6 +284,7 @@ 11.11 template(value_name, "value") \ 11.12 template(frontCacheEnabled_name, "frontCacheEnabled") \ 11.13 template(stringCacheEnabled_name, "stringCacheEnabled") \ 11.14 + template(bitCount_name, "bitCount") \ 11.15 \ 11.16 /* non-intrinsic name/signature pairs: */ \ 11.17 template(register_method_name, "register") \ 11.18 @@ -304,6 +305,7 @@ 11.19 template(double_long_signature, "(D)J") \ 11.20 template(double_double_signature, "(D)D") \ 11.21 template(int_float_signature, "(I)F") \ 11.22 + template(long_int_signature, "(J)I") \ 11.23 template(long_long_signature, "(J)J") \ 11.24 template(long_double_signature, "(J)D") \ 11.25 template(byte_signature, "B") \ 11.26 @@ -507,6 +509,10 @@ 11.27 do_name( doubleToLongBits_name, "doubleToLongBits") \ 11.28 do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_S) \ 11.29 do_name( longBitsToDouble_name, "longBitsToDouble") \ 11.30 + \ 11.31 + do_intrinsic(_bitCount_i, java_lang_Integer, bitCount_name, int_int_signature, F_S) \ 11.32 + do_intrinsic(_bitCount_l, java_lang_Long, bitCount_name, long_int_signature, F_S) \ 11.33 + \ 11.34 do_intrinsic(_reverseBytes_i, java_lang_Integer, reverseBytes_name, int_int_signature, F_S) \ 11.35 do_name( reverseBytes_name, "reverseBytes") \ 11.36 do_intrinsic(_reverseBytes_l, java_lang_Long, reverseBytes_name, long_long_signature, F_S) \ 11.37 @@ -696,7 +702,6 @@ 11.38 do_signature(putShort_raw_signature, "(JS)V") \ 11.39 do_signature(getChar_raw_signature, "(J)C") \ 11.40 do_signature(putChar_raw_signature, "(JC)V") \ 11.41 - do_signature(getInt_raw_signature, "(J)I") \ 11.42 do_signature(putInt_raw_signature, "(JI)V") \ 11.43 do_alias(getLong_raw_signature, /*(J)J*/ long_long_signature) \ 11.44 do_alias(putLong_raw_signature, /*(JJ)V*/ long_long_void_signature) \ 11.45 @@ -713,7 +718,7 @@ 11.46 do_intrinsic(_getByte_raw, sun_misc_Unsafe, getByte_name, getByte_raw_signature, F_RN) \ 11.47 do_intrinsic(_getShort_raw, sun_misc_Unsafe, getShort_name, getShort_raw_signature, F_RN) \ 11.48 do_intrinsic(_getChar_raw, sun_misc_Unsafe, getChar_name, getChar_raw_signature, F_RN) \ 11.49 - do_intrinsic(_getInt_raw, sun_misc_Unsafe, getInt_name, getInt_raw_signature, F_RN) \ 11.50 + do_intrinsic(_getInt_raw, sun_misc_Unsafe, getInt_name, long_int_signature, F_RN) \ 11.51 do_intrinsic(_getLong_raw, sun_misc_Unsafe, getLong_name, getLong_raw_signature, F_RN) \ 11.52 do_intrinsic(_getFloat_raw, sun_misc_Unsafe, getFloat_name, getFloat_raw_signature, F_RN) \ 11.53 do_intrinsic(_getDouble_raw, sun_misc_Unsafe, getDouble_name, getDouble_raw_signature, F_RN) \
12.1 --- a/src/share/vm/opto/classes.hpp Thu Mar 12 10:37:46 2009 -0700 12.2 +++ b/src/share/vm/opto/classes.hpp Fri Mar 13 11:35:17 2009 -0700 12.3 @@ -184,6 +184,8 @@ 12.4 macro(Parm) 12.5 macro(PartialSubtypeCheck) 12.6 macro(Phi) 12.7 +macro(PopCountI) 12.8 +macro(PopCountL) 12.9 macro(PowD) 12.10 macro(PrefetchRead) 12.11 macro(PrefetchWrite)
13.1 --- a/src/share/vm/opto/connode.hpp Thu Mar 12 10:37:46 2009 -0700 13.2 +++ b/src/share/vm/opto/connode.hpp Fri Mar 13 11:35:17 2009 -0700 13.3 @@ -1,5 +1,5 @@ 13.4 /* 13.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 13.6 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 13.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 13.8 * 13.9 * This code is free software; you can redistribute it and/or modify it 13.10 @@ -635,3 +635,23 @@ 13.11 virtual uint ideal_reg() const { return Op_RegL; } 13.12 virtual const Type* Value( PhaseTransform *phase ) const; 13.13 }; 13.14 + 13.15 +//---------- PopCountINode ----------------------------------------------------- 13.16 +// Population count (bit count) of an integer. 13.17 +class PopCountINode : public Node { 13.18 +public: 13.19 + PopCountINode(Node* in1) : Node(0, in1) {} 13.20 + virtual int Opcode() const; 13.21 + const Type* bottom_type() const { return TypeInt::INT; } 13.22 + virtual uint ideal_reg() const { return Op_RegI; } 13.23 +}; 13.24 + 13.25 +//---------- PopCountLNode ----------------------------------------------------- 13.26 +// Population count (bit count) of a long. 13.27 +class PopCountLNode : public Node { 13.28 +public: 13.29 + PopCountLNode(Node* in1) : Node(0, in1) {} 13.30 + virtual int Opcode() const; 13.31 + const Type* bottom_type() const { return TypeInt::INT; } 13.32 + virtual uint ideal_reg() const { return Op_RegI; } 13.33 +};
14.1 --- a/src/share/vm/opto/library_call.cpp Thu Mar 12 10:37:46 2009 -0700 14.2 +++ b/src/share/vm/opto/library_call.cpp Fri Mar 13 11:35:17 2009 -0700 14.3 @@ -1,5 +1,5 @@ 14.4 /* 14.5 - * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved. 14.6 + * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. 14.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 14.8 * 14.9 * This code is free software; you can redistribute it and/or modify it 14.10 @@ -221,6 +221,7 @@ 14.11 bool inline_unsafe_CAS(BasicType type); 14.12 bool inline_unsafe_ordered_store(BasicType type); 14.13 bool inline_fp_conversions(vmIntrinsics::ID id); 14.14 + bool inline_bitCount(vmIntrinsics::ID id); 14.15 bool inline_reverseBytes(vmIntrinsics::ID id); 14.16 }; 14.17 14.18 @@ -314,6 +315,11 @@ 14.19 if (!JDK_Version::is_gte_jdk14x_version()) return NULL; 14.20 break; 14.21 14.22 + case vmIntrinsics::_bitCount_i: 14.23 + case vmIntrinsics::_bitCount_l: 14.24 + if (!UsePopCountInstruction) return NULL; 14.25 + break; 14.26 + 14.27 default: 14.28 break; 14.29 } 14.30 @@ -617,6 +623,10 @@ 14.31 case vmIntrinsics::_longBitsToDouble: 14.32 return inline_fp_conversions(intrinsic_id()); 14.33 14.34 + case vmIntrinsics::_bitCount_i: 14.35 + case vmIntrinsics::_bitCount_l: 14.36 + return inline_bitCount(intrinsic_id()); 14.37 + 14.38 case vmIntrinsics::_reverseBytes_i: 14.39 case vmIntrinsics::_reverseBytes_l: 14.40 return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id()); 14.41 @@ -1714,6 +1724,27 @@ 14.42 } 14.43 } 14.44 14.45 +//----------------------------inline_bitCount_int/long----------------------- 14.46 +// inline int Integer.bitCount(int) 14.47 +// inline int Long.bitCount(long) 14.48 +bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) { 14.49 + assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount"); 14.50 + if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false; 14.51 + if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false; 14.52 + _sp += arg_size(); // restore stack pointer 14.53 + switch (id) { 14.54 + case vmIntrinsics::_bitCount_i: 14.55 + push(_gvn.transform(new (C, 2) PopCountINode(pop()))); 14.56 + break; 14.57 + case vmIntrinsics::_bitCount_l: 14.58 + push(_gvn.transform(new (C, 2) PopCountLNode(pop_pair()))); 14.59 + break; 14.60 + default: 14.61 + ShouldNotReachHere(); 14.62 + } 14.63 + return true; 14.64 +} 14.65 + 14.66 //----------------------------inline_reverseBytes_int/long------------------- 14.67 // inline Integer.reverseBytes(int) 14.68 // inline Long.reverseBytes(long)
15.1 --- a/src/share/vm/runtime/globals.hpp Thu Mar 12 10:37:46 2009 -0700 15.2 +++ b/src/share/vm/runtime/globals.hpp Fri Mar 13 11:35:17 2009 -0700 15.3 @@ -1,5 +1,5 @@ 15.4 /* 15.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 15.6 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 15.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 15.8 * 15.9 * This code is free software; you can redistribute it and/or modify it 15.10 @@ -2172,6 +2172,9 @@ 15.11 diagnostic(bool, PrintIntrinsics, false, \ 15.12 "prints attempted and successful inlining of intrinsics") \ 15.13 \ 15.14 + product(bool, UsePopCountInstruction, false, \ 15.15 + "Use population count instruction") \ 15.16 + \ 15.17 diagnostic(ccstrlist, DisableIntrinsic, "", \ 15.18 "do not expand intrinsics whose (internal) names appear here") \ 15.19 \
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 16.2 +++ b/test/compiler/6378821/Test6378821.java Fri Mar 13 11:35:17 2009 -0700 16.3 @@ -0,0 +1,75 @@ 16.4 +/* 16.5 + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. 16.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 16.7 + * 16.8 + * This code is free software; you can redistribute it and/or modify it 16.9 + * under the terms of the GNU General Public License version 2 only, as 16.10 + * published by the Free Software Foundation. 16.11 + * 16.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 16.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 16.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16.15 + * version 2 for more details (a copy is included in the LICENSE file that 16.16 + * accompanied this code). 16.17 + * 16.18 + * You should have received a copy of the GNU General Public License version 16.19 + * 2 along with this work; if not, write to the Free Software Foundation, 16.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 16.21 + * 16.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 16.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 16.24 + * have any questions. 16.25 + */ 16.26 + 16.27 +/** 16.28 + * @test 16.29 + * @bug 6378821 16.30 + * @summary where available, bitCount() should use POPC on SPARC processors and AMD+10h 16.31 + * 16.32 + * @run main/othervm -Xcomp -XX:CompileOnly=Test6378821.fcomp Test6378821 16.33 + */ 16.34 + 16.35 +public class Test6378821 { 16.36 + static final int[] ia = new int[] { 0x12345678 }; 16.37 + static final long[] la = new long[] { 0x12345678abcdefL }; 16.38 + 16.39 + public static void main(String [] args) { 16.40 + // Resolve the class and the method. 16.41 + Integer.bitCount(1); 16.42 + Long.bitCount(1); 16.43 + 16.44 + sub(ia[0]); 16.45 + sub(la[0]); 16.46 + sub(ia); 16.47 + sub(la); 16.48 + } 16.49 + 16.50 + static void check(int i, int expected, int result) { 16.51 + if (result != expected) { 16.52 + throw new InternalError("Wrong population count for " + i + ": " + result + " != " + expected); 16.53 + } 16.54 + } 16.55 + 16.56 + static void check(long l, int expected, int result) { 16.57 + if (result != expected) { 16.58 + throw new InternalError("Wrong population count for " + l + ": " + result + " != " + expected); 16.59 + } 16.60 + } 16.61 + 16.62 + static void sub(int i) { check(i, fint(i), fcomp(i) ); } 16.63 + static void sub(int[] ia) { check(ia[0], fint(ia), fcomp(ia)); } 16.64 + static void sub(long l) { check(l, fint(l), fcomp(l) ); } 16.65 + static void sub(long[] la) { check(la[0], fint(la), fcomp(la)); } 16.66 + 16.67 + static int fint (int i) { return Integer.bitCount(i); } 16.68 + static int fcomp(int i) { return Integer.bitCount(i); } 16.69 + 16.70 + static int fint (int[] ia) { return Integer.bitCount(ia[0]); } 16.71 + static int fcomp(int[] ia) { return Integer.bitCount(ia[0]); } 16.72 + 16.73 + static int fint (long l) { return Long.bitCount(l); } 16.74 + static int fcomp(long l) { return Long.bitCount(l); } 16.75 + 16.76 + static int fint (long[] la) { return Long.bitCount(la[0]); } 16.77 + static int fcomp(long[] la) { return Long.bitCount(la[0]); } 16.78 +}