Wed, 06 May 2009 00:27:52 -0700
6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}()
Summary: These methods can be instrinsified by using bit scan, bit test, and population count instructions.
Reviewed-by: kvn, never
1.1 --- a/src/cpu/sparc/vm/sparc.ad Tue May 05 11:02:10 2009 -0700 1.2 +++ b/src/cpu/sparc/vm/sparc.ad Wed May 06 00:27:52 2009 -0700 1.3 @@ -1712,6 +1712,23 @@ 1.4 return as_DoubleFloatRegister(register_encoding); 1.5 } 1.6 1.7 +const bool Matcher::match_rule_supported(int opcode) { 1.8 + if (!has_match_rule(opcode)) 1.9 + return false; 1.10 + 1.11 + switch (opcode) { 1.12 + case Op_CountLeadingZerosI: 1.13 + case Op_CountLeadingZerosL: 1.14 + case Op_CountTrailingZerosI: 1.15 + case Op_CountTrailingZerosL: 1.16 + if (!UsePopCountInstruction) 1.17 + return false; 1.18 + break; 1.19 + } 1.20 + 1.21 + return true; // Per default match rules are supported. 1.22 +} 1.23 + 1.24 int Matcher::regnum_to_fpu_offset(int regnum) { 1.25 return regnum - 32; // The FP registers are in the second chunk 1.26 } 1.27 @@ -9188,6 +9205,145 @@ 1.28 ins_pipe(long_memory_op); 1.29 %} 1.30 1.31 + 1.32 +//---------- Zeros Count Instructions ------------------------------------------ 1.33 + 1.34 +instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{ 1.35 + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported 1.36 + match(Set dst (CountLeadingZerosI src)); 1.37 + effect(TEMP dst, TEMP tmp, KILL cr); 1.38 + 1.39 + // x |= (x >> 1); 1.40 + // x |= (x >> 2); 1.41 + // x |= (x >> 4); 1.42 + // x |= (x >> 8); 1.43 + // x |= (x >> 16); 1.44 + // return (WORDBITS - popc(x)); 1.45 + format %{ "SRL $src,1,$dst\t! count leading zeros (int)\n\t" 1.46 + "OR $src,$tmp,$dst\n\t" 1.47 + "SRL $dst,2,$tmp\n\t" 1.48 + "OR $dst,$tmp,$dst\n\t" 1.49 + "SRL $dst,4,$tmp\n\t" 1.50 + "OR $dst,$tmp,$dst\n\t" 1.51 + "SRL $dst,8,$tmp\n\t" 1.52 + "OR $dst,$tmp,$dst\n\t" 1.53 + "SRL $dst,16,$tmp\n\t" 1.54 + "OR $dst,$tmp,$dst\n\t" 1.55 + "POPC $dst,$dst\n\t" 1.56 + "MOV 32,$tmp\n\t" 1.57 + "SUB $tmp,$dst,$dst" %} 1.58 + ins_encode %{ 1.59 + Register Rdst = $dst$$Register; 1.60 + Register Rsrc = $src$$Register; 1.61 + Register Rtmp = $tmp$$Register; 1.62 + __ srl(Rsrc, 1, Rtmp); 1.63 + __ or3(Rsrc, Rtmp, Rdst); 1.64 + __ srl(Rdst, 2, Rtmp); 1.65 + __ or3(Rdst, Rtmp, Rdst); 1.66 + __ srl(Rdst, 4, Rtmp); 1.67 + __ or3(Rdst, Rtmp, Rdst); 1.68 + __ srl(Rdst, 8, Rtmp); 1.69 + __ or3(Rdst, Rtmp, Rdst); 1.70 + __ srl(Rdst, 16, Rtmp); 1.71 + __ or3(Rdst, Rtmp, Rdst); 1.72 + __ popc(Rdst, Rdst); 1.73 + __ mov(BitsPerInt, Rtmp); 1.74 + __ sub(Rtmp, Rdst, Rdst); 1.75 + %} 1.76 + ins_pipe(ialu_reg); 1.77 +%} 1.78 + 1.79 +instruct countLeadingZerosL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{ 1.80 + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported 1.81 + match(Set dst (CountLeadingZerosL src)); 1.82 + effect(TEMP dst, TEMP tmp, KILL cr); 1.83 + 1.84 + // x |= (x >> 1); 1.85 + // x |= (x >> 2); 1.86 + // x |= (x >> 4); 1.87 + // x |= (x >> 8); 1.88 + // x |= (x >> 16); 1.89 + // x |= (x >> 32); 1.90 + // return (WORDBITS - popc(x)); 1.91 + format %{ "SRLX $src,1,$dst\t! count leading zeros (long)\n\t" 1.92 + "OR $src,$tmp,$dst\n\t" 1.93 + "SRLX $dst,2,$tmp\n\t" 1.94 + "OR $dst,$tmp,$dst\n\t" 1.95 + "SRLX $dst,4,$tmp\n\t" 1.96 + "OR $dst,$tmp,$dst\n\t" 1.97 + "SRLX $dst,8,$tmp\n\t" 1.98 + "OR $dst,$tmp,$dst\n\t" 1.99 + "SRLX $dst,16,$tmp\n\t" 1.100 + "OR $dst,$tmp,$dst\n\t" 1.101 + "SRLX $dst,32,$tmp\n\t" 1.102 + "OR $dst,$tmp,$dst\n\t" 1.103 + "POPC $dst,$dst\n\t" 1.104 + "MOV 64,$tmp\n\t" 1.105 + "SUB $tmp,$dst,$dst" %} 1.106 + ins_encode %{ 1.107 + Register Rdst = $dst$$Register; 1.108 + Register Rsrc = $src$$Register; 1.109 + Register Rtmp = $tmp$$Register; 1.110 + __ srlx(Rsrc, 1, Rtmp); 1.111 + __ or3(Rsrc, Rtmp, Rdst); 1.112 + __ srlx(Rdst, 2, Rtmp); 1.113 + __ or3(Rdst, Rtmp, Rdst); 1.114 + __ srlx(Rdst, 4, Rtmp); 1.115 + __ or3(Rdst, Rtmp, Rdst); 1.116 + __ srlx(Rdst, 8, Rtmp); 1.117 + __ or3(Rdst, Rtmp, Rdst); 1.118 + __ srlx(Rdst, 16, Rtmp); 1.119 + __ or3(Rdst, Rtmp, Rdst); 1.120 + __ srlx(Rdst, 32, Rtmp); 1.121 + __ or3(Rdst, Rtmp, Rdst); 1.122 + __ popc(Rdst, Rdst); 1.123 + __ mov(BitsPerLong, Rtmp); 1.124 + __ sub(Rtmp, Rdst, Rdst); 1.125 + %} 1.126 + ins_pipe(ialu_reg); 1.127 +%} 1.128 + 1.129 +instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{ 1.130 + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported 1.131 + match(Set dst (CountTrailingZerosI src)); 1.132 + effect(TEMP dst, KILL cr); 1.133 + 1.134 + // return popc(~x & (x - 1)); 1.135 + format %{ "SUB $src,1,$dst\t! count trailing zeros (int)\n\t" 1.136 + "ANDN $dst,$src,$dst\n\t" 1.137 + "SRL $dst,R_G0,$dst\n\t" 1.138 + "POPC $dst,$dst" %} 1.139 + ins_encode %{ 1.140 + Register Rdst = $dst$$Register; 1.141 + Register Rsrc = $src$$Register; 1.142 + __ sub(Rsrc, 1, Rdst); 1.143 + __ andn(Rdst, Rsrc, Rdst); 1.144 + __ srl(Rdst, G0, Rdst); 1.145 + __ popc(Rdst, Rdst); 1.146 + %} 1.147 + ins_pipe(ialu_reg); 1.148 +%} 1.149 + 1.150 +instruct countTrailingZerosL(iRegI dst, iRegL src, flagsReg cr) %{ 1.151 + predicate(UsePopCountInstruction); // See Matcher::match_rule_supported 1.152 + match(Set dst (CountTrailingZerosL src)); 1.153 + effect(TEMP dst, KILL cr); 1.154 + 1.155 + // return popc(~x & (x - 1)); 1.156 + format %{ "SUB $src,1,$dst\t! count trailing zeros (long)\n\t" 1.157 + "ANDN $dst,$src,$dst\n\t" 1.158 + "POPC $dst,$dst" %} 1.159 + ins_encode %{ 1.160 + Register Rdst = $dst$$Register; 1.161 + Register Rsrc = $src$$Register; 1.162 + __ sub(Rsrc, 1, Rdst); 1.163 + __ andn(Rdst, Rsrc, Rdst); 1.164 + __ popc(Rdst, Rdst); 1.165 + %} 1.166 + ins_pipe(ialu_reg); 1.167 +%} 1.168 + 1.169 + 1.170 //---------- Population Count Instructions ------------------------------------- 1.171 1.172 instruct popCountI(iRegI dst, iRegI src) %{
2.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Tue May 05 11:02:10 2009 -0700 2.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Wed May 06 00:27:52 2009 -0700 2.3 @@ -952,6 +952,21 @@ 2.4 emit_operand(dst, src); 2.5 } 2.6 2.7 +void Assembler::bsfl(Register dst, Register src) { 2.8 + int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2.9 + emit_byte(0x0F); 2.10 + emit_byte(0xBC); 2.11 + emit_byte(0xC0 | encode); 2.12 +} 2.13 + 2.14 +void Assembler::bsrl(Register dst, Register src) { 2.15 + assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 2.16 + int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2.17 + emit_byte(0x0F); 2.18 + emit_byte(0xBD); 2.19 + emit_byte(0xC0 | encode); 2.20 +} 2.21 + 2.22 void Assembler::bswapl(Register reg) { // bswap 2.23 int encode = prefix_and_encode(reg->encoding()); 2.24 emit_byte(0x0F); 2.25 @@ -1438,6 +1453,15 @@ 2.26 } 2.27 } 2.28 2.29 +void Assembler::lzcntl(Register dst, Register src) { 2.30 + assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 2.31 + emit_byte(0xF3); 2.32 + int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2.33 + emit_byte(0x0F); 2.34 + emit_byte(0xBD); 2.35 + emit_byte(0xC0 | encode); 2.36 +} 2.37 + 2.38 // Emit mfence instruction 2.39 void Assembler::mfence() { 2.40 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 2.41 @@ -3688,6 +3712,21 @@ 2.42 emit_arith(0x23, 0xC0, dst, src); 2.43 } 2.44 2.45 +void Assembler::bsfq(Register dst, Register src) { 2.46 + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2.47 + emit_byte(0x0F); 2.48 + emit_byte(0xBC); 2.49 + emit_byte(0xC0 | encode); 2.50 +} 2.51 + 2.52 +void Assembler::bsrq(Register dst, Register src) { 2.53 + assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 2.54 + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2.55 + emit_byte(0x0F); 2.56 + emit_byte(0xBD); 2.57 + emit_byte(0xC0 | encode); 2.58 +} 2.59 + 2.60 void Assembler::bswapq(Register reg) { 2.61 int encode = prefixq_and_encode(reg->encoding()); 2.62 emit_byte(0x0F); 2.63 @@ -3941,6 +3980,15 @@ 2.64 emit_data((int)imm32, rspec, narrow_oop_operand); 2.65 } 2.66 2.67 +void Assembler::lzcntq(Register dst, Register src) { 2.68 + assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 2.69 + emit_byte(0xF3); 2.70 + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2.71 + emit_byte(0x0F); 2.72 + emit_byte(0xBD); 2.73 + emit_byte(0xC0 | encode); 2.74 +} 2.75 + 2.76 void Assembler::movdq(XMMRegister dst, Register src) { 2.77 // table D-1 says MMX/SSE2 2.78 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
3.1 --- a/src/cpu/x86/vm/assembler_x86.hpp Tue May 05 11:02:10 2009 -0700 3.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Wed May 06 00:27:52 2009 -0700 3.3 @@ -757,6 +757,14 @@ 3.4 void andpd(XMMRegister dst, Address src); 3.5 void andpd(XMMRegister dst, XMMRegister src); 3.6 3.7 + void bsfl(Register dst, Register src); 3.8 + void bsrl(Register dst, Register src); 3.9 + 3.10 +#ifdef _LP64 3.11 + void bsfq(Register dst, Register src); 3.12 + void bsrq(Register dst, Register src); 3.13 +#endif 3.14 + 3.15 void bswapl(Register reg); 3.16 3.17 void bswapq(Register reg); 3.18 @@ -1061,6 +1069,12 @@ 3.19 3.20 void lock(); 3.21 3.22 + void lzcntl(Register dst, Register src); 3.23 + 3.24 +#ifdef _LP64 3.25 + void lzcntq(Register dst, Register src); 3.26 +#endif 3.27 + 3.28 enum Membar_mask_bits { 3.29 StoreStore = 1 << 3, 3.30 LoadStore = 1 << 2,
4.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Tue May 05 11:02:10 2009 -0700 4.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Wed May 06 00:27:52 2009 -0700 4.3 @@ -284,7 +284,7 @@ 4.4 } 4.5 4.6 char buf[256]; 4.7 - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 4.8 + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 4.9 cores_per_cpu(), threads_per_core(), 4.10 cpu_family(), _model, _stepping, 4.11 (supports_cmov() ? ", cmov" : ""), 4.12 @@ -301,6 +301,7 @@ 4.13 (supports_mmx_ext() ? ", mmxext" : ""), 4.14 (supports_3dnow() ? ", 3dnow" : ""), 4.15 (supports_3dnow2() ? ", 3dnowext" : ""), 4.16 + (supports_lzcnt() ? ", lzcnt": ""), 4.17 (supports_sse4a() ? ", sse4a": ""), 4.18 (supports_ht() ? ", ht": "")); 4.19 _features_str = strdup(buf); 4.20 @@ -364,6 +365,13 @@ 4.21 UseXmmI2D = false; 4.22 } 4.23 } 4.24 + 4.25 + // Use count leading zeros count instruction if available. 4.26 + if (supports_lzcnt()) { 4.27 + if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 4.28 + UseCountLeadingZerosInstruction = true; 4.29 + } 4.30 + } 4.31 } 4.32 4.33 if( is_intel() ) { // Intel cpus specific settings
5.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp Tue May 05 11:02:10 2009 -0700 5.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Wed May 06 00:27:52 2009 -0700 5.3 @@ -120,7 +120,7 @@ 5.4 uint32_t LahfSahf : 1, 5.5 CmpLegacy : 1, 5.6 : 4, 5.7 - abm : 1, 5.8 + lzcnt : 1, 5.9 sse4a : 1, 5.10 misalignsse : 1, 5.11 prefetchw : 1, 5.12 @@ -182,7 +182,8 @@ 5.13 CPU_SSE4A = (1 << 10), 5.14 CPU_SSE4_1 = (1 << 11), 5.15 CPU_SSE4_2 = (1 << 12), 5.16 - CPU_POPCNT = (1 << 13) 5.17 + CPU_POPCNT = (1 << 13), 5.18 + CPU_LZCNT = (1 << 14) 5.19 } cpuFeatureFlags; 5.20 5.21 // cpuid information block. All info derived from executing cpuid with 5.22 @@ -277,8 +278,6 @@ 5.23 if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && 5.24 _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) 5.25 result |= CPU_MMX; 5.26 - if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) 5.27 - result |= CPU_3DNOW; 5.28 if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) 5.29 result |= CPU_SSE; 5.30 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) 5.31 @@ -287,14 +286,23 @@ 5.32 result |= CPU_SSE3; 5.33 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) 5.34 result |= CPU_SSSE3; 5.35 - if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 5.36 - result |= CPU_SSE4A; 5.37 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) 5.38 result |= CPU_SSE4_1; 5.39 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 5.40 result |= CPU_SSE4_2; 5.41 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) 5.42 result |= CPU_POPCNT; 5.43 + 5.44 + // AMD features. 5.45 + if (is_amd()) { 5.46 + if (_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) 5.47 + result |= CPU_3DNOW; 5.48 + if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) 5.49 + result |= CPU_LZCNT; 5.50 + if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 5.51 + result |= CPU_SSE4A; 5.52 + } 5.53 + 5.54 return result; 5.55 } 5.56 5.57 @@ -391,6 +399,7 @@ 5.58 static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } 5.59 static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } 5.60 static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } 5.61 + static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } 5.62 static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } 5.63 5.64 static bool supports_compare_and_exchange() { return true; }
6.1 --- a/src/cpu/x86/vm/x86_32.ad Tue May 05 11:02:10 2009 -0700 6.2 +++ b/src/cpu/x86/vm/x86_32.ad Wed May 06 00:27:52 2009 -0700 6.3 @@ -1281,6 +1281,13 @@ 6.4 } 6.5 6.6 6.7 +const bool Matcher::match_rule_supported(int opcode) { 6.8 + if (!has_match_rule(opcode)) 6.9 + return false; 6.10 + 6.11 + return true; // Per default match rules are supported. 6.12 +} 6.13 + 6.14 int Matcher::regnum_to_fpu_offset(int regnum) { 6.15 return regnum - 32; // The FP registers are in the second chunk 6.16 } 6.17 @@ -6644,6 +6651,153 @@ 6.18 %} 6.19 6.20 6.21 +//---------- Zeros Count Instructions ------------------------------------------ 6.22 + 6.23 +instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ 6.24 + predicate(UseCountLeadingZerosInstruction); 6.25 + match(Set dst (CountLeadingZerosI src)); 6.26 + effect(KILL cr); 6.27 + 6.28 + format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 6.29 + ins_encode %{ 6.30 + __ lzcntl($dst$$Register, $src$$Register); 6.31 + %} 6.32 + ins_pipe(ialu_reg); 6.33 +%} 6.34 + 6.35 +instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{ 6.36 + predicate(!UseCountLeadingZerosInstruction); 6.37 + match(Set dst (CountLeadingZerosI src)); 6.38 + effect(KILL cr); 6.39 + 6.40 + format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 6.41 + "JNZ skip\n\t" 6.42 + "MOV $dst, -1\n" 6.43 + "skip:\n\t" 6.44 + "NEG $dst\n\t" 6.45 + "ADD $dst, 31" %} 6.46 + ins_encode %{ 6.47 + Register Rdst = $dst$$Register; 6.48 + Register Rsrc = $src$$Register; 6.49 + Label skip; 6.50 + __ bsrl(Rdst, Rsrc); 6.51 + __ jccb(Assembler::notZero, skip); 6.52 + __ movl(Rdst, -1); 6.53 + __ bind(skip); 6.54 + __ negl(Rdst); 6.55 + __ addl(Rdst, BitsPerInt - 1); 6.56 + %} 6.57 + ins_pipe(ialu_reg); 6.58 +%} 6.59 + 6.60 +instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ 6.61 + predicate(UseCountLeadingZerosInstruction); 6.62 + match(Set dst (CountLeadingZerosL src)); 6.63 + effect(TEMP dst, KILL cr); 6.64 + 6.65 + format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 6.66 + "JNC done\n\t" 6.67 + "LZCNT $dst, $src.lo\n\t" 6.68 + "ADD $dst, 32\n" 6.69 + "done:" %} 6.70 + ins_encode %{ 6.71 + Register Rdst = $dst$$Register; 6.72 + Register Rsrc = $src$$Register; 6.73 + Label done; 6.74 + __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 6.75 + __ jccb(Assembler::carryClear, done); 6.76 + __ lzcntl(Rdst, Rsrc); 6.77 + __ addl(Rdst, BitsPerInt); 6.78 + __ bind(done); 6.79 + %} 6.80 + ins_pipe(ialu_reg); 6.81 +%} 6.82 + 6.83 +instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{ 6.84 + predicate(!UseCountLeadingZerosInstruction); 6.85 + match(Set dst (CountLeadingZerosL src)); 6.86 + effect(TEMP dst, KILL cr); 6.87 + 6.88 + format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 6.89 + "JZ msw_is_zero\n\t" 6.90 + "ADD $dst, 32\n\t" 6.91 + "JMP not_zero\n" 6.92 + "msw_is_zero:\n\t" 6.93 + "BSR $dst, $src.lo\n\t" 6.94 + "JNZ not_zero\n\t" 6.95 + "MOV $dst, -1\n" 6.96 + "not_zero:\n\t" 6.97 + "NEG $dst\n\t" 6.98 + "ADD $dst, 63\n" %} 6.99 + ins_encode %{ 6.100 + Register Rdst = $dst$$Register; 6.101 + Register Rsrc = $src$$Register; 6.102 + Label msw_is_zero; 6.103 + Label not_zero; 6.104 + __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 6.105 + __ jccb(Assembler::zero, msw_is_zero); 6.106 + __ addl(Rdst, BitsPerInt); 6.107 + __ jmpb(not_zero); 6.108 + __ bind(msw_is_zero); 6.109 + __ bsrl(Rdst, Rsrc); 6.110 + __ jccb(Assembler::notZero, not_zero); 6.111 + __ movl(Rdst, -1); 6.112 + __ bind(not_zero); 6.113 + __ negl(Rdst); 6.114 + __ addl(Rdst, BitsPerLong - 1); 6.115 + %} 6.116 + ins_pipe(ialu_reg); 6.117 +%} 6.118 + 6.119 +instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{ 6.120 + match(Set dst (CountTrailingZerosI src)); 6.121 + effect(KILL cr); 6.122 + 6.123 + format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 6.124 + "JNZ done\n\t" 6.125 + "MOV $dst, 32\n" 6.126 + "done:" %} 6.127 + ins_encode %{ 6.128 + Register Rdst = $dst$$Register; 6.129 + Label done; 6.130 + __ bsfl(Rdst, $src$$Register); 6.131 + __ jccb(Assembler::notZero, done); 6.132 + __ movl(Rdst, BitsPerInt); 6.133 + __ bind(done); 6.134 + %} 6.135 + ins_pipe(ialu_reg); 6.136 +%} 6.137 + 6.138 +instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{ 6.139 + match(Set dst (CountTrailingZerosL src)); 6.140 + effect(TEMP dst, KILL cr); 6.141 + 6.142 + format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 6.143 + "JNZ done\n\t" 6.144 + "BSF $dst, $src.hi\n\t" 6.145 + "JNZ msw_not_zero\n\t" 6.146 + "MOV $dst, 32\n" 6.147 + "msw_not_zero:\n\t" 6.148 + "ADD $dst, 32\n" 6.149 + "done:" %} 6.150 + ins_encode %{ 6.151 + Register Rdst = $dst$$Register; 6.152 + Register Rsrc = $src$$Register; 6.153 + Label msw_not_zero; 6.154 + Label done; 6.155 + __ bsfl(Rdst, Rsrc); 6.156 + __ jccb(Assembler::notZero, done); 6.157 + __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 6.158 + __ jccb(Assembler::notZero, msw_not_zero); 6.159 + __ movl(Rdst, BitsPerInt); 6.160 + __ bind(msw_not_zero); 6.161 + __ addl(Rdst, BitsPerInt); 6.162 + __ bind(done); 6.163 + %} 6.164 + ins_pipe(ialu_reg); 6.165 +%} 6.166 + 6.167 + 6.168 //---------- Population Count Instructions ------------------------------------- 6.169 6.170 instruct popCountI(eRegI dst, eRegI src) %{
7.1 --- a/src/cpu/x86/vm/x86_64.ad Tue May 05 11:02:10 2009 -0700 7.2 +++ b/src/cpu/x86/vm/x86_64.ad Wed May 06 00:27:52 2009 -0700 7.3 @@ -1980,6 +1980,13 @@ 7.4 } 7.5 7.6 7.7 +const bool Matcher::match_rule_supported(int opcode) { 7.8 + if (!has_match_rule(opcode)) 7.9 + return false; 7.10 + 7.11 + return true; // Per default match rules are supported. 7.12 +} 7.13 + 7.14 int Matcher::regnum_to_fpu_offset(int regnum) 7.15 { 7.16 return regnum - 32; // The FP registers are in the second chunk 7.17 @@ -7656,6 +7663,121 @@ 7.18 %} 7.19 7.20 7.21 +//---------- Zeros Count Instructions ------------------------------------------ 7.22 + 7.23 +instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{ 7.24 + predicate(UseCountLeadingZerosInstruction); 7.25 + match(Set dst (CountLeadingZerosI src)); 7.26 + effect(KILL cr); 7.27 + 7.28 + format %{ "lzcntl $dst, $src\t# count leading zeros (int)" %} 7.29 + ins_encode %{ 7.30 + __ lzcntl($dst$$Register, $src$$Register); 7.31 + %} 7.32 + ins_pipe(ialu_reg); 7.33 +%} 7.34 + 7.35 +instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{ 7.36 + predicate(!UseCountLeadingZerosInstruction); 7.37 + match(Set dst (CountLeadingZerosI src)); 7.38 + effect(KILL cr); 7.39 + 7.40 + format %{ "bsrl $dst, $src\t# count leading zeros (int)\n\t" 7.41 + "jnz skip\n\t" 7.42 + "movl $dst, -1\n" 7.43 + "skip:\n\t" 7.44 + "negl $dst\n\t" 7.45 + "addl $dst, 31" %} 7.46 + ins_encode %{ 7.47 + Register Rdst = $dst$$Register; 7.48 + Register Rsrc = $src$$Register; 7.49 + Label skip; 7.50 + __ bsrl(Rdst, Rsrc); 7.51 + __ jccb(Assembler::notZero, skip); 7.52 + __ movl(Rdst, -1); 7.53 + __ bind(skip); 7.54 + __ negl(Rdst); 7.55 + __ addl(Rdst, BitsPerInt - 1); 7.56 + %} 7.57 + ins_pipe(ialu_reg); 7.58 +%} 7.59 + 7.60 +instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{ 7.61 + predicate(UseCountLeadingZerosInstruction); 7.62 + match(Set dst (CountLeadingZerosL src)); 7.63 + effect(KILL cr); 7.64 + 7.65 + format %{ "lzcntq $dst, $src\t# count leading zeros (long)" %} 7.66 + ins_encode %{ 7.67 + __ lzcntq($dst$$Register, $src$$Register); 7.68 + %} 7.69 + ins_pipe(ialu_reg); 7.70 +%} 7.71 + 7.72 +instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{ 7.73 + predicate(!UseCountLeadingZerosInstruction); 7.74 + match(Set dst (CountLeadingZerosL src)); 7.75 + effect(KILL cr); 7.76 + 7.77 + format %{ "bsrq $dst, $src\t# count leading zeros (long)\n\t" 7.78 + "jnz skip\n\t" 7.79 + "movl $dst, -1\n" 7.80 + "skip:\n\t" 7.81 + "negl $dst\n\t" 7.82 + "addl $dst, 63" %} 7.83 + ins_encode %{ 7.84 + Register Rdst = $dst$$Register; 7.85 + Register Rsrc = $src$$Register; 7.86 + Label skip; 7.87 + __ bsrq(Rdst, Rsrc); 7.88 + __ jccb(Assembler::notZero, skip); 7.89 + __ movl(Rdst, -1); 7.90 + __ bind(skip); 7.91 + __ negl(Rdst); 7.92 + __ addl(Rdst, BitsPerLong - 1); 7.93 + %} 7.94 + ins_pipe(ialu_reg); 7.95 +%} 7.96 + 7.97 +instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{ 7.98 + match(Set dst (CountTrailingZerosI src)); 7.99 + effect(KILL cr); 7.100 + 7.101 + format %{ "bsfl $dst, $src\t# count trailing zeros (int)\n\t" 7.102 + "jnz done\n\t" 7.103 + "movl $dst, 32\n" 7.104 + "done:" %} 7.105 + ins_encode %{ 7.106 + Register Rdst = $dst$$Register; 7.107 + Label done; 7.108 + __ bsfl(Rdst, $src$$Register); 7.109 + __ jccb(Assembler::notZero, done); 7.110 + __ movl(Rdst, BitsPerInt); 7.111 + __ bind(done); 7.112 + %} 7.113 + ins_pipe(ialu_reg); 7.114 +%} 7.115 + 7.116 +instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{ 7.117 + match(Set dst (CountTrailingZerosL src)); 7.118 + effect(KILL cr); 7.119 + 7.120 + format %{ "bsfq $dst, $src\t# count trailing zeros (long)\n\t" 7.121 + "jnz done\n\t" 7.122 + "movl $dst, 64\n" 7.123 + "done:" %} 7.124 + ins_encode %{ 7.125 + Register Rdst = $dst$$Register; 7.126 + Label done; 7.127 + __ bsfq(Rdst, $src$$Register); 7.128 + __ jccb(Assembler::notZero, done); 7.129 + __ movl(Rdst, BitsPerLong); 7.130 + __ bind(done); 7.131 + %} 7.132 + ins_pipe(ialu_reg); 7.133 +%} 7.134 + 7.135 + 7.136 //---------- Population Count Instructions ------------------------------------- 7.137 7.138 instruct popCountI(rRegI dst, rRegI src) %{
8.1 --- a/src/share/vm/classfile/vmSymbols.hpp Tue May 05 11:02:10 2009 -0700 8.2 +++ b/src/share/vm/classfile/vmSymbols.hpp Wed May 06 00:27:52 2009 -0700 8.3 @@ -313,6 +313,8 @@ 8.4 template(value_name, "value") \ 8.5 template(frontCacheEnabled_name, "frontCacheEnabled") \ 8.6 template(stringCacheEnabled_name, "stringCacheEnabled") \ 8.7 + template(numberOfLeadingZeros_name, "numberOfLeadingZeros") \ 8.8 + template(numberOfTrailingZeros_name, "numberOfTrailingZeros") \ 8.9 template(bitCount_name, "bitCount") \ 8.10 template(profile_name, "profile") \ 8.11 template(equals_name, "equals") \ 8.12 @@ -559,6 +561,12 @@ 8.13 do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_S) \ 8.14 do_name( longBitsToDouble_name, "longBitsToDouble") \ 8.15 \ 8.16 + do_intrinsic(_numberOfLeadingZeros_i, java_lang_Integer, numberOfLeadingZeros_name,int_int_signature, F_S) \ 8.17 + do_intrinsic(_numberOfLeadingZeros_l, java_lang_Long, numberOfLeadingZeros_name,long_int_signature, F_S) \ 8.18 + \ 8.19 + do_intrinsic(_numberOfTrailingZeros_i, java_lang_Integer, numberOfTrailingZeros_name,int_int_signature, F_S) \ 8.20 + do_intrinsic(_numberOfTrailingZeros_l, java_lang_Long, numberOfTrailingZeros_name,long_int_signature, F_S) \ 8.21 + \ 8.22 do_intrinsic(_bitCount_i, java_lang_Integer, bitCount_name, int_int_signature, F_S) \ 8.23 do_intrinsic(_bitCount_l, java_lang_Long, bitCount_name, long_int_signature, F_S) \ 8.24 \
9.1 --- a/src/share/vm/opto/classes.hpp Tue May 05 11:02:10 2009 -0700 9.2 +++ b/src/share/vm/opto/classes.hpp Wed May 06 00:27:52 2009 -0700 9.3 @@ -104,6 +104,10 @@ 9.4 macro(CosD) 9.5 macro(CountedLoop) 9.6 macro(CountedLoopEnd) 9.7 +macro(CountLeadingZerosI) 9.8 +macro(CountLeadingZerosL) 9.9 +macro(CountTrailingZerosI) 9.10 +macro(CountTrailingZerosL) 9.11 macro(CreateEx) 9.12 macro(DecodeN) 9.13 macro(DivD)
10.1 --- a/src/share/vm/opto/connode.cpp Tue May 05 11:02:10 2009 -0700 10.2 +++ b/src/share/vm/opto/connode.cpp Wed May 06 00:27:52 2009 -0700 10.3 @@ -1255,3 +1255,93 @@ 10.4 v.set_jdouble(td->getd()); 10.5 return TypeLong::make( v.get_jlong() ); 10.6 } 10.7 + 10.8 +//------------------------------Value------------------------------------------ 10.9 +const Type* CountLeadingZerosINode::Value(PhaseTransform* phase) const { 10.10 + const Type* t = phase->type(in(1)); 10.11 + if (t == Type::TOP) return Type::TOP; 10.12 + const TypeInt* ti = t->isa_int(); 10.13 + if (ti && ti->is_con()) { 10.14 + jint i = ti->get_con(); 10.15 + // HD, Figure 5-6 10.16 + if (i == 0) 10.17 + return TypeInt::make(BitsPerInt); 10.18 + int n = 1; 10.19 + unsigned int x = i; 10.20 + if (x >> 16 == 0) { n += 16; x <<= 16; } 10.21 + if (x >> 24 == 0) { n += 8; x <<= 8; } 10.22 + if (x >> 28 == 0) { n += 4; x <<= 4; } 10.23 + if (x >> 30 == 0) { n += 2; x <<= 2; } 10.24 + n -= x >> 31; 10.25 + return TypeInt::make(n); 10.26 + } 10.27 + return TypeInt::INT; 10.28 +} 10.29 + 10.30 +//------------------------------Value------------------------------------------ 10.31 +const Type* CountLeadingZerosLNode::Value(PhaseTransform* phase) const { 10.32 + const Type* t = phase->type(in(1)); 10.33 + if (t == Type::TOP) return Type::TOP; 10.34 + const TypeLong* tl = t->isa_long(); 10.35 + if (tl && tl->is_con()) { 10.36 + jlong l = tl->get_con(); 10.37 + // HD, Figure 5-6 10.38 + if (l == 0) 10.39 + return TypeInt::make(BitsPerLong); 10.40 + int n = 1; 10.41 + unsigned int x = (((julong) l) >> 32); 10.42 + if (x == 0) { n += 32; x = (int) l; } 10.43 + if (x >> 16 == 0) { n += 16; x <<= 16; } 10.44 + if (x >> 24 == 0) { n += 8; x <<= 8; } 10.45 + if (x >> 28 == 0) { n += 4; x <<= 4; } 10.46 + if (x >> 30 == 0) { n += 2; x <<= 2; } 10.47 + n -= x >> 31; 10.48 + return TypeInt::make(n); 10.49 + } 10.50 + return TypeInt::INT; 10.51 +} 10.52 + 10.53 +//------------------------------Value------------------------------------------ 10.54 +const Type* CountTrailingZerosINode::Value(PhaseTransform* phase) const { 10.55 + const Type* t = phase->type(in(1)); 10.56 + if (t == Type::TOP) return Type::TOP; 10.57 + const TypeInt* ti = t->isa_int(); 10.58 + if (ti && ti->is_con()) { 10.59 + jint i = ti->get_con(); 10.60 + // HD, Figure 5-14 10.61 + int y; 10.62 + if (i == 0) 10.63 + return TypeInt::make(BitsPerInt); 10.64 + int n = 31; 10.65 + y = i << 16; if (y != 0) { n = n - 16; i = y; } 10.66 + y = i << 8; if (y != 0) { n = n - 8; i = y; } 10.67 + y = i << 4; if (y != 0) { n = n - 4; i = y; } 10.68 + y = i << 2; if (y != 0) { n = n - 2; i = y; } 10.69 + y = i << 1; if (y != 0) { n = n - 1; } 10.70 + return TypeInt::make(n); 10.71 + } 10.72 + return TypeInt::INT; 10.73 +} 10.74 + 10.75 +//------------------------------Value------------------------------------------ 10.76 +const Type* CountTrailingZerosLNode::Value(PhaseTransform* phase) const { 10.77 + const Type* t = phase->type(in(1)); 10.78 + if (t == Type::TOP) return Type::TOP; 10.79 + const TypeLong* tl = t->isa_long(); 10.80 + if (tl && tl->is_con()) { 10.81 + jlong l = tl->get_con(); 10.82 + // HD, Figure 5-14 10.83 + int x, y; 10.84 + if (l == 0) 10.85 + return TypeInt::make(BitsPerLong); 10.86 + int n = 63; 10.87 + y = (int) l; if (y != 0) { n = n - 32; x = y; } else x = (((julong) l) >> 32); 10.88 + y = x << 16; if (y != 0) { n = n - 16; x = y; } 10.89 + y = x << 8; if (y != 0) { n = n - 8; x = y; } 10.90 + y = x << 4; if (y != 0) { n = n - 4; x = y; } 10.91 + y = x << 2; if (y != 0) { n = n - 2; x = y; } 10.92 + y = x << 1; if (y != 0) { n = n - 1; } 10.93 + return TypeInt::make(n); 10.94 + } 10.95 + return TypeInt::INT; 10.96 +}
11.1 --- a/src/share/vm/opto/connode.hpp Tue May 05 11:02:10 2009 -0700 11.2 +++ b/src/share/vm/opto/connode.hpp Wed May 06 00:27:52 2009 -0700 11.3 @@ -636,22 +636,62 @@ 11.4 virtual const Type* Value( PhaseTransform *phase ) const; 11.5 }; 11.6 11.7 -//---------- PopCountINode ----------------------------------------------------- 11.8 -// Population count (bit count) of an integer. 11.9 -class PopCountINode : public Node { 11.10 +//---------- CountBitsNode ----------------------------------------------------- 11.11 +class CountBitsNode : public Node { 11.12 public: 11.13 - PopCountINode(Node* in1) : Node(0, in1) {} 11.14 - virtual int Opcode() const; 11.15 + CountBitsNode(Node* in1) : Node(0, in1) {} 11.16 const Type* bottom_type() const { return TypeInt::INT; } 11.17 virtual uint ideal_reg() const { return Op_RegI; } 11.18 }; 11.19 11.20 +//---------- CountLeadingZerosINode -------------------------------------------- 11.21 +// Count leading zeros (0-bit count starting from MSB) of an integer. 11.22 +class CountLeadingZerosINode : public CountBitsNode { 11.23 +public: 11.24 + CountLeadingZerosINode(Node* in1) : CountBitsNode(in1) {} 11.25 + virtual int Opcode() const; 11.26 + virtual const Type* Value(PhaseTransform* phase) const; 11.27 +}; 11.28 + 11.29 +//---------- CountLeadingZerosLNode -------------------------------------------- 11.30 +// Count leading zeros (0-bit count starting from MSB) of a long. 11.31 +class CountLeadingZerosLNode : public CountBitsNode { 11.32 +public: 11.33 + CountLeadingZerosLNode(Node* in1) : CountBitsNode(in1) {} 11.34 + virtual int Opcode() const; 11.35 + virtual const Type* Value(PhaseTransform* phase) const; 11.36 +}; 11.37 + 11.38 +//---------- CountTrailingZerosINode ------------------------------------------- 11.39 +// Count trailing zeros (0-bit count starting from LSB) of an integer. 11.40 +class CountTrailingZerosINode : public CountBitsNode { 11.41 +public: 11.42 + CountTrailingZerosINode(Node* in1) : CountBitsNode(in1) {} 11.43 + virtual int Opcode() const; 11.44 + virtual const Type* Value(PhaseTransform* phase) const; 11.45 +}; 11.46 + 11.47 +//---------- CountTrailingZerosLNode ------------------------------------------- 11.48 +// Count trailing zeros (0-bit count starting from LSB) of a long. 11.49 +class CountTrailingZerosLNode : public CountBitsNode { 11.50 +public: 11.51 + CountTrailingZerosLNode(Node* in1) : CountBitsNode(in1) {} 11.52 + virtual int Opcode() const; 11.53 + virtual const Type* Value(PhaseTransform* phase) const; 11.54 +}; 11.55 + 11.56 +//---------- PopCountINode ----------------------------------------------------- 11.57 +// Population count (bit count) of an integer. 11.58 +class PopCountINode : public CountBitsNode { 11.59 +public: 11.60 + PopCountINode(Node* in1) : CountBitsNode(in1) {} 11.61 + virtual int Opcode() const; 11.62 +}; 11.63 + 11.64 //---------- PopCountLNode ----------------------------------------------------- 11.65 // Population count (bit count) of a long. 11.66 -class PopCountLNode : public Node { 11.67 +class PopCountLNode : public CountBitsNode { 11.68 public: 11.69 - PopCountLNode(Node* in1) : Node(0, in1) {} 11.70 + PopCountLNode(Node* in1) : CountBitsNode(in1) {} 11.71 virtual int Opcode() const; 11.72 - const Type* bottom_type() const { return TypeInt::INT; } 11.73 - virtual uint ideal_reg() const { return Op_RegI; } 11.74 };
12.1 --- a/src/share/vm/opto/library_call.cpp Tue May 05 11:02:10 2009 -0700 12.2 +++ b/src/share/vm/opto/library_call.cpp Wed May 06 00:27:52 2009 -0700 12.3 @@ -222,6 +222,8 @@ 12.4 bool inline_unsafe_CAS(BasicType type); 12.5 bool inline_unsafe_ordered_store(BasicType type); 12.6 bool inline_fp_conversions(vmIntrinsics::ID id); 12.7 + bool inline_numberOfLeadingZeros(vmIntrinsics::ID id); 12.8 + bool inline_numberOfTrailingZeros(vmIntrinsics::ID id); 12.9 bool inline_bitCount(vmIntrinsics::ID id); 12.10 bool inline_reverseBytes(vmIntrinsics::ID id); 12.11 }; 12.12 @@ -630,6 +632,14 @@ 12.13 case vmIntrinsics::_longBitsToDouble: 12.14 return inline_fp_conversions(intrinsic_id()); 12.15 12.16 + case vmIntrinsics::_numberOfLeadingZeros_i: 12.17 + case vmIntrinsics::_numberOfLeadingZeros_l: 12.18 + return inline_numberOfLeadingZeros(intrinsic_id()); 12.19 + 12.20 + case vmIntrinsics::_numberOfTrailingZeros_i: 12.21 + case vmIntrinsics::_numberOfTrailingZeros_l: 12.22 + return inline_numberOfTrailingZeros(intrinsic_id()); 12.23 + 12.24 case vmIntrinsics::_bitCount_i: 12.25 case vmIntrinsics::_bitCount_l: 12.26 return inline_bitCount(intrinsic_id()); 12.27 @@ -1844,6 +1854,48 @@ 12.28 } 12.29 } 12.30 12.31 +//-------------------inline_numberOfLeadingZeros_int/long----------------------- 12.32 +// inline int Integer.numberOfLeadingZeros(int) 12.33 +// inline int Long.numberOfLeadingZeros(long) 12.34 +bool LibraryCallKit::inline_numberOfLeadingZeros(vmIntrinsics::ID id) { 12.35 + assert(id == vmIntrinsics::_numberOfLeadingZeros_i || id == vmIntrinsics::_numberOfLeadingZeros_l, "not numberOfLeadingZeros"); 12.36 + if (id == vmIntrinsics::_numberOfLeadingZeros_i && !Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false; 12.37 + if (id == vmIntrinsics::_numberOfLeadingZeros_l && !Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false; 12.38 + _sp += arg_size(); // restore stack pointer 12.39 + switch (id) { 12.40 + case vmIntrinsics::_numberOfLeadingZeros_i: 12.41 + push(_gvn.transform(new (C, 2) CountLeadingZerosINode(pop()))); 12.42 + break; 12.43 + case vmIntrinsics::_numberOfLeadingZeros_l: 12.44 + push(_gvn.transform(new (C, 2) CountLeadingZerosLNode(pop_pair()))); 12.45 + break; 12.46 + default: 12.47 + ShouldNotReachHere(); 12.48 + } 12.49 + return true; 12.50 +} 12.51 + 12.52 +//-------------------inline_numberOfTrailingZeros_int/long---------------------- 12.53 +// inline int Integer.numberOfTrailingZeros(int) 12.54 +// inline int Long.numberOfTrailingZeros(long) 12.55 +bool LibraryCallKit::inline_numberOfTrailingZeros(vmIntrinsics::ID id) { 12.56 + assert(id == vmIntrinsics::_numberOfTrailingZeros_i || id == vmIntrinsics::_numberOfTrailingZeros_l, "not numberOfTrailingZeros"); 12.57 + if (id == vmIntrinsics::_numberOfTrailingZeros_i && !Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false; 12.58 + if (id == vmIntrinsics::_numberOfTrailingZeros_l && !Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false; 12.59 + _sp += arg_size(); // restore stack pointer 12.60 + switch (id) { 12.61 + case vmIntrinsics::_numberOfTrailingZeros_i: 12.62 + push(_gvn.transform(new (C, 2) CountTrailingZerosINode(pop()))); 12.63 + break; 12.64 + case vmIntrinsics::_numberOfTrailingZeros_l: 12.65 + push(_gvn.transform(new (C, 2) CountTrailingZerosLNode(pop_pair()))); 12.66 + break; 12.67 + default: 12.68 + ShouldNotReachHere(); 12.69 + } 12.70 + return true; 12.71 +} 12.72 + 12.73 //----------------------------inline_bitCount_int/long----------------------- 12.74 // inline int Integer.bitCount(int) 12.75 // inline int Long.bitCount(long)
13.1 --- a/src/share/vm/opto/matcher.hpp Tue May 05 11:02:10 2009 -0700 13.2 +++ b/src/share/vm/opto/matcher.hpp Wed May 06 00:27:52 2009 -0700 13.3 @@ -225,10 +225,16 @@ 13.4 OptoRegPair *_parm_regs; // Array of machine registers per argument 13.5 RegMask *_calling_convention_mask; // Array of RegMasks per argument 13.6 13.7 - // Does matcher support this ideal node? 13.8 + // Does matcher have a match rule for this ideal node? 13.9 static const bool has_match_rule(int opcode); 13.10 static const bool _hasMatchRule[_last_opcode]; 13.11 13.12 + // Does matcher have a match rule for this ideal node and is the 13.13 + // predicate (if there is one) true? 13.14 + // NOTE: If this function is used more commonly in the future, ADLC 13.15 + // should generate this one. 13.16 + static const bool match_rule_supported(int opcode); 13.17 + 13.18 // Used to determine if we have fast l2f conversion 13.19 // USII has it, USIII doesn't 13.20 static const bool convL2FSupported(void);
14.1 --- a/src/share/vm/runtime/globals.hpp Tue May 05 11:02:10 2009 -0700 14.2 +++ b/src/share/vm/runtime/globals.hpp Wed May 06 00:27:52 2009 -0700 14.3 @@ -2185,6 +2185,9 @@ 14.4 diagnostic(bool, PrintIntrinsics, false, \ 14.5 "prints attempted and successful inlining of intrinsics") \ 14.6 \ 14.7 + product(bool, UseCountLeadingZerosInstruction, false, \ 14.8 + "Use count leading zeros instruction") \ 14.9 + \ 14.10 product(bool, UsePopCountInstruction, false, \ 14.11 "Use population count instruction") \ 14.12 \
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 15.2 +++ b/test/compiler/6823354/Test6823354.java Wed May 06 00:27:52 2009 -0700 15.3 @@ -0,0 +1,266 @@ 15.4 +/* 15.5 + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. 15.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 15.7 + * 15.8 + * This code is free software; you can redistribute it and/or modify it 15.9 + * under the terms of the GNU General Public License version 2 only, as 15.10 + * published by the Free Software Foundation. 15.11 + * 15.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 15.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 15.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15.15 + * version 2 for more details (a copy is included in the LICENSE file that 15.16 + * accompanied this code). 15.17 + * 15.18 + * You should have received a copy of the GNU General Public License version 15.19 + * 2 along with this work; if not, write to the Free Software Foundation, 15.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 15.21 + * 15.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 15.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 15.24 + * have any questions. 15.25 + */ 15.26 + 15.27 +/** 15.28 + * @test 15.29 + * @bug 6823354 15.30 + * @summary These methods can be instrinsified by using bit scan, bit test, and population count instructions. 15.31 + * 15.32 + * @run main/othervm -Xcomp -XX:CompileOnly=Test6823354.lzcomp,Test6823354.tzcomp,.dolzcomp,.dotzcomp Test6823354 15.33 + */ 15.34 + 15.35 +import java.net.URLClassLoader; 15.36 + 15.37 +public class Test6823354 { 15.38 + // Arrays of corner case values. 15.39 + static final int[] ia = new int[] { 0, 1, -1, Integer.MIN_VALUE, Integer.MAX_VALUE }; 15.40 + static final long[] la = new long[] { 0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE }; 15.41 + 15.42 + public static void main(String[] args) throws Exception { 15.43 + // Load the classes and the methods. 15.44 + Integer.numberOfLeadingZeros(0); 15.45 + Integer.numberOfTrailingZeros(0); 15.46 + Long.numberOfLeadingZeros(0); 15.47 + Long.numberOfTrailingZeros(0); 15.48 + 15.49 + lz(); 15.50 + tz(); 15.51 + } 15.52 + 15.53 + static void lz() throws Exception { 15.54 + // int 15.55 + 15.56 + // Test corner cases. 15.57 + for (int i = 0; i < ia.length; i++) { 15.58 + int x = ia[i]; 15.59 + check(x, lzcomp(x), lzint(x)); 15.60 + } 15.61 + 15.62 + // Test all possible return values. 15.63 + for (int i = 0; i < Integer.SIZE; i++) { 15.64 + int x = 1 << i; 15.65 + check(x, lzcomp(x), lzint(x)); 15.66 + } 15.67 + 15.68 + String classname = "Test6823354$lzconI"; 15.69 + 15.70 + // Test Ideal optimizations (constant values). 15.71 + for (int i = 0; i < ia.length; i++) { 15.72 + testclass(classname, ia[i]); 15.73 + } 15.74 + 15.75 + // Test Ideal optimizations (constant values). 15.76 + for (int i = 0; i < Integer.SIZE; i++) { 15.77 + int x = 1 << i; 15.78 + testclass(classname, x); 15.79 + } 15.80 + 15.81 + 15.82 + // long 15.83 + 15.84 + // Test corner cases. 15.85 + for (int i = 0; i < ia.length; i++) { 15.86 + long x = la[i]; 15.87 + check(x, lzcomp(x), lzint(x)); 15.88 + } 15.89 + 15.90 + // Test all possible return values. 15.91 + for (int i = 0; i < Long.SIZE; i++) { 15.92 + long x = 1L << i; 15.93 + check(x, lzcomp(x), lzint(x)); 15.94 + } 15.95 + 15.96 + classname = "Test6823354$lzconL"; 15.97 + 15.98 + // Test Ideal optimizations (constant values). 15.99 + for (int i = 0; i < la.length; i++) { 15.100 + testclass(classname, la[i]); 15.101 + } 15.102 + 15.103 + // Test Ideal optimizations (constant values). 15.104 + for (int i = 0; i < Long.SIZE; i++) { 15.105 + long x = 1L << i; 15.106 + testclass(classname, x); 15.107 + } 15.108 + } 15.109 + 15.110 + static void tz() throws Exception { 15.111 + // int 15.112 + 15.113 + // Test corner cases. 15.114 + for (int i = 0; i < ia.length; i++) { 15.115 + int x = ia[i]; 15.116 + check(x, tzcomp(x), tzint(x)); 15.117 + } 15.118 + 15.119 + // Test all possible return values. 15.120 + for (int i = 0; i < Integer.SIZE; i++) { 15.121 + int x = 1 << i; 15.122 + check(x, tzcomp(x), tzint(x)); 15.123 + } 15.124 + 15.125 + String classname = "Test6823354$tzconI"; 15.126 + 15.127 + // Test Ideal optimizations (constant values). 15.128 + for (int i = 0; i < ia.length; i++) { 15.129 + testclass(classname, ia[i]); 15.130 + } 15.131 + 15.132 + // Test Ideal optimizations (constant values). 15.133 + for (int i = 0; i < Integer.SIZE; i++) { 15.134 + int x = 1 << i; 15.135 + testclass(classname, x); 15.136 + } 15.137 + 15.138 + 15.139 + // long 15.140 + 15.141 + // Test corner cases. 15.142 + for (int i = 0; i < la.length; i++) { 15.143 + long x = la[i]; 15.144 + check(x, tzcomp(x), tzint(x)); 15.145 + } 15.146 + 15.147 + // Test all possible return values. 15.148 + for (int i = 0; i < Long.SIZE; i++) { 15.149 + long x = 1L << i; 15.150 + check(x, tzcomp(x), tzint(x)); 15.151 + } 15.152 + 15.153 + classname = "Test6823354$tzconL"; 15.154 + 15.155 + // Test Ideal optimizations (constant values). 15.156 + for (int i = 0; i < la.length; i++) { 15.157 + testclass(classname, la[i]); 15.158 + } 15.159 + 15.160 + // Test Ideal optimizations (constant values). 15.161 + for (int i = 0; i < Long.SIZE; i++) { 15.162 + long x = 1L << i; 15.163 + testclass(classname, x); 15.164 + } 15.165 + } 15.166 + 15.167 + static void check(int value, int result, int expected) { 15.168 + //System.out.println(value + ": " + result + ", " + expected); 15.169 + if (result != expected) 15.170 + throw new InternalError(value + " failed: " + result + " != " + expected); 15.171 + } 15.172 + 15.173 + static void check(long value, long result, long expected) { 15.174 + //System.out.println(value + ": " + result + ", " + expected); 15.175 + if (result != expected) 15.176 + throw new InternalError(value + " failed: " + result + " != " + expected); 15.177 + } 15.178 + 15.179 + static int lzint( int i) { return Integer.numberOfLeadingZeros(i); } 15.180 + static int lzcomp(int i) { return Integer.numberOfLeadingZeros(i); } 15.181 + 15.182 + static int lzint( long l) { return Long.numberOfLeadingZeros(l); } 15.183 + static int lzcomp(long l) { return Long.numberOfLeadingZeros(l); } 15.184 + 15.185 + static int tzint( int i) { return Integer.numberOfTrailingZeros(i); } 15.186 + static int tzcomp(int i) { return Integer.numberOfTrailingZeros(i); } 15.187 + 15.188 + static int tzint( long l) { return Long.numberOfTrailingZeros(l); } 15.189 + static int tzcomp(long l) { return Long.numberOfTrailingZeros(l); } 15.190 + 15.191 + static void testclass(String classname, int x) throws Exception { 15.192 + System.setProperty("value", "" + x); 15.193 + loadandrunclass(classname); 15.194 + } 15.195 + 15.196 + static void testclass(String classname, long x) throws Exception { 15.197 + System.setProperty("value", "" + x); 15.198 + loadandrunclass(classname); 15.199 + } 15.200 + 15.201 + static void loadandrunclass(String classname) throws Exception { 15.202 + Class cl = Class.forName(classname); 15.203 + URLClassLoader apploader = (URLClassLoader) cl.getClassLoader(); 15.204 + ClassLoader loader = new URLClassLoader(apploader.getURLs(), apploader.getParent()); 15.205 + Class c = loader.loadClass(classname); 15.206 + Runnable r = (Runnable) c.newInstance(); 15.207 + r.run(); 15.208 + } 15.209 + 15.210 + public static class lzconI implements Runnable { 15.211 + static final int VALUE; 15.212 + 15.213 + static { 15.214 + int value = 0; 15.215 + try { 15.216 + value = Integer.decode(System.getProperty("value")); 15.217 + } catch (Throwable e) {} 15.218 + VALUE = value; 15.219 + } 15.220 + 15.221 + public void run() { check(VALUE, lzint(VALUE), dolzcomp()); } 15.222 + static int dolzcomp() { return lzcomp(VALUE); } 15.223 + } 15.224 + 15.225 + public static class lzconL implements Runnable { 15.226 + static final long VALUE; 15.227 + 15.228 + static { 15.229 + long value = 0; 15.230 + try { 15.231 + value = Long.decode(System.getProperty("value")); 15.232 + } catch (Throwable e) {} 15.233 + VALUE = value; 15.234 + } 15.235 + 15.236 + public void run() { check(VALUE, lzint(VALUE), dolzcomp()); } 15.237 + static int dolzcomp() { return lzcomp(VALUE); } 15.238 + } 15.239 + 15.240 + public static class tzconI implements Runnable { 15.241 + static final int VALUE; 15.242 + 15.243 + static { 15.244 + int value = 0; 15.245 + try { 15.246 + value = Integer.decode(System.getProperty("value")); 15.247 + } catch (Throwable e) {} 15.248 + VALUE = value; 15.249 + } 15.250 + 15.251 + public void run() { check(VALUE, tzint(VALUE), dotzcomp()); } 15.252 + static int dotzcomp() { return tzcomp(VALUE); } 15.253 + } 15.254 + 15.255 + public static class tzconL implements Runnable { 15.256 + static final long VALUE; 15.257 + 15.258 + static { 15.259 + long value = 0; 15.260 + try { 15.261 + value = Long.decode(System.getProperty("value")); 15.262 + } catch (Throwable e) {} 15.263 + VALUE = value; 15.264 + } 15.265 + 15.266 + public void run() { check(VALUE, tzint(VALUE), dotzcomp()); } 15.267 + static int dotzcomp() { return tzcomp(VALUE); } 15.268 + } 15.269 +}