Wed, 12 Mar 2014 11:24:26 -0700
8031321: Support Intel bit manipulation instructions
Summary: Add support for BMI1 instructions
Reviewed-by: kvn, roland
1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Mon Jan 27 13:14:53 2014 +0100 1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Wed Mar 12 11:24:26 2014 -0700 1.3 @@ -1089,6 +1089,21 @@ 1.4 emit_arith(0x23, 0xC0, dst, src); 1.5 } 1.6 1.7 +void Assembler::andnl(Register dst, Register src1, Register src2) { 1.8 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.9 + int encode = vex_prefix_0F38_and_encode(dst, src1, src2); 1.10 + emit_int8((unsigned char)0xF2); 1.11 + emit_int8((unsigned char)(0xC0 | encode)); 1.12 +} 1.13 + 1.14 +void Assembler::andnl(Register dst, Register src1, Address src2) { 1.15 + InstructionMark im(this); 1.16 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.17 + vex_prefix_0F38(dst, src1, src2); 1.18 + emit_int8((unsigned char)0xF2); 1.19 + emit_operand(dst, src2); 1.20 +} 1.21 + 1.22 void Assembler::bsfl(Register dst, Register src) { 1.23 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1.24 emit_int8(0x0F); 1.25 @@ -1110,6 +1125,51 @@ 1.26 emit_int8((unsigned char)(0xC8 | encode)); 1.27 } 1.28 1.29 +void Assembler::blsil(Register dst, Register src) { 1.30 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.31 + int encode = vex_prefix_0F38_and_encode(rbx, dst, src); 1.32 + emit_int8((unsigned char)0xF3); 1.33 + emit_int8((unsigned char)(0xC0 | encode)); 1.34 +} 1.35 + 1.36 +void Assembler::blsil(Register dst, Address src) { 1.37 + InstructionMark im(this); 1.38 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.39 + vex_prefix_0F38(rbx, dst, src); 1.40 + emit_int8((unsigned char)0xF3); 1.41 + emit_operand(rbx, src); 1.42 +} 1.43 + 1.44 +void Assembler::blsmskl(Register dst, Register src) { 1.45 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.46 + int encode = vex_prefix_0F38_and_encode(rdx, dst, src); 1.47 + emit_int8((unsigned char)0xF3); 1.48 + emit_int8((unsigned char)(0xC0 | encode)); 1.49 +} 1.50 + 1.51 +void Assembler::blsmskl(Register dst, Address src) { 1.52 + InstructionMark im(this); 1.53 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.54 + vex_prefix_0F38(rdx, dst, src); 1.55 + emit_int8((unsigned char)0xF3); 1.56 + emit_operand(rdx, src); 1.57 +} 1.58 + 1.59 +void Assembler::blsrl(Register dst, Register src) { 1.60 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.61 + int encode = vex_prefix_0F38_and_encode(rcx, dst, src); 1.62 + emit_int8((unsigned char)0xF3); 1.63 + emit_int8((unsigned char)(0xC0 | encode)); 1.64 +} 1.65 + 1.66 +void Assembler::blsrl(Register dst, Address src) { 1.67 + InstructionMark im(this); 1.68 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.69 + vex_prefix_0F38(rcx, dst, src); 1.70 + emit_int8((unsigned char)0xF3); 1.71 + emit_operand(rcx, src); 1.72 +} 1.73 + 1.74 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1.75 // suspect disp32 is always good 1.76 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1.77 @@ -2878,6 +2938,24 @@ 1.78 emit_operand(dst, src); 1.79 } 1.80 1.81 +void Assembler::tzcntl(Register dst, Register src) { 1.82 + assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported"); 1.83 + emit_int8((unsigned char)0xF3); 1.84 + int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1.85 + emit_int8(0x0F); 1.86 + emit_int8((unsigned char)0xBC); 1.87 + emit_int8((unsigned char)0xC0 | encode); 1.88 +} 1.89 + 1.90 +void Assembler::tzcntq(Register dst, Register src) { 1.91 + assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported"); 1.92 + emit_int8((unsigned char)0xF3); 1.93 + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1.94 + emit_int8(0x0F); 1.95 + emit_int8((unsigned char)0xBC); 1.96 + emit_int8((unsigned char)(0xC0 | encode)); 1.97 +} 1.98 + 1.99 void Assembler::ucomisd(XMMRegister dst, Address src) { 1.100 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1.101 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 1.102 @@ -4837,6 +4915,21 @@ 1.103 emit_arith(0x23, 0xC0, dst, src); 1.104 } 1.105 1.106 +void Assembler::andnq(Register dst, Register src1, Register src2) { 1.107 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.108 + int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2); 1.109 + emit_int8((unsigned char)0xF2); 1.110 + emit_int8((unsigned char)(0xC0 | encode)); 1.111 +} 1.112 + 1.113 +void Assembler::andnq(Register dst, Register src1, Address src2) { 1.114 + InstructionMark im(this); 1.115 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.116 + vex_prefix_0F38_q(dst, src1, src2); 1.117 + emit_int8((unsigned char)0xF2); 1.118 + emit_operand(dst, src2); 1.119 +} 1.120 + 1.121 void Assembler::bsfq(Register dst, Register src) { 1.122 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1.123 emit_int8(0x0F); 1.124 @@ -4858,6 +4951,51 @@ 1.125 emit_int8((unsigned char)(0xC8 | encode)); 1.126 } 1.127 1.128 +void Assembler::blsiq(Register dst, Register src) { 1.129 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.130 + int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src); 1.131 + emit_int8((unsigned char)0xF3); 1.132 + emit_int8((unsigned char)(0xC0 | encode)); 1.133 +} 1.134 + 1.135 +void Assembler::blsiq(Register dst, Address src) { 1.136 + InstructionMark im(this); 1.137 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.138 + vex_prefix_0F38_q(rbx, dst, src); 1.139 + emit_int8((unsigned char)0xF3); 1.140 + emit_operand(rbx, src); 1.141 +} 1.142 + 1.143 +void Assembler::blsmskq(Register dst, Register src) { 1.144 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.145 + int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src); 1.146 + emit_int8((unsigned char)0xF3); 1.147 + emit_int8((unsigned char)(0xC0 | encode)); 1.148 +} 1.149 + 1.150 +void Assembler::blsmskq(Register dst, Address src) { 1.151 + InstructionMark im(this); 1.152 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.153 + vex_prefix_0F38_q(rdx, dst, src); 1.154 + emit_int8((unsigned char)0xF3); 1.155 + emit_operand(rdx, src); 1.156 +} 1.157 + 1.158 +void Assembler::blsrq(Register dst, Register src) { 1.159 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.160 + int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src); 1.161 + emit_int8((unsigned char)0xF3); 1.162 + emit_int8((unsigned char)(0xC0 | encode)); 1.163 +} 1.164 + 1.165 +void Assembler::blsrq(Register dst, Address src) { 1.166 + InstructionMark im(this); 1.167 + assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1.168 + vex_prefix_0F38_q(rcx, dst, src); 1.169 + emit_int8((unsigned char)0xF3); 1.170 + emit_operand(rcx, src); 1.171 +} 1.172 + 1.173 void Assembler::cdqq() { 1.174 prefix(REX_W); 1.175 emit_int8((unsigned char)0x99);
2.1 --- a/src/cpu/x86/vm/assembler_x86.hpp Mon Jan 27 13:14:53 2014 +0100 2.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Wed Mar 12 11:24:26 2014 -0700 2.3 @@ -590,10 +590,35 @@ 2.4 vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256); 2.5 } 2.6 2.7 + void vex_prefix_0F38(Register dst, Register nds, Address src) { 2.8 + bool vex_w = false; 2.9 + bool vector256 = false; 2.10 + vex_prefix(src, nds->encoding(), dst->encoding(), 2.11 + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); 2.12 + } 2.13 + 2.14 + void vex_prefix_0F38_q(Register dst, Register nds, Address src) { 2.15 + bool vex_w = true; 2.16 + bool vector256 = false; 2.17 + vex_prefix(src, nds->encoding(), dst->encoding(), 2.18 + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); 2.19 + } 2.20 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, 2.21 VexSimdPrefix pre, VexOpcode opc, 2.22 bool vex_w, bool vector256); 2.23 2.24 + int vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) { 2.25 + bool vex_w = false; 2.26 + bool vector256 = false; 2.27 + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), 2.28 + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); 2.29 + } 2.30 + int vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) { 2.31 + bool vex_w = true; 2.32 + bool vector256 = false; 2.33 + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), 2.34 + VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256); 2.35 + } 2.36 int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, 2.37 VexSimdPrefix pre, bool vector256 = false, 2.38 VexOpcode opc = VEX_OPCODE_0F) { 2.39 @@ -897,6 +922,27 @@ 2.40 void andq(Register dst, Address src); 2.41 void andq(Register dst, Register src); 2.42 2.43 + // BMI instructions 2.44 + void andnl(Register dst, Register src1, Register src2); 2.45 + void andnl(Register dst, Register src1, Address src2); 2.46 + void andnq(Register dst, Register src1, Register src2); 2.47 + void andnq(Register dst, Register src1, Address src2); 2.48 + 2.49 + void blsil(Register dst, Register src); 2.50 + void blsil(Register dst, Address src); 2.51 + void blsiq(Register dst, Register src); 2.52 + void blsiq(Register dst, Address src); 2.53 + 2.54 + void blsmskl(Register dst, Register src); 2.55 + void blsmskl(Register dst, Address src); 2.56 + void blsmskq(Register dst, Register src); 2.57 + void blsmskq(Register dst, Address src); 2.58 + 2.59 + void blsrl(Register dst, Register src); 2.60 + void blsrl(Register dst, Address src); 2.61 + void blsrq(Register dst, Register src); 2.62 + void blsrq(Register dst, Address src); 2.63 + 2.64 void bsfl(Register dst, Register src); 2.65 void bsrl(Register dst, Register src); 2.66 2.67 @@ -1574,6 +1620,9 @@ 2.68 void testq(Register dst, int32_t imm32); 2.69 void testq(Register dst, Register src); 2.70 2.71 + // BMI - count trailing zeros 2.72 + void tzcntl(Register dst, Register src); 2.73 + void tzcntq(Register dst, Register src); 2.74 2.75 // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS 2.76 void ucomisd(XMMRegister dst, Address src);
3.1 --- a/src/cpu/x86/vm/globals_x86.hpp Mon Jan 27 13:14:53 2014 +0100 3.2 +++ b/src/cpu/x86/vm/globals_x86.hpp Wed Mar 12 11:24:26 2014 -0700 3.3 @@ -134,5 +134,11 @@ 3.4 \ 3.5 product(bool, UseCountLeadingZerosInstruction, false, \ 3.6 "Use count leading zeros instruction") \ 3.7 + \ 3.8 + product(bool, UseCountTrailingZerosInstruction, false, \ 3.9 + "Use count trailing zeros instruction") \ 3.10 + \ 3.11 + product(bool, UseBMI1Instructions, false, \ 3.12 + "Use BMI instructions") 3.13 3.14 #endif // CPU_X86_VM_GLOBALS_X86_HPP
4.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Mon Jan 27 13:14:53 2014 +0100 4.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Wed Mar 12 11:24:26 2014 -0700 4.3 @@ -429,7 +429,7 @@ 4.4 } 4.5 4.6 char buf[256]; 4.7 - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 4.8 + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 4.9 cores_per_cpu(), threads_per_core(), 4.10 cpu_family(), _model, _stepping, 4.11 (supports_cmov() ? ", cmov" : ""), 4.12 @@ -455,7 +455,9 @@ 4.13 (supports_ht() ? ", ht": ""), 4.14 (supports_tsc() ? ", tsc": ""), 4.15 (supports_tscinv_bit() ? ", tscinvbit": ""), 4.16 - (supports_tscinv() ? ", tscinv": "")); 4.17 + (supports_tscinv() ? ", tscinv": ""), 4.18 + (supports_bmi1() ? ", bmi1" : ""), 4.19 + (supports_bmi2() ? ", bmi2" : "")); 4.20 _features_str = strdup(buf); 4.21 4.22 // UseSSE is set to the smaller of what hardware supports and what 4.23 @@ -600,13 +602,6 @@ 4.24 } 4.25 } 4.26 4.27 - // Use count leading zeros count instruction if available. 4.28 - if (supports_lzcnt()) { 4.29 - if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 4.30 - UseCountLeadingZerosInstruction = true; 4.31 - } 4.32 - } 4.33 - 4.34 // some defaults for AMD family 15h 4.35 if ( cpu_family() == 0x15 ) { 4.36 // On family 15h processors default is no sw prefetch 4.37 @@ -692,6 +687,35 @@ 4.38 } 4.39 #endif // COMPILER2 4.40 4.41 + // Use count leading zeros count instruction if available. 4.42 + if (supports_lzcnt()) { 4.43 + if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 4.44 + UseCountLeadingZerosInstruction = true; 4.45 + } 4.46 + } else if (UseCountLeadingZerosInstruction) { 4.47 + warning("lzcnt instruction is not available on this CPU"); 4.48 + FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 4.49 + } 4.50 + 4.51 + if (supports_bmi1()) { 4.52 + if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 4.53 + UseBMI1Instructions = true; 4.54 + } 4.55 + } else if (UseBMI1Instructions) { 4.56 + warning("BMI1 instructions are not available on this CPU"); 4.57 + FLAG_SET_DEFAULT(UseBMI1Instructions, false); 4.58 + } 4.59 + 4.60 + // Use count trailing zeros instruction if available 4.61 + if (supports_bmi1()) { 4.62 + if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 4.63 + UseCountTrailingZerosInstruction = UseBMI1Instructions; 4.64 + } 4.65 + } else if (UseCountTrailingZerosInstruction) { 4.66 + warning("tzcnt instruction is not available on this CPU"); 4.67 + FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 4.68 + } 4.69 + 4.70 // Use population count instruction if available. 4.71 if (supports_popcnt()) { 4.72 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
5.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp Mon Jan 27 13:14:53 2014 +0100 5.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Wed Mar 12 11:24:26 2014 -0700 5.3 @@ -141,7 +141,8 @@ 5.4 struct { 5.5 uint32_t LahfSahf : 1, 5.6 CmpLegacy : 1, 5.7 - : 4, 5.8 + : 3, 5.9 + lzcnt_intel : 1, 5.10 lzcnt : 1, 5.11 sse4a : 1, 5.12 misalignsse : 1, 5.13 @@ -251,7 +252,9 @@ 5.14 CPU_AVX2 = (1 << 18), 5.15 CPU_AES = (1 << 19), 5.16 CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions 5.17 - CPU_CLMUL = (1 << 21) // carryless multiply for CRC 5.18 + CPU_CLMUL = (1 << 21), // carryless multiply for CRC 5.19 + CPU_BMI1 = (1 << 22), 5.20 + CPU_BMI2 = (1 << 23) 5.21 } cpuFeatureFlags; 5.22 5.23 enum { 5.24 @@ -423,6 +426,8 @@ 5.25 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) 5.26 result |= CPU_AVX2; 5.27 } 5.28 + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0) 5.29 + result |= CPU_BMI1; 5.30 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) 5.31 result |= CPU_TSC; 5.32 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) 5.33 @@ -444,6 +449,13 @@ 5.34 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 5.35 result |= CPU_SSE4A; 5.36 } 5.37 + // Intel features. 5.38 + if(is_intel()) { 5.39 + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) 5.40 + result |= CPU_BMI2; 5.41 + if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0) 5.42 + result |= CPU_LZCNT; 5.43 + } 5.44 5.45 return result; 5.46 } 5.47 @@ -560,7 +572,8 @@ 5.48 static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; } 5.49 static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; } 5.50 static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; } 5.51 - 5.52 + static bool supports_bmi1() { return (_cpuFeatures & CPU_BMI1) != 0; } 5.53 + static bool supports_bmi2() { return (_cpuFeatures & CPU_BMI2) != 0; } 5.54 // Intel features 5.55 static bool is_intel_family_core() { return is_intel() && 5.56 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
6.1 --- a/src/cpu/x86/vm/x86_32.ad Mon Jan 27 13:14:53 2014 +0100 6.2 +++ b/src/cpu/x86/vm/x86_32.ad Wed Mar 12 11:24:26 2014 -0700 6.3 @@ -5155,6 +5155,19 @@ 6.4 %} 6.5 6.6 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 6.7 + predicate(UseCountTrailingZerosInstruction); 6.8 + match(Set dst (CountTrailingZerosI src)); 6.9 + effect(KILL cr); 6.10 + 6.11 + format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 6.12 + ins_encode %{ 6.13 + __ tzcntl($dst$$Register, $src$$Register); 6.14 + %} 6.15 + ins_pipe(ialu_reg); 6.16 +%} 6.17 + 6.18 +instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 6.19 + predicate(!UseCountTrailingZerosInstruction); 6.20 match(Set dst (CountTrailingZerosI src)); 6.21 effect(KILL cr); 6.22 6.23 @@ -5174,6 +5187,30 @@ 6.24 %} 6.25 6.26 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 6.27 + predicate(UseCountTrailingZerosInstruction); 6.28 + match(Set dst (CountTrailingZerosL src)); 6.29 + effect(TEMP dst, KILL cr); 6.30 + 6.31 + format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 6.32 + "JNC done\n\t" 6.33 + "TZCNT $dst, $src.hi\n\t" 6.34 + "ADD $dst, 32\n" 6.35 + "done:" %} 6.36 + ins_encode %{ 6.37 + Register Rdst = $dst$$Register; 6.38 + Register Rsrc = $src$$Register; 6.39 + Label done; 6.40 + __ tzcntl(Rdst, Rsrc); 6.41 + __ jccb(Assembler::carryClear, done); 6.42 + __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 6.43 + __ addl(Rdst, BitsPerInt); 6.44 + __ bind(done); 6.45 + %} 6.46 + ins_pipe(ialu_reg); 6.47 +%} 6.48 + 6.49 +instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 6.50 + predicate(!UseCountTrailingZerosInstruction); 6.51 match(Set dst (CountTrailingZerosL src)); 6.52 effect(TEMP dst, KILL cr); 6.53 6.54 @@ -8017,6 +8054,123 @@ 6.55 ins_pipe( ialu_mem_imm ); 6.56 %} 6.57 6.58 +// BMI1 instructions 6.59 +instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 6.60 + match(Set dst (AndI (XorI src1 minus_1) src2)); 6.61 + predicate(UseBMI1Instructions); 6.62 + effect(KILL cr); 6.63 + 6.64 + format %{ "ANDNL $dst, $src1, $src2" %} 6.65 + 6.66 + ins_encode %{ 6.67 + __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 6.68 + %} 6.69 + ins_pipe(ialu_reg); 6.70 +%} 6.71 + 6.72 +instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 6.73 + match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 6.74 + predicate(UseBMI1Instructions); 6.75 + effect(KILL cr); 6.76 + 6.77 + ins_cost(125); 6.78 + format %{ "ANDNL $dst, $src1, $src2" %} 6.79 + 6.80 + ins_encode %{ 6.81 + __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 6.82 + %} 6.83 + ins_pipe(ialu_reg_mem); 6.84 +%} 6.85 + 6.86 +instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 6.87 + match(Set dst (AndI (SubI imm_zero src) src)); 6.88 + predicate(UseBMI1Instructions); 6.89 + effect(KILL cr); 6.90 + 6.91 + format %{ "BLSIL $dst, $src" %} 6.92 + 6.93 + ins_encode %{ 6.94 + __ blsil($dst$$Register, $src$$Register); 6.95 + %} 6.96 + ins_pipe(ialu_reg); 6.97 +%} 6.98 + 6.99 +instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 6.100 + match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 6.101 + predicate(UseBMI1Instructions); 6.102 + effect(KILL cr); 6.103 + 6.104 + ins_cost(125); 6.105 + format %{ "BLSIL $dst, $src" %} 6.106 + 6.107 + ins_encode %{ 6.108 + __ blsil($dst$$Register, $src$$Address); 6.109 + %} 6.110 + ins_pipe(ialu_reg_mem); 6.111 +%} 6.112 + 6.113 +instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 6.114 +%{ 6.115 + match(Set dst (XorI (AddI src minus_1) src)); 6.116 + predicate(UseBMI1Instructions); 6.117 + effect(KILL cr); 6.118 + 6.119 + format %{ "BLSMSKL $dst, $src" %} 6.120 + 6.121 + ins_encode %{ 6.122 + __ blsmskl($dst$$Register, $src$$Register); 6.123 + %} 6.124 + 6.125 + ins_pipe(ialu_reg); 6.126 +%} 6.127 + 6.128 +instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 6.129 +%{ 6.130 + match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 6.131 + predicate(UseBMI1Instructions); 6.132 + effect(KILL cr); 6.133 + 6.134 + ins_cost(125); 6.135 + format %{ "BLSMSKL $dst, $src" %} 6.136 + 6.137 + ins_encode %{ 6.138 + __ blsmskl($dst$$Register, $src$$Address); 6.139 + %} 6.140 + 6.141 + ins_pipe(ialu_reg_mem); 6.142 +%} 6.143 + 6.144 +instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 6.145 +%{ 6.146 + match(Set dst (AndI (AddI src minus_1) src) ); 6.147 + predicate(UseBMI1Instructions); 6.148 + effect(KILL cr); 6.149 + 6.150 + format %{ "BLSRL $dst, $src" %} 6.151 + 6.152 + ins_encode %{ 6.153 + __ blsrl($dst$$Register, $src$$Register); 6.154 + %} 6.155 + 6.156 + ins_pipe(ialu_reg); 6.157 +%} 6.158 + 6.159 +instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 6.160 +%{ 6.161 + match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 6.162 + predicate(UseBMI1Instructions); 6.163 + effect(KILL cr); 6.164 + 6.165 + ins_cost(125); 6.166 + format %{ "BLSRL $dst, $src" %} 6.167 + 6.168 + ins_encode %{ 6.169 + __ blsrl($dst$$Register, $src$$Address); 6.170 + %} 6.171 + 6.172 + ins_pipe(ialu_reg_mem); 6.173 +%} 6.174 + 6.175 // Or Instructions 6.176 // Or Register with Register 6.177 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 6.178 @@ -8639,6 +8793,210 @@ 6.179 ins_pipe( ialu_reg_long_mem ); 6.180 %} 6.181 6.182 +// BMI1 instructions 6.183 +instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 6.184 + match(Set dst (AndL (XorL src1 minus_1) src2)); 6.185 + predicate(UseBMI1Instructions); 6.186 + effect(KILL cr, TEMP dst); 6.187 + 6.188 + format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 6.189 + "ANDNL $dst.hi, $src1.hi, $src2.hi" 6.190 + %} 6.191 + 6.192 + ins_encode %{ 6.193 + Register Rdst = $dst$$Register; 6.194 + Register Rsrc1 = $src1$$Register; 6.195 + Register Rsrc2 = $src2$$Register; 6.196 + __ andnl(Rdst, Rsrc1, Rsrc2); 6.197 + __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 6.198 + %} 6.199 + ins_pipe(ialu_reg_reg_long); 6.200 +%} 6.201 + 6.202 +instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 6.203 + match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 6.204 + predicate(UseBMI1Instructions); 6.205 + effect(KILL cr, TEMP dst); 6.206 + 6.207 + ins_cost(125); 6.208 + format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 6.209 + "ANDNL $dst.hi, $src1.hi, $src2+4" 6.210 + %} 6.211 + 6.212 + ins_encode %{ 6.213 + Register Rdst = $dst$$Register; 6.214 + Register Rsrc1 = $src1$$Register; 6.215 + Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 6.216 + 6.217 + __ andnl(Rdst, Rsrc1, $src2$$Address); 6.218 + __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 6.219 + %} 6.220 + ins_pipe(ialu_reg_mem); 6.221 +%} 6.222 + 6.223 +instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 6.224 + match(Set dst (AndL (SubL imm_zero src) src)); 6.225 + predicate(UseBMI1Instructions); 6.226 + effect(KILL cr, TEMP dst); 6.227 + 6.228 + format %{ "MOVL $dst.hi, 0\n\t" 6.229 + "BLSIL $dst.lo, $src.lo\n\t" 6.230 + "JNZ done\n\t" 6.231 + "BLSIL $dst.hi, $src.hi\n" 6.232 + "done:" 6.233 + %} 6.234 + 6.235 + ins_encode %{ 6.236 + Label done; 6.237 + Register Rdst = $dst$$Register; 6.238 + Register Rsrc = $src$$Register; 6.239 + __ movl(HIGH_FROM_LOW(Rdst), 0); 6.240 + __ blsil(Rdst, Rsrc); 6.241 + __ jccb(Assembler::notZero, done); 6.242 + __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 6.243 + __ bind(done); 6.244 + %} 6.245 + ins_pipe(ialu_reg); 6.246 +%} 6.247 + 6.248 +instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 6.249 + match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 6.250 + predicate(UseBMI1Instructions); 6.251 + effect(KILL cr, TEMP dst); 6.252 + 6.253 + ins_cost(125); 6.254 + format %{ "MOVL $dst.hi, 0\n\t" 6.255 + "BLSIL $dst.lo, $src\n\t" 6.256 + "JNZ done\n\t" 6.257 + "BLSIL $dst.hi, $src+4\n" 6.258 + "done:" 6.259 + %} 6.260 + 6.261 + ins_encode %{ 6.262 + Label done; 6.263 + Register Rdst = $dst$$Register; 6.264 + Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 6.265 + 6.266 + __ movl(HIGH_FROM_LOW(Rdst), 0); 6.267 + __ blsil(Rdst, $src$$Address); 6.268 + __ jccb(Assembler::notZero, done); 6.269 + __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 6.270 + __ bind(done); 6.271 + %} 6.272 + ins_pipe(ialu_reg_mem); 6.273 +%} 6.274 + 6.275 +instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 6.276 +%{ 6.277 + match(Set dst (XorL (AddL src minus_1) src)); 6.278 + predicate(UseBMI1Instructions); 6.279 + effect(KILL cr, TEMP dst); 6.280 + 6.281 + format %{ "MOVL $dst.hi, 0\n\t" 6.282 + "BLSMSKL $dst.lo, $src.lo\n\t" 6.283 + "JNC done\n\t" 6.284 + "BLSMSKL $dst.hi, $src.hi\n" 6.285 + "done:" 6.286 + %} 6.287 + 6.288 + ins_encode %{ 6.289 + Label done; 6.290 + Register Rdst = $dst$$Register; 6.291 + Register Rsrc = $src$$Register; 6.292 + __ movl(HIGH_FROM_LOW(Rdst), 0); 6.293 + __ blsmskl(Rdst, Rsrc); 6.294 + __ jccb(Assembler::carryClear, done); 6.295 + __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 6.296 + __ bind(done); 6.297 + %} 6.298 + 6.299 + ins_pipe(ialu_reg); 6.300 +%} 6.301 + 6.302 +instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 6.303 +%{ 6.304 + match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 6.305 + predicate(UseBMI1Instructions); 6.306 + effect(KILL cr, TEMP dst); 6.307 + 6.308 + ins_cost(125); 6.309 + format %{ "MOVL $dst.hi, 0\n\t" 6.310 + "BLSMSKL $dst.lo, $src\n\t" 6.311 + "JNC done\n\t" 6.312 + "BLSMSKL $dst.hi, $src+4\n" 6.313 + "done:" 6.314 + %} 6.315 + 6.316 + ins_encode %{ 6.317 + Label done; 6.318 + Register Rdst = $dst$$Register; 6.319 + Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 6.320 + 6.321 + __ movl(HIGH_FROM_LOW(Rdst), 0); 6.322 + __ blsmskl(Rdst, $src$$Address); 6.323 + __ jccb(Assembler::carryClear, done); 6.324 + __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 6.325 + __ bind(done); 6.326 + %} 6.327 + 6.328 + ins_pipe(ialu_reg_mem); 6.329 +%} 6.330 + 6.331 +instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 6.332 +%{ 6.333 + match(Set dst (AndL (AddL src minus_1) src) ); 6.334 + predicate(UseBMI1Instructions); 6.335 + effect(KILL cr, TEMP dst); 6.336 + 6.337 + format %{ "MOVL $dst.hi, $src.hi\n\t" 6.338 + "BLSRL $dst.lo, $src.lo\n\t" 6.339 + "JNC done\n\t" 6.340 + "BLSRL $dst.hi, $src.hi\n" 6.341 + "done:" 6.342 + %} 6.343 + 6.344 + ins_encode %{ 6.345 + Label done; 6.346 + Register Rdst = $dst$$Register; 6.347 + Register Rsrc = $src$$Register; 6.348 + __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 6.349 + __ blsrl(Rdst, Rsrc); 6.350 + __ jccb(Assembler::carryClear, done); 6.351 + __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 6.352 + __ bind(done); 6.353 + %} 6.354 + 6.355 + ins_pipe(ialu_reg); 6.356 +%} 6.357 + 6.358 +instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 6.359 +%{ 6.360 + match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 6.361 + predicate(UseBMI1Instructions); 6.362 + effect(KILL cr, TEMP dst); 6.363 + 6.364 + ins_cost(125); 6.365 + format %{ "MOVL $dst.hi, $src+4\n\t" 6.366 + "BLSRL $dst.lo, $src\n\t" 6.367 + "JNC done\n\t" 6.368 + "BLSRL $dst.hi, $src+4\n" 6.369 + "done:" 6.370 + %} 6.371 + 6.372 + ins_encode %{ 6.373 + Label done; 6.374 + Register Rdst = $dst$$Register; 6.375 + Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 6.376 + __ movl(HIGH_FROM_LOW(Rdst), src_hi); 6.377 + __ blsrl(Rdst, $src$$Address); 6.378 + __ jccb(Assembler::carryClear, done); 6.379 + __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 6.380 + __ bind(done); 6.381 + %} 6.382 + 6.383 + ins_pipe(ialu_reg_mem); 6.384 +%} 6.385 + 6.386 // Or Long Register with Register 6.387 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 6.388 match(Set dst (OrL dst src));
7.1 --- a/src/cpu/x86/vm/x86_64.ad Mon Jan 27 13:14:53 2014 +0100 7.2 +++ b/src/cpu/x86/vm/x86_64.ad Wed Mar 12 11:24:26 2014 -0700 7.3 @@ -6014,6 +6014,19 @@ 7.4 %} 7.5 7.6 instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{ 7.7 + predicate(UseCountTrailingZerosInstruction); 7.8 + match(Set dst (CountTrailingZerosI src)); 7.9 + effect(KILL cr); 7.10 + 7.11 + format %{ "tzcntl $dst, $src\t# count trailing zeros (int)" %} 7.12 + ins_encode %{ 7.13 + __ tzcntl($dst$$Register, $src$$Register); 7.14 + %} 7.15 + ins_pipe(ialu_reg); 7.16 +%} 7.17 + 7.18 +instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{ 7.19 + predicate(!UseCountTrailingZerosInstruction); 7.20 match(Set dst (CountTrailingZerosI src)); 7.21 effect(KILL cr); 7.22 7.23 @@ -6033,6 +6046,19 @@ 7.24 %} 7.25 7.26 instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{ 7.27 + predicate(UseCountTrailingZerosInstruction); 7.28 + match(Set dst (CountTrailingZerosL src)); 7.29 + effect(KILL cr); 7.30 + 7.31 + format %{ "tzcntq $dst, $src\t# count trailing zeros (long)" %} 7.32 + ins_encode %{ 7.33 + __ tzcntq($dst$$Register, $src$$Register); 7.34 + %} 7.35 + ins_pipe(ialu_reg); 7.36 +%} 7.37 + 7.38 +instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{ 7.39 + predicate(!UseCountTrailingZerosInstruction); 7.40 match(Set dst (CountTrailingZerosL src)); 7.41 effect(KILL cr); 7.42 7.43 @@ -8612,6 +8638,122 @@ 7.44 ins_pipe(ialu_mem_imm); 7.45 %} 7.46 7.47 +// BMI1 instructions 7.48 +instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{ 7.49 + match(Set dst (AndI (XorI src1 minus_1) (LoadI src2))); 7.50 + predicate(UseBMI1Instructions); 7.51 + effect(KILL cr); 7.52 + 7.53 + ins_cost(125); 7.54 + format %{ "andnl $dst, $src1, $src2" %} 7.55 + 7.56 + ins_encode %{ 7.57 + __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 7.58 + %} 7.59 + ins_pipe(ialu_reg_mem); 7.60 +%} 7.61 + 7.62 +instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{ 7.63 + match(Set dst (AndI (XorI src1 minus_1) src2)); 7.64 + predicate(UseBMI1Instructions); 7.65 + effect(KILL cr); 7.66 + 7.67 + format %{ "andnl $dst, $src1, $src2" %} 7.68 + 7.69 + ins_encode %{ 7.70 + __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 7.71 + %} 7.72 + ins_pipe(ialu_reg); 7.73 +%} 7.74 + 7.75 +instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{ 7.76 + match(Set dst (AndI (SubI imm_zero src) src)); 7.77 + predicate(UseBMI1Instructions); 7.78 + effect(KILL cr); 7.79 + 7.80 + format %{ "blsil $dst, $src" %} 7.81 + 7.82 + ins_encode %{ 7.83 + __ blsil($dst$$Register, $src$$Register); 7.84 + %} 7.85 + ins_pipe(ialu_reg); 7.86 +%} 7.87 + 7.88 +instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{ 7.89 + match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 7.90 + predicate(UseBMI1Instructions); 7.91 + effect(KILL cr); 7.92 + 7.93 + ins_cost(125); 7.94 + format %{ "blsil $dst, $src" %} 7.95 + 7.96 + ins_encode %{ 7.97 + __ blsil($dst$$Register, $src$$Address); 7.98 + %} 7.99 + ins_pipe(ialu_reg_mem); 7.100 +%} 7.101 + 7.102 +instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr) 7.103 +%{ 7.104 + match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) ); 7.105 + predicate(UseBMI1Instructions); 7.106 + effect(KILL cr); 7.107 + 7.108 + ins_cost(125); 7.109 + format %{ "blsmskl $dst, $src" %} 7.110 + 7.111 + ins_encode %{ 7.112 + __ blsmskl($dst$$Register, $src$$Address); 7.113 + %} 7.114 + ins_pipe(ialu_reg_mem); 7.115 +%} 7.116 + 7.117 +instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr) 7.118 +%{ 7.119 + match(Set dst (XorI (AddI src minus_1) src)); 7.120 + predicate(UseBMI1Instructions); 7.121 + effect(KILL cr); 7.122 + 7.123 + format %{ "blsmskl $dst, $src" %} 7.124 + 7.125 + ins_encode %{ 7.126 + __ blsmskl($dst$$Register, $src$$Register); 7.127 + %} 7.128 + 7.129 + ins_pipe(ialu_reg); 7.130 +%} 7.131 + 7.132 +instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr) 7.133 +%{ 7.134 + match(Set dst (AndI (AddI src minus_1) src) ); 7.135 + predicate(UseBMI1Instructions); 7.136 + effect(KILL cr); 7.137 + 7.138 + format %{ "blsrl $dst, $src" %} 7.139 + 7.140 + ins_encode %{ 7.141 + __ blsrl($dst$$Register, $src$$Register); 7.142 + %} 7.143 + 7.144 + ins_pipe(ialu_reg_mem); 7.145 +%} 7.146 + 7.147 +instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr) 7.148 +%{ 7.149 + match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) ); 7.150 + predicate(UseBMI1Instructions); 7.151 + effect(KILL cr); 7.152 + 7.153 + ins_cost(125); 7.154 + format %{ "blsrl $dst, $src" %} 7.155 + 7.156 + ins_encode %{ 7.157 + __ blsrl($dst$$Register, $src$$Address); 7.158 + %} 7.159 + 7.160 + ins_pipe(ialu_reg); 7.161 +%} 7.162 + 7.163 // Or Instructions 7.164 // Or Register with Register 7.165 instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr) 7.166 @@ -8843,6 +8985,122 @@ 7.167 ins_pipe(ialu_mem_imm); 7.168 %} 7.169 7.170 +// BMI1 instructions 7.171 +instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{ 7.172 + match(Set dst (AndL (XorL src1 minus_1) (LoadL src2))); 7.173 + predicate(UseBMI1Instructions); 7.174 + effect(KILL cr); 7.175 + 7.176 + ins_cost(125); 7.177 + format %{ "andnq $dst, $src1, $src2" %} 7.178 + 7.179 + ins_encode %{ 7.180 + __ andnq($dst$$Register, $src1$$Register, $src2$$Address); 7.181 + %} 7.182 + ins_pipe(ialu_reg_mem); 7.183 +%} 7.184 + 7.185 +instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{ 7.186 + match(Set dst (AndL (XorL src1 minus_1) src2)); 7.187 + predicate(UseBMI1Instructions); 7.188 + effect(KILL cr); 7.189 + 7.190 + format %{ "andnq $dst, $src1, $src2" %} 7.191 + 7.192 + ins_encode %{ 7.193 + __ andnq($dst$$Register, $src1$$Register, $src2$$Register); 7.194 + %} 7.195 + ins_pipe(ialu_reg_mem); 7.196 +%} 7.197 + 7.198 +instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{ 7.199 + match(Set dst (AndL (SubL imm_zero src) src)); 7.200 + predicate(UseBMI1Instructions); 7.201 + effect(KILL cr); 7.202 + 7.203 + format %{ "blsiq $dst, $src" %} 7.204 + 7.205 + ins_encode %{ 7.206 + __ blsiq($dst$$Register, $src$$Register); 7.207 + %} 7.208 + ins_pipe(ialu_reg); 7.209 +%} 7.210 + 7.211 +instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{ 7.212 + match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 7.213 + predicate(UseBMI1Instructions); 7.214 + effect(KILL cr); 7.215 + 7.216 + ins_cost(125); 7.217 + format %{ "blsiq $dst, $src" %} 7.218 + 7.219 + ins_encode %{ 7.220 + __ blsiq($dst$$Register, $src$$Address); 7.221 + %} 7.222 + ins_pipe(ialu_reg_mem); 7.223 +%} 7.224 + 7.225 +instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr) 7.226 +%{ 7.227 + match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) ); 7.228 + predicate(UseBMI1Instructions); 7.229 + effect(KILL cr); 7.230 + 7.231 + ins_cost(125); 7.232 + format %{ "blsmskq $dst, $src" %} 7.233 + 7.234 + ins_encode %{ 7.235 + __ blsmskq($dst$$Register, $src$$Address); 7.236 + %} 7.237 + ins_pipe(ialu_reg_mem); 7.238 +%} 7.239 + 7.240 +instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr) 7.241 +%{ 7.242 + match(Set dst (XorL (AddL src minus_1) src)); 7.243 + predicate(UseBMI1Instructions); 7.244 + effect(KILL cr); 7.245 + 7.246 + format %{ "blsmskq $dst, $src" %} 7.247 + 7.248 + ins_encode %{ 7.249 + __ blsmskq($dst$$Register, $src$$Register); 7.250 + %} 7.251 + 7.252 + ins_pipe(ialu_reg); 7.253 +%} 7.254 + 7.255 +instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr) 7.256 +%{ 7.257 + match(Set dst (AndL (AddL src minus_1) src) ); 7.258 + predicate(UseBMI1Instructions); 7.259 + effect(KILL cr); 7.260 + 7.261 + format %{ "blsrq $dst, $src" %} 7.262 + 7.263 + ins_encode %{ 7.264 + __ blsrq($dst$$Register, $src$$Register); 7.265 + %} 7.266 + 7.267 + ins_pipe(ialu_reg); 7.268 +%} 7.269 + 7.270 +instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr) 7.271 +%{ 7.272 + match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) ); 7.273 + predicate(UseBMI1Instructions); 7.274 + effect(KILL cr); 7.275 + 7.276 + ins_cost(125); 7.277 + format %{ "blsrq $dst, $src" %} 7.278 + 7.279 + ins_encode %{ 7.280 + __ blsrq($dst$$Register, $src$$Address); 7.281 + %} 7.282 + 7.283 + ins_pipe(ialu_reg); 7.284 +%} 7.285 + 7.286 // Or Instructions 7.287 // Or Register with Register 7.288 instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
8.1 --- a/src/share/vm/adlc/formssel.cpp Mon Jan 27 13:14:53 2014 +0100 8.2 +++ b/src/share/vm/adlc/formssel.cpp Wed Mar 12 11:24:26 2014 -0700 8.3 @@ -649,6 +649,7 @@ 8.4 int USE_of_memory = 0; 8.5 int DEF_of_memory = 0; 8.6 const char* last_memory_DEF = NULL; // to test DEF/USE pairing in asserts 8.7 + const char* last_memory_USE = NULL; 8.8 Component *unique = NULL; 8.9 Component *comp = NULL; 8.10 ComponentList &components = (ComponentList &)_components; 8.11 @@ -670,7 +671,16 @@ 8.12 assert(0 == strcmp(last_memory_DEF, comp->_name), "every memory DEF is followed by a USE of the same name"); 8.13 last_memory_DEF = NULL; 8.14 } 8.15 - USE_of_memory++; 8.16 + // Handles same memory being used multiple times in the case of BMI1 instructions. 8.17 + if (last_memory_USE != NULL) { 8.18 + if (strcmp(comp->_name, last_memory_USE) != 0) { 8.19 + USE_of_memory++; 8.20 + } 8.21 + } else { 8.22 + USE_of_memory++; 8.23 + } 8.24 + last_memory_USE = comp->_name; 8.25 + 8.26 if (DEF_of_memory == 0) // defs take precedence 8.27 unique = comp; 8.28 } else {
9.1 --- a/src/share/vm/opto/matcher.cpp Mon Jan 27 13:14:53 2014 +0100 9.2 +++ b/src/share/vm/opto/matcher.cpp Wed Mar 12 11:24:26 2014 -0700 9.3 @@ -1908,6 +1908,105 @@ 9.4 return OptoReg::as_OptoReg(regs.first()); 9.5 } 9.6 9.7 +// This function identifies sub-graphs in which a 'load' node is 9.8 +// input to two different nodes, and such that it can be matched 9.9 +// with BMI instructions like blsi, blsr, etc. 9.10 +// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 9.11 +// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 9.12 +// refers to the same node. 9.13 +#ifdef X86 9.14 +// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 9.15 +// This is a temporary solution until we make DAGs expressible in ADL. 9.16 +template<typename ConType> 9.17 +class FusedPatternMatcher { 9.18 + Node* _op1_node; 9.19 + Node* _mop_node; 9.20 + int _con_op; 9.21 + 9.22 + static int match_next(Node* n, int next_op, int next_op_idx) { 9.23 + if (n->in(1) == NULL || n->in(2) == NULL) { 9.24 + return -1; 9.25 + } 9.26 + 9.27 + if (next_op_idx == -1) { // n is commutative, try rotations 9.28 + if (n->in(1)->Opcode() == next_op) { 9.29 + return 1; 9.30 + } else if (n->in(2)->Opcode() == next_op) { 9.31 + return 2; 9.32 + } 9.33 + } else { 9.34 + assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 9.35 + if (n->in(next_op_idx)->Opcode() == next_op) { 9.36 + return next_op_idx; 9.37 + } 9.38 + } 9.39 + return -1; 9.40 + } 9.41 +public: 9.42 + FusedPatternMatcher(Node* op1_node, Node *mop_node, int con_op) : 9.43 + _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 9.44 + 9.45 + bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 9.46 + int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 9.47 + typename ConType::NativeType con_value) { 9.48 + if (_op1_node->Opcode() != op1) { 9.49 + return false; 9.50 + } 9.51 + if (_mop_node->outcnt() > 2) { 9.52 + return false; 9.53 + } 9.54 + op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 9.55 + if (op1_op2_idx == -1) { 9.56 + return false; 9.57 + } 9.58 + // Memory operation must be the other edge 9.59 + int op1_mop_idx = (op1_op2_idx & 1) + 1; 9.60 + 9.61 + // Check that the mop node is really what we want 9.62 + if (_op1_node->in(op1_mop_idx) == _mop_node) { 9.63 + Node *op2_node = _op1_node->in(op1_op2_idx); 9.64 + if (op2_node->outcnt() > 1) { 9.65 + return false; 9.66 + } 9.67 + assert(op2_node->Opcode() == op2, "Should be"); 9.68 + op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 9.69 + if (op2_con_idx == -1) { 9.70 + return false; 9.71 + } 9.72 + // Memory operation must be the other edge 9.73 + int op2_mop_idx = (op2_con_idx & 1) + 1; 9.74 + // Check that the memory operation is the same node 9.75 + if (op2_node->in(op2_mop_idx) == _mop_node) { 9.76 + // Now check the constant 9.77 + const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 9.78 + if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 9.79 + return true; 9.80 + } 9.81 + } 9.82 + } 9.83 + return false; 9.84 + } 9.85 +}; 9.86 + 9.87 + 9.88 +bool Matcher::is_bmi_pattern(Node *n, Node *m) { 9.89 + if (n != NULL && m != NULL) { 9.90 + if (m->Opcode() == Op_LoadI) { 9.91 + FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 9.92 + return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 9.93 + bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 9.94 + bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 9.95 + } else if (m->Opcode() == Op_LoadL) { 9.96 + FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 9.97 + return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 9.98 + bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 9.99 + bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 9.100 + } 9.101 + } 9.102 + return false; 9.103 +} 9.104 +#endif // X86 9.105 + 9.106 // A method-klass-holder may be passed in the inline_cache_reg 9.107 // and then expanded into the inline_cache_reg and a method_oop register 9.108 // defined in ad_<arch>.cpp 9.109 @@ -2063,6 +2162,14 @@ 9.110 set_shared(m->in(AddPNode::Base)->in(1)); 9.111 } 9.112 9.113 + // if 'n' and 'm' are part of a graph for BMI instruction, clone this node. 9.114 +#ifdef X86 9.115 + if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 9.116 + mstack.push(m, Visit); 9.117 + continue; 9.118 + } 9.119 +#endif 9.120 + 9.121 // Clone addressing expressions as they are "free" in memory access instructions 9.122 if( mem_op && i == MemNode::Address && mop == Op_AddP ) { 9.123 // Some inputs for address expression are not put on stack
10.1 --- a/src/share/vm/opto/matcher.hpp Mon Jan 27 13:14:53 2014 +0100 10.2 +++ b/src/share/vm/opto/matcher.hpp Wed Mar 12 11:24:26 2014 -0700 10.3 @@ -79,6 +79,9 @@ 10.4 10.5 // Find shared Nodes, or Nodes that otherwise are Matcher roots 10.6 void find_shared( Node *n ); 10.7 +#ifdef X86 10.8 + bool is_bmi_pattern(Node *n, Node *m); 10.9 +#endif 10.10 10.11 // Debug and profile information for nodes in old space: 10.12 GrowableArray<Node_Notes*>* _old_node_note_array;
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/test/compiler/codegen/BMI1.java Wed Mar 12 11:24:26 2014 -0700 11.3 @@ -0,0 +1,301 @@ 11.4 +/* 11.5 + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. 11.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 11.7 + * 11.8 + * This code is free software; you can redistribute it and/or modify it 11.9 + * under the terms of the GNU General Public License version 2 only, as 11.10 + * published by the Free Software Foundation. 11.11 + * 11.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 11.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11.15 + * version 2 for more details (a copy is included in the LICENSE file that 11.16 + * accompanied this code). 11.17 + * 11.18 + * You should have received a copy of the GNU General Public License version 11.19 + * 2 along with this work; if not, write to the Free Software Foundation, 11.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 11.21 + * 11.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 11.23 + * or visit www.oracle.com if you need additional information or have any 11.24 + * questions. 11.25 + */ 11.26 + 11.27 +/* 11.28 + * @test 11.29 + * @bug 8031321 11.30 + * @summary Support BMI1 instructions on x86/x64 11.31 + * @run main/othervm -Xbatch -XX:-TieredCompilation -XX:CompileCommand=compileonly,BMITests.* BMI1 11.32 + * 11.33 + */ 11.34 + 11.35 +class MemI { 11.36 + public int x; 11.37 + public MemI(int x) { this.x = x; } 11.38 +} 11.39 + 11.40 +class MemL { 11.41 + public long x; 11.42 + public MemL(long x) { this.x = x; } 11.43 +} 11.44 + 11.45 +class BMITests { 11.46 + static int andnl(int src1, int src2) { 11.47 + return ~src1 & src2; 11.48 + } 11.49 + static long andnq(long src1, long src2) { 11.50 + return ~src1 & src2; 11.51 + } 11.52 + static int andnl(int src1, MemI src2) { 11.53 + return ~src1 & src2.x; 11.54 + } 11.55 + static long andnq(long src1, MemL src2) { 11.56 + return ~src1 & src2.x; 11.57 + } 11.58 + static int blsil(int src1) { 11.59 + return src1 & -src1; 11.60 + } 11.61 + static long blsiq(long src1) { 11.62 + return src1 & -src1; 11.63 + } 11.64 + static int blsil(MemI src1) { 11.65 + return src1.x & -src1.x; 11.66 + } 11.67 + static long blsiq(MemL src1) { 11.68 + return src1.x & -src1.x; 11.69 + } 11.70 + static int blsmskl(int src1) { 11.71 + return (src1 - 1) ^ src1; 11.72 + } 11.73 + static long blsmskq(long src1) { 11.74 + return (src1 - 1) ^ src1; 11.75 + } 11.76 + static int blsmskl(MemI src1) { 11.77 + return (src1.x - 1) ^ src1.x; 11.78 + } 11.79 + static long blsmskq(MemL src1) { 11.80 + return (src1.x - 1) ^ src1.x; 11.81 + } 11.82 + static int blsrl(int src1) { 11.83 + return (src1 - 1) & src1; 11.84 + } 11.85 + static long blsrq(long src1) { 11.86 + return (src1 - 1) & src1; 11.87 + } 11.88 + static int blsrl(MemI src1) { 11.89 + return (src1.x - 1) & src1.x; 11.90 + } 11.91 + static long blsrq(MemL src1) { 11.92 + return (src1.x - 1) & src1.x; 11.93 + } 11.94 + static int lzcntl(int src1) { 11.95 + return Integer.numberOfLeadingZeros(src1); 11.96 + } 11.97 + static int lzcntq(long src1) { 11.98 + return Long.numberOfLeadingZeros(src1); 11.99 + } 11.100 + static int tzcntl(int src1) { 11.101 + return Integer.numberOfTrailingZeros(src1); 11.102 + } 11.103 + static int tzcntq(long src1) { 11.104 + return Long.numberOfTrailingZeros(src1); 11.105 + } 11.106 +} 11.107 + 11.108 +public class BMI1 { 11.109 + private final static int ITERATIONS = 1000000; 11.110 + 11.111 + public static void main(String[] args) { 11.112 + int ix = 0x01234567; 11.113 + int iy = 0x89abcdef; 11.114 + MemI imy = new MemI(iy); 11.115 + long lx = 0x0123456701234567L; 11.116 + long ly = 0x89abcdef89abcdefL; 11.117 + MemL lmy = new MemL(ly); 11.118 + 11.119 + { // match(Set dst (AndI (XorI src1 minus_1) src2)) 11.120 + int z = BMITests.andnl(ix, iy); 11.121 + for (int i = 0; i < ITERATIONS; i++) { 11.122 + int ii = BMITests.andnl(ix, iy); 11.123 + if (ii != z) { 11.124 + throw new Error("andnl with register failed"); 11.125 + } 11.126 + } 11.127 + } 11.128 + { // match(Set dst (AndL (XorL src1 minus_1) src2)) 11.129 + long z = BMITests.andnq(lx, ly); 11.130 + for (int i = 0; i < ITERATIONS; i++) { 11.131 + long ll = BMITests.andnq(lx, ly); 11.132 + if (ll != z) { 11.133 + throw new Error("andnq with register failed"); 11.134 + } 11.135 + } 11.136 + } 11.137 + { // match(Set dst (AndI (XorI src1 minus_1) (LoadI src2))) 11.138 + int z = BMITests.andnl(ix, imy); 11.139 + for (int i = 0; i < ITERATIONS; i++) { 11.140 + int ii = BMITests.andnl(ix, imy); 11.141 + if (ii != z) { 11.142 + throw new Error("andnl with memory failed"); 11.143 + } 11.144 + } 11.145 + } 11.146 + { // match(Set dst (AndL (XorL src1 minus_1) (LoadL src2))) 11.147 + long z = BMITests.andnq(lx, lmy); 11.148 + for (int i = 0; i < ITERATIONS; i++) { 11.149 + long ll = BMITests.andnq(lx, lmy); 11.150 + if (ll != z) { 11.151 + throw new Error("andnq with memory failed"); 11.152 + } 11.153 + } 11.154 + } 11.155 + { // match(Set dst (AndI (SubI imm_zero src) src)) 11.156 + int z = BMITests.blsil(ix); 11.157 + for (int i = 0; i < ITERATIONS; i++) { 11.158 + int ii = BMITests.blsil(ix); 11.159 + if (ii != z) { 11.160 + throw new Error("blsil with register failed"); 11.161 + } 11.162 + } 11.163 + } 11.164 + { // match(Set dst (AndL (SubL imm_zero src) src)) 11.165 + long z = BMITests.blsiq(lx); 11.166 + for (int i = 0; i < ITERATIONS; i++) { 11.167 + long ll = BMITests.blsiq(lx); 11.168 + if (ll != z) { 11.169 + throw new Error("blsiq with register failed"); 11.170 + } 11.171 + } 11.172 + } 11.173 + { // match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )) 11.174 + int z = BMITests.blsil(imy); 11.175 + for (int i = 0; i < ITERATIONS; i++) { 11.176 + int ii = BMITests.blsil(imy); 11.177 + if (ii != z) { 11.178 + throw new Error("blsil with memory failed"); 11.179 + } 11.180 + } 11.181 + } 11.182 + { // match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )) 11.183 + long z = BMITests.blsiq(lmy); 11.184 + for (int i = 0; i < ITERATIONS; i++) { 11.185 + long ll = BMITests.blsiq(lmy); 11.186 + if (ll != z) { 11.187 + throw new Error("blsiq with memory failed"); 11.188 + } 11.189 + } 11.190 + } 11.191 + 11.192 + { // match(Set dst (XorI (AddI src minus_1) src)) 11.193 + int z = BMITests.blsmskl(ix); 11.194 + for (int i = 0; i < ITERATIONS; i++) { 11.195 + int ii = BMITests.blsmskl(ix); 11.196 + if (ii != z) { 11.197 + throw new Error("blsmskl with register failed"); 11.198 + } 11.199 + } 11.200 + } 11.201 + { // match(Set dst (XorL (AddL src minus_1) src)) 11.202 + long z = BMITests.blsmskq(lx); 11.203 + for (int i = 0; i < ITERATIONS; i++) { 11.204 + long ll = BMITests.blsmskq(lx); 11.205 + if (ll != z) { 11.206 + throw new Error("blsmskq with register failed"); 11.207 + } 11.208 + } 11.209 + } 11.210 + { // match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) ) 11.211 + int z = BMITests.blsmskl(imy); 11.212 + for (int i = 0; i < ITERATIONS; i++) { 11.213 + int ii = BMITests.blsmskl(imy); 11.214 + if (ii != z) { 11.215 + throw new Error("blsmskl with memory failed"); 11.216 + } 11.217 + } 11.218 + } 11.219 + { // match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) ) 11.220 + long z = BMITests.blsmskq(lmy); 11.221 + for (int i = 0; i < ITERATIONS; i++) { 11.222 + long ll = BMITests.blsmskq(lmy); 11.223 + if (ll != z) { 11.224 + throw new Error("blsmskq with memory failed"); 11.225 + } 11.226 + } 11.227 + } 11.228 + 11.229 + { // match(Set dst (AndI (AddI src minus_1) src) ) 11.230 + int z = BMITests.blsrl(ix); 11.231 + for (int i = 0; i < ITERATIONS; i++) { 11.232 + int ii = BMITests.blsrl(ix); 11.233 + if (ii != z) { 11.234 + throw new Error("blsrl with register failed"); 11.235 + } 11.236 + } 11.237 + } 11.238 + { // match(Set dst (AndL (AddL src minus_1) src) ) 11.239 + long z = BMITests.blsrq(lx); 11.240 + for (int i = 0; i < ITERATIONS; i++) { 11.241 + long ll = BMITests.blsrq(lx); 11.242 + if (ll != z) { 11.243 + throw new Error("blsrq with register failed"); 11.244 + } 11.245 + } 11.246 + } 11.247 + { // match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) ) 11.248 + int z = BMITests.blsrl(imy); 11.249 + for (int i = 0; i < ITERATIONS; i++) { 11.250 + int ii = BMITests.blsrl(imy); 11.251 + if (ii != z) { 11.252 + throw new Error("blsrl with memory failed"); 11.253 + } 11.254 + } 11.255 + } 11.256 + { // match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) ) 11.257 + long z = BMITests.blsrq(lmy); 11.258 + for (int i = 0; i < ITERATIONS; i++) { 11.259 + long ll = BMITests.blsrq(lmy); 11.260 + if (ll != z) { 11.261 + throw new Error("blsrq with memory failed"); 11.262 + } 11.263 + } 11.264 + } 11.265 + 11.266 + { 11.267 + int z = BMITests.lzcntl(ix); 11.268 + for (int i = 0; i < ITERATIONS; i++) { 11.269 + int ii = BMITests.lzcntl(ix); 11.270 + if (ii != z) { 11.271 + throw new Error("lzcntl failed"); 11.272 + } 11.273 + } 11.274 + } 11.275 + { 11.276 + int z = BMITests.lzcntq(lx); 11.277 + for (int i = 0; i < ITERATIONS; i++) { 11.278 + int ii = BMITests.lzcntq(lx); 11.279 + if (ii != z) { 11.280 + throw new Error("lzcntq failed"); 11.281 + } 11.282 + } 11.283 + } 11.284 + 11.285 + { 11.286 + int z = BMITests.tzcntl(ix); 11.287 + for (int i = 0; i < ITERATIONS; i++) { 11.288 + int ii = BMITests.tzcntl(ix); 11.289 + if (ii != z) { 11.290 + throw new Error("tzcntl failed"); 11.291 + } 11.292 + } 11.293 + } 11.294 + { 11.295 + int z = BMITests.tzcntq(lx); 11.296 + for (int i = 0; i < ITERATIONS; i++) { 11.297 + int ii = BMITests.tzcntq(lx); 11.298 + if (ii != z) { 11.299 + throw new Error("tzcntq failed"); 11.300 + } 11.301 + } 11.302 + } 11.303 + } 11.304 +}