Merge

Mon, 17 Mar 2014 10:48:53 -0700

author
iveresov
date
Mon, 17 Mar 2014 10:48:53 -0700
changeset 6379
f58fd4f52c07
parent 6377
b8413a9cbb84
parent 6378
8a8ff6b577ed
child 6381
8ef3428f54b6

Merge

     1.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Tue Feb 25 18:16:24 2014 +0100
     1.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Mon Mar 17 10:48:53 2014 -0700
     1.3 @@ -1089,6 +1089,21 @@
     1.4    emit_arith(0x23, 0xC0, dst, src);
     1.5  }
     1.6  
     1.7 +void Assembler::andnl(Register dst, Register src1, Register src2) {
     1.8 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
     1.9 +  int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
    1.10 +  emit_int8((unsigned char)0xF2);
    1.11 +  emit_int8((unsigned char)(0xC0 | encode));
    1.12 +}
    1.13 +
    1.14 +void Assembler::andnl(Register dst, Register src1, Address src2) {
    1.15 +  InstructionMark im(this);
    1.16 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
    1.17 +  vex_prefix_0F38(dst, src1, src2);
    1.18 +  emit_int8((unsigned char)0xF2);
    1.19 +  emit_operand(dst, src2);
    1.20 +}
    1.21 +
    1.22  void Assembler::bsfl(Register dst, Register src) {
    1.23    int encode = prefix_and_encode(dst->encoding(), src->encoding());
    1.24    emit_int8(0x0F);
    1.25 @@ -1110,6 +1125,51 @@
    1.26    emit_int8((unsigned char)(0xC8 | encode));
    1.27  }
    1.28  
    1.29 +void Assembler::blsil(Register dst, Register src) {
    1.30 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
    1.31 +  int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
    1.32 +  emit_int8((unsigned char)0xF3);
    1.33 +  emit_int8((unsigned char)(0xC0 | encode));
    1.34 +}
    1.35 +
    1.36 +void Assembler::blsil(Register dst, Address src) {
    1.37 +  InstructionMark im(this);
    1.38 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
    1.39 +  vex_prefix_0F38(rbx, dst, src);
    1.40 +  emit_int8((unsigned char)0xF3);
    1.41 +  emit_operand(rbx, src);
    1.42 +}
    1.43 +
    1.44 +void Assembler::blsmskl(Register dst, Register src) {
    1.45 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
    1.46 +  int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
    1.47 +  emit_int8((unsigned char)0xF3);
    1.48 +  emit_int8((unsigned char)(0xC0 | encode));
    1.49 +}
    1.50 +
    1.51 +void Assembler::blsmskl(Register dst, Address src) {
    1.52 +  InstructionMark im(this);
    1.53 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
    1.54 +  vex_prefix_0F38(rdx, dst, src);
    1.55 +  emit_int8((unsigned char)0xF3);
    1.56 +  emit_operand(rdx, src);
    1.57 +}
    1.58 +
    1.59 +void Assembler::blsrl(Register dst, Register src) {
    1.60 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
    1.61 +  int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
    1.62 +  emit_int8((unsigned char)0xF3);
    1.63 +  emit_int8((unsigned char)(0xC0 | encode));
    1.64 +}
    1.65 +
    1.66 +void Assembler::blsrl(Register dst, Address src) {
    1.67 +  InstructionMark im(this);
    1.68 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
    1.69 +  vex_prefix_0F38(rcx, dst, src);
    1.70 +  emit_int8((unsigned char)0xF3);
    1.71 +  emit_operand(rcx, src);
    1.72 +}
    1.73 +
    1.74  void Assembler::call(Label& L, relocInfo::relocType rtype) {
    1.75    // suspect disp32 is always good
    1.76    int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
    1.77 @@ -2878,6 +2938,24 @@
    1.78    emit_operand(dst, src);
    1.79  }
    1.80  
    1.81 +void Assembler::tzcntl(Register dst, Register src) {
    1.82 +  assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
    1.83 +  emit_int8((unsigned char)0xF3);
    1.84 +  int encode = prefix_and_encode(dst->encoding(), src->encoding());
    1.85 +  emit_int8(0x0F);
    1.86 +  emit_int8((unsigned char)0xBC);
    1.87 +  emit_int8((unsigned char)0xC0 | encode);
    1.88 +}
    1.89 +
    1.90 +void Assembler::tzcntq(Register dst, Register src) {
    1.91 +  assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
    1.92 +  emit_int8((unsigned char)0xF3);
    1.93 +  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
    1.94 +  emit_int8(0x0F);
    1.95 +  emit_int8((unsigned char)0xBC);
    1.96 +  emit_int8((unsigned char)(0xC0 | encode));
    1.97 +}
    1.98 +
    1.99  void Assembler::ucomisd(XMMRegister dst, Address src) {
   1.100    NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   1.101    emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
   1.102 @@ -4837,6 +4915,21 @@
   1.103    emit_arith(0x23, 0xC0, dst, src);
   1.104  }
   1.105  
   1.106 +void Assembler::andnq(Register dst, Register src1, Register src2) {
   1.107 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   1.108 +  int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
   1.109 +  emit_int8((unsigned char)0xF2);
   1.110 +  emit_int8((unsigned char)(0xC0 | encode));
   1.111 +}
   1.112 +
   1.113 +void Assembler::andnq(Register dst, Register src1, Address src2) {
   1.114 +  InstructionMark im(this);
   1.115 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   1.116 +  vex_prefix_0F38_q(dst, src1, src2);
   1.117 +  emit_int8((unsigned char)0xF2);
   1.118 +  emit_operand(dst, src2);
   1.119 +}
   1.120 +
   1.121  void Assembler::bsfq(Register dst, Register src) {
   1.122    int encode = prefixq_and_encode(dst->encoding(), src->encoding());
   1.123    emit_int8(0x0F);
   1.124 @@ -4858,6 +4951,51 @@
   1.125    emit_int8((unsigned char)(0xC8 | encode));
   1.126  }
   1.127  
   1.128 +void Assembler::blsiq(Register dst, Register src) {
   1.129 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   1.130 +  int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src);
   1.131 +  emit_int8((unsigned char)0xF3);
   1.132 +  emit_int8((unsigned char)(0xC0 | encode));
   1.133 +}
   1.134 +
   1.135 +void Assembler::blsiq(Register dst, Address src) {
   1.136 +  InstructionMark im(this);
   1.137 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   1.138 +  vex_prefix_0F38_q(rbx, dst, src);
   1.139 +  emit_int8((unsigned char)0xF3);
   1.140 +  emit_operand(rbx, src);
   1.141 +}
   1.142 +
   1.143 +void Assembler::blsmskq(Register dst, Register src) {
   1.144 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   1.145 +  int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src);
   1.146 +  emit_int8((unsigned char)0xF3);
   1.147 +  emit_int8((unsigned char)(0xC0 | encode));
   1.148 +}
   1.149 +
   1.150 +void Assembler::blsmskq(Register dst, Address src) {
   1.151 +  InstructionMark im(this);
   1.152 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   1.153 +  vex_prefix_0F38_q(rdx, dst, src);
   1.154 +  emit_int8((unsigned char)0xF3);
   1.155 +  emit_operand(rdx, src);
   1.156 +}
   1.157 +
   1.158 +void Assembler::blsrq(Register dst, Register src) {
   1.159 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   1.160 +  int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src);
   1.161 +  emit_int8((unsigned char)0xF3);
   1.162 +  emit_int8((unsigned char)(0xC0 | encode));
   1.163 +}
   1.164 +
   1.165 +void Assembler::blsrq(Register dst, Address src) {
   1.166 +  InstructionMark im(this);
   1.167 +  assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   1.168 +  vex_prefix_0F38_q(rcx, dst, src);
   1.169 +  emit_int8((unsigned char)0xF3);
   1.170 +  emit_operand(rcx, src);
   1.171 +}
   1.172 +
   1.173  void Assembler::cdqq() {
   1.174    prefix(REX_W);
   1.175    emit_int8((unsigned char)0x99);
     2.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Tue Feb 25 18:16:24 2014 +0100
     2.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Mon Mar 17 10:48:53 2014 -0700
     2.3 @@ -590,10 +590,35 @@
     2.4      vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
     2.5    }
     2.6  
     2.7 +  void vex_prefix_0F38(Register dst, Register nds, Address src) {
     2.8 +    bool vex_w = false;
     2.9 +    bool vector256 = false;
    2.10 +    vex_prefix(src, nds->encoding(), dst->encoding(),
    2.11 +               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
    2.12 +  }
    2.13 +
    2.14 +  void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
    2.15 +    bool vex_w = true;
    2.16 +    bool vector256 = false;
    2.17 +    vex_prefix(src, nds->encoding(), dst->encoding(),
    2.18 +               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
    2.19 +  }
    2.20    int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
    2.21                               VexSimdPrefix pre, VexOpcode opc,
    2.22                               bool vex_w, bool vector256);
    2.23  
    2.24 +  int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
    2.25 +    bool vex_w = false;
    2.26 +    bool vector256 = false;
    2.27 +    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
    2.28 +                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
    2.29 +  }
    2.30 +  int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
    2.31 +    bool vex_w = true;
    2.32 +    bool vector256 = false;
    2.33 +    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
    2.34 +                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
    2.35 +  }
    2.36    int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
    2.37                               VexSimdPrefix pre, bool vector256 = false,
    2.38                               VexOpcode opc = VEX_OPCODE_0F) {
    2.39 @@ -897,6 +922,27 @@
    2.40    void andq(Register dst, Address src);
    2.41    void andq(Register dst, Register src);
    2.42  
    2.43 +  // BMI instructions
    2.44 +  void andnl(Register dst, Register src1, Register src2);
    2.45 +  void andnl(Register dst, Register src1, Address src2);
    2.46 +  void andnq(Register dst, Register src1, Register src2);
    2.47 +  void andnq(Register dst, Register src1, Address src2);
    2.48 +
    2.49 +  void blsil(Register dst, Register src);
    2.50 +  void blsil(Register dst, Address src);
    2.51 +  void blsiq(Register dst, Register src);
    2.52 +  void blsiq(Register dst, Address src);
    2.53 +
    2.54 +  void blsmskl(Register dst, Register src);
    2.55 +  void blsmskl(Register dst, Address src);
    2.56 +  void blsmskq(Register dst, Register src);
    2.57 +  void blsmskq(Register dst, Address src);
    2.58 +
    2.59 +  void blsrl(Register dst, Register src);
    2.60 +  void blsrl(Register dst, Address src);
    2.61 +  void blsrq(Register dst, Register src);
    2.62 +  void blsrq(Register dst, Address src);
    2.63 +
    2.64    void bsfl(Register dst, Register src);
    2.65    void bsrl(Register dst, Register src);
    2.66  
    2.67 @@ -1574,6 +1620,9 @@
    2.68    void testq(Register dst, int32_t imm32);
    2.69    void testq(Register dst, Register src);
    2.70  
    2.71 +  // BMI - count trailing zeros
    2.72 +  void tzcntl(Register dst, Register src);
    2.73 +  void tzcntq(Register dst, Register src);
    2.74  
    2.75    // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
    2.76    void ucomisd(XMMRegister dst, Address src);
     3.1 --- a/src/cpu/x86/vm/globals_x86.hpp	Tue Feb 25 18:16:24 2014 +0100
     3.2 +++ b/src/cpu/x86/vm/globals_x86.hpp	Mon Mar 17 10:48:53 2014 -0700
     3.3 @@ -134,5 +134,11 @@
     3.4                                                                              \
     3.5    product(bool, UseCountLeadingZerosInstruction, false,                     \
     3.6            "Use count leading zeros instruction")                            \
     3.7 +                                                                            \
     3.8 +  product(bool, UseCountTrailingZerosInstruction, false,                    \
     3.9 +          "Use count trailing zeros instruction")                           \
    3.10 +                                                                            \
    3.11 +  product(bool, UseBMI1Instructions, false,                                 \
    3.12 +          "Use BMI instructions")
    3.13  
    3.14  #endif // CPU_X86_VM_GLOBALS_X86_HPP
     4.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Tue Feb 25 18:16:24 2014 +0100
     4.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Mon Mar 17 10:48:53 2014 -0700
     4.3 @@ -429,7 +429,7 @@
     4.4    }
     4.5  
     4.6    char buf[256];
     4.7 -  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
     4.8 +  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
     4.9                 cores_per_cpu(), threads_per_core(),
    4.10                 cpu_family(), _model, _stepping,
    4.11                 (supports_cmov() ? ", cmov" : ""),
    4.12 @@ -455,7 +455,9 @@
    4.13                 (supports_ht() ? ", ht": ""),
    4.14                 (supports_tsc() ? ", tsc": ""),
    4.15                 (supports_tscinv_bit() ? ", tscinvbit": ""),
    4.16 -               (supports_tscinv() ? ", tscinv": ""));
    4.17 +               (supports_tscinv() ? ", tscinv": ""),
    4.18 +               (supports_bmi1() ? ", bmi1" : ""),
    4.19 +               (supports_bmi2() ? ", bmi2" : ""));
    4.20    _features_str = strdup(buf);
    4.21  
    4.22    // UseSSE is set to the smaller of what hardware supports and what
    4.23 @@ -600,13 +602,6 @@
    4.24        }
    4.25      }
    4.26  
    4.27 -    // Use count leading zeros count instruction if available.
    4.28 -    if (supports_lzcnt()) {
    4.29 -      if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
    4.30 -        UseCountLeadingZerosInstruction = true;
    4.31 -      }
    4.32 -    }
    4.33 -
    4.34      // some defaults for AMD family 15h
    4.35      if ( cpu_family() == 0x15 ) {
    4.36        // On family 15h processors default is no sw prefetch
    4.37 @@ -692,6 +687,35 @@
    4.38      }
    4.39  #endif // COMPILER2
    4.40  
    4.41 +  // Use count leading zeros count instruction if available.
    4.42 +  if (supports_lzcnt()) {
    4.43 +    if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
    4.44 +      UseCountLeadingZerosInstruction = true;
    4.45 +    }
    4.46 +   } else if (UseCountLeadingZerosInstruction) {
    4.47 +    warning("lzcnt instruction is not available on this CPU");
    4.48 +    FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
    4.49 +  }
    4.50 +
    4.51 +  if (supports_bmi1()) {
    4.52 +    if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
    4.53 +      UseBMI1Instructions = true;
    4.54 +    }
    4.55 +  } else if (UseBMI1Instructions) {
    4.56 +    warning("BMI1 instructions are not available on this CPU");
    4.57 +    FLAG_SET_DEFAULT(UseBMI1Instructions, false);
    4.58 +  }
    4.59 +
    4.60 +  // Use count trailing zeros instruction if available
    4.61 +  if (supports_bmi1()) {
    4.62 +    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
    4.63 +      UseCountTrailingZerosInstruction = UseBMI1Instructions;
    4.64 +    }
    4.65 +  } else if (UseCountTrailingZerosInstruction) {
    4.66 +    warning("tzcnt instruction is not available on this CPU");
    4.67 +    FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
    4.68 +  }
    4.69 +
    4.70    // Use population count instruction if available.
    4.71    if (supports_popcnt()) {
    4.72      if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
     5.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp	Tue Feb 25 18:16:24 2014 +0100
     5.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp	Mon Mar 17 10:48:53 2014 -0700
     5.3 @@ -141,7 +141,8 @@
     5.4      struct {
     5.5        uint32_t LahfSahf     : 1,
     5.6                 CmpLegacy    : 1,
     5.7 -                            : 4,
     5.8 +                            : 3,
     5.9 +               lzcnt_intel  : 1,
    5.10                 lzcnt        : 1,
    5.11                 sse4a        : 1,
    5.12                 misalignsse  : 1,
    5.13 @@ -251,7 +252,9 @@
    5.14      CPU_AVX2   = (1 << 18),
    5.15      CPU_AES    = (1 << 19),
    5.16      CPU_ERMS   = (1 << 20), // enhanced 'rep movsb/stosb' instructions
    5.17 -    CPU_CLMUL  = (1 << 21) // carryless multiply for CRC
    5.18 +    CPU_CLMUL  = (1 << 21), // carryless multiply for CRC
    5.19 +    CPU_BMI1   = (1 << 22),
    5.20 +    CPU_BMI2   = (1 << 23)
    5.21    } cpuFeatureFlags;
    5.22  
    5.23    enum {
    5.24 @@ -423,6 +426,8 @@
    5.25        if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
    5.26          result |= CPU_AVX2;
    5.27      }
    5.28 +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
    5.29 +      result |= CPU_BMI1;
    5.30      if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
    5.31        result |= CPU_TSC;
    5.32      if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
    5.33 @@ -444,6 +449,13 @@
    5.34        if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
    5.35          result |= CPU_SSE4A;
    5.36      }
    5.37 +    // Intel features.
    5.38 +    if(is_intel()) {
    5.39 +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
    5.40 +        result |= CPU_BMI2;
    5.41 +      if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
    5.42 +        result |= CPU_LZCNT;
    5.43 +    }
    5.44  
    5.45      return result;
    5.46    }
    5.47 @@ -560,7 +572,8 @@
    5.48    static bool supports_aes()      { return (_cpuFeatures & CPU_AES) != 0; }
    5.49    static bool supports_erms()     { return (_cpuFeatures & CPU_ERMS) != 0; }
    5.50    static bool supports_clmul()    { return (_cpuFeatures & CPU_CLMUL) != 0; }
    5.51 -
    5.52 +  static bool supports_bmi1()     { return (_cpuFeatures & CPU_BMI1) != 0; }
    5.53 +  static bool supports_bmi2()     { return (_cpuFeatures & CPU_BMI2) != 0; }
    5.54    // Intel features
    5.55    static bool is_intel_family_core() { return is_intel() &&
    5.56                                         extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
     6.1 --- a/src/cpu/x86/vm/x86_32.ad	Tue Feb 25 18:16:24 2014 +0100
     6.2 +++ b/src/cpu/x86/vm/x86_32.ad	Mon Mar 17 10:48:53 2014 -0700
     6.3 @@ -5155,6 +5155,19 @@
     6.4  %}
     6.5  
     6.6  instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
     6.7 +  predicate(UseCountTrailingZerosInstruction);
     6.8 +  match(Set dst (CountTrailingZerosI src));
     6.9 +  effect(KILL cr);
    6.10 +
    6.11 +  format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
    6.12 +  ins_encode %{
    6.13 +    __ tzcntl($dst$$Register, $src$$Register);
    6.14 +  %}
    6.15 +  ins_pipe(ialu_reg);
    6.16 +%}
    6.17 +
    6.18 +instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
    6.19 +  predicate(!UseCountTrailingZerosInstruction);
    6.20    match(Set dst (CountTrailingZerosI src));
    6.21    effect(KILL cr);
    6.22  
    6.23 @@ -5174,6 +5187,30 @@
    6.24  %}
    6.25  
    6.26  instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
    6.27 +  predicate(UseCountTrailingZerosInstruction);
    6.28 +  match(Set dst (CountTrailingZerosL src));
    6.29 +  effect(TEMP dst, KILL cr);
    6.30 +
    6.31 +  format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
    6.32 +            "JNC    done\n\t"
    6.33 +            "TZCNT  $dst, $src.hi\n\t"
    6.34 +            "ADD    $dst, 32\n"
    6.35 +            "done:" %}
    6.36 +  ins_encode %{
    6.37 +    Register Rdst = $dst$$Register;
    6.38 +    Register Rsrc = $src$$Register;
    6.39 +    Label done;
    6.40 +    __ tzcntl(Rdst, Rsrc);
    6.41 +    __ jccb(Assembler::carryClear, done);
    6.42 +    __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
    6.43 +    __ addl(Rdst, BitsPerInt);
    6.44 +    __ bind(done);
    6.45 +  %}
    6.46 +  ins_pipe(ialu_reg);
    6.47 +%}
    6.48 +
    6.49 +instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
    6.50 +  predicate(!UseCountTrailingZerosInstruction);
    6.51    match(Set dst (CountTrailingZerosL src));
    6.52    effect(TEMP dst, KILL cr);
    6.53  
    6.54 @@ -8017,6 +8054,123 @@
    6.55    ins_pipe( ialu_mem_imm );
    6.56  %}
    6.57  
    6.58 +// BMI1 instructions
    6.59 +instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
    6.60 +  match(Set dst (AndI (XorI src1 minus_1) src2));
    6.61 +  predicate(UseBMI1Instructions);
    6.62 +  effect(KILL cr);
    6.63 +
    6.64 +  format %{ "ANDNL  $dst, $src1, $src2" %}
    6.65 +
    6.66 +  ins_encode %{
    6.67 +    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
    6.68 +  %}
    6.69 +  ins_pipe(ialu_reg);
    6.70 +%}
    6.71 +
    6.72 +instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
    6.73 +  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
    6.74 +  predicate(UseBMI1Instructions);
    6.75 +  effect(KILL cr);
    6.76 +
    6.77 +  ins_cost(125);
    6.78 +  format %{ "ANDNL  $dst, $src1, $src2" %}
    6.79 +
    6.80 +  ins_encode %{
    6.81 +    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
    6.82 +  %}
    6.83 +  ins_pipe(ialu_reg_mem);
    6.84 +%}
    6.85 +
    6.86 +instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
    6.87 +  match(Set dst (AndI (SubI imm_zero src) src));
    6.88 +  predicate(UseBMI1Instructions);
    6.89 +  effect(KILL cr);
    6.90 +
    6.91 +  format %{ "BLSIL  $dst, $src" %}
    6.92 +
    6.93 +  ins_encode %{
    6.94 +    __ blsil($dst$$Register, $src$$Register);
    6.95 +  %}
    6.96 +  ins_pipe(ialu_reg);
    6.97 +%}
    6.98 +
    6.99 +instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
   6.100 +  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
   6.101 +  predicate(UseBMI1Instructions);
   6.102 +  effect(KILL cr);
   6.103 +
   6.104 +  ins_cost(125);
   6.105 +  format %{ "BLSIL  $dst, $src" %}
   6.106 +
   6.107 +  ins_encode %{
   6.108 +    __ blsil($dst$$Register, $src$$Address);
   6.109 +  %}
   6.110 +  ins_pipe(ialu_reg_mem);
   6.111 +%}
   6.112 +
   6.113 +instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
   6.114 +%{
   6.115 +  match(Set dst (XorI (AddI src minus_1) src));
   6.116 +  predicate(UseBMI1Instructions);
   6.117 +  effect(KILL cr);
   6.118 +
   6.119 +  format %{ "BLSMSKL $dst, $src" %}
   6.120 +
   6.121 +  ins_encode %{
   6.122 +    __ blsmskl($dst$$Register, $src$$Register);
   6.123 +  %}
   6.124 +
   6.125 +  ins_pipe(ialu_reg);
   6.126 +%}
   6.127 +
   6.128 +instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
   6.129 +%{
   6.130 +  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
   6.131 +  predicate(UseBMI1Instructions);
   6.132 +  effect(KILL cr);
   6.133 +
   6.134 +  ins_cost(125);
   6.135 +  format %{ "BLSMSKL $dst, $src" %}
   6.136 +
   6.137 +  ins_encode %{
   6.138 +    __ blsmskl($dst$$Register, $src$$Address);
   6.139 +  %}
   6.140 +
   6.141 +  ins_pipe(ialu_reg_mem);
   6.142 +%}
   6.143 +
   6.144 +instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
   6.145 +%{
   6.146 +  match(Set dst (AndI (AddI src minus_1) src) );
   6.147 +  predicate(UseBMI1Instructions);
   6.148 +  effect(KILL cr);
   6.149 +
   6.150 +  format %{ "BLSRL  $dst, $src" %}
   6.151 +
   6.152 +  ins_encode %{
   6.153 +    __ blsrl($dst$$Register, $src$$Register);
   6.154 +  %}
   6.155 +
   6.156 +  ins_pipe(ialu_reg);
   6.157 +%}
   6.158 +
   6.159 +instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
   6.160 +%{
   6.161 +  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
   6.162 +  predicate(UseBMI1Instructions);
   6.163 +  effect(KILL cr);
   6.164 +
   6.165 +  ins_cost(125);
   6.166 +  format %{ "BLSRL  $dst, $src" %}
   6.167 +
   6.168 +  ins_encode %{
   6.169 +    __ blsrl($dst$$Register, $src$$Address);
   6.170 +  %}
   6.171 +
   6.172 +  ins_pipe(ialu_reg_mem);
   6.173 +%}
   6.174 +
   6.175  // Or Instructions
   6.176  // Or Register with Register
   6.177  instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   6.178 @@ -8639,6 +8793,210 @@
   6.179    ins_pipe( ialu_reg_long_mem );
   6.180  %}
   6.181  
   6.182 +// BMI1 instructions
   6.183 +instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
   6.184 +  match(Set dst (AndL (XorL src1 minus_1) src2));
   6.185 +  predicate(UseBMI1Instructions);
   6.186 +  effect(KILL cr, TEMP dst);
   6.187 +
   6.188 +  format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
   6.189 +            "ANDNL  $dst.hi, $src1.hi, $src2.hi"
   6.190 +         %}
   6.191 +
   6.192 +  ins_encode %{
   6.193 +    Register Rdst = $dst$$Register;
   6.194 +    Register Rsrc1 = $src1$$Register;
   6.195 +    Register Rsrc2 = $src2$$Register;
   6.196 +    __ andnl(Rdst, Rsrc1, Rsrc2);
   6.197 +    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
   6.198 +  %}
   6.199 +  ins_pipe(ialu_reg_reg_long);
   6.200 +%}
   6.201 +
   6.202 +instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
   6.203 +  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
   6.204 +  predicate(UseBMI1Instructions);
   6.205 +  effect(KILL cr, TEMP dst);
   6.206 +
   6.207 +  ins_cost(125);
   6.208 +  format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
   6.209 +            "ANDNL  $dst.hi, $src1.hi, $src2+4"
   6.210 +         %}
   6.211 +
   6.212 +  ins_encode %{
   6.213 +    Register Rdst = $dst$$Register;
   6.214 +    Register Rsrc1 = $src1$$Register;
   6.215 +    Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
   6.216 +
   6.217 +    __ andnl(Rdst, Rsrc1, $src2$$Address);
   6.218 +    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
   6.219 +  %}
   6.220 +  ins_pipe(ialu_reg_mem);
   6.221 +%}
   6.222 +
   6.223 +instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
   6.224 +  match(Set dst (AndL (SubL imm_zero src) src));
   6.225 +  predicate(UseBMI1Instructions);
   6.226 +  effect(KILL cr, TEMP dst);
   6.227 +
   6.228 +  format %{ "MOVL   $dst.hi, 0\n\t"
   6.229 +            "BLSIL  $dst.lo, $src.lo\n\t"
   6.230 +            "JNZ    done\n\t"
   6.231 +            "BLSIL  $dst.hi, $src.hi\n"
   6.232 +            "done:"
   6.233 +         %}
   6.234 +
   6.235 +  ins_encode %{
   6.236 +    Label done;
   6.237 +    Register Rdst = $dst$$Register;
   6.238 +    Register Rsrc = $src$$Register;
   6.239 +    __ movl(HIGH_FROM_LOW(Rdst), 0);
   6.240 +    __ blsil(Rdst, Rsrc);
   6.241 +    __ jccb(Assembler::notZero, done);
   6.242 +    __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
   6.243 +    __ bind(done);
   6.244 +  %}
   6.245 +  ins_pipe(ialu_reg);
   6.246 +%}
   6.247 +
   6.248 +instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
   6.249 +  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
   6.250 +  predicate(UseBMI1Instructions);
   6.251 +  effect(KILL cr, TEMP dst);
   6.252 +
   6.253 +  ins_cost(125);
   6.254 +  format %{ "MOVL   $dst.hi, 0\n\t"
   6.255 +            "BLSIL  $dst.lo, $src\n\t"
   6.256 +            "JNZ    done\n\t"
   6.257 +            "BLSIL  $dst.hi, $src+4\n"
   6.258 +            "done:"
   6.259 +         %}
   6.260 +
   6.261 +  ins_encode %{
   6.262 +    Label done;
   6.263 +    Register Rdst = $dst$$Register;
   6.264 +    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
   6.265 +
   6.266 +    __ movl(HIGH_FROM_LOW(Rdst), 0);
   6.267 +    __ blsil(Rdst, $src$$Address);
   6.268 +    __ jccb(Assembler::notZero, done);
   6.269 +    __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
   6.270 +    __ bind(done);
   6.271 +  %}
   6.272 +  ins_pipe(ialu_reg_mem);
   6.273 +%}
   6.274 +
   6.275 +instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
   6.276 +%{
   6.277 +  match(Set dst (XorL (AddL src minus_1) src));
   6.278 +  predicate(UseBMI1Instructions);
   6.279 +  effect(KILL cr, TEMP dst);
   6.280 +
   6.281 +  format %{ "MOVL    $dst.hi, 0\n\t"
   6.282 +            "BLSMSKL $dst.lo, $src.lo\n\t"
   6.283 +            "JNC     done\n\t"
   6.284 +            "BLSMSKL $dst.hi, $src.hi\n"
   6.285 +            "done:"
   6.286 +         %}
   6.287 +
   6.288 +  ins_encode %{
   6.289 +    Label done;
   6.290 +    Register Rdst = $dst$$Register;
   6.291 +    Register Rsrc = $src$$Register;
   6.292 +    __ movl(HIGH_FROM_LOW(Rdst), 0);
   6.293 +    __ blsmskl(Rdst, Rsrc);
   6.294 +    __ jccb(Assembler::carryClear, done);
   6.295 +    __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
   6.296 +    __ bind(done);
   6.297 +  %}
   6.298 +
   6.299 +  ins_pipe(ialu_reg);
   6.300 +%}
   6.301 +
   6.302 +instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
   6.303 +%{
   6.304 +  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
   6.305 +  predicate(UseBMI1Instructions);
   6.306 +  effect(KILL cr, TEMP dst);
   6.307 +
   6.308 +  ins_cost(125);
   6.309 +  format %{ "MOVL    $dst.hi, 0\n\t"
   6.310 +            "BLSMSKL $dst.lo, $src\n\t"
   6.311 +            "JNC     done\n\t"
   6.312 +            "BLSMSKL $dst.hi, $src+4\n"
   6.313 +            "done:"
   6.314 +         %}
   6.315 +
   6.316 +  ins_encode %{
   6.317 +    Label done;
   6.318 +    Register Rdst = $dst$$Register;
   6.319 +    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
   6.320 +
   6.321 +    __ movl(HIGH_FROM_LOW(Rdst), 0);
   6.322 +    __ blsmskl(Rdst, $src$$Address);
   6.323 +    __ jccb(Assembler::carryClear, done);
   6.324 +    __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
   6.325 +    __ bind(done);
   6.326 +  %}
   6.327 +
   6.328 +  ins_pipe(ialu_reg_mem);
   6.329 +%}
   6.330 +
   6.331 +instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
   6.332 +%{
   6.333 +  match(Set dst (AndL (AddL src minus_1) src) );
   6.334 +  predicate(UseBMI1Instructions);
   6.335 +  effect(KILL cr, TEMP dst);
   6.336 +
   6.337 +  format %{ "MOVL   $dst.hi, $src.hi\n\t"
   6.338 +            "BLSRL  $dst.lo, $src.lo\n\t"
   6.339 +            "JNC    done\n\t"
   6.340 +            "BLSRL  $dst.hi, $src.hi\n"
   6.341 +            "done:"
   6.342 +  %}
   6.343 +
   6.344 +  ins_encode %{
   6.345 +    Label done;
   6.346 +    Register Rdst = $dst$$Register;
   6.347 +    Register Rsrc = $src$$Register;
   6.348 +    __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
   6.349 +    __ blsrl(Rdst, Rsrc);
   6.350 +    __ jccb(Assembler::carryClear, done);
   6.351 +    __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
   6.352 +    __ bind(done);
   6.353 +  %}
   6.354 +
   6.355 +  ins_pipe(ialu_reg);
   6.356 +%}
   6.357 +
   6.358 +instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
   6.359 +%{
   6.360 +  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
   6.361 +  predicate(UseBMI1Instructions);
   6.362 +  effect(KILL cr, TEMP dst);
   6.363 +
   6.364 +  ins_cost(125);
   6.365 +  format %{ "MOVL   $dst.hi, $src+4\n\t"
   6.366 +            "BLSRL  $dst.lo, $src\n\t"
   6.367 +            "JNC    done\n\t"
   6.368 +            "BLSRL  $dst.hi, $src+4\n"
   6.369 +            "done:"
   6.370 +  %}
   6.371 +
   6.372 +  ins_encode %{
   6.373 +    Label done;
   6.374 +    Register Rdst = $dst$$Register;
   6.375 +    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
   6.376 +    __ movl(HIGH_FROM_LOW(Rdst), src_hi);
   6.377 +    __ blsrl(Rdst, $src$$Address);
   6.378 +    __ jccb(Assembler::carryClear, done);
   6.379 +    __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
   6.380 +    __ bind(done);
   6.381 +  %}
   6.382 +
   6.383 +  ins_pipe(ialu_reg_mem);
   6.384 +%}
   6.385 +
   6.386  // Or Long Register with Register
   6.387  instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
   6.388    match(Set dst (OrL dst src));
     7.1 --- a/src/cpu/x86/vm/x86_64.ad	Tue Feb 25 18:16:24 2014 +0100
     7.2 +++ b/src/cpu/x86/vm/x86_64.ad	Mon Mar 17 10:48:53 2014 -0700
     7.3 @@ -6014,6 +6014,19 @@
     7.4  %}
     7.5  
     7.6  instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
     7.7 +  predicate(UseCountTrailingZerosInstruction);
     7.8 +  match(Set dst (CountTrailingZerosI src));
     7.9 +  effect(KILL cr);
    7.10 +
    7.11 +  format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
    7.12 +  ins_encode %{
    7.13 +    __ tzcntl($dst$$Register, $src$$Register);
    7.14 +  %}
    7.15 +  ins_pipe(ialu_reg);
    7.16 +%}
    7.17 +
    7.18 +instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
    7.19 +  predicate(!UseCountTrailingZerosInstruction);
    7.20    match(Set dst (CountTrailingZerosI src));
    7.21    effect(KILL cr);
    7.22  
    7.23 @@ -6033,6 +6046,19 @@
    7.24  %}
    7.25  
    7.26  instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
    7.27 +  predicate(UseCountTrailingZerosInstruction);
    7.28 +  match(Set dst (CountTrailingZerosL src));
    7.29 +  effect(KILL cr);
    7.30 +
    7.31 +  format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
    7.32 +  ins_encode %{
    7.33 +    __ tzcntq($dst$$Register, $src$$Register);
    7.34 +  %}
    7.35 +  ins_pipe(ialu_reg);
    7.36 +%}
    7.37 +
    7.38 +instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
    7.39 +  predicate(!UseCountTrailingZerosInstruction);
    7.40    match(Set dst (CountTrailingZerosL src));
    7.41    effect(KILL cr);
    7.42  
    7.43 @@ -8612,6 +8638,122 @@
    7.44    ins_pipe(ialu_mem_imm);
    7.45  %}
    7.46  
    7.47 +// BMI1 instructions
    7.48 +instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
    7.49 +  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
    7.50 +  predicate(UseBMI1Instructions);
    7.51 +  effect(KILL cr);
    7.52 +
    7.53 +  ins_cost(125);
    7.54 +  format %{ "andnl  $dst, $src1, $src2" %}
    7.55 +
    7.56 +  ins_encode %{
    7.57 +    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
    7.58 +  %}
    7.59 +  ins_pipe(ialu_reg_mem);
    7.60 +%}
    7.61 +
    7.62 +instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
    7.63 +  match(Set dst (AndI (XorI src1 minus_1) src2));
    7.64 +  predicate(UseBMI1Instructions);
    7.65 +  effect(KILL cr);
    7.66 +
    7.67 +  format %{ "andnl  $dst, $src1, $src2" %}
    7.68 +
    7.69 +  ins_encode %{
    7.70 +    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
    7.71 +  %}
    7.72 +  ins_pipe(ialu_reg);
    7.73 +%}
    7.74 +
    7.75 +instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
    7.76 +  match(Set dst (AndI (SubI imm_zero src) src));
    7.77 +  predicate(UseBMI1Instructions);
    7.78 +  effect(KILL cr);
    7.79 +
    7.80 +  format %{ "blsil  $dst, $src" %}
    7.81 +
    7.82 +  ins_encode %{
    7.83 +    __ blsil($dst$$Register, $src$$Register);
    7.84 +  %}
    7.85 +  ins_pipe(ialu_reg);
    7.86 +%}
    7.87 +
    7.88 +instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
    7.89 +  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
    7.90 +  predicate(UseBMI1Instructions);
    7.91 +  effect(KILL cr);
    7.92 +
    7.93 +  ins_cost(125);
    7.94 +  format %{ "blsil  $dst, $src" %}
    7.95 +
    7.96 +  ins_encode %{
    7.97 +    __ blsil($dst$$Register, $src$$Address);
    7.98 +  %}
    7.99 +  ins_pipe(ialu_reg_mem);
   7.100 +%}
   7.101 +
   7.102 +instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
   7.103 +%{
   7.104 +  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
   7.105 +  predicate(UseBMI1Instructions);
   7.106 +  effect(KILL cr);
   7.107 +
   7.108 +  ins_cost(125);
   7.109 +  format %{ "blsmskl $dst, $src" %}
   7.110 +
   7.111 +  ins_encode %{
   7.112 +    __ blsmskl($dst$$Register, $src$$Address);
   7.113 +  %}
   7.114 +  ins_pipe(ialu_reg_mem);
   7.115 +%}
   7.116 +
   7.117 +instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
   7.118 +%{
   7.119 +  match(Set dst (XorI (AddI src minus_1) src));
   7.120 +  predicate(UseBMI1Instructions);
   7.121 +  effect(KILL cr);
   7.122 +
   7.123 +  format %{ "blsmskl $dst, $src" %}
   7.124 +
   7.125 +  ins_encode %{
   7.126 +    __ blsmskl($dst$$Register, $src$$Register);
   7.127 +  %}
   7.128 +
   7.129 +  ins_pipe(ialu_reg);
   7.130 +%}
   7.131 +
   7.132 +instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
   7.133 +%{
   7.134 +  match(Set dst (AndI (AddI src minus_1) src) );
   7.135 +  predicate(UseBMI1Instructions);
   7.136 +  effect(KILL cr);
   7.137 +
   7.138 +  format %{ "blsrl  $dst, $src" %}
   7.139 +
   7.140 +  ins_encode %{
   7.141 +    __ blsrl($dst$$Register, $src$$Register);
   7.142 +  %}
   7.143 +
   7.144 +  ins_pipe(ialu_reg_mem);
   7.145 +%}
   7.146 +
   7.147 +instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
   7.148 +%{
   7.149 +  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
   7.150 +  predicate(UseBMI1Instructions);
   7.151 +  effect(KILL cr);
   7.152 +
   7.153 +  ins_cost(125);
   7.154 +  format %{ "blsrl  $dst, $src" %}
   7.155 +
   7.156 +  ins_encode %{
   7.157 +    __ blsrl($dst$$Register, $src$$Address);
   7.158 +  %}
   7.159 +
   7.160 +  ins_pipe(ialu_reg);
   7.161 +%}
   7.162 +
   7.163  // Or Instructions
   7.164  // Or Register with Register
   7.165  instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
   7.166 @@ -8843,6 +8985,122 @@
   7.167    ins_pipe(ialu_mem_imm);
   7.168  %}
   7.169  
   7.170 +// BMI1 instructions
   7.171 +instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
   7.172 +  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
   7.173 +  predicate(UseBMI1Instructions);
   7.174 +  effect(KILL cr);
   7.175 +
   7.176 +  ins_cost(125);
   7.177 +  format %{ "andnq  $dst, $src1, $src2" %}
   7.178 +
   7.179 +  ins_encode %{
   7.180 +    __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
   7.181 +  %}
   7.182 +  ins_pipe(ialu_reg_mem);
   7.183 +%}
   7.184 +
   7.185 +instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
   7.186 +  match(Set dst (AndL (XorL src1 minus_1) src2));
   7.187 +  predicate(UseBMI1Instructions);
   7.188 +  effect(KILL cr);
   7.189 +
   7.190 +  format %{ "andnq  $dst, $src1, $src2" %}
   7.191 +
   7.192 +  ins_encode %{
   7.193 +  __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
   7.194 +  %}
   7.195 +  ins_pipe(ialu_reg_mem);
   7.196 +%}
   7.197 +
   7.198 +instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
   7.199 +  match(Set dst (AndL (SubL imm_zero src) src));
   7.200 +  predicate(UseBMI1Instructions);
   7.201 +  effect(KILL cr);
   7.202 +
   7.203 +  format %{ "blsiq  $dst, $src" %}
   7.204 +
   7.205 +  ins_encode %{
   7.206 +    __ blsiq($dst$$Register, $src$$Register);
   7.207 +  %}
   7.208 +  ins_pipe(ialu_reg);
   7.209 +%}
   7.210 +
   7.211 +instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
   7.212 +  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
   7.213 +  predicate(UseBMI1Instructions);
   7.214 +  effect(KILL cr);
   7.215 +
   7.216 +  ins_cost(125);
   7.217 +  format %{ "blsiq  $dst, $src" %}
   7.218 +
   7.219 +  ins_encode %{
   7.220 +    __ blsiq($dst$$Register, $src$$Address);
   7.221 +  %}
   7.222 +  ins_pipe(ialu_reg_mem);
   7.223 +%}
   7.224 +
   7.225 +instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
   7.226 +%{
   7.227 +  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
   7.228 +  predicate(UseBMI1Instructions);
   7.229 +  effect(KILL cr);
   7.230 +
   7.231 +  ins_cost(125);
   7.232 +  format %{ "blsmskq $dst, $src" %}
   7.233 +
   7.234 +  ins_encode %{
   7.235 +    __ blsmskq($dst$$Register, $src$$Address);
   7.236 +  %}
   7.237 +  ins_pipe(ialu_reg_mem);
   7.238 +%}
   7.239 +
   7.240 +instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
   7.241 +%{
   7.242 +  match(Set dst (XorL (AddL src minus_1) src));
   7.243 +  predicate(UseBMI1Instructions);
   7.244 +  effect(KILL cr);
   7.245 +
   7.246 +  format %{ "blsmskq $dst, $src" %}
   7.247 +
   7.248 +  ins_encode %{
   7.249 +    __ blsmskq($dst$$Register, $src$$Register);
   7.250 +  %}
   7.251 +
   7.252 +  ins_pipe(ialu_reg);
   7.253 +%}
   7.254 +
   7.255 +instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
   7.256 +%{
   7.257 +  match(Set dst (AndL (AddL src minus_1) src) );
   7.258 +  predicate(UseBMI1Instructions);
   7.259 +  effect(KILL cr);
   7.260 +
   7.261 +  format %{ "blsrq  $dst, $src" %}
   7.262 +
   7.263 +  ins_encode %{
   7.264 +    __ blsrq($dst$$Register, $src$$Register);
   7.265 +  %}
   7.266 +
   7.267 +  ins_pipe(ialu_reg);
   7.268 +%}
   7.269 +
   7.270 +instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
   7.271 +%{
   7.272 +  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
   7.273 +  predicate(UseBMI1Instructions);
   7.274 +  effect(KILL cr);
   7.275 +
   7.276 +  ins_cost(125);
   7.277 +  format %{ "blsrq  $dst, $src" %}
   7.278 +
   7.279 +  ins_encode %{
   7.280 +    __ blsrq($dst$$Register, $src$$Address);
   7.281 +  %}
   7.282 +
   7.283 +  ins_pipe(ialu_reg);
   7.284 +%}
   7.285 +
   7.286  // Or Instructions
   7.287  // Or Register with Register
   7.288  instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
     8.1 --- a/src/share/vm/adlc/formssel.cpp	Tue Feb 25 18:16:24 2014 +0100
     8.2 +++ b/src/share/vm/adlc/formssel.cpp	Mon Mar 17 10:48:53 2014 -0700
     8.3 @@ -649,6 +649,7 @@
     8.4    int USE_of_memory  = 0;
     8.5    int DEF_of_memory  = 0;
     8.6    const char*    last_memory_DEF = NULL; // to test DEF/USE pairing in asserts
     8.7 +  const char*    last_memory_USE = NULL;
     8.8    Component     *unique          = NULL;
     8.9    Component     *comp            = NULL;
    8.10    ComponentList &components      = (ComponentList &)_components;
    8.11 @@ -670,7 +671,16 @@
    8.12            assert(0 == strcmp(last_memory_DEF, comp->_name), "every memory DEF is followed by a USE of the same name");
    8.13            last_memory_DEF = NULL;
    8.14          }
    8.15 -        USE_of_memory++;
    8.16 +        // Handles same memory being used multiple times in the case of BMI1 instructions.
    8.17 +        if (last_memory_USE != NULL) {
    8.18 +          if (strcmp(comp->_name, last_memory_USE) != 0) {
    8.19 +            USE_of_memory++;
    8.20 +          }
    8.21 +        } else {
    8.22 +          USE_of_memory++;
    8.23 +        }
    8.24 +        last_memory_USE = comp->_name;
    8.25 +
    8.26          if (DEF_of_memory == 0)  // defs take precedence
    8.27            unique = comp;
    8.28        } else {
     9.1 --- a/src/share/vm/opto/matcher.cpp	Tue Feb 25 18:16:24 2014 +0100
     9.2 +++ b/src/share/vm/opto/matcher.cpp	Mon Mar 17 10:48:53 2014 -0700
     9.3 @@ -1908,6 +1908,105 @@
     9.4    return OptoReg::as_OptoReg(regs.first());
     9.5  }
     9.6  
     9.7 +// This function identifies sub-graphs in which a 'load' node is
     9.8 +// input to two different nodes, and such that it can be matched
     9.9 +// with BMI instructions like blsi, blsr, etc.
    9.10 +// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
    9.11 +// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
    9.12 +// refers to the same node.
    9.13 +#ifdef X86
    9.14 +// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
    9.15 +// This is a temporary solution until we make DAGs expressible in ADL.
    9.16 +template<typename ConType>
    9.17 +class FusedPatternMatcher {
    9.18 +  Node* _op1_node;
    9.19 +  Node* _mop_node;
    9.20 +  int _con_op;
    9.21 +
    9.22 +  static int match_next(Node* n, int next_op, int next_op_idx) {
    9.23 +    if (n->in(1) == NULL || n->in(2) == NULL) {
    9.24 +      return -1;
    9.25 +    }
    9.26 +
    9.27 +    if (next_op_idx == -1) { // n is commutative, try rotations
    9.28 +      if (n->in(1)->Opcode() == next_op) {
    9.29 +        return 1;
    9.30 +      } else if (n->in(2)->Opcode() == next_op) {
    9.31 +        return 2;
    9.32 +      }
    9.33 +    } else {
    9.34 +      assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
    9.35 +      if (n->in(next_op_idx)->Opcode() == next_op) {
    9.36 +        return next_op_idx;
    9.37 +      }
    9.38 +    }
    9.39 +    return -1;
    9.40 +  }
    9.41 +public:
    9.42 +  FusedPatternMatcher(Node* op1_node, Node *mop_node, int con_op) :
    9.43 +    _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
    9.44 +
    9.45 +  bool match(int op1, int op1_op2_idx,  // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
    9.46 +             int op2, int op2_con_idx,  // op2 and the index of the op2->con edge, -1 if op2 is commutative
    9.47 +             typename ConType::NativeType con_value) {
    9.48 +    if (_op1_node->Opcode() != op1) {
    9.49 +      return false;
    9.50 +    }
    9.51 +    if (_mop_node->outcnt() > 2) {
    9.52 +      return false;
    9.53 +    }
    9.54 +    op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
    9.55 +    if (op1_op2_idx == -1) {
    9.56 +      return false;
    9.57 +    }
    9.58 +    // Memory operation must be the other edge
    9.59 +    int op1_mop_idx = (op1_op2_idx & 1) + 1;
    9.60 +
    9.61 +    // Check that the mop node is really what we want
    9.62 +    if (_op1_node->in(op1_mop_idx) == _mop_node) {
    9.63 +      Node *op2_node = _op1_node->in(op1_op2_idx);
    9.64 +      if (op2_node->outcnt() > 1) {
    9.65 +        return false;
    9.66 +      }
    9.67 +      assert(op2_node->Opcode() == op2, "Should be");
    9.68 +      op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
    9.69 +      if (op2_con_idx == -1) {
    9.70 +        return false;
    9.71 +      }
    9.72 +      // Memory operation must be the other edge
    9.73 +      int op2_mop_idx = (op2_con_idx & 1) + 1;
    9.74 +      // Check that the memory operation is the same node
    9.75 +      if (op2_node->in(op2_mop_idx) == _mop_node) {
    9.76 +        // Now check the constant
    9.77 +        const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
    9.78 +        if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
    9.79 +          return true;
    9.80 +        }
    9.81 +      }
    9.82 +    }
    9.83 +    return false;
    9.84 +  }
    9.85 +};
    9.86 +
    9.87 +
    9.88 +bool Matcher::is_bmi_pattern(Node *n, Node *m) {
    9.89 +  if (n != NULL && m != NULL) {
    9.90 +    if (m->Opcode() == Op_LoadI) {
    9.91 +      FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
    9.92 +      return bmii.match(Op_AndI, -1, Op_SubI,  1,  0)  ||
    9.93 +             bmii.match(Op_AndI, -1, Op_AddI, -1, -1)  ||
    9.94 +             bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
    9.95 +    } else if (m->Opcode() == Op_LoadL) {
    9.96 +      FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
    9.97 +      return bmil.match(Op_AndL, -1, Op_SubL,  1,  0) ||
    9.98 +             bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
    9.99 +             bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
   9.100 +    }
   9.101 +  }
   9.102 +  return false;
   9.103 +}
   9.104 +#endif // X86
   9.105 +
   9.106  // A method-klass-holder may be passed in the inline_cache_reg
   9.107  // and then expanded into the inline_cache_reg and a method_oop register
   9.108  //   defined in ad_<arch>.cpp
   9.109 @@ -2063,6 +2162,14 @@
   9.110            set_shared(m->in(AddPNode::Base)->in(1));
   9.111          }
   9.112  
   9.113 +        // if 'n' and 'm' are part of a graph for BMI instruction, clone this node.
   9.114 +#ifdef X86
   9.115 +        if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
   9.116 +          mstack.push(m, Visit);
   9.117 +          continue;
   9.118 +        }
   9.119 +#endif
   9.120 +
   9.121          // Clone addressing expressions as they are "free" in memory access instructions
   9.122          if( mem_op && i == MemNode::Address && mop == Op_AddP ) {
   9.123            // Some inputs for address expression are not put on stack
    10.1 --- a/src/share/vm/opto/matcher.hpp	Tue Feb 25 18:16:24 2014 +0100
    10.2 +++ b/src/share/vm/opto/matcher.hpp	Mon Mar 17 10:48:53 2014 -0700
    10.3 @@ -79,6 +79,9 @@
    10.4  
    10.5    // Find shared Nodes, or Nodes that otherwise are Matcher roots
    10.6    void find_shared( Node *n );
    10.7 +#ifdef X86
    10.8 +  bool is_bmi_pattern(Node *n, Node *m);
    10.9 +#endif
   10.10  
   10.11    // Debug and profile information for nodes in old space:
   10.12    GrowableArray<Node_Notes*>* _old_node_note_array;
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/test/compiler/codegen/BMI1.java	Mon Mar 17 10:48:53 2014 -0700
    11.3 @@ -0,0 +1,301 @@
    11.4 +/*
    11.5 + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
    11.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    11.7 + *
    11.8 + * This code is free software; you can redistribute it and/or modify it
    11.9 + * under the terms of the GNU General Public License version 2 only, as
   11.10 + * published by the Free Software Foundation.
   11.11 + *
   11.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
   11.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   11.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   11.15 + * version 2 for more details (a copy is included in the LICENSE file that
   11.16 + * accompanied this code).
   11.17 + *
   11.18 + * You should have received a copy of the GNU General Public License version
   11.19 + * 2 along with this work; if not, write to the Free Software Foundation,
   11.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   11.21 + *
   11.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
   11.23 + * or visit www.oracle.com if you need additional information or have any
   11.24 + * questions.
   11.25 + */
   11.26 +
   11.27 +/*
   11.28 + * @test
   11.29 + * @bug 8031321
   11.30 + * @summary Support BMI1 instructions on x86/x64
   11.31 + * @run main/othervm -Xbatch -XX:-TieredCompilation -XX:CompileCommand=compileonly,BMITests.* BMI1
   11.32 + *
   11.33 + */
   11.34 +
   11.35 +class MemI {
   11.36 +  public int x;
   11.37 +  public MemI(int x) { this.x = x; }
   11.38 +}
   11.39 +
   11.40 +class MemL {
   11.41 +  public long x;
   11.42 +  public MemL(long x) { this.x = x; }
   11.43 +}
   11.44 +
   11.45 +class BMITests {
   11.46 +  static int andnl(int src1, int src2) {
   11.47 +    return ~src1 & src2;
   11.48 +  }
   11.49 +  static long andnq(long src1, long src2) {
   11.50 +    return ~src1 & src2;
   11.51 +  }
   11.52 +  static int andnl(int src1, MemI src2) {
   11.53 +    return ~src1 & src2.x;
   11.54 +  }
   11.55 +  static long andnq(long src1, MemL src2) {
   11.56 +    return ~src1 & src2.x;
   11.57 +  }
   11.58 +  static int blsil(int src1) {
   11.59 +    return src1 & -src1;
   11.60 +  }
   11.61 +  static long blsiq(long src1) {
   11.62 +    return src1 & -src1;
   11.63 +  }
   11.64 +  static int blsil(MemI src1) {
   11.65 +    return src1.x & -src1.x;
   11.66 +  }
   11.67 +  static long blsiq(MemL src1) {
   11.68 +    return src1.x & -src1.x;
   11.69 +  }
   11.70 +  static int blsmskl(int src1) {
   11.71 +    return (src1 - 1) ^ src1;
   11.72 +  }
   11.73 +  static long blsmskq(long src1) {
   11.74 +    return (src1 - 1) ^ src1;
   11.75 +  }
   11.76 +  static int blsmskl(MemI src1) {
   11.77 +    return (src1.x - 1) ^ src1.x;
   11.78 +  }
   11.79 +  static long blsmskq(MemL src1) {
   11.80 +    return (src1.x - 1) ^ src1.x;
   11.81 +  }
   11.82 +  static int blsrl(int src1) {
   11.83 +    return (src1 - 1) & src1;
   11.84 +  }
   11.85 +  static long blsrq(long src1) {
   11.86 +    return (src1 - 1) & src1;
   11.87 +  }
   11.88 +  static int blsrl(MemI src1) {
   11.89 +    return (src1.x - 1) & src1.x;
   11.90 +  }
   11.91 +  static long blsrq(MemL src1) {
   11.92 +    return (src1.x - 1) & src1.x;
   11.93 +  }
   11.94 +  static int lzcntl(int src1) {
   11.95 +    return Integer.numberOfLeadingZeros(src1);
   11.96 +  }
   11.97 +  static int lzcntq(long src1) {
   11.98 +    return Long.numberOfLeadingZeros(src1);
   11.99 +  }
  11.100 +  static int tzcntl(int src1) {
  11.101 +    return Integer.numberOfTrailingZeros(src1);
  11.102 +  }
  11.103 +  static int tzcntq(long src1) {
  11.104 +    return Long.numberOfTrailingZeros(src1);
  11.105 +  }
  11.106 +}
  11.107 +
  11.108 +public class BMI1 {
  11.109 +  private final static int ITERATIONS = 1000000;
  11.110 +
  11.111 +  public static void main(String[] args) {
  11.112 +    int ix = 0x01234567;
  11.113 +    int iy = 0x89abcdef;
  11.114 +    MemI imy = new MemI(iy);
  11.115 +    long lx = 0x0123456701234567L;
  11.116 +    long ly = 0x89abcdef89abcdefL;
  11.117 +    MemL lmy = new MemL(ly);
  11.118 +
  11.119 +    { // match(Set dst (AndI (XorI src1 minus_1) src2))
  11.120 +      int z = BMITests.andnl(ix, iy);
  11.121 +      for (int i = 0; i < ITERATIONS; i++) {
  11.122 +        int ii = BMITests.andnl(ix, iy);
  11.123 +        if (ii != z) {
  11.124 +          throw new Error("andnl with register failed");
  11.125 +        }
  11.126 +      }
  11.127 +    }
  11.128 +    { // match(Set dst (AndL (XorL src1 minus_1) src2))
  11.129 +      long z = BMITests.andnq(lx, ly);
  11.130 +      for (int i = 0; i < ITERATIONS; i++) {
  11.131 +        long ll = BMITests.andnq(lx, ly);
  11.132 +        if (ll != z) {
  11.133 +          throw new Error("andnq with register failed");
  11.134 +        }
  11.135 +      }
  11.136 +    }
  11.137 +    { // match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)))
  11.138 +      int z = BMITests.andnl(ix, imy);
  11.139 +      for (int i = 0; i < ITERATIONS; i++) {
  11.140 +        int ii = BMITests.andnl(ix, imy);
  11.141 +        if (ii != z) {
  11.142 +          throw new Error("andnl with memory failed");
  11.143 +        }
  11.144 +      }
  11.145 +    }
  11.146 +    { // match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)))
  11.147 +      long z = BMITests.andnq(lx, lmy);
  11.148 +      for (int i = 0; i < ITERATIONS; i++) {
  11.149 +        long ll = BMITests.andnq(lx, lmy);
  11.150 +        if (ll != z) {
  11.151 +          throw new Error("andnq with memory failed");
  11.152 +        }
  11.153 +      }
  11.154 +    }
  11.155 +    { // match(Set dst (AndI (SubI imm_zero src) src))
  11.156 +      int z = BMITests.blsil(ix);
  11.157 +      for (int i = 0; i < ITERATIONS; i++) {
  11.158 +        int ii = BMITests.blsil(ix);
  11.159 +        if (ii != z) {
  11.160 +          throw new Error("blsil with register failed");
  11.161 +        }
  11.162 +      }
  11.163 +    }
  11.164 +    { // match(Set dst (AndL (SubL imm_zero src) src))
  11.165 +      long z = BMITests.blsiq(lx);
  11.166 +      for (int i = 0; i < ITERATIONS; i++) {
  11.167 +        long ll = BMITests.blsiq(lx);
  11.168 +        if (ll != z) {
  11.169 +          throw new Error("blsiq with register failed");
  11.170 +        }
  11.171 +      }
  11.172 +    }
  11.173 +    { // match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ))
  11.174 +      int z = BMITests.blsil(imy);
  11.175 +      for (int i = 0; i < ITERATIONS; i++) {
  11.176 +        int ii = BMITests.blsil(imy);
  11.177 +        if (ii != z) {
  11.178 +          throw new Error("blsil with memory failed");
  11.179 +        }
  11.180 +      }
  11.181 +    }
  11.182 +    { // match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ))
  11.183 +      long z = BMITests.blsiq(lmy);
  11.184 +      for (int i = 0; i < ITERATIONS; i++) {
  11.185 +        long ll = BMITests.blsiq(lmy);
  11.186 +        if (ll != z) {
  11.187 +          throw new Error("blsiq with memory failed");
  11.188 +        }
  11.189 +      }
  11.190 +    }
  11.191 +
  11.192 +    { // match(Set dst (XorI (AddI src minus_1) src))
  11.193 +      int z = BMITests.blsmskl(ix);
  11.194 +      for (int i = 0; i < ITERATIONS; i++) {
  11.195 +        int ii = BMITests.blsmskl(ix);
  11.196 +        if (ii != z) {
  11.197 +          throw new Error("blsmskl with register failed");
  11.198 +        }
  11.199 +      }
  11.200 +    }
  11.201 +    { // match(Set dst (XorL (AddL src minus_1) src))
  11.202 +      long z = BMITests.blsmskq(lx);
  11.203 +      for (int i = 0; i < ITERATIONS; i++) {
  11.204 +        long ll = BMITests.blsmskq(lx);
  11.205 +        if (ll != z) {
  11.206 +          throw new Error("blsmskq with register failed");
  11.207 +        }
  11.208 +      }
  11.209 +    }
  11.210 +    { // match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) )
  11.211 +      int z = BMITests.blsmskl(imy);
  11.212 +      for (int i = 0; i < ITERATIONS; i++) {
  11.213 +        int ii = BMITests.blsmskl(imy);
  11.214 +        if (ii != z) {
  11.215 +          throw new Error("blsmskl with memory failed");
  11.216 +        }
  11.217 +      }
  11.218 +    }
  11.219 +    {  // match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) )
  11.220 +      long z = BMITests.blsmskq(lmy);
  11.221 +      for (int i = 0; i < ITERATIONS; i++) {
  11.222 +        long ll = BMITests.blsmskq(lmy);
  11.223 +        if (ll != z) {
  11.224 +          throw new Error("blsmskq with memory failed");
  11.225 +        }
  11.226 +      }
  11.227 +    }
  11.228 +
  11.229 +    { //  match(Set dst (AndI (AddI src minus_1) src) )
  11.230 +      int z = BMITests.blsrl(ix);
  11.231 +      for (int i = 0; i < ITERATIONS; i++) {
  11.232 +        int ii = BMITests.blsrl(ix);
  11.233 +        if (ii != z) {
  11.234 +          throw new Error("blsrl with register failed");
  11.235 +        }
  11.236 +      }
  11.237 +    }
  11.238 +    { // match(Set dst (AndL (AddL src minus_1) src) )
  11.239 +      long z = BMITests.blsrq(lx);
  11.240 +      for (int i = 0; i < ITERATIONS; i++) {
  11.241 +        long ll = BMITests.blsrq(lx);
  11.242 +        if (ll != z) {
  11.243 +          throw new Error("blsrq with register failed");
  11.244 +        }
  11.245 +      }
  11.246 +    }
  11.247 +    { // match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) )
  11.248 +      int z = BMITests.blsrl(imy);
  11.249 +      for (int i = 0; i < ITERATIONS; i++) {
  11.250 +        int ii = BMITests.blsrl(imy);
  11.251 +        if (ii != z) {
  11.252 +          throw new Error("blsrl with memory failed");
  11.253 +        }
  11.254 +      }
  11.255 +    }
  11.256 +    { // match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) )
  11.257 +      long z = BMITests.blsrq(lmy);
  11.258 +      for (int i = 0; i < ITERATIONS; i++) {
  11.259 +        long ll = BMITests.blsrq(lmy);
  11.260 +        if (ll != z) {
  11.261 +          throw new Error("blsrq with memory failed");
  11.262 +        }
  11.263 +      }
  11.264 +    }
  11.265 +
  11.266 +    {
  11.267 +      int z = BMITests.lzcntl(ix);
  11.268 +      for (int i = 0; i < ITERATIONS; i++) {
  11.269 +        int ii = BMITests.lzcntl(ix);
  11.270 +        if (ii != z) {
  11.271 +          throw new Error("lzcntl failed");
  11.272 +        }
  11.273 +      }
  11.274 +    }
  11.275 +    {
  11.276 +      int z = BMITests.lzcntq(lx);
  11.277 +      for (int i = 0; i < ITERATIONS; i++) {
  11.278 +        int ii = BMITests.lzcntq(lx);
  11.279 +        if (ii != z) {
  11.280 +          throw new Error("lzcntq failed");
  11.281 +        }
  11.282 +      }
  11.283 +    }
  11.284 +
  11.285 +    {
  11.286 +      int z = BMITests.tzcntl(ix);
  11.287 +      for (int i = 0; i < ITERATIONS; i++) {
  11.288 +        int ii = BMITests.tzcntl(ix);
  11.289 +        if (ii != z) {
  11.290 +          throw new Error("tzcntl failed");
  11.291 +        }
  11.292 +      }
  11.293 +    }
  11.294 +    {
  11.295 +      int z = BMITests.tzcntq(lx);
  11.296 +      for (int i = 0; i < ITERATIONS; i++) {
  11.297 +        int ii = BMITests.tzcntq(lx);
  11.298 +        if (ii != z) {
  11.299 +          throw new Error("tzcntq failed");
  11.300 +        }
  11.301 +      }
  11.302 +    }
  11.303 +  }
  11.304 +}

mercurial