src/cpu/x86/vm/vm_version_x86.hpp

changeset 1020
22e09c0f4b47
child 1078
c771b7f43bbf
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp	Mon Feb 23 12:02:30 2009 -0800
     1.3 @@ -0,0 +1,459 @@
     1.4 +/*
     1.5 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    1.24 + * have any questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +class VM_Version : public Abstract_VM_Version {
    1.29 +public:
    1.30 +  // cpuid result register layouts.  These are all unions of a uint32_t
    1.31 +  // (in case anyone wants access to the register as a whole) and a bitfield.
    1.32 +
    1.33 +  union StdCpuid1Eax {
    1.34 +    uint32_t value;
    1.35 +    struct {
    1.36 +      uint32_t stepping   : 4,
    1.37 +               model      : 4,
    1.38 +               family     : 4,
    1.39 +               proc_type  : 2,
    1.40 +                          : 2,
    1.41 +               ext_model  : 4,
    1.42 +               ext_family : 8,
    1.43 +                          : 4;
    1.44 +    } bits;
    1.45 +  };
    1.46 +
    1.47 +  union StdCpuid1Ebx { // example, unused
    1.48 +    uint32_t value;
    1.49 +    struct {
    1.50 +      uint32_t brand_id         : 8,
    1.51 +               clflush_size     : 8,
    1.52 +               threads_per_cpu  : 8,
    1.53 +               apic_id          : 8;
    1.54 +    } bits;
    1.55 +  };
    1.56 +
    1.57 +  union StdCpuid1Ecx {
    1.58 +    uint32_t value;
    1.59 +    struct {
    1.60 +      uint32_t sse3     : 1,
    1.61 +                        : 2,
    1.62 +               monitor  : 1,
    1.63 +                        : 1,
    1.64 +               vmx      : 1,
    1.65 +                        : 1,
    1.66 +               est      : 1,
    1.67 +                        : 1,
    1.68 +               ssse3    : 1,
    1.69 +               cid      : 1,
    1.70 +                        : 2,
    1.71 +               cmpxchg16: 1,
    1.72 +                        : 4,
    1.73 +               dca      : 1,
    1.74 +               sse4_1   : 1,
    1.75 +               sse4_2   : 1,
    1.76 +                        : 11;
    1.77 +    } bits;
    1.78 +  };
    1.79 +
    1.80 +  union StdCpuid1Edx {
    1.81 +    uint32_t value;
    1.82 +    struct {
    1.83 +      uint32_t          : 4,
    1.84 +               tsc      : 1,
    1.85 +                        : 3,
    1.86 +               cmpxchg8 : 1,
    1.87 +                        : 6,
    1.88 +               cmov     : 1,
    1.89 +                        : 7,
    1.90 +               mmx      : 1,
    1.91 +               fxsr     : 1,
    1.92 +               sse      : 1,
    1.93 +               sse2     : 1,
    1.94 +                        : 1,
    1.95 +               ht       : 1,
    1.96 +                        : 3;
    1.97 +    } bits;
    1.98 +  };
    1.99 +
   1.100 +  union DcpCpuid4Eax {
   1.101 +    uint32_t value;
   1.102 +    struct {
   1.103 +      uint32_t cache_type    : 5,
   1.104 +                             : 21,
   1.105 +               cores_per_cpu : 6;
   1.106 +    } bits;
   1.107 +  };
   1.108 +
   1.109 +  union DcpCpuid4Ebx {
   1.110 +    uint32_t value;
   1.111 +    struct {
   1.112 +      uint32_t L1_line_size  : 12,
   1.113 +               partitions    : 10,
   1.114 +               associativity : 10;
   1.115 +    } bits;
   1.116 +  };
   1.117 +
   1.118 +  union ExtCpuid1Ecx {
   1.119 +    uint32_t value;
   1.120 +    struct {
   1.121 +      uint32_t LahfSahf     : 1,
   1.122 +               CmpLegacy    : 1,
   1.123 +                            : 4,
   1.124 +               abm          : 1,
   1.125 +               sse4a        : 1,
   1.126 +               misalignsse  : 1,
   1.127 +               prefetchw    : 1,
   1.128 +                            : 22;
   1.129 +    } bits;
   1.130 +  };
   1.131 +
   1.132 +  union ExtCpuid1Edx {
   1.133 +    uint32_t value;
   1.134 +    struct {
   1.135 +      uint32_t           : 22,
   1.136 +               mmx_amd   : 1,
   1.137 +               mmx       : 1,
   1.138 +               fxsr      : 1,
   1.139 +                         : 4,
   1.140 +               long_mode : 1,
   1.141 +               tdnow2    : 1,
   1.142 +               tdnow     : 1;
   1.143 +    } bits;
   1.144 +  };
   1.145 +
   1.146 +  union ExtCpuid5Ex {
   1.147 +    uint32_t value;
   1.148 +    struct {
   1.149 +      uint32_t L1_line_size : 8,
   1.150 +               L1_tag_lines : 8,
   1.151 +               L1_assoc     : 8,
   1.152 +               L1_size      : 8;
   1.153 +    } bits;
   1.154 +  };
   1.155 +
   1.156 +  union ExtCpuid8Ecx {
   1.157 +    uint32_t value;
   1.158 +    struct {
   1.159 +      uint32_t cores_per_cpu : 8,
   1.160 +                             : 24;
   1.161 +    } bits;
   1.162 +  };
   1.163 +
   1.164 +protected:
   1.165 +   static int _cpu;
   1.166 +   static int _model;
   1.167 +   static int _stepping;
   1.168 +   static int _cpuFeatures;     // features returned by the "cpuid" instruction
   1.169 +                                // 0 if this instruction is not available
   1.170 +   static const char* _features_str;
   1.171 +
   1.172 +   enum {
   1.173 +     CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
   1.174 +     CPU_CMOV   = (1 << 1),
   1.175 +     CPU_FXSR   = (1 << 2),
   1.176 +     CPU_HT     = (1 << 3),
   1.177 +     CPU_MMX    = (1 << 4),
   1.178 +     CPU_3DNOW  = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX)
   1.179 +     CPU_SSE    = (1 << 6),
   1.180 +     CPU_SSE2   = (1 << 7),
   1.181 +     CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
   1.182 +     CPU_SSSE3  = (1 << 9),
   1.183 +     CPU_SSE4A  = (1 << 10),
   1.184 +     CPU_SSE4_1 = (1 << 11),
   1.185 +     CPU_SSE4_2 = (1 << 12)
   1.186 +   } cpuFeatureFlags;
   1.187 +
   1.188 +  // cpuid information block.  All info derived from executing cpuid with
   1.189 +  // various function numbers is stored here.  Intel and AMD info is
   1.190 +  // merged in this block: accessor methods disentangle it.
   1.191 +  //
   1.192 +  // The info block is laid out in subblocks of 4 dwords corresponding to
   1.193 +  // eax, ebx, ecx and edx, whether or not they contain anything useful.
   1.194 +  struct CpuidInfo {
   1.195 +    // cpuid function 0
   1.196 +    uint32_t std_max_function;
   1.197 +    uint32_t std_vendor_name_0;
   1.198 +    uint32_t std_vendor_name_1;
   1.199 +    uint32_t std_vendor_name_2;
   1.200 +
   1.201 +    // cpuid function 1
   1.202 +    StdCpuid1Eax std_cpuid1_eax;
   1.203 +    StdCpuid1Ebx std_cpuid1_ebx;
   1.204 +    StdCpuid1Ecx std_cpuid1_ecx;
   1.205 +    StdCpuid1Edx std_cpuid1_edx;
   1.206 +
   1.207 +    // cpuid function 4 (deterministic cache parameters)
   1.208 +    DcpCpuid4Eax dcp_cpuid4_eax;
   1.209 +    DcpCpuid4Ebx dcp_cpuid4_ebx;
   1.210 +    uint32_t     dcp_cpuid4_ecx; // unused currently
   1.211 +    uint32_t     dcp_cpuid4_edx; // unused currently
   1.212 +
   1.213 +    // cpuid function 0x80000000 // example, unused
   1.214 +    uint32_t ext_max_function;
   1.215 +    uint32_t ext_vendor_name_0;
   1.216 +    uint32_t ext_vendor_name_1;
   1.217 +    uint32_t ext_vendor_name_2;
   1.218 +
   1.219 +    // cpuid function 0x80000001
   1.220 +    uint32_t     ext_cpuid1_eax; // reserved
   1.221 +    uint32_t     ext_cpuid1_ebx; // reserved
   1.222 +    ExtCpuid1Ecx ext_cpuid1_ecx;
   1.223 +    ExtCpuid1Edx ext_cpuid1_edx;
   1.224 +
   1.225 +    // cpuid functions 0x80000002 thru 0x80000004: example, unused
   1.226 +    uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
   1.227 +    uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
   1.228 +    uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
   1.229 +
   1.230 +    // cpuid function 0x80000005 //AMD L1, Intel reserved
   1.231 +    uint32_t     ext_cpuid5_eax; // unused currently
   1.232 +    uint32_t     ext_cpuid5_ebx; // reserved
   1.233 +    ExtCpuid5Ex  ext_cpuid5_ecx; // L1 data cache info (AMD)
   1.234 +    ExtCpuid5Ex  ext_cpuid5_edx; // L1 instruction cache info (AMD)
   1.235 +
   1.236 +    // cpuid function 0x80000008
   1.237 +    uint32_t     ext_cpuid8_eax; // unused currently
   1.238 +    uint32_t     ext_cpuid8_ebx; // reserved
   1.239 +    ExtCpuid8Ecx ext_cpuid8_ecx;
   1.240 +    uint32_t     ext_cpuid8_edx; // reserved
   1.241 +  };
   1.242 +
   1.243 +  // The actual cpuid info block
   1.244 +  static CpuidInfo _cpuid_info;
   1.245 +
   1.246 +  // Extractors and predicates
   1.247 +  static uint32_t extended_cpu_family() {
   1.248 +    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
   1.249 +    result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
   1.250 +    return result;
   1.251 +  }
   1.252 +  static uint32_t extended_cpu_model() {
   1.253 +    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
   1.254 +    result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
   1.255 +    return result;
   1.256 +  }
   1.257 +  static uint32_t cpu_stepping() {
   1.258 +    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
   1.259 +    return result;
   1.260 +  }
   1.261 +  static uint logical_processor_count() {
   1.262 +    uint result = threads_per_core();
   1.263 +    return result;
   1.264 +  }
   1.265 +  static uint32_t feature_flags() {
   1.266 +    uint32_t result = 0;
   1.267 +    if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
   1.268 +      result |= CPU_CX8;
   1.269 +    if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
   1.270 +      result |= CPU_CMOV;
   1.271 +    if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() &&
   1.272 +        _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)
   1.273 +      result |= CPU_FXSR;
   1.274 +    // HT flag is set for multi-core processors also.
   1.275 +    if (threads_per_core() > 1)
   1.276 +      result |= CPU_HT;
   1.277 +    if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() &&
   1.278 +        _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)
   1.279 +      result |= CPU_MMX;
   1.280 +    if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
   1.281 +      result |= CPU_3DNOW;
   1.282 +    if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
   1.283 +      result |= CPU_SSE;
   1.284 +    if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
   1.285 +      result |= CPU_SSE2;
   1.286 +    if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
   1.287 +      result |= CPU_SSE3;
   1.288 +    if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
   1.289 +      result |= CPU_SSSE3;
   1.290 +    if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
   1.291 +      result |= CPU_SSE4A;
   1.292 +    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
   1.293 +      result |= CPU_SSE4_1;
   1.294 +    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
   1.295 +      result |= CPU_SSE4_2;
   1.296 +    return result;
   1.297 +  }
   1.298 +
   1.299 +  static void get_processor_features();
   1.300 +
   1.301 +public:
   1.302 +  // Offsets for cpuid asm stub
   1.303 +  static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
   1.304 +  static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
   1.305 +  static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
   1.306 +  static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
   1.307 +  static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
   1.308 +  static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
   1.309 +
   1.310 +  // Initialization
   1.311 +  static void initialize();
   1.312 +
   1.313 +  // Asserts
   1.314 +  static void assert_is_initialized() {
   1.315 +    assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
   1.316 +  }
   1.317 +
   1.318 +  //
   1.319 +  // Processor family:
   1.320 +  //       3   -  386
   1.321 +  //       4   -  486
   1.322 +  //       5   -  Pentium
   1.323 +  //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
   1.324 +  //              Pentium M, Core Solo, Core Duo, Core2 Duo
   1.325 +  //    family 6 model:   9,        13,       14,        15
   1.326 +  //    0x0f   -  Pentium 4, Opteron
   1.327 +  //
   1.328 +  // Note: The cpu family should be used to select between
   1.329 +  //       instruction sequences which are valid on all Intel
   1.330 +  //       processors.  Use the feature test functions below to
   1.331 +  //       determine whether a particular instruction is supported.
   1.332 +  //
   1.333 +  static int  cpu_family()        { return _cpu;}
   1.334 +  static bool is_P6()             { return cpu_family() >= 6; }
   1.335 +
   1.336 +  static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
   1.337 +  static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
   1.338 +
   1.339 +  static uint cores_per_cpu()  {
   1.340 +    uint result = 1;
   1.341 +    if (is_intel()) {
   1.342 +      result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
   1.343 +    } else if (is_amd()) {
   1.344 +      result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
   1.345 +    }
   1.346 +    return result;
   1.347 +  }
   1.348 +
   1.349 +  static uint threads_per_core()  {
   1.350 +    uint result = 1;
   1.351 +    if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
   1.352 +      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
   1.353 +               cores_per_cpu();
   1.354 +    }
   1.355 +    return result;
   1.356 +  }
   1.357 +
   1.358 +  static intx L1_data_cache_line_size()  {
   1.359 +    intx result = 0;
   1.360 +    if (is_intel()) {
   1.361 +      result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
   1.362 +    } else if (is_amd()) {
   1.363 +      result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
   1.364 +    }
   1.365 +    if (result < 32) // not defined ?
   1.366 +      result = 32;   // 32 bytes by default on x86 and other x64
   1.367 +    return result;
   1.368 +  }
   1.369 +
   1.370 +  //
   1.371 +  // Feature identification
   1.372 +  //
   1.373 +  static bool supports_cpuid()    { return _cpuFeatures  != 0; }
   1.374 +  static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
   1.375 +  static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
   1.376 +  static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
   1.377 +  static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
   1.378 +  static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
   1.379 +  static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
   1.380 +  static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
   1.381 +  static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
   1.382 +  static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
   1.383 +  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   1.384 +  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   1.385 +  //
   1.386 +  // AMD features
   1.387 +  //
   1.388 +  static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
   1.389 +  static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
   1.390 +  static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
   1.391 +  static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
   1.392 +
   1.393 +  static bool supports_compare_and_exchange() { return true; }
   1.394 +
   1.395 +  static const char* cpu_features()           { return _features_str; }
   1.396 +
   1.397 +  static intx allocate_prefetch_distance() {
   1.398 +    // This method should be called before allocate_prefetch_style().
   1.399 +    //
   1.400 +    // Hardware prefetching (distance/size in bytes):
   1.401 +    // Pentium 3 -  64 /  32
   1.402 +    // Pentium 4 - 256 / 128
   1.403 +    // Athlon    -  64 /  32 ????
   1.404 +    // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
   1.405 +    // Core      - 128 /  64
   1.406 +    //
   1.407 +    // Software prefetching (distance in bytes / instruction with best score):
   1.408 +    // Pentium 3 - 128 / prefetchnta
   1.409 +    // Pentium 4 - 512 / prefetchnta
   1.410 +    // Athlon    - 128 / prefetchnta
   1.411 +    // Opteron   - 256 / prefetchnta
   1.412 +    // Core      - 256 / prefetchnta
   1.413 +    // It will be used only when AllocatePrefetchStyle > 0
   1.414 +
   1.415 +    intx count = AllocatePrefetchDistance;
   1.416 +    if (count < 0) {   // default ?
   1.417 +      if (is_amd()) {  // AMD
   1.418 +        if (supports_sse2())
   1.419 +          count = 256; // Opteron
   1.420 +        else
   1.421 +          count = 128; // Athlon
   1.422 +      } else {         // Intel
   1.423 +        if (supports_sse2())
   1.424 +          if (cpu_family() == 6) {
   1.425 +            count = 256; // Pentium M, Core, Core2
   1.426 +          } else {
   1.427 +            count = 512; // Pentium 4
   1.428 +          }
   1.429 +        else
   1.430 +          count = 128; // Pentium 3 (and all other old CPUs)
   1.431 +      }
   1.432 +    }
   1.433 +    return count;
   1.434 +  }
   1.435 +  static intx allocate_prefetch_style() {
   1.436 +    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
   1.437 +    // Return 0 if AllocatePrefetchDistance was not defined.
   1.438 +    return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
   1.439 +  }
   1.440 +
   1.441 +  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
   1.442 +  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
   1.443 +  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
   1.444 +  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
   1.445 +
   1.446 +  // gc copy/scan is disabled if prefetchw isn't supported, because
   1.447 +  // Prefetch::write emits an inlined prefetchw on Linux.
   1.448 +  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
   1.449 +  // The used prefetcht0 instruction works for both amd64 and em64t.
   1.450 +  static intx prefetch_copy_interval_in_bytes() {
   1.451 +    intx interval = PrefetchCopyIntervalInBytes;
   1.452 +    return interval >= 0 ? interval : 576;
   1.453 +  }
   1.454 +  static intx prefetch_scan_interval_in_bytes() {
   1.455 +    intx interval = PrefetchScanIntervalInBytes;
   1.456 +    return interval >= 0 ? interval : 576;
   1.457 +  }
   1.458 +  static intx prefetch_fields_ahead() {
   1.459 +    intx count = PrefetchFieldsAhead;
   1.460 +    return count >= 0 ? count : 1;
   1.461 +  }
   1.462 +};

mercurial