src/cpu/x86/vm/vm_version_x86.hpp

changeset 0
f90c822e73f8
child 6876
710a3c8b516e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp	Wed Apr 27 01:25:04 2016 +0800
     1.3 @@ -0,0 +1,737 @@
     1.4 +/*
     1.5 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.
    1.11 + *
    1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.15 + * version 2 for more details (a copy is included in the LICENSE file that
    1.16 + * accompanied this code).
    1.17 + *
    1.18 + * You should have received a copy of the GNU General Public License version
    1.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.21 + *
    1.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.23 + * or visit www.oracle.com if you need additional information or have any
    1.24 + * questions.
    1.25 + *
    1.26 + */
    1.27 +
    1.28 +#ifndef CPU_X86_VM_VM_VERSION_X86_HPP
    1.29 +#define CPU_X86_VM_VM_VERSION_X86_HPP
    1.30 +
    1.31 +#include "runtime/globals_extension.hpp"
    1.32 +#include "runtime/vm_version.hpp"
    1.33 +
    1.34 +class VM_Version : public Abstract_VM_Version {
    1.35 +public:
    1.36 +  // cpuid result register layouts.  These are all unions of a uint32_t
    1.37 +  // (in case anyone wants access to the register as a whole) and a bitfield.
    1.38 +
    1.39 +  union StdCpuid1Eax {
    1.40 +    uint32_t value;
    1.41 +    struct {
    1.42 +      uint32_t stepping   : 4,
    1.43 +               model      : 4,
    1.44 +               family     : 4,
    1.45 +               proc_type  : 2,
    1.46 +                          : 2,
    1.47 +               ext_model  : 4,
    1.48 +               ext_family : 8,
    1.49 +                          : 4;
    1.50 +    } bits;
    1.51 +  };
    1.52 +
    1.53 +  union StdCpuid1Ebx { // example, unused
    1.54 +    uint32_t value;
    1.55 +    struct {
    1.56 +      uint32_t brand_id         : 8,
    1.57 +               clflush_size     : 8,
    1.58 +               threads_per_cpu  : 8,
    1.59 +               apic_id          : 8;
    1.60 +    } bits;
    1.61 +  };
    1.62 +
    1.63 +  union StdCpuid1Ecx {
    1.64 +    uint32_t value;
    1.65 +    struct {
    1.66 +      uint32_t sse3     : 1,
    1.67 +               clmul    : 1,
    1.68 +                        : 1,
    1.69 +               monitor  : 1,
    1.70 +                        : 1,
    1.71 +               vmx      : 1,
    1.72 +                        : 1,
    1.73 +               est      : 1,
    1.74 +                        : 1,
    1.75 +               ssse3    : 1,
    1.76 +               cid      : 1,
    1.77 +                        : 2,
    1.78 +               cmpxchg16: 1,
    1.79 +                        : 4,
    1.80 +               dca      : 1,
    1.81 +               sse4_1   : 1,
    1.82 +               sse4_2   : 1,
    1.83 +                        : 2,
    1.84 +               popcnt   : 1,
    1.85 +                        : 1,
    1.86 +               aes      : 1,
    1.87 +                        : 1,
    1.88 +               osxsave  : 1,
    1.89 +               avx      : 1,
    1.90 +                        : 3;
    1.91 +    } bits;
    1.92 +  };
    1.93 +
    1.94 +  union StdCpuid1Edx {
    1.95 +    uint32_t value;
    1.96 +    struct {
    1.97 +      uint32_t          : 4,
    1.98 +               tsc      : 1,
    1.99 +                        : 3,
   1.100 +               cmpxchg8 : 1,
   1.101 +                        : 6,
   1.102 +               cmov     : 1,
   1.103 +                        : 3,
   1.104 +               clflush  : 1,
   1.105 +                        : 3,
   1.106 +               mmx      : 1,
   1.107 +               fxsr     : 1,
   1.108 +               sse      : 1,
   1.109 +               sse2     : 1,
   1.110 +                        : 1,
   1.111 +               ht       : 1,
   1.112 +                        : 3;
   1.113 +    } bits;
   1.114 +  };
   1.115 +
   1.116 +  union DcpCpuid4Eax {
   1.117 +    uint32_t value;
   1.118 +    struct {
   1.119 +      uint32_t cache_type    : 5,
   1.120 +                             : 21,
   1.121 +               cores_per_cpu : 6;
   1.122 +    } bits;
   1.123 +  };
   1.124 +
   1.125 +  union DcpCpuid4Ebx {
   1.126 +    uint32_t value;
   1.127 +    struct {
   1.128 +      uint32_t L1_line_size  : 12,
   1.129 +               partitions    : 10,
   1.130 +               associativity : 10;
   1.131 +    } bits;
   1.132 +  };
   1.133 +
   1.134 +  union TplCpuidBEbx {
   1.135 +    uint32_t value;
   1.136 +    struct {
   1.137 +      uint32_t logical_cpus : 16,
   1.138 +                            : 16;
   1.139 +    } bits;
   1.140 +  };
   1.141 +
   1.142 +  union ExtCpuid1Ecx {
   1.143 +    uint32_t value;
   1.144 +    struct {
   1.145 +      uint32_t LahfSahf     : 1,
   1.146 +               CmpLegacy    : 1,
   1.147 +                            : 3,
   1.148 +               lzcnt_intel  : 1,
   1.149 +               lzcnt        : 1,
   1.150 +               sse4a        : 1,
   1.151 +               misalignsse  : 1,
   1.152 +               prefetchw    : 1,
   1.153 +                            : 22;
   1.154 +    } bits;
   1.155 +  };
   1.156 +
   1.157 +  union ExtCpuid1Edx {
   1.158 +    uint32_t value;
   1.159 +    struct {
   1.160 +      uint32_t           : 22,
   1.161 +               mmx_amd   : 1,
   1.162 +               mmx       : 1,
   1.163 +               fxsr      : 1,
   1.164 +                         : 4,
   1.165 +               long_mode : 1,
   1.166 +               tdnow2    : 1,
   1.167 +               tdnow     : 1;
   1.168 +    } bits;
   1.169 +  };
   1.170 +
   1.171 +  union ExtCpuid5Ex {
   1.172 +    uint32_t value;
   1.173 +    struct {
   1.174 +      uint32_t L1_line_size : 8,
   1.175 +               L1_tag_lines : 8,
   1.176 +               L1_assoc     : 8,
   1.177 +               L1_size      : 8;
   1.178 +    } bits;
   1.179 +  };
   1.180 +
   1.181 +  union ExtCpuid7Edx {
   1.182 +    uint32_t value;
   1.183 +    struct {
   1.184 +      uint32_t               : 8,
   1.185 +              tsc_invariance : 1,
   1.186 +                             : 23;
   1.187 +    } bits;
   1.188 +  };
   1.189 +
   1.190 +  union ExtCpuid8Ecx {
   1.191 +    uint32_t value;
   1.192 +    struct {
   1.193 +      uint32_t cores_per_cpu : 8,
   1.194 +                             : 24;
   1.195 +    } bits;
   1.196 +  };
   1.197 +
   1.198 +  union SefCpuid7Eax {
   1.199 +    uint32_t value;
   1.200 +  };
   1.201 +
   1.202 +  union SefCpuid7Ebx {
   1.203 +    uint32_t value;
   1.204 +    struct {
   1.205 +      uint32_t fsgsbase : 1,
   1.206 +                        : 2,
   1.207 +                   bmi1 : 1,
   1.208 +                        : 1,
   1.209 +                   avx2 : 1,
   1.210 +                        : 2,
   1.211 +                   bmi2 : 1,
   1.212 +                   erms : 1,
   1.213 +                        : 1,
   1.214 +                   rtm  : 1,
   1.215 +                        : 20;
   1.216 +    } bits;
   1.217 +  };
   1.218 +
   1.219 +  union XemXcr0Eax {
   1.220 +    uint32_t value;
   1.221 +    struct {
   1.222 +      uint32_t x87 : 1,
   1.223 +               sse : 1,
   1.224 +               ymm : 1,
   1.225 +                   : 29;
   1.226 +    } bits;
   1.227 +  };
   1.228 +
   1.229 +protected:
   1.230 +  static int _cpu;
   1.231 +  static int _model;
   1.232 +  static int _stepping;
   1.233 +  static int _cpuFeatures;     // features returned by the "cpuid" instruction
   1.234 +                               // 0 if this instruction is not available
   1.235 +  static const char* _features_str;
   1.236 +
   1.237 +  static address   _cpuinfo_segv_addr; // address of instruction which causes SEGV
   1.238 +  static address   _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
   1.239 +
   1.240 +  enum {
   1.241 +    CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
   1.242 +    CPU_CMOV   = (1 << 1),
   1.243 +    CPU_FXSR   = (1 << 2),
   1.244 +    CPU_HT     = (1 << 3),
   1.245 +    CPU_MMX    = (1 << 4),
   1.246 +    CPU_3DNOW_PREFETCH  = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
   1.247 +                                    // may not necessarily support other 3dnow instructions
   1.248 +    CPU_SSE    = (1 << 6),
   1.249 +    CPU_SSE2   = (1 << 7),
   1.250 +    CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
   1.251 +    CPU_SSSE3  = (1 << 9),
   1.252 +    CPU_SSE4A  = (1 << 10),
   1.253 +    CPU_SSE4_1 = (1 << 11),
   1.254 +    CPU_SSE4_2 = (1 << 12),
   1.255 +    CPU_POPCNT = (1 << 13),
   1.256 +    CPU_LZCNT  = (1 << 14),
   1.257 +    CPU_TSC    = (1 << 15),
   1.258 +    CPU_TSCINV = (1 << 16),
   1.259 +    CPU_AVX    = (1 << 17),
   1.260 +    CPU_AVX2   = (1 << 18),
   1.261 +    CPU_AES    = (1 << 19),
   1.262 +    CPU_ERMS   = (1 << 20), // enhanced 'rep movsb/stosb' instructions
   1.263 +    CPU_CLMUL  = (1 << 21), // carryless multiply for CRC
   1.264 +    CPU_BMI1   = (1 << 22),
   1.265 +    CPU_BMI2   = (1 << 23),
   1.266 +    CPU_RTM    = (1 << 24)  // Restricted Transactional Memory instructions
   1.267 +  } cpuFeatureFlags;
   1.268 +
   1.269 +  enum {
   1.270 +    // AMD
   1.271 +    CPU_FAMILY_AMD_11H       = 0x11,
   1.272 +    // Intel
   1.273 +    CPU_FAMILY_INTEL_CORE    = 6,
   1.274 +    CPU_MODEL_NEHALEM        = 0x1e,
   1.275 +    CPU_MODEL_NEHALEM_EP     = 0x1a,
   1.276 +    CPU_MODEL_NEHALEM_EX     = 0x2e,
   1.277 +    CPU_MODEL_WESTMERE       = 0x25,
   1.278 +    CPU_MODEL_WESTMERE_EP    = 0x2c,
   1.279 +    CPU_MODEL_WESTMERE_EX    = 0x2f,
   1.280 +    CPU_MODEL_SANDYBRIDGE    = 0x2a,
   1.281 +    CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
   1.282 +    CPU_MODEL_IVYBRIDGE_EP   = 0x3a
   1.283 +  } cpuExtendedFamily;
   1.284 +
   1.285 +  // cpuid information block.  All info derived from executing cpuid with
   1.286 +  // various function numbers is stored here.  Intel and AMD info is
   1.287 +  // merged in this block: accessor methods disentangle it.
   1.288 +  //
   1.289 +  // The info block is laid out in subblocks of 4 dwords corresponding to
   1.290 +  // eax, ebx, ecx and edx, whether or not they contain anything useful.
   1.291 +  struct CpuidInfo {
   1.292 +    // cpuid function 0
   1.293 +    uint32_t std_max_function;
   1.294 +    uint32_t std_vendor_name_0;
   1.295 +    uint32_t std_vendor_name_1;
   1.296 +    uint32_t std_vendor_name_2;
   1.297 +
   1.298 +    // cpuid function 1
   1.299 +    StdCpuid1Eax std_cpuid1_eax;
   1.300 +    StdCpuid1Ebx std_cpuid1_ebx;
   1.301 +    StdCpuid1Ecx std_cpuid1_ecx;
   1.302 +    StdCpuid1Edx std_cpuid1_edx;
   1.303 +
   1.304 +    // cpuid function 4 (deterministic cache parameters)
   1.305 +    DcpCpuid4Eax dcp_cpuid4_eax;
   1.306 +    DcpCpuid4Ebx dcp_cpuid4_ebx;
   1.307 +    uint32_t     dcp_cpuid4_ecx; // unused currently
   1.308 +    uint32_t     dcp_cpuid4_edx; // unused currently
   1.309 +
   1.310 +    // cpuid function 7 (structured extended features)
   1.311 +    SefCpuid7Eax sef_cpuid7_eax;
   1.312 +    SefCpuid7Ebx sef_cpuid7_ebx;
   1.313 +    uint32_t     sef_cpuid7_ecx; // unused currently
   1.314 +    uint32_t     sef_cpuid7_edx; // unused currently
   1.315 +
   1.316 +    // cpuid function 0xB (processor topology)
   1.317 +    // ecx = 0
   1.318 +    uint32_t     tpl_cpuidB0_eax;
   1.319 +    TplCpuidBEbx tpl_cpuidB0_ebx;
   1.320 +    uint32_t     tpl_cpuidB0_ecx; // unused currently
   1.321 +    uint32_t     tpl_cpuidB0_edx; // unused currently
   1.322 +
   1.323 +    // ecx = 1
   1.324 +    uint32_t     tpl_cpuidB1_eax;
   1.325 +    TplCpuidBEbx tpl_cpuidB1_ebx;
   1.326 +    uint32_t     tpl_cpuidB1_ecx; // unused currently
   1.327 +    uint32_t     tpl_cpuidB1_edx; // unused currently
   1.328 +
   1.329 +    // ecx = 2
   1.330 +    uint32_t     tpl_cpuidB2_eax;
   1.331 +    TplCpuidBEbx tpl_cpuidB2_ebx;
   1.332 +    uint32_t     tpl_cpuidB2_ecx; // unused currently
   1.333 +    uint32_t     tpl_cpuidB2_edx; // unused currently
   1.334 +
   1.335 +    // cpuid function 0x80000000 // example, unused
   1.336 +    uint32_t ext_max_function;
   1.337 +    uint32_t ext_vendor_name_0;
   1.338 +    uint32_t ext_vendor_name_1;
   1.339 +    uint32_t ext_vendor_name_2;
   1.340 +
   1.341 +    // cpuid function 0x80000001
   1.342 +    uint32_t     ext_cpuid1_eax; // reserved
   1.343 +    uint32_t     ext_cpuid1_ebx; // reserved
   1.344 +    ExtCpuid1Ecx ext_cpuid1_ecx;
   1.345 +    ExtCpuid1Edx ext_cpuid1_edx;
   1.346 +
   1.347 +    // cpuid functions 0x80000002 thru 0x80000004: example, unused
   1.348 +    uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
   1.349 +    uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
   1.350 +    uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
   1.351 +
   1.352 +    // cpuid function 0x80000005 // AMD L1, Intel reserved
   1.353 +    uint32_t     ext_cpuid5_eax; // unused currently
   1.354 +    uint32_t     ext_cpuid5_ebx; // reserved
   1.355 +    ExtCpuid5Ex  ext_cpuid5_ecx; // L1 data cache info (AMD)
   1.356 +    ExtCpuid5Ex  ext_cpuid5_edx; // L1 instruction cache info (AMD)
   1.357 +
   1.358 +    // cpuid function 0x80000007
   1.359 +    uint32_t     ext_cpuid7_eax; // reserved
   1.360 +    uint32_t     ext_cpuid7_ebx; // reserved
   1.361 +    uint32_t     ext_cpuid7_ecx; // reserved
   1.362 +    ExtCpuid7Edx ext_cpuid7_edx; // tscinv
   1.363 +
   1.364 +    // cpuid function 0x80000008
   1.365 +    uint32_t     ext_cpuid8_eax; // unused currently
   1.366 +    uint32_t     ext_cpuid8_ebx; // reserved
   1.367 +    ExtCpuid8Ecx ext_cpuid8_ecx;
   1.368 +    uint32_t     ext_cpuid8_edx; // reserved
   1.369 +
   1.370 +    // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
   1.371 +    XemXcr0Eax   xem_xcr0_eax;
   1.372 +    uint32_t     xem_xcr0_edx; // reserved
   1.373 +
   1.374 +    // Space to save ymm registers after signal handle
   1.375 +    int          ymm_save[8*4]; // Save ymm0, ymm7, ymm8, ymm15
   1.376 +  };
   1.377 +
   1.378 +  // The actual cpuid info block
   1.379 +  static CpuidInfo _cpuid_info;
   1.380 +
   1.381 +  // Extractors and predicates
   1.382 +  static uint32_t extended_cpu_family() {
   1.383 +    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
   1.384 +    result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
   1.385 +    return result;
   1.386 +  }
   1.387 +
   1.388 +  static uint32_t extended_cpu_model() {
   1.389 +    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
   1.390 +    result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
   1.391 +    return result;
   1.392 +  }
   1.393 +
   1.394 +  static uint32_t cpu_stepping() {
   1.395 +    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
   1.396 +    return result;
   1.397 +  }
   1.398 +
   1.399 +  static uint logical_processor_count() {
   1.400 +    uint result = threads_per_core();
   1.401 +    return result;
   1.402 +  }
   1.403 +
   1.404 +  static uint32_t feature_flags() {
   1.405 +    uint32_t result = 0;
   1.406 +    if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
   1.407 +      result |= CPU_CX8;
   1.408 +    if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
   1.409 +      result |= CPU_CMOV;
   1.410 +    if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd() &&
   1.411 +        _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
   1.412 +      result |= CPU_FXSR;
   1.413 +    // HT flag is set for multi-core processors also.
   1.414 +    if (threads_per_core() > 1)
   1.415 +      result |= CPU_HT;
   1.416 +    if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd() &&
   1.417 +        _cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
   1.418 +      result |= CPU_MMX;
   1.419 +    if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
   1.420 +      result |= CPU_SSE;
   1.421 +    if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
   1.422 +      result |= CPU_SSE2;
   1.423 +    if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
   1.424 +      result |= CPU_SSE3;
   1.425 +    if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
   1.426 +      result |= CPU_SSSE3;
   1.427 +    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
   1.428 +      result |= CPU_SSE4_1;
   1.429 +    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
   1.430 +      result |= CPU_SSE4_2;
   1.431 +    if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
   1.432 +      result |= CPU_POPCNT;
   1.433 +    if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
   1.434 +        _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
   1.435 +        _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
   1.436 +        _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
   1.437 +      result |= CPU_AVX;
   1.438 +      if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
   1.439 +        result |= CPU_AVX2;
   1.440 +    }
   1.441 +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
   1.442 +      result |= CPU_BMI1;
   1.443 +    if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
   1.444 +      result |= CPU_TSC;
   1.445 +    if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
   1.446 +      result |= CPU_TSCINV;
   1.447 +    if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
   1.448 +      result |= CPU_AES;
   1.449 +    if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
   1.450 +      result |= CPU_ERMS;
   1.451 +    if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
   1.452 +      result |= CPU_CLMUL;
   1.453 +    if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
   1.454 +      result |= CPU_RTM;
   1.455 +
   1.456 +    // AMD features.
   1.457 +    if (is_amd()) {
   1.458 +      if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
   1.459 +          (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
   1.460 +        result |= CPU_3DNOW_PREFETCH;
   1.461 +      if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
   1.462 +        result |= CPU_LZCNT;
   1.463 +      if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
   1.464 +        result |= CPU_SSE4A;
   1.465 +    }
   1.466 +    // Intel features.
   1.467 +    if(is_intel()) {
   1.468 +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
   1.469 +        result |= CPU_BMI2;
   1.470 +      if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
   1.471 +        result |= CPU_LZCNT;
   1.472 +    }
   1.473 +
   1.474 +    return result;
   1.475 +  }
   1.476 +
   1.477 +  static bool os_supports_avx_vectors() {
   1.478 +    if (!supports_avx()) {
   1.479 +      return false;
   1.480 +    }
   1.481 +    // Verify that OS save/restore all bits of AVX registers
   1.482 +    // during signal processing.
   1.483 +    int nreg = 2 LP64_ONLY(+2);
   1.484 +    for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
   1.485 +      if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
   1.486 +        return false;
   1.487 +      }
   1.488 +    }
   1.489 +    return true;
   1.490 +  }
   1.491 +
   1.492 +  static void get_processor_features();
   1.493 +
   1.494 +public:
   1.495 +  // Offsets for cpuid asm stub
   1.496 +  static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
   1.497 +  static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
   1.498 +  static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
   1.499 +  static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
   1.500 +  static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
   1.501 +  static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
   1.502 +  static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
   1.503 +  static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
   1.504 +  static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
   1.505 +  static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
   1.506 +  static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
   1.507 +  static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
   1.508 +  static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
   1.509 +
   1.510 +  // The value used to check ymm register after signal handle
   1.511 +  static int ymm_test_value()    { return 0xCAFEBABE; }
   1.512 +
   1.513 +  static void get_cpu_info_wrapper();
   1.514 +  static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
   1.515 +  static bool  is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; }
   1.516 +  static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
   1.517 +  static address  cpuinfo_cont_addr()           { return _cpuinfo_cont_addr; }
   1.518 +
   1.519 +  static void clean_cpuFeatures()   { _cpuFeatures = 0; }
   1.520 +  static void set_avx_cpuFeatures() { _cpuFeatures = (CPU_SSE | CPU_SSE2 | CPU_AVX); }
   1.521 +
   1.522 +
   1.523 +  // Initialization
   1.524 +  static void initialize();
   1.525 +
   1.526 +  // Override Abstract_VM_Version implementation
   1.527 +  static bool use_biased_locking();
   1.528 +
   1.529 +  // Asserts
   1.530 +  static void assert_is_initialized() {
   1.531 +    assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
   1.532 +  }
   1.533 +
   1.534 +  //
   1.535 +  // Processor family:
   1.536 +  //       3   -  386
   1.537 +  //       4   -  486
   1.538 +  //       5   -  Pentium
   1.539 +  //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
   1.540 +  //              Pentium M, Core Solo, Core Duo, Core2 Duo
   1.541 +  //    family 6 model:   9,        13,       14,        15
   1.542 +  //    0x0f   -  Pentium 4, Opteron
   1.543 +  //
   1.544 +  // Note: The cpu family should be used to select between
   1.545 +  //       instruction sequences which are valid on all Intel
   1.546 +  //       processors.  Use the feature test functions below to
   1.547 +  //       determine whether a particular instruction is supported.
   1.548 +  //
   1.549 +  static int  cpu_family()        { return _cpu;}
   1.550 +  static bool is_P6()             { return cpu_family() >= 6; }
   1.551 +  static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
   1.552 +  static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
   1.553 +
   1.554 +  static bool supports_processor_topology() {
   1.555 +    return (_cpuid_info.std_max_function >= 0xB) &&
   1.556 +           // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level.
   1.557 +           // Some cpus have max cpuid >= 0xB but do not support processor topology.
   1.558 +           (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0);
   1.559 +  }
   1.560 +
   1.561 +  static uint cores_per_cpu()  {
   1.562 +    uint result = 1;
   1.563 +    if (is_intel()) {
   1.564 +      if (supports_processor_topology()) {
   1.565 +        result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
   1.566 +                 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
   1.567 +      } else {
   1.568 +        result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
   1.569 +      }
   1.570 +    } else if (is_amd()) {
   1.571 +      result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
   1.572 +    }
   1.573 +    return result;
   1.574 +  }
   1.575 +
   1.576 +  static uint threads_per_core()  {
   1.577 +    uint result = 1;
   1.578 +    if (is_intel() && supports_processor_topology()) {
   1.579 +      result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
   1.580 +    } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
   1.581 +      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
   1.582 +               cores_per_cpu();
   1.583 +    }
   1.584 +    return result;
   1.585 +  }
   1.586 +
   1.587 +  static intx prefetch_data_size()  {
   1.588 +    intx result = 0;
   1.589 +    if (is_intel()) {
   1.590 +      result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
   1.591 +    } else if (is_amd()) {
   1.592 +      result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
   1.593 +    }
   1.594 +    if (result < 32) // not defined ?
   1.595 +      result = 32;   // 32 bytes by default on x86 and other x64
   1.596 +    return result;
   1.597 +  }
   1.598 +
   1.599 +  //
   1.600 +  // Feature identification
   1.601 +  //
   1.602 +  static bool supports_cpuid()    { return _cpuFeatures  != 0; }
   1.603 +  static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
   1.604 +  static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
   1.605 +  static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
   1.606 +  static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
   1.607 +  static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
   1.608 +  static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
   1.609 +  static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
   1.610 +  static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
   1.611 +  static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
   1.612 +  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   1.613 +  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   1.614 +  static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
   1.615 +  static bool supports_avx()      { return (_cpuFeatures & CPU_AVX) != 0; }
   1.616 +  static bool supports_avx2()     { return (_cpuFeatures & CPU_AVX2) != 0; }
   1.617 +  static bool supports_tsc()      { return (_cpuFeatures & CPU_TSC)    != 0; }
   1.618 +  static bool supports_aes()      { return (_cpuFeatures & CPU_AES) != 0; }
   1.619 +  static bool supports_erms()     { return (_cpuFeatures & CPU_ERMS) != 0; }
   1.620 +  static bool supports_clmul()    { return (_cpuFeatures & CPU_CLMUL) != 0; }
   1.621 +  static bool supports_rtm()      { return (_cpuFeatures & CPU_RTM) != 0; }
   1.622 +  static bool supports_bmi1()     { return (_cpuFeatures & CPU_BMI1) != 0; }
   1.623 +  static bool supports_bmi2()     { return (_cpuFeatures & CPU_BMI2) != 0; }
   1.624 +  // Intel features
   1.625 +  static bool is_intel_family_core() { return is_intel() &&
   1.626 +                                       extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
   1.627 +
   1.628 +  static bool is_intel_tsc_synched_at_init()  {
   1.629 +    if (is_intel_family_core()) {
   1.630 +      uint32_t ext_model = extended_cpu_model();
   1.631 +      if (ext_model == CPU_MODEL_NEHALEM_EP     ||
   1.632 +          ext_model == CPU_MODEL_WESTMERE_EP    ||
   1.633 +          ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
   1.634 +          ext_model == CPU_MODEL_IVYBRIDGE_EP) {
   1.635 +        // <= 2-socket invariant tsc support. EX versions are usually used
   1.636 +        // in > 2-socket systems and likely don't synchronize tscs at
   1.637 +        // initialization.
   1.638 +        // Code that uses tsc values must be prepared for them to arbitrarily
   1.639 +        // jump forward or backward.
   1.640 +        return true;
   1.641 +      }
   1.642 +    }
   1.643 +    return false;
   1.644 +  }
   1.645 +
   1.646 +  // AMD features
   1.647 +  static bool supports_3dnow_prefetch()    { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; }
   1.648 +  static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
   1.649 +  static bool supports_lzcnt()    { return (_cpuFeatures & CPU_LZCNT) != 0; }
   1.650 +  static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
   1.651 +
   1.652 +  static bool is_amd_Barcelona()  { return is_amd() &&
   1.653 +                                           extended_cpu_family() == CPU_FAMILY_AMD_11H; }
   1.654 +
   1.655 +  // Intel and AMD newer cores support fast timestamps well
   1.656 +  static bool supports_tscinv_bit() {
   1.657 +    return (_cpuFeatures & CPU_TSCINV) != 0;
   1.658 +  }
   1.659 +  static bool supports_tscinv() {
   1.660 +    return supports_tscinv_bit() &&
   1.661 +           ( (is_amd() && !is_amd_Barcelona()) ||
   1.662 +             is_intel_tsc_synched_at_init() );
   1.663 +  }
   1.664 +
   1.665 +  // Intel Core and newer cpus have fast IDIV instruction (excluding Atom).
   1.666 +  static bool has_fast_idiv()     { return is_intel() && cpu_family() == 6 &&
   1.667 +                                           supports_sse3() && _model != 0x1C; }
   1.668 +
   1.669 +  static bool supports_compare_and_exchange() { return true; }
   1.670 +
   1.671 +  static const char* cpu_features()           { return _features_str; }
   1.672 +
   1.673 +  static intx allocate_prefetch_distance() {
   1.674 +    // This method should be called before allocate_prefetch_style().
   1.675 +    //
   1.676 +    // Hardware prefetching (distance/size in bytes):
   1.677 +    // Pentium 3 -  64 /  32
   1.678 +    // Pentium 4 - 256 / 128
   1.679 +    // Athlon    -  64 /  32 ????
   1.680 +    // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
   1.681 +    // Core      - 128 /  64
   1.682 +    //
   1.683 +    // Software prefetching (distance in bytes / instruction with best score):
   1.684 +    // Pentium 3 - 128 / prefetchnta
   1.685 +    // Pentium 4 - 512 / prefetchnta
   1.686 +    // Athlon    - 128 / prefetchnta
   1.687 +    // Opteron   - 256 / prefetchnta
   1.688 +    // Core      - 256 / prefetchnta
   1.689 +    // It will be used only when AllocatePrefetchStyle > 0
   1.690 +
   1.691 +    intx count = AllocatePrefetchDistance;
   1.692 +    if (count < 0) {   // default ?
   1.693 +      if (is_amd()) {  // AMD
   1.694 +        if (supports_sse2())
   1.695 +          count = 256; // Opteron
   1.696 +        else
   1.697 +          count = 128; // Athlon
   1.698 +      } else {         // Intel
   1.699 +        if (supports_sse2())
   1.700 +          if (cpu_family() == 6) {
   1.701 +            count = 256; // Pentium M, Core, Core2
   1.702 +          } else {
   1.703 +            count = 512; // Pentium 4
   1.704 +          }
   1.705 +        else
   1.706 +          count = 128; // Pentium 3 (and all other old CPUs)
   1.707 +      }
   1.708 +    }
   1.709 +    return count;
   1.710 +  }
   1.711 +  static intx allocate_prefetch_style() {
   1.712 +    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
   1.713 +    // Return 0 if AllocatePrefetchDistance was not defined.
   1.714 +    return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
   1.715 +  }
   1.716 +
   1.717 +  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
   1.718 +  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
   1.719 +  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
   1.720 +  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
   1.721 +
   1.722 +  // gc copy/scan is disabled if prefetchw isn't supported, because
   1.723 +  // Prefetch::write emits an inlined prefetchw on Linux.
   1.724 +  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
   1.725 +  // The used prefetcht0 instruction works for both amd64 and em64t.
   1.726 +  static intx prefetch_copy_interval_in_bytes() {
   1.727 +    intx interval = PrefetchCopyIntervalInBytes;
   1.728 +    return interval >= 0 ? interval : 576;
   1.729 +  }
   1.730 +  static intx prefetch_scan_interval_in_bytes() {
   1.731 +    intx interval = PrefetchScanIntervalInBytes;
   1.732 +    return interval >= 0 ? interval : 576;
   1.733 +  }
   1.734 +  static intx prefetch_fields_ahead() {
   1.735 +    intx count = PrefetchFieldsAhead;
   1.736 +    return count >= 0 ? count : 1;
   1.737 +  }
   1.738 +};
   1.739 +
   1.740 +#endif // CPU_X86_VM_VM_VERSION_X86_HPP

mercurial