Wed, 24 Oct 2012 14:33:22 -0700
7184394: add intrinsics to use AES instructions
Summary: Use new x86 AES instructions for AESCrypt.
Reviewed-by: twisti, kvn, roland
Contributed-by: tom.deneau@amd.com
twisti@1020 | 1 | /* |
mikael@4153 | 2 | * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. |
twisti@1020 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
twisti@1020 | 4 | * |
twisti@1020 | 5 | * This code is free software; you can redistribute it and/or modify it |
twisti@1020 | 6 | * under the terms of the GNU General Public License version 2 only, as |
twisti@1020 | 7 | * published by the Free Software Foundation. |
twisti@1020 | 8 | * |
twisti@1020 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
twisti@1020 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
twisti@1020 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
twisti@1020 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
twisti@1020 | 13 | * accompanied this code). |
twisti@1020 | 14 | * |
twisti@1020 | 15 | * You should have received a copy of the GNU General Public License version |
twisti@1020 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
twisti@1020 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
twisti@1020 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
twisti@1020 | 22 | * |
twisti@1020 | 23 | */ |
twisti@1020 | 24 | |
stefank@2314 | 25 | #ifndef CPU_X86_VM_VM_VERSION_X86_HPP |
stefank@2314 | 26 | #define CPU_X86_VM_VM_VERSION_X86_HPP |
stefank@2314 | 27 | |
stefank@2314 | 28 | #include "runtime/globals_extension.hpp" |
stefank@2314 | 29 | #include "runtime/vm_version.hpp" |
stefank@2314 | 30 | |
twisti@1020 | 31 | class VM_Version : public Abstract_VM_Version { |
twisti@1020 | 32 | public: |
twisti@1020 | 33 | // cpuid result register layouts. These are all unions of a uint32_t |
twisti@1020 | 34 | // (in case anyone wants access to the register as a whole) and a bitfield. |
twisti@1020 | 35 | |
twisti@1020 | 36 | union StdCpuid1Eax { |
twisti@1020 | 37 | uint32_t value; |
twisti@1020 | 38 | struct { |
twisti@1020 | 39 | uint32_t stepping : 4, |
twisti@1020 | 40 | model : 4, |
twisti@1020 | 41 | family : 4, |
twisti@1020 | 42 | proc_type : 2, |
twisti@1020 | 43 | : 2, |
twisti@1020 | 44 | ext_model : 4, |
twisti@1020 | 45 | ext_family : 8, |
twisti@1020 | 46 | : 4; |
twisti@1020 | 47 | } bits; |
twisti@1020 | 48 | }; |
twisti@1020 | 49 | |
twisti@1020 | 50 | union StdCpuid1Ebx { // example, unused |
twisti@1020 | 51 | uint32_t value; |
twisti@1020 | 52 | struct { |
twisti@1020 | 53 | uint32_t brand_id : 8, |
twisti@1020 | 54 | clflush_size : 8, |
twisti@1020 | 55 | threads_per_cpu : 8, |
twisti@1020 | 56 | apic_id : 8; |
twisti@1020 | 57 | } bits; |
twisti@1020 | 58 | }; |
twisti@1020 | 59 | |
twisti@1020 | 60 | union StdCpuid1Ecx { |
twisti@1020 | 61 | uint32_t value; |
twisti@1020 | 62 | struct { |
twisti@1020 | 63 | uint32_t sse3 : 1, |
twisti@1020 | 64 | : 2, |
twisti@1020 | 65 | monitor : 1, |
twisti@1020 | 66 | : 1, |
twisti@1020 | 67 | vmx : 1, |
twisti@1020 | 68 | : 1, |
twisti@1020 | 69 | est : 1, |
twisti@1020 | 70 | : 1, |
twisti@1020 | 71 | ssse3 : 1, |
twisti@1020 | 72 | cid : 1, |
twisti@1020 | 73 | : 2, |
twisti@1020 | 74 | cmpxchg16: 1, |
twisti@1020 | 75 | : 4, |
twisti@1020 | 76 | dca : 1, |
twisti@1020 | 77 | sse4_1 : 1, |
twisti@1020 | 78 | sse4_2 : 1, |
twisti@1078 | 79 | : 2, |
twisti@1078 | 80 | popcnt : 1, |
kvn@4205 | 81 | : 1, |
kvn@4205 | 82 | aes : 1, |
kvn@4205 | 83 | : 1, |
kvn@3388 | 84 | osxsave : 1, |
kvn@3388 | 85 | avx : 1, |
kvn@3388 | 86 | : 3; |
twisti@1020 | 87 | } bits; |
twisti@1020 | 88 | }; |
twisti@1020 | 89 | |
twisti@1020 | 90 | union StdCpuid1Edx { |
twisti@1020 | 91 | uint32_t value; |
twisti@1020 | 92 | struct { |
twisti@1020 | 93 | uint32_t : 4, |
twisti@1020 | 94 | tsc : 1, |
twisti@1020 | 95 | : 3, |
twisti@1020 | 96 | cmpxchg8 : 1, |
twisti@1020 | 97 | : 6, |
twisti@1020 | 98 | cmov : 1, |
kvn@2984 | 99 | : 3, |
kvn@2984 | 100 | clflush : 1, |
kvn@2984 | 101 | : 3, |
twisti@1020 | 102 | mmx : 1, |
twisti@1020 | 103 | fxsr : 1, |
twisti@1020 | 104 | sse : 1, |
twisti@1020 | 105 | sse2 : 1, |
twisti@1020 | 106 | : 1, |
twisti@1020 | 107 | ht : 1, |
twisti@1020 | 108 | : 3; |
twisti@1020 | 109 | } bits; |
twisti@1020 | 110 | }; |
twisti@1020 | 111 | |
twisti@1020 | 112 | union DcpCpuid4Eax { |
twisti@1020 | 113 | uint32_t value; |
twisti@1020 | 114 | struct { |
twisti@1020 | 115 | uint32_t cache_type : 5, |
twisti@1020 | 116 | : 21, |
twisti@1020 | 117 | cores_per_cpu : 6; |
twisti@1020 | 118 | } bits; |
twisti@1020 | 119 | }; |
twisti@1020 | 120 | |
twisti@1020 | 121 | union DcpCpuid4Ebx { |
twisti@1020 | 122 | uint32_t value; |
twisti@1020 | 123 | struct { |
twisti@1020 | 124 | uint32_t L1_line_size : 12, |
twisti@1020 | 125 | partitions : 10, |
twisti@1020 | 126 | associativity : 10; |
twisti@1020 | 127 | } bits; |
twisti@1020 | 128 | }; |
twisti@1020 | 129 | |
kvn@1977 | 130 | union TplCpuidBEbx { |
kvn@1977 | 131 | uint32_t value; |
kvn@1977 | 132 | struct { |
kvn@1977 | 133 | uint32_t logical_cpus : 16, |
kvn@1977 | 134 | : 16; |
kvn@1977 | 135 | } bits; |
kvn@1977 | 136 | }; |
kvn@1977 | 137 | |
twisti@1020 | 138 | union ExtCpuid1Ecx { |
twisti@1020 | 139 | uint32_t value; |
twisti@1020 | 140 | struct { |
twisti@1020 | 141 | uint32_t LahfSahf : 1, |
twisti@1020 | 142 | CmpLegacy : 1, |
twisti@1020 | 143 | : 4, |
twisti@1210 | 144 | lzcnt : 1, |
twisti@1020 | 145 | sse4a : 1, |
twisti@1020 | 146 | misalignsse : 1, |
twisti@1020 | 147 | prefetchw : 1, |
twisti@1020 | 148 | : 22; |
twisti@1020 | 149 | } bits; |
twisti@1020 | 150 | }; |
twisti@1020 | 151 | |
twisti@1020 | 152 | union ExtCpuid1Edx { |
twisti@1020 | 153 | uint32_t value; |
twisti@1020 | 154 | struct { |
twisti@1020 | 155 | uint32_t : 22, |
twisti@1020 | 156 | mmx_amd : 1, |
twisti@1020 | 157 | mmx : 1, |
twisti@1020 | 158 | fxsr : 1, |
twisti@1020 | 159 | : 4, |
twisti@1020 | 160 | long_mode : 1, |
twisti@1020 | 161 | tdnow2 : 1, |
twisti@1020 | 162 | tdnow : 1; |
twisti@1020 | 163 | } bits; |
twisti@1020 | 164 | }; |
twisti@1020 | 165 | |
twisti@1020 | 166 | union ExtCpuid5Ex { |
twisti@1020 | 167 | uint32_t value; |
twisti@1020 | 168 | struct { |
twisti@1020 | 169 | uint32_t L1_line_size : 8, |
twisti@1020 | 170 | L1_tag_lines : 8, |
twisti@1020 | 171 | L1_assoc : 8, |
twisti@1020 | 172 | L1_size : 8; |
twisti@1020 | 173 | } bits; |
twisti@1020 | 174 | }; |
twisti@1020 | 175 | |
kvn@3400 | 176 | union ExtCpuid7Edx { |
kvn@3400 | 177 | uint32_t value; |
kvn@3400 | 178 | struct { |
kvn@3400 | 179 | uint32_t : 8, |
kvn@3400 | 180 | tsc_invariance : 1, |
kvn@3400 | 181 | : 23; |
kvn@3400 | 182 | } bits; |
kvn@3400 | 183 | }; |
kvn@3400 | 184 | |
twisti@1020 | 185 | union ExtCpuid8Ecx { |
twisti@1020 | 186 | uint32_t value; |
twisti@1020 | 187 | struct { |
twisti@1020 | 188 | uint32_t cores_per_cpu : 8, |
twisti@1020 | 189 | : 24; |
twisti@1020 | 190 | } bits; |
twisti@1020 | 191 | }; |
twisti@1020 | 192 | |
kvn@3388 | 193 | union SefCpuid7Eax { |
kvn@3388 | 194 | uint32_t value; |
kvn@3388 | 195 | }; |
kvn@3388 | 196 | |
kvn@3388 | 197 | union SefCpuid7Ebx { |
kvn@3388 | 198 | uint32_t value; |
kvn@3388 | 199 | struct { |
kvn@3388 | 200 | uint32_t fsgsbase : 1, |
kvn@3388 | 201 | : 2, |
kvn@3388 | 202 | bmi1 : 1, |
kvn@3388 | 203 | : 1, |
kvn@3388 | 204 | avx2 : 1, |
kvn@3388 | 205 | : 2, |
kvn@3388 | 206 | bmi2 : 1, |
kvn@3388 | 207 | : 23; |
kvn@3388 | 208 | } bits; |
kvn@3388 | 209 | }; |
kvn@3388 | 210 | |
kvn@3388 | 211 | union XemXcr0Eax { |
kvn@3388 | 212 | uint32_t value; |
kvn@3388 | 213 | struct { |
kvn@3388 | 214 | uint32_t x87 : 1, |
kvn@3388 | 215 | sse : 1, |
kvn@3388 | 216 | ymm : 1, |
kvn@3388 | 217 | : 29; |
kvn@3388 | 218 | } bits; |
kvn@3388 | 219 | }; |
kvn@3388 | 220 | |
twisti@1020 | 221 | protected: |
phh@3378 | 222 | static int _cpu; |
phh@3378 | 223 | static int _model; |
phh@3378 | 224 | static int _stepping; |
phh@3378 | 225 | static int _cpuFeatures; // features returned by the "cpuid" instruction |
phh@3378 | 226 | // 0 if this instruction is not available |
phh@3378 | 227 | static const char* _features_str; |
twisti@1020 | 228 | |
phh@3378 | 229 | enum { |
phh@3378 | 230 | CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) |
phh@3378 | 231 | CPU_CMOV = (1 << 1), |
phh@3378 | 232 | CPU_FXSR = (1 << 2), |
phh@3378 | 233 | CPU_HT = (1 << 3), |
phh@3378 | 234 | CPU_MMX = (1 << 4), |
phh@3378 | 235 | CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions |
phh@3378 | 236 | // may not necessarily support other 3dnow instructions |
phh@3378 | 237 | CPU_SSE = (1 << 6), |
phh@3378 | 238 | CPU_SSE2 = (1 << 7), |
phh@3378 | 239 | CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) |
phh@3378 | 240 | CPU_SSSE3 = (1 << 9), |
phh@3378 | 241 | CPU_SSE4A = (1 << 10), |
phh@3378 | 242 | CPU_SSE4_1 = (1 << 11), |
phh@3378 | 243 | CPU_SSE4_2 = (1 << 12), |
phh@3378 | 244 | CPU_POPCNT = (1 << 13), |
phh@3378 | 245 | CPU_LZCNT = (1 << 14), |
phh@3378 | 246 | CPU_TSC = (1 << 15), |
kvn@3400 | 247 | CPU_TSCINV = (1 << 16), |
kvn@3400 | 248 | CPU_AVX = (1 << 17), |
kvn@4205 | 249 | CPU_AVX2 = (1 << 18), |
kvn@4205 | 250 | CPU_AES = (1 << 19) |
phh@3378 | 251 | } cpuFeatureFlags; |
phh@3378 | 252 | |
phh@3378 | 253 | enum { |
phh@3378 | 254 | // AMD |
phh@3560 | 255 | CPU_FAMILY_AMD_11H = 0x11, |
phh@3378 | 256 | // Intel |
phh@3378 | 257 | CPU_FAMILY_INTEL_CORE = 6, |
phh@3560 | 258 | CPU_MODEL_NEHALEM = 0x1e, |
phh@3560 | 259 | CPU_MODEL_NEHALEM_EP = 0x1a, |
phh@3560 | 260 | CPU_MODEL_NEHALEM_EX = 0x2e, |
phh@3560 | 261 | CPU_MODEL_WESTMERE = 0x25, |
phh@3560 | 262 | CPU_MODEL_WESTMERE_EP = 0x2c, |
phh@3560 | 263 | CPU_MODEL_WESTMERE_EX = 0x2f, |
phh@3560 | 264 | CPU_MODEL_SANDYBRIDGE = 0x2a, |
phh@3560 | 265 | CPU_MODEL_SANDYBRIDGE_EP = 0x2d, |
phh@3560 | 266 | CPU_MODEL_IVYBRIDGE_EP = 0x3a |
phh@3378 | 267 | } cpuExtendedFamily; |
twisti@1020 | 268 | |
twisti@1020 | 269 | // cpuid information block. All info derived from executing cpuid with |
twisti@1020 | 270 | // various function numbers is stored here. Intel and AMD info is |
twisti@1020 | 271 | // merged in this block: accessor methods disentangle it. |
twisti@1020 | 272 | // |
twisti@1020 | 273 | // The info block is laid out in subblocks of 4 dwords corresponding to |
twisti@1020 | 274 | // eax, ebx, ecx and edx, whether or not they contain anything useful. |
twisti@1020 | 275 | struct CpuidInfo { |
twisti@1020 | 276 | // cpuid function 0 |
twisti@1020 | 277 | uint32_t std_max_function; |
twisti@1020 | 278 | uint32_t std_vendor_name_0; |
twisti@1020 | 279 | uint32_t std_vendor_name_1; |
twisti@1020 | 280 | uint32_t std_vendor_name_2; |
twisti@1020 | 281 | |
twisti@1020 | 282 | // cpuid function 1 |
twisti@1020 | 283 | StdCpuid1Eax std_cpuid1_eax; |
twisti@1020 | 284 | StdCpuid1Ebx std_cpuid1_ebx; |
twisti@1020 | 285 | StdCpuid1Ecx std_cpuid1_ecx; |
twisti@1020 | 286 | StdCpuid1Edx std_cpuid1_edx; |
twisti@1020 | 287 | |
twisti@1020 | 288 | // cpuid function 4 (deterministic cache parameters) |
twisti@1020 | 289 | DcpCpuid4Eax dcp_cpuid4_eax; |
twisti@1020 | 290 | DcpCpuid4Ebx dcp_cpuid4_ebx; |
twisti@1020 | 291 | uint32_t dcp_cpuid4_ecx; // unused currently |
twisti@1020 | 292 | uint32_t dcp_cpuid4_edx; // unused currently |
twisti@1020 | 293 | |
kvn@3388 | 294 | // cpuid function 7 (structured extended features) |
kvn@3388 | 295 | SefCpuid7Eax sef_cpuid7_eax; |
kvn@3388 | 296 | SefCpuid7Ebx sef_cpuid7_ebx; |
kvn@3388 | 297 | uint32_t sef_cpuid7_ecx; // unused currently |
kvn@3388 | 298 | uint32_t sef_cpuid7_edx; // unused currently |
kvn@3388 | 299 | |
kvn@1977 | 300 | // cpuid function 0xB (processor topology) |
kvn@1977 | 301 | // ecx = 0 |
kvn@1977 | 302 | uint32_t tpl_cpuidB0_eax; |
kvn@1977 | 303 | TplCpuidBEbx tpl_cpuidB0_ebx; |
kvn@1977 | 304 | uint32_t tpl_cpuidB0_ecx; // unused currently |
kvn@1977 | 305 | uint32_t tpl_cpuidB0_edx; // unused currently |
kvn@1977 | 306 | |
kvn@1977 | 307 | // ecx = 1 |
kvn@1977 | 308 | uint32_t tpl_cpuidB1_eax; |
kvn@1977 | 309 | TplCpuidBEbx tpl_cpuidB1_ebx; |
kvn@1977 | 310 | uint32_t tpl_cpuidB1_ecx; // unused currently |
kvn@1977 | 311 | uint32_t tpl_cpuidB1_edx; // unused currently |
kvn@1977 | 312 | |
kvn@1977 | 313 | // ecx = 2 |
kvn@1977 | 314 | uint32_t tpl_cpuidB2_eax; |
kvn@1977 | 315 | TplCpuidBEbx tpl_cpuidB2_ebx; |
kvn@1977 | 316 | uint32_t tpl_cpuidB2_ecx; // unused currently |
kvn@1977 | 317 | uint32_t tpl_cpuidB2_edx; // unused currently |
kvn@1977 | 318 | |
twisti@1020 | 319 | // cpuid function 0x80000000 // example, unused |
twisti@1020 | 320 | uint32_t ext_max_function; |
twisti@1020 | 321 | uint32_t ext_vendor_name_0; |
twisti@1020 | 322 | uint32_t ext_vendor_name_1; |
twisti@1020 | 323 | uint32_t ext_vendor_name_2; |
twisti@1020 | 324 | |
twisti@1020 | 325 | // cpuid function 0x80000001 |
twisti@1020 | 326 | uint32_t ext_cpuid1_eax; // reserved |
twisti@1020 | 327 | uint32_t ext_cpuid1_ebx; // reserved |
twisti@1020 | 328 | ExtCpuid1Ecx ext_cpuid1_ecx; |
twisti@1020 | 329 | ExtCpuid1Edx ext_cpuid1_edx; |
twisti@1020 | 330 | |
twisti@1020 | 331 | // cpuid functions 0x80000002 thru 0x80000004: example, unused |
twisti@1020 | 332 | uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; |
twisti@1020 | 333 | uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; |
twisti@1020 | 334 | uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; |
twisti@1020 | 335 | |
phh@3560 | 336 | // cpuid function 0x80000005 // AMD L1, Intel reserved |
twisti@1020 | 337 | uint32_t ext_cpuid5_eax; // unused currently |
twisti@1020 | 338 | uint32_t ext_cpuid5_ebx; // reserved |
twisti@1020 | 339 | ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) |
twisti@1020 | 340 | ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) |
twisti@1020 | 341 | |
phh@3378 | 342 | // cpuid function 0x80000007 |
phh@3378 | 343 | uint32_t ext_cpuid7_eax; // reserved |
phh@3378 | 344 | uint32_t ext_cpuid7_ebx; // reserved |
phh@3378 | 345 | uint32_t ext_cpuid7_ecx; // reserved |
phh@3378 | 346 | ExtCpuid7Edx ext_cpuid7_edx; // tscinv |
phh@3378 | 347 | |
twisti@1020 | 348 | // cpuid function 0x80000008 |
twisti@1020 | 349 | uint32_t ext_cpuid8_eax; // unused currently |
twisti@1020 | 350 | uint32_t ext_cpuid8_ebx; // reserved |
twisti@1020 | 351 | ExtCpuid8Ecx ext_cpuid8_ecx; |
twisti@1020 | 352 | uint32_t ext_cpuid8_edx; // reserved |
kvn@3388 | 353 | |
kvn@3388 | 354 | // extended control register XCR0 (the XFEATURE_ENABLED_MASK register) |
kvn@3388 | 355 | XemXcr0Eax xem_xcr0_eax; |
kvn@3388 | 356 | uint32_t xem_xcr0_edx; // reserved |
twisti@1020 | 357 | }; |
twisti@1020 | 358 | |
twisti@1020 | 359 | // The actual cpuid info block |
twisti@1020 | 360 | static CpuidInfo _cpuid_info; |
twisti@1020 | 361 | |
twisti@1020 | 362 | // Extractors and predicates |
twisti@1020 | 363 | static uint32_t extended_cpu_family() { |
twisti@1020 | 364 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; |
twisti@1020 | 365 | result += _cpuid_info.std_cpuid1_eax.bits.ext_family; |
twisti@1020 | 366 | return result; |
twisti@1020 | 367 | } |
phh@3378 | 368 | |
twisti@1020 | 369 | static uint32_t extended_cpu_model() { |
twisti@1020 | 370 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; |
twisti@1020 | 371 | result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; |
twisti@1020 | 372 | return result; |
twisti@1020 | 373 | } |
phh@3378 | 374 | |
twisti@1020 | 375 | static uint32_t cpu_stepping() { |
twisti@1020 | 376 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; |
twisti@1020 | 377 | return result; |
twisti@1020 | 378 | } |
phh@3378 | 379 | |
twisti@1020 | 380 | static uint logical_processor_count() { |
twisti@1020 | 381 | uint result = threads_per_core(); |
twisti@1020 | 382 | return result; |
twisti@1020 | 383 | } |
phh@3378 | 384 | |
twisti@1020 | 385 | static uint32_t feature_flags() { |
twisti@1020 | 386 | uint32_t result = 0; |
twisti@1020 | 387 | if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) |
twisti@1020 | 388 | result |= CPU_CX8; |
twisti@1020 | 389 | if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) |
twisti@1020 | 390 | result |= CPU_CMOV; |
twisti@2144 | 391 | if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd() && |
twisti@2144 | 392 | _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)) |
twisti@1020 | 393 | result |= CPU_FXSR; |
twisti@1020 | 394 | // HT flag is set for multi-core processors also. |
twisti@1020 | 395 | if (threads_per_core() > 1) |
twisti@1020 | 396 | result |= CPU_HT; |
twisti@2144 | 397 | if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd() && |
twisti@2144 | 398 | _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)) |
twisti@1020 | 399 | result |= CPU_MMX; |
twisti@1020 | 400 | if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) |
twisti@1020 | 401 | result |= CPU_SSE; |
twisti@1020 | 402 | if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) |
twisti@1020 | 403 | result |= CPU_SSE2; |
twisti@1020 | 404 | if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) |
twisti@1020 | 405 | result |= CPU_SSE3; |
twisti@1020 | 406 | if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) |
twisti@1020 | 407 | result |= CPU_SSSE3; |
twisti@1020 | 408 | if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) |
twisti@1020 | 409 | result |= CPU_SSE4_1; |
twisti@1020 | 410 | if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) |
twisti@1020 | 411 | result |= CPU_SSE4_2; |
twisti@1078 | 412 | if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) |
twisti@1078 | 413 | result |= CPU_POPCNT; |
kvn@3388 | 414 | if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && |
kvn@3388 | 415 | _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && |
kvn@3388 | 416 | _cpuid_info.xem_xcr0_eax.bits.sse != 0 && |
kvn@3388 | 417 | _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { |
kvn@3388 | 418 | result |= CPU_AVX; |
kvn@3388 | 419 | if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) |
kvn@3388 | 420 | result |= CPU_AVX2; |
kvn@3388 | 421 | } |
phh@3378 | 422 | if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) |
phh@3378 | 423 | result |= CPU_TSC; |
phh@3378 | 424 | if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) |
phh@3378 | 425 | result |= CPU_TSCINV; |
kvn@4205 | 426 | if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) |
kvn@4205 | 427 | result |= CPU_AES; |
twisti@1210 | 428 | |
twisti@1210 | 429 | // AMD features. |
twisti@1210 | 430 | if (is_amd()) { |
kvn@2761 | 431 | if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || |
kvn@2761 | 432 | (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) |
kvn@2761 | 433 | result |= CPU_3DNOW_PREFETCH; |
twisti@1210 | 434 | if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) |
twisti@1210 | 435 | result |= CPU_LZCNT; |
twisti@1210 | 436 | if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) |
twisti@1210 | 437 | result |= CPU_SSE4A; |
twisti@1210 | 438 | } |
twisti@1210 | 439 | |
twisti@1020 | 440 | return result; |
twisti@1020 | 441 | } |
twisti@1020 | 442 | |
twisti@1020 | 443 | static void get_processor_features(); |
twisti@1020 | 444 | |
twisti@1020 | 445 | public: |
twisti@1020 | 446 | // Offsets for cpuid asm stub |
twisti@1020 | 447 | static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } |
twisti@1020 | 448 | static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } |
twisti@1020 | 449 | static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } |
kvn@3388 | 450 | static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); } |
twisti@1020 | 451 | static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } |
twisti@1020 | 452 | static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } |
phh@3378 | 453 | static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); } |
twisti@1020 | 454 | static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } |
kvn@1977 | 455 | static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } |
kvn@1977 | 456 | static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } |
kvn@1977 | 457 | static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } |
kvn@3388 | 458 | static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); } |
twisti@1020 | 459 | |
twisti@1020 | 460 | // Initialization |
twisti@1020 | 461 | static void initialize(); |
twisti@1020 | 462 | |
twisti@1020 | 463 | // Asserts |
twisti@1020 | 464 | static void assert_is_initialized() { |
twisti@1020 | 465 | assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); |
twisti@1020 | 466 | } |
twisti@1020 | 467 | |
twisti@1020 | 468 | // |
twisti@1020 | 469 | // Processor family: |
twisti@1020 | 470 | // 3 - 386 |
twisti@1020 | 471 | // 4 - 486 |
twisti@1020 | 472 | // 5 - Pentium |
twisti@1020 | 473 | // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, |
twisti@1020 | 474 | // Pentium M, Core Solo, Core Duo, Core2 Duo |
twisti@1020 | 475 | // family 6 model: 9, 13, 14, 15 |
twisti@1020 | 476 | // 0x0f - Pentium 4, Opteron |
twisti@1020 | 477 | // |
twisti@1020 | 478 | // Note: The cpu family should be used to select between |
twisti@1020 | 479 | // instruction sequences which are valid on all Intel |
twisti@1020 | 480 | // processors. Use the feature test functions below to |
twisti@1020 | 481 | // determine whether a particular instruction is supported. |
twisti@1020 | 482 | // |
twisti@1020 | 483 | static int cpu_family() { return _cpu;} |
twisti@1020 | 484 | static bool is_P6() { return cpu_family() >= 6; } |
twisti@1020 | 485 | static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' |
twisti@1020 | 486 | static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' |
twisti@1020 | 487 | |
kvn@2002 | 488 | static bool supports_processor_topology() { |
kvn@2002 | 489 | return (_cpuid_info.std_max_function >= 0xB) && |
kvn@2002 | 490 | // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level. |
kvn@2002 | 491 | // Some cpus have max cpuid >= 0xB but do not support processor topology. |
kvn@2002 | 492 | ((_cpuid_info.tpl_cpuidB0_eax & 0x1f | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0); |
kvn@2002 | 493 | } |
kvn@2002 | 494 | |
twisti@1020 | 495 | static uint cores_per_cpu() { |
twisti@1020 | 496 | uint result = 1; |
twisti@1020 | 497 | if (is_intel()) { |
kvn@2002 | 498 | if (supports_processor_topology()) { |
kvn@1977 | 499 | result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / |
kvn@1977 | 500 | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; |
kvn@1977 | 501 | } else { |
kvn@1977 | 502 | result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); |
kvn@1977 | 503 | } |
twisti@1020 | 504 | } else if (is_amd()) { |
twisti@1020 | 505 | result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); |
twisti@1020 | 506 | } |
twisti@1020 | 507 | return result; |
twisti@1020 | 508 | } |
twisti@1020 | 509 | |
twisti@1020 | 510 | static uint threads_per_core() { |
twisti@1020 | 511 | uint result = 1; |
kvn@2002 | 512 | if (is_intel() && supports_processor_topology()) { |
kvn@1977 | 513 | result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; |
kvn@1977 | 514 | } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { |
twisti@1020 | 515 | result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / |
twisti@1020 | 516 | cores_per_cpu(); |
twisti@1020 | 517 | } |
twisti@1020 | 518 | return result; |
twisti@1020 | 519 | } |
twisti@1020 | 520 | |
kvn@3052 | 521 | static intx prefetch_data_size() { |
twisti@1020 | 522 | intx result = 0; |
twisti@1020 | 523 | if (is_intel()) { |
twisti@1020 | 524 | result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); |
twisti@1020 | 525 | } else if (is_amd()) { |
twisti@1020 | 526 | result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; |
twisti@1020 | 527 | } |
twisti@1020 | 528 | if (result < 32) // not defined ? |
twisti@1020 | 529 | result = 32; // 32 bytes by default on x86 and other x64 |
twisti@1020 | 530 | return result; |
twisti@1020 | 531 | } |
twisti@1020 | 532 | |
twisti@1020 | 533 | // |
twisti@1020 | 534 | // Feature identification |
twisti@1020 | 535 | // |
twisti@1020 | 536 | static bool supports_cpuid() { return _cpuFeatures != 0; } |
twisti@1020 | 537 | static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } |
twisti@1020 | 538 | static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } |
twisti@1020 | 539 | static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } |
twisti@1020 | 540 | static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } |
twisti@1020 | 541 | static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } |
twisti@1020 | 542 | static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } |
twisti@1020 | 543 | static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } |
twisti@1020 | 544 | static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } |
twisti@1020 | 545 | static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } |
twisti@1020 | 546 | static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } |
twisti@1020 | 547 | static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } |
twisti@1078 | 548 | static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } |
kvn@3388 | 549 | static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; } |
kvn@3388 | 550 | static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; } |
phh@3378 | 551 | static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; } |
kvn@4205 | 552 | static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; } |
phh@3378 | 553 | |
phh@3378 | 554 | // Intel features |
phh@3378 | 555 | static bool is_intel_family_core() { return is_intel() && |
phh@3378 | 556 | extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } |
phh@3378 | 557 | |
phh@3378 | 558 | static bool is_intel_tsc_synched_at_init() { |
phh@3378 | 559 | if (is_intel_family_core()) { |
phh@3378 | 560 | uint32_t ext_model = extended_cpu_model(); |
phh@3560 | 561 | if (ext_model == CPU_MODEL_NEHALEM_EP || |
phh@3560 | 562 | ext_model == CPU_MODEL_WESTMERE_EP || |
phh@3560 | 563 | ext_model == CPU_MODEL_SANDYBRIDGE_EP || |
phh@3560 | 564 | ext_model == CPU_MODEL_IVYBRIDGE_EP) { |
phh@3560 | 565 | // <= 2-socket invariant tsc support. EX versions are usually used |
phh@3560 | 566 | // in > 2-socket systems and likely don't synchronize tscs at |
phh@3560 | 567 | // initialization. |
phh@3560 | 568 | // Code that uses tsc values must be prepared for them to arbitrarily |
phh@3560 | 569 | // jump forward or backward. |
phh@3378 | 570 | return true; |
phh@3378 | 571 | } |
phh@3378 | 572 | } |
phh@3378 | 573 | return false; |
phh@3378 | 574 | } |
phh@3378 | 575 | |
twisti@1020 | 576 | // AMD features |
kvn@2761 | 577 | static bool supports_3dnow_prefetch() { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; } |
twisti@1020 | 578 | static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } |
twisti@1210 | 579 | static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } |
twisti@1020 | 580 | static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } |
twisti@1020 | 581 | |
phh@3378 | 582 | static bool is_amd_Barcelona() { return is_amd() && |
phh@3378 | 583 | extended_cpu_family() == CPU_FAMILY_AMD_11H; } |
phh@3378 | 584 | |
phh@3378 | 585 | // Intel and AMD newer cores support fast timestamps well |
phh@3378 | 586 | static bool supports_tscinv_bit() { |
phh@3378 | 587 | return (_cpuFeatures & CPU_TSCINV) != 0; |
phh@3378 | 588 | } |
phh@3378 | 589 | static bool supports_tscinv() { |
phh@3378 | 590 | return supports_tscinv_bit() && |
phh@3378 | 591 | ( (is_amd() && !is_amd_Barcelona()) || |
phh@3378 | 592 | is_intel_tsc_synched_at_init() ); |
phh@3378 | 593 | } |
phh@3378 | 594 | |
kvn@2269 | 595 | // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). |
kvn@2269 | 596 | static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && |
kvn@2269 | 597 | supports_sse3() && _model != 0x1C; } |
kvn@2269 | 598 | |
twisti@1020 | 599 | static bool supports_compare_and_exchange() { return true; } |
twisti@1020 | 600 | |
twisti@1020 | 601 | static const char* cpu_features() { return _features_str; } |
twisti@1020 | 602 | |
twisti@1020 | 603 | static intx allocate_prefetch_distance() { |
twisti@1020 | 604 | // This method should be called before allocate_prefetch_style(). |
twisti@1020 | 605 | // |
twisti@1020 | 606 | // Hardware prefetching (distance/size in bytes): |
twisti@1020 | 607 | // Pentium 3 - 64 / 32 |
twisti@1020 | 608 | // Pentium 4 - 256 / 128 |
twisti@1020 | 609 | // Athlon - 64 / 32 ???? |
twisti@1020 | 610 | // Opteron - 128 / 64 only when 2 sequential cache lines accessed |
twisti@1020 | 611 | // Core - 128 / 64 |
twisti@1020 | 612 | // |
twisti@1020 | 613 | // Software prefetching (distance in bytes / instruction with best score): |
twisti@1020 | 614 | // Pentium 3 - 128 / prefetchnta |
twisti@1020 | 615 | // Pentium 4 - 512 / prefetchnta |
twisti@1020 | 616 | // Athlon - 128 / prefetchnta |
twisti@1020 | 617 | // Opteron - 256 / prefetchnta |
twisti@1020 | 618 | // Core - 256 / prefetchnta |
twisti@1020 | 619 | // It will be used only when AllocatePrefetchStyle > 0 |
twisti@1020 | 620 | |
twisti@1020 | 621 | intx count = AllocatePrefetchDistance; |
twisti@1020 | 622 | if (count < 0) { // default ? |
twisti@1020 | 623 | if (is_amd()) { // AMD |
twisti@1020 | 624 | if (supports_sse2()) |
twisti@1020 | 625 | count = 256; // Opteron |
twisti@1020 | 626 | else |
twisti@1020 | 627 | count = 128; // Athlon |
twisti@1020 | 628 | } else { // Intel |
twisti@1020 | 629 | if (supports_sse2()) |
twisti@1020 | 630 | if (cpu_family() == 6) { |
twisti@1020 | 631 | count = 256; // Pentium M, Core, Core2 |
twisti@1020 | 632 | } else { |
twisti@1020 | 633 | count = 512; // Pentium 4 |
twisti@1020 | 634 | } |
twisti@1020 | 635 | else |
twisti@1020 | 636 | count = 128; // Pentium 3 (and all other old CPUs) |
twisti@1020 | 637 | } |
twisti@1020 | 638 | } |
twisti@1020 | 639 | return count; |
twisti@1020 | 640 | } |
twisti@1020 | 641 | static intx allocate_prefetch_style() { |
twisti@1020 | 642 | assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); |
twisti@1020 | 643 | // Return 0 if AllocatePrefetchDistance was not defined. |
twisti@1020 | 644 | return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; |
twisti@1020 | 645 | } |
twisti@1020 | 646 | |
twisti@1020 | 647 | // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from |
twisti@1020 | 648 | // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. |
twisti@1020 | 649 | // Tested intervals from 128 to 2048 in increments of 64 == one cache line. |
twisti@1020 | 650 | // 256 bytes (4 dcache lines) was the nearest runner-up to 576. |
twisti@1020 | 651 | |
twisti@1020 | 652 | // gc copy/scan is disabled if prefetchw isn't supported, because |
twisti@1020 | 653 | // Prefetch::write emits an inlined prefetchw on Linux. |
twisti@1020 | 654 | // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. |
twisti@1020 | 655 | // The used prefetcht0 instruction works for both amd64 and em64t. |
twisti@1020 | 656 | static intx prefetch_copy_interval_in_bytes() { |
twisti@1020 | 657 | intx interval = PrefetchCopyIntervalInBytes; |
twisti@1020 | 658 | return interval >= 0 ? interval : 576; |
twisti@1020 | 659 | } |
twisti@1020 | 660 | static intx prefetch_scan_interval_in_bytes() { |
twisti@1020 | 661 | intx interval = PrefetchScanIntervalInBytes; |
twisti@1020 | 662 | return interval >= 0 ? interval : 576; |
twisti@1020 | 663 | } |
twisti@1020 | 664 | static intx prefetch_fields_ahead() { |
twisti@1020 | 665 | intx count = PrefetchFieldsAhead; |
twisti@1020 | 666 | return count >= 0 ? count : 1; |
twisti@1020 | 667 | } |
twisti@1020 | 668 | }; |
stefank@2314 | 669 | |
stefank@2314 | 670 | #endif // CPU_X86_VM_VM_VERSION_X86_HPP |