Tue, 02 Jul 2013 20:42:12 -0400
7088419: Use x86 Hardware CRC32 Instruction with java.util.zip.CRC32
Summary: add intrinsics using new instruction to interpreter, C1, C2, for suitable x86; add test
Reviewed-by: kvn, twisti
twisti@1020 | 1 | /* |
drchase@5353 | 2 | * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. |
twisti@1020 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
twisti@1020 | 4 | * |
twisti@1020 | 5 | * This code is free software; you can redistribute it and/or modify it |
twisti@1020 | 6 | * under the terms of the GNU General Public License version 2 only, as |
twisti@1020 | 7 | * published by the Free Software Foundation. |
twisti@1020 | 8 | * |
twisti@1020 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
twisti@1020 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
twisti@1020 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
twisti@1020 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
twisti@1020 | 13 | * accompanied this code). |
twisti@1020 | 14 | * |
twisti@1020 | 15 | * You should have received a copy of the GNU General Public License version |
twisti@1020 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
twisti@1020 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
twisti@1020 | 18 | * |
trims@1907 | 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
trims@1907 | 20 | * or visit www.oracle.com if you need additional information or have any |
trims@1907 | 21 | * questions. |
twisti@1020 | 22 | * |
twisti@1020 | 23 | */ |
twisti@1020 | 24 | |
stefank@2314 | 25 | #ifndef CPU_X86_VM_VM_VERSION_X86_HPP |
stefank@2314 | 26 | #define CPU_X86_VM_VM_VERSION_X86_HPP |
stefank@2314 | 27 | |
stefank@2314 | 28 | #include "runtime/globals_extension.hpp" |
stefank@2314 | 29 | #include "runtime/vm_version.hpp" |
stefank@2314 | 30 | |
twisti@1020 | 31 | class VM_Version : public Abstract_VM_Version { |
twisti@1020 | 32 | public: |
twisti@1020 | 33 | // cpuid result register layouts. These are all unions of a uint32_t |
twisti@1020 | 34 | // (in case anyone wants access to the register as a whole) and a bitfield. |
twisti@1020 | 35 | |
twisti@1020 | 36 | union StdCpuid1Eax { |
twisti@1020 | 37 | uint32_t value; |
twisti@1020 | 38 | struct { |
twisti@1020 | 39 | uint32_t stepping : 4, |
twisti@1020 | 40 | model : 4, |
twisti@1020 | 41 | family : 4, |
twisti@1020 | 42 | proc_type : 2, |
twisti@1020 | 43 | : 2, |
twisti@1020 | 44 | ext_model : 4, |
twisti@1020 | 45 | ext_family : 8, |
twisti@1020 | 46 | : 4; |
twisti@1020 | 47 | } bits; |
twisti@1020 | 48 | }; |
twisti@1020 | 49 | |
twisti@1020 | 50 | union StdCpuid1Ebx { // example, unused |
twisti@1020 | 51 | uint32_t value; |
twisti@1020 | 52 | struct { |
twisti@1020 | 53 | uint32_t brand_id : 8, |
twisti@1020 | 54 | clflush_size : 8, |
twisti@1020 | 55 | threads_per_cpu : 8, |
twisti@1020 | 56 | apic_id : 8; |
twisti@1020 | 57 | } bits; |
twisti@1020 | 58 | }; |
twisti@1020 | 59 | |
twisti@1020 | 60 | union StdCpuid1Ecx { |
twisti@1020 | 61 | uint32_t value; |
twisti@1020 | 62 | struct { |
twisti@1020 | 63 | uint32_t sse3 : 1, |
drchase@5353 | 64 | clmul : 1, |
drchase@5353 | 65 | : 1, |
twisti@1020 | 66 | monitor : 1, |
twisti@1020 | 67 | : 1, |
twisti@1020 | 68 | vmx : 1, |
twisti@1020 | 69 | : 1, |
twisti@1020 | 70 | est : 1, |
twisti@1020 | 71 | : 1, |
twisti@1020 | 72 | ssse3 : 1, |
twisti@1020 | 73 | cid : 1, |
twisti@1020 | 74 | : 2, |
twisti@1020 | 75 | cmpxchg16: 1, |
twisti@1020 | 76 | : 4, |
twisti@1020 | 77 | dca : 1, |
twisti@1020 | 78 | sse4_1 : 1, |
twisti@1020 | 79 | sse4_2 : 1, |
twisti@1078 | 80 | : 2, |
twisti@1078 | 81 | popcnt : 1, |
kvn@4205 | 82 | : 1, |
kvn@4205 | 83 | aes : 1, |
kvn@4205 | 84 | : 1, |
kvn@3388 | 85 | osxsave : 1, |
kvn@3388 | 86 | avx : 1, |
kvn@3388 | 87 | : 3; |
twisti@1020 | 88 | } bits; |
twisti@1020 | 89 | }; |
twisti@1020 | 90 | |
twisti@1020 | 91 | union StdCpuid1Edx { |
twisti@1020 | 92 | uint32_t value; |
twisti@1020 | 93 | struct { |
twisti@1020 | 94 | uint32_t : 4, |
twisti@1020 | 95 | tsc : 1, |
twisti@1020 | 96 | : 3, |
twisti@1020 | 97 | cmpxchg8 : 1, |
twisti@1020 | 98 | : 6, |
twisti@1020 | 99 | cmov : 1, |
kvn@2984 | 100 | : 3, |
kvn@2984 | 101 | clflush : 1, |
kvn@2984 | 102 | : 3, |
twisti@1020 | 103 | mmx : 1, |
twisti@1020 | 104 | fxsr : 1, |
twisti@1020 | 105 | sse : 1, |
twisti@1020 | 106 | sse2 : 1, |
twisti@1020 | 107 | : 1, |
twisti@1020 | 108 | ht : 1, |
twisti@1020 | 109 | : 3; |
twisti@1020 | 110 | } bits; |
twisti@1020 | 111 | }; |
twisti@1020 | 112 | |
twisti@1020 | 113 | union DcpCpuid4Eax { |
twisti@1020 | 114 | uint32_t value; |
twisti@1020 | 115 | struct { |
twisti@1020 | 116 | uint32_t cache_type : 5, |
twisti@1020 | 117 | : 21, |
twisti@1020 | 118 | cores_per_cpu : 6; |
twisti@1020 | 119 | } bits; |
twisti@1020 | 120 | }; |
twisti@1020 | 121 | |
twisti@1020 | 122 | union DcpCpuid4Ebx { |
twisti@1020 | 123 | uint32_t value; |
twisti@1020 | 124 | struct { |
twisti@1020 | 125 | uint32_t L1_line_size : 12, |
twisti@1020 | 126 | partitions : 10, |
twisti@1020 | 127 | associativity : 10; |
twisti@1020 | 128 | } bits; |
twisti@1020 | 129 | }; |
twisti@1020 | 130 | |
kvn@1977 | 131 | union TplCpuidBEbx { |
kvn@1977 | 132 | uint32_t value; |
kvn@1977 | 133 | struct { |
kvn@1977 | 134 | uint32_t logical_cpus : 16, |
kvn@1977 | 135 | : 16; |
kvn@1977 | 136 | } bits; |
kvn@1977 | 137 | }; |
kvn@1977 | 138 | |
twisti@1020 | 139 | union ExtCpuid1Ecx { |
twisti@1020 | 140 | uint32_t value; |
twisti@1020 | 141 | struct { |
twisti@1020 | 142 | uint32_t LahfSahf : 1, |
twisti@1020 | 143 | CmpLegacy : 1, |
twisti@1020 | 144 | : 4, |
twisti@1210 | 145 | lzcnt : 1, |
twisti@1020 | 146 | sse4a : 1, |
twisti@1020 | 147 | misalignsse : 1, |
twisti@1020 | 148 | prefetchw : 1, |
twisti@1020 | 149 | : 22; |
twisti@1020 | 150 | } bits; |
twisti@1020 | 151 | }; |
twisti@1020 | 152 | |
twisti@1020 | 153 | union ExtCpuid1Edx { |
twisti@1020 | 154 | uint32_t value; |
twisti@1020 | 155 | struct { |
twisti@1020 | 156 | uint32_t : 22, |
twisti@1020 | 157 | mmx_amd : 1, |
twisti@1020 | 158 | mmx : 1, |
twisti@1020 | 159 | fxsr : 1, |
twisti@1020 | 160 | : 4, |
twisti@1020 | 161 | long_mode : 1, |
twisti@1020 | 162 | tdnow2 : 1, |
twisti@1020 | 163 | tdnow : 1; |
twisti@1020 | 164 | } bits; |
twisti@1020 | 165 | }; |
twisti@1020 | 166 | |
twisti@1020 | 167 | union ExtCpuid5Ex { |
twisti@1020 | 168 | uint32_t value; |
twisti@1020 | 169 | struct { |
twisti@1020 | 170 | uint32_t L1_line_size : 8, |
twisti@1020 | 171 | L1_tag_lines : 8, |
twisti@1020 | 172 | L1_assoc : 8, |
twisti@1020 | 173 | L1_size : 8; |
twisti@1020 | 174 | } bits; |
twisti@1020 | 175 | }; |
twisti@1020 | 176 | |
kvn@3400 | 177 | union ExtCpuid7Edx { |
kvn@3400 | 178 | uint32_t value; |
kvn@3400 | 179 | struct { |
kvn@3400 | 180 | uint32_t : 8, |
kvn@3400 | 181 | tsc_invariance : 1, |
kvn@3400 | 182 | : 23; |
kvn@3400 | 183 | } bits; |
kvn@3400 | 184 | }; |
kvn@3400 | 185 | |
twisti@1020 | 186 | union ExtCpuid8Ecx { |
twisti@1020 | 187 | uint32_t value; |
twisti@1020 | 188 | struct { |
twisti@1020 | 189 | uint32_t cores_per_cpu : 8, |
twisti@1020 | 190 | : 24; |
twisti@1020 | 191 | } bits; |
twisti@1020 | 192 | }; |
twisti@1020 | 193 | |
kvn@3388 | 194 | union SefCpuid7Eax { |
kvn@3388 | 195 | uint32_t value; |
kvn@3388 | 196 | }; |
kvn@3388 | 197 | |
kvn@3388 | 198 | union SefCpuid7Ebx { |
kvn@3388 | 199 | uint32_t value; |
kvn@3388 | 200 | struct { |
kvn@3388 | 201 | uint32_t fsgsbase : 1, |
kvn@3388 | 202 | : 2, |
kvn@3388 | 203 | bmi1 : 1, |
kvn@3388 | 204 | : 1, |
kvn@3388 | 205 | avx2 : 1, |
kvn@3388 | 206 | : 2, |
kvn@3388 | 207 | bmi2 : 1, |
kvn@4410 | 208 | erms : 1, |
kvn@4410 | 209 | : 22; |
kvn@3388 | 210 | } bits; |
kvn@3388 | 211 | }; |
kvn@3388 | 212 | |
kvn@3388 | 213 | union XemXcr0Eax { |
kvn@3388 | 214 | uint32_t value; |
kvn@3388 | 215 | struct { |
kvn@3388 | 216 | uint32_t x87 : 1, |
kvn@3388 | 217 | sse : 1, |
kvn@3388 | 218 | ymm : 1, |
kvn@3388 | 219 | : 29; |
kvn@3388 | 220 | } bits; |
kvn@3388 | 221 | }; |
kvn@3388 | 222 | |
twisti@1020 | 223 | protected: |
phh@3378 | 224 | static int _cpu; |
phh@3378 | 225 | static int _model; |
phh@3378 | 226 | static int _stepping; |
phh@3378 | 227 | static int _cpuFeatures; // features returned by the "cpuid" instruction |
phh@3378 | 228 | // 0 if this instruction is not available |
phh@3378 | 229 | static const char* _features_str; |
twisti@1020 | 230 | |
phh@3378 | 231 | enum { |
phh@3378 | 232 | CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) |
phh@3378 | 233 | CPU_CMOV = (1 << 1), |
phh@3378 | 234 | CPU_FXSR = (1 << 2), |
phh@3378 | 235 | CPU_HT = (1 << 3), |
phh@3378 | 236 | CPU_MMX = (1 << 4), |
phh@3378 | 237 | CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions |
phh@3378 | 238 | // may not necessarily support other 3dnow instructions |
phh@3378 | 239 | CPU_SSE = (1 << 6), |
phh@3378 | 240 | CPU_SSE2 = (1 << 7), |
phh@3378 | 241 | CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) |
phh@3378 | 242 | CPU_SSSE3 = (1 << 9), |
phh@3378 | 243 | CPU_SSE4A = (1 << 10), |
phh@3378 | 244 | CPU_SSE4_1 = (1 << 11), |
phh@3378 | 245 | CPU_SSE4_2 = (1 << 12), |
phh@3378 | 246 | CPU_POPCNT = (1 << 13), |
phh@3378 | 247 | CPU_LZCNT = (1 << 14), |
phh@3378 | 248 | CPU_TSC = (1 << 15), |
kvn@3400 | 249 | CPU_TSCINV = (1 << 16), |
kvn@3400 | 250 | CPU_AVX = (1 << 17), |
kvn@4205 | 251 | CPU_AVX2 = (1 << 18), |
kvn@4410 | 252 | CPU_AES = (1 << 19), |
drchase@5353 | 253 | CPU_ERMS = (1 << 20), // enhanced 'rep movsb/stosb' instructions |
drchase@5353 | 254 | CPU_CLMUL = (1 << 21) // carryless multiply for CRC |
phh@3378 | 255 | } cpuFeatureFlags; |
phh@3378 | 256 | |
phh@3378 | 257 | enum { |
phh@3378 | 258 | // AMD |
phh@3560 | 259 | CPU_FAMILY_AMD_11H = 0x11, |
phh@3378 | 260 | // Intel |
phh@3378 | 261 | CPU_FAMILY_INTEL_CORE = 6, |
phh@3560 | 262 | CPU_MODEL_NEHALEM = 0x1e, |
phh@3560 | 263 | CPU_MODEL_NEHALEM_EP = 0x1a, |
phh@3560 | 264 | CPU_MODEL_NEHALEM_EX = 0x2e, |
phh@3560 | 265 | CPU_MODEL_WESTMERE = 0x25, |
phh@3560 | 266 | CPU_MODEL_WESTMERE_EP = 0x2c, |
phh@3560 | 267 | CPU_MODEL_WESTMERE_EX = 0x2f, |
phh@3560 | 268 | CPU_MODEL_SANDYBRIDGE = 0x2a, |
phh@3560 | 269 | CPU_MODEL_SANDYBRIDGE_EP = 0x2d, |
phh@3560 | 270 | CPU_MODEL_IVYBRIDGE_EP = 0x3a |
phh@3378 | 271 | } cpuExtendedFamily; |
twisti@1020 | 272 | |
twisti@1020 | 273 | // cpuid information block. All info derived from executing cpuid with |
twisti@1020 | 274 | // various function numbers is stored here. Intel and AMD info is |
twisti@1020 | 275 | // merged in this block: accessor methods disentangle it. |
twisti@1020 | 276 | // |
twisti@1020 | 277 | // The info block is laid out in subblocks of 4 dwords corresponding to |
twisti@1020 | 278 | // eax, ebx, ecx and edx, whether or not they contain anything useful. |
twisti@1020 | 279 | struct CpuidInfo { |
twisti@1020 | 280 | // cpuid function 0 |
twisti@1020 | 281 | uint32_t std_max_function; |
twisti@1020 | 282 | uint32_t std_vendor_name_0; |
twisti@1020 | 283 | uint32_t std_vendor_name_1; |
twisti@1020 | 284 | uint32_t std_vendor_name_2; |
twisti@1020 | 285 | |
twisti@1020 | 286 | // cpuid function 1 |
twisti@1020 | 287 | StdCpuid1Eax std_cpuid1_eax; |
twisti@1020 | 288 | StdCpuid1Ebx std_cpuid1_ebx; |
twisti@1020 | 289 | StdCpuid1Ecx std_cpuid1_ecx; |
twisti@1020 | 290 | StdCpuid1Edx std_cpuid1_edx; |
twisti@1020 | 291 | |
twisti@1020 | 292 | // cpuid function 4 (deterministic cache parameters) |
twisti@1020 | 293 | DcpCpuid4Eax dcp_cpuid4_eax; |
twisti@1020 | 294 | DcpCpuid4Ebx dcp_cpuid4_ebx; |
twisti@1020 | 295 | uint32_t dcp_cpuid4_ecx; // unused currently |
twisti@1020 | 296 | uint32_t dcp_cpuid4_edx; // unused currently |
twisti@1020 | 297 | |
kvn@3388 | 298 | // cpuid function 7 (structured extended features) |
kvn@3388 | 299 | SefCpuid7Eax sef_cpuid7_eax; |
kvn@3388 | 300 | SefCpuid7Ebx sef_cpuid7_ebx; |
kvn@3388 | 301 | uint32_t sef_cpuid7_ecx; // unused currently |
kvn@3388 | 302 | uint32_t sef_cpuid7_edx; // unused currently |
kvn@3388 | 303 | |
kvn@1977 | 304 | // cpuid function 0xB (processor topology) |
kvn@1977 | 305 | // ecx = 0 |
kvn@1977 | 306 | uint32_t tpl_cpuidB0_eax; |
kvn@1977 | 307 | TplCpuidBEbx tpl_cpuidB0_ebx; |
kvn@1977 | 308 | uint32_t tpl_cpuidB0_ecx; // unused currently |
kvn@1977 | 309 | uint32_t tpl_cpuidB0_edx; // unused currently |
kvn@1977 | 310 | |
kvn@1977 | 311 | // ecx = 1 |
kvn@1977 | 312 | uint32_t tpl_cpuidB1_eax; |
kvn@1977 | 313 | TplCpuidBEbx tpl_cpuidB1_ebx; |
kvn@1977 | 314 | uint32_t tpl_cpuidB1_ecx; // unused currently |
kvn@1977 | 315 | uint32_t tpl_cpuidB1_edx; // unused currently |
kvn@1977 | 316 | |
kvn@1977 | 317 | // ecx = 2 |
kvn@1977 | 318 | uint32_t tpl_cpuidB2_eax; |
kvn@1977 | 319 | TplCpuidBEbx tpl_cpuidB2_ebx; |
kvn@1977 | 320 | uint32_t tpl_cpuidB2_ecx; // unused currently |
kvn@1977 | 321 | uint32_t tpl_cpuidB2_edx; // unused currently |
kvn@1977 | 322 | |
twisti@1020 | 323 | // cpuid function 0x80000000 // example, unused |
twisti@1020 | 324 | uint32_t ext_max_function; |
twisti@1020 | 325 | uint32_t ext_vendor_name_0; |
twisti@1020 | 326 | uint32_t ext_vendor_name_1; |
twisti@1020 | 327 | uint32_t ext_vendor_name_2; |
twisti@1020 | 328 | |
twisti@1020 | 329 | // cpuid function 0x80000001 |
twisti@1020 | 330 | uint32_t ext_cpuid1_eax; // reserved |
twisti@1020 | 331 | uint32_t ext_cpuid1_ebx; // reserved |
twisti@1020 | 332 | ExtCpuid1Ecx ext_cpuid1_ecx; |
twisti@1020 | 333 | ExtCpuid1Edx ext_cpuid1_edx; |
twisti@1020 | 334 | |
twisti@1020 | 335 | // cpuid functions 0x80000002 thru 0x80000004: example, unused |
twisti@1020 | 336 | uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; |
twisti@1020 | 337 | uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; |
twisti@1020 | 338 | uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; |
twisti@1020 | 339 | |
phh@3560 | 340 | // cpuid function 0x80000005 // AMD L1, Intel reserved |
twisti@1020 | 341 | uint32_t ext_cpuid5_eax; // unused currently |
twisti@1020 | 342 | uint32_t ext_cpuid5_ebx; // reserved |
twisti@1020 | 343 | ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) |
twisti@1020 | 344 | ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) |
twisti@1020 | 345 | |
phh@3378 | 346 | // cpuid function 0x80000007 |
phh@3378 | 347 | uint32_t ext_cpuid7_eax; // reserved |
phh@3378 | 348 | uint32_t ext_cpuid7_ebx; // reserved |
phh@3378 | 349 | uint32_t ext_cpuid7_ecx; // reserved |
phh@3378 | 350 | ExtCpuid7Edx ext_cpuid7_edx; // tscinv |
phh@3378 | 351 | |
twisti@1020 | 352 | // cpuid function 0x80000008 |
twisti@1020 | 353 | uint32_t ext_cpuid8_eax; // unused currently |
twisti@1020 | 354 | uint32_t ext_cpuid8_ebx; // reserved |
twisti@1020 | 355 | ExtCpuid8Ecx ext_cpuid8_ecx; |
twisti@1020 | 356 | uint32_t ext_cpuid8_edx; // reserved |
kvn@3388 | 357 | |
kvn@3388 | 358 | // extended control register XCR0 (the XFEATURE_ENABLED_MASK register) |
kvn@3388 | 359 | XemXcr0Eax xem_xcr0_eax; |
kvn@3388 | 360 | uint32_t xem_xcr0_edx; // reserved |
twisti@1020 | 361 | }; |
twisti@1020 | 362 | |
twisti@1020 | 363 | // The actual cpuid info block |
twisti@1020 | 364 | static CpuidInfo _cpuid_info; |
twisti@1020 | 365 | |
twisti@1020 | 366 | // Extractors and predicates |
twisti@1020 | 367 | static uint32_t extended_cpu_family() { |
twisti@1020 | 368 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; |
twisti@1020 | 369 | result += _cpuid_info.std_cpuid1_eax.bits.ext_family; |
twisti@1020 | 370 | return result; |
twisti@1020 | 371 | } |
phh@3378 | 372 | |
twisti@1020 | 373 | static uint32_t extended_cpu_model() { |
twisti@1020 | 374 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; |
twisti@1020 | 375 | result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; |
twisti@1020 | 376 | return result; |
twisti@1020 | 377 | } |
phh@3378 | 378 | |
twisti@1020 | 379 | static uint32_t cpu_stepping() { |
twisti@1020 | 380 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; |
twisti@1020 | 381 | return result; |
twisti@1020 | 382 | } |
phh@3378 | 383 | |
twisti@1020 | 384 | static uint logical_processor_count() { |
twisti@1020 | 385 | uint result = threads_per_core(); |
twisti@1020 | 386 | return result; |
twisti@1020 | 387 | } |
phh@3378 | 388 | |
twisti@1020 | 389 | static uint32_t feature_flags() { |
twisti@1020 | 390 | uint32_t result = 0; |
twisti@1020 | 391 | if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) |
twisti@1020 | 392 | result |= CPU_CX8; |
twisti@1020 | 393 | if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) |
twisti@1020 | 394 | result |= CPU_CMOV; |
twisti@2144 | 395 | if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd() && |
twisti@2144 | 396 | _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)) |
twisti@1020 | 397 | result |= CPU_FXSR; |
twisti@1020 | 398 | // HT flag is set for multi-core processors also. |
twisti@1020 | 399 | if (threads_per_core() > 1) |
twisti@1020 | 400 | result |= CPU_HT; |
twisti@2144 | 401 | if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd() && |
twisti@2144 | 402 | _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)) |
twisti@1020 | 403 | result |= CPU_MMX; |
twisti@1020 | 404 | if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) |
twisti@1020 | 405 | result |= CPU_SSE; |
twisti@1020 | 406 | if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) |
twisti@1020 | 407 | result |= CPU_SSE2; |
twisti@1020 | 408 | if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) |
twisti@1020 | 409 | result |= CPU_SSE3; |
twisti@1020 | 410 | if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) |
twisti@1020 | 411 | result |= CPU_SSSE3; |
twisti@1020 | 412 | if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) |
twisti@1020 | 413 | result |= CPU_SSE4_1; |
twisti@1020 | 414 | if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) |
twisti@1020 | 415 | result |= CPU_SSE4_2; |
twisti@1078 | 416 | if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) |
twisti@1078 | 417 | result |= CPU_POPCNT; |
kvn@3388 | 418 | if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && |
kvn@3388 | 419 | _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && |
kvn@3388 | 420 | _cpuid_info.xem_xcr0_eax.bits.sse != 0 && |
kvn@3388 | 421 | _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { |
kvn@3388 | 422 | result |= CPU_AVX; |
kvn@3388 | 423 | if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) |
kvn@3388 | 424 | result |= CPU_AVX2; |
kvn@3388 | 425 | } |
phh@3378 | 426 | if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) |
phh@3378 | 427 | result |= CPU_TSC; |
phh@3378 | 428 | if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) |
phh@3378 | 429 | result |= CPU_TSCINV; |
kvn@4205 | 430 | if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) |
kvn@4205 | 431 | result |= CPU_AES; |
kvn@4410 | 432 | if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0) |
kvn@4410 | 433 | result |= CPU_ERMS; |
drchase@5353 | 434 | if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0) |
drchase@5353 | 435 | result |= CPU_CLMUL; |
twisti@1210 | 436 | |
twisti@1210 | 437 | // AMD features. |
twisti@1210 | 438 | if (is_amd()) { |
kvn@2761 | 439 | if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || |
kvn@2761 | 440 | (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) |
kvn@2761 | 441 | result |= CPU_3DNOW_PREFETCH; |
twisti@1210 | 442 | if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) |
twisti@1210 | 443 | result |= CPU_LZCNT; |
twisti@1210 | 444 | if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) |
twisti@1210 | 445 | result |= CPU_SSE4A; |
twisti@1210 | 446 | } |
twisti@1210 | 447 | |
twisti@1020 | 448 | return result; |
twisti@1020 | 449 | } |
twisti@1020 | 450 | |
twisti@1020 | 451 | static void get_processor_features(); |
twisti@1020 | 452 | |
twisti@1020 | 453 | public: |
twisti@1020 | 454 | // Offsets for cpuid asm stub |
twisti@1020 | 455 | static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } |
twisti@1020 | 456 | static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } |
twisti@1020 | 457 | static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } |
kvn@3388 | 458 | static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); } |
twisti@1020 | 459 | static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } |
twisti@1020 | 460 | static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } |
phh@3378 | 461 | static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); } |
twisti@1020 | 462 | static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } |
kvn@1977 | 463 | static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } |
kvn@1977 | 464 | static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } |
kvn@1977 | 465 | static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } |
kvn@3388 | 466 | static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); } |
twisti@1020 | 467 | |
twisti@1020 | 468 | // Initialization |
twisti@1020 | 469 | static void initialize(); |
twisti@1020 | 470 | |
twisti@1020 | 471 | // Asserts |
twisti@1020 | 472 | static void assert_is_initialized() { |
twisti@1020 | 473 | assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); |
twisti@1020 | 474 | } |
twisti@1020 | 475 | |
twisti@1020 | 476 | // |
twisti@1020 | 477 | // Processor family: |
twisti@1020 | 478 | // 3 - 386 |
twisti@1020 | 479 | // 4 - 486 |
twisti@1020 | 480 | // 5 - Pentium |
twisti@1020 | 481 | // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, |
twisti@1020 | 482 | // Pentium M, Core Solo, Core Duo, Core2 Duo |
twisti@1020 | 483 | // family 6 model: 9, 13, 14, 15 |
twisti@1020 | 484 | // 0x0f - Pentium 4, Opteron |
twisti@1020 | 485 | // |
twisti@1020 | 486 | // Note: The cpu family should be used to select between |
twisti@1020 | 487 | // instruction sequences which are valid on all Intel |
twisti@1020 | 488 | // processors. Use the feature test functions below to |
twisti@1020 | 489 | // determine whether a particular instruction is supported. |
twisti@1020 | 490 | // |
twisti@1020 | 491 | static int cpu_family() { return _cpu;} |
twisti@1020 | 492 | static bool is_P6() { return cpu_family() >= 6; } |
twisti@1020 | 493 | static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' |
twisti@1020 | 494 | static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' |
twisti@1020 | 495 | |
kvn@2002 | 496 | static bool supports_processor_topology() { |
kvn@2002 | 497 | return (_cpuid_info.std_max_function >= 0xB) && |
kvn@2002 | 498 | // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level. |
kvn@2002 | 499 | // Some cpus have max cpuid >= 0xB but do not support processor topology. |
kvn@4410 | 500 | (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0); |
kvn@2002 | 501 | } |
kvn@2002 | 502 | |
twisti@1020 | 503 | static uint cores_per_cpu() { |
twisti@1020 | 504 | uint result = 1; |
twisti@1020 | 505 | if (is_intel()) { |
kvn@2002 | 506 | if (supports_processor_topology()) { |
kvn@1977 | 507 | result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / |
kvn@1977 | 508 | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; |
kvn@1977 | 509 | } else { |
kvn@1977 | 510 | result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); |
kvn@1977 | 511 | } |
twisti@1020 | 512 | } else if (is_amd()) { |
twisti@1020 | 513 | result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); |
twisti@1020 | 514 | } |
twisti@1020 | 515 | return result; |
twisti@1020 | 516 | } |
twisti@1020 | 517 | |
twisti@1020 | 518 | static uint threads_per_core() { |
twisti@1020 | 519 | uint result = 1; |
kvn@2002 | 520 | if (is_intel() && supports_processor_topology()) { |
kvn@1977 | 521 | result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; |
kvn@1977 | 522 | } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { |
twisti@1020 | 523 | result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / |
twisti@1020 | 524 | cores_per_cpu(); |
twisti@1020 | 525 | } |
twisti@1020 | 526 | return result; |
twisti@1020 | 527 | } |
twisti@1020 | 528 | |
kvn@3052 | 529 | static intx prefetch_data_size() { |
twisti@1020 | 530 | intx result = 0; |
twisti@1020 | 531 | if (is_intel()) { |
twisti@1020 | 532 | result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); |
twisti@1020 | 533 | } else if (is_amd()) { |
twisti@1020 | 534 | result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; |
twisti@1020 | 535 | } |
twisti@1020 | 536 | if (result < 32) // not defined ? |
twisti@1020 | 537 | result = 32; // 32 bytes by default on x86 and other x64 |
twisti@1020 | 538 | return result; |
twisti@1020 | 539 | } |
twisti@1020 | 540 | |
twisti@1020 | 541 | // |
twisti@1020 | 542 | // Feature identification |
twisti@1020 | 543 | // |
twisti@1020 | 544 | static bool supports_cpuid() { return _cpuFeatures != 0; } |
twisti@1020 | 545 | static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } |
twisti@1020 | 546 | static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } |
twisti@1020 | 547 | static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } |
twisti@1020 | 548 | static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } |
twisti@1020 | 549 | static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } |
twisti@1020 | 550 | static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } |
twisti@1020 | 551 | static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } |
twisti@1020 | 552 | static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } |
twisti@1020 | 553 | static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } |
twisti@1020 | 554 | static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } |
twisti@1020 | 555 | static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } |
twisti@1078 | 556 | static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } |
kvn@3388 | 557 | static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; } |
kvn@3388 | 558 | static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; } |
phh@3378 | 559 | static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; } |
kvn@4205 | 560 | static bool supports_aes() { return (_cpuFeatures & CPU_AES) != 0; } |
kvn@4410 | 561 | static bool supports_erms() { return (_cpuFeatures & CPU_ERMS) != 0; } |
drchase@5353 | 562 | static bool supports_clmul() { return (_cpuFeatures & CPU_CLMUL) != 0; } |
phh@3378 | 563 | |
phh@3378 | 564 | // Intel features |
phh@3378 | 565 | static bool is_intel_family_core() { return is_intel() && |
phh@3378 | 566 | extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } |
phh@3378 | 567 | |
phh@3378 | 568 | static bool is_intel_tsc_synched_at_init() { |
phh@3378 | 569 | if (is_intel_family_core()) { |
phh@3378 | 570 | uint32_t ext_model = extended_cpu_model(); |
phh@3560 | 571 | if (ext_model == CPU_MODEL_NEHALEM_EP || |
phh@3560 | 572 | ext_model == CPU_MODEL_WESTMERE_EP || |
phh@3560 | 573 | ext_model == CPU_MODEL_SANDYBRIDGE_EP || |
phh@3560 | 574 | ext_model == CPU_MODEL_IVYBRIDGE_EP) { |
phh@3560 | 575 | // <= 2-socket invariant tsc support. EX versions are usually used |
phh@3560 | 576 | // in > 2-socket systems and likely don't synchronize tscs at |
phh@3560 | 577 | // initialization. |
phh@3560 | 578 | // Code that uses tsc values must be prepared for them to arbitrarily |
phh@3560 | 579 | // jump forward or backward. |
phh@3378 | 580 | return true; |
phh@3378 | 581 | } |
phh@3378 | 582 | } |
phh@3378 | 583 | return false; |
phh@3378 | 584 | } |
phh@3378 | 585 | |
twisti@1020 | 586 | // AMD features |
kvn@2761 | 587 | static bool supports_3dnow_prefetch() { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; } |
twisti@1020 | 588 | static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } |
twisti@1210 | 589 | static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } |
twisti@1020 | 590 | static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } |
twisti@1020 | 591 | |
phh@3378 | 592 | static bool is_amd_Barcelona() { return is_amd() && |
phh@3378 | 593 | extended_cpu_family() == CPU_FAMILY_AMD_11H; } |
phh@3378 | 594 | |
phh@3378 | 595 | // Intel and AMD newer cores support fast timestamps well |
phh@3378 | 596 | static bool supports_tscinv_bit() { |
phh@3378 | 597 | return (_cpuFeatures & CPU_TSCINV) != 0; |
phh@3378 | 598 | } |
phh@3378 | 599 | static bool supports_tscinv() { |
phh@3378 | 600 | return supports_tscinv_bit() && |
phh@3378 | 601 | ( (is_amd() && !is_amd_Barcelona()) || |
phh@3378 | 602 | is_intel_tsc_synched_at_init() ); |
phh@3378 | 603 | } |
phh@3378 | 604 | |
kvn@2269 | 605 | // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). |
kvn@2269 | 606 | static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && |
kvn@2269 | 607 | supports_sse3() && _model != 0x1C; } |
kvn@2269 | 608 | |
twisti@1020 | 609 | static bool supports_compare_and_exchange() { return true; } |
twisti@1020 | 610 | |
twisti@1020 | 611 | static const char* cpu_features() { return _features_str; } |
twisti@1020 | 612 | |
twisti@1020 | 613 | static intx allocate_prefetch_distance() { |
twisti@1020 | 614 | // This method should be called before allocate_prefetch_style(). |
twisti@1020 | 615 | // |
twisti@1020 | 616 | // Hardware prefetching (distance/size in bytes): |
twisti@1020 | 617 | // Pentium 3 - 64 / 32 |
twisti@1020 | 618 | // Pentium 4 - 256 / 128 |
twisti@1020 | 619 | // Athlon - 64 / 32 ???? |
twisti@1020 | 620 | // Opteron - 128 / 64 only when 2 sequential cache lines accessed |
twisti@1020 | 621 | // Core - 128 / 64 |
twisti@1020 | 622 | // |
twisti@1020 | 623 | // Software prefetching (distance in bytes / instruction with best score): |
twisti@1020 | 624 | // Pentium 3 - 128 / prefetchnta |
twisti@1020 | 625 | // Pentium 4 - 512 / prefetchnta |
twisti@1020 | 626 | // Athlon - 128 / prefetchnta |
twisti@1020 | 627 | // Opteron - 256 / prefetchnta |
twisti@1020 | 628 | // Core - 256 / prefetchnta |
twisti@1020 | 629 | // It will be used only when AllocatePrefetchStyle > 0 |
twisti@1020 | 630 | |
twisti@1020 | 631 | intx count = AllocatePrefetchDistance; |
twisti@1020 | 632 | if (count < 0) { // default ? |
twisti@1020 | 633 | if (is_amd()) { // AMD |
twisti@1020 | 634 | if (supports_sse2()) |
twisti@1020 | 635 | count = 256; // Opteron |
twisti@1020 | 636 | else |
twisti@1020 | 637 | count = 128; // Athlon |
twisti@1020 | 638 | } else { // Intel |
twisti@1020 | 639 | if (supports_sse2()) |
twisti@1020 | 640 | if (cpu_family() == 6) { |
twisti@1020 | 641 | count = 256; // Pentium M, Core, Core2 |
twisti@1020 | 642 | } else { |
twisti@1020 | 643 | count = 512; // Pentium 4 |
twisti@1020 | 644 | } |
twisti@1020 | 645 | else |
twisti@1020 | 646 | count = 128; // Pentium 3 (and all other old CPUs) |
twisti@1020 | 647 | } |
twisti@1020 | 648 | } |
twisti@1020 | 649 | return count; |
twisti@1020 | 650 | } |
twisti@1020 | 651 | static intx allocate_prefetch_style() { |
twisti@1020 | 652 | assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); |
twisti@1020 | 653 | // Return 0 if AllocatePrefetchDistance was not defined. |
twisti@1020 | 654 | return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; |
twisti@1020 | 655 | } |
twisti@1020 | 656 | |
twisti@1020 | 657 | // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from |
twisti@1020 | 658 | // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. |
twisti@1020 | 659 | // Tested intervals from 128 to 2048 in increments of 64 == one cache line. |
twisti@1020 | 660 | // 256 bytes (4 dcache lines) was the nearest runner-up to 576. |
twisti@1020 | 661 | |
twisti@1020 | 662 | // gc copy/scan is disabled if prefetchw isn't supported, because |
twisti@1020 | 663 | // Prefetch::write emits an inlined prefetchw on Linux. |
twisti@1020 | 664 | // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. |
twisti@1020 | 665 | // The used prefetcht0 instruction works for both amd64 and em64t. |
twisti@1020 | 666 | static intx prefetch_copy_interval_in_bytes() { |
twisti@1020 | 667 | intx interval = PrefetchCopyIntervalInBytes; |
twisti@1020 | 668 | return interval >= 0 ? interval : 576; |
twisti@1020 | 669 | } |
twisti@1020 | 670 | static intx prefetch_scan_interval_in_bytes() { |
twisti@1020 | 671 | intx interval = PrefetchScanIntervalInBytes; |
twisti@1020 | 672 | return interval >= 0 ? interval : 576; |
twisti@1020 | 673 | } |
twisti@1020 | 674 | static intx prefetch_fields_ahead() { |
twisti@1020 | 675 | intx count = PrefetchFieldsAhead; |
twisti@1020 | 676 | return count >= 0 ? count : 1; |
twisti@1020 | 677 | } |
twisti@1020 | 678 | }; |
stefank@2314 | 679 | |
stefank@2314 | 680 | #endif // CPU_X86_VM_VM_VERSION_X86_HPP |