1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Mon Feb 23 12:02:30 2009 -0800 1.3 @@ -0,0 +1,459 @@ 1.4 +/* 1.5 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +class VM_Version : public Abstract_VM_Version { 1.29 +public: 1.30 + // cpuid result register layouts. These are all unions of a uint32_t 1.31 + // (in case anyone wants access to the register as a whole) and a bitfield. 1.32 + 1.33 + union StdCpuid1Eax { 1.34 + uint32_t value; 1.35 + struct { 1.36 + uint32_t stepping : 4, 1.37 + model : 4, 1.38 + family : 4, 1.39 + proc_type : 2, 1.40 + : 2, 1.41 + ext_model : 4, 1.42 + ext_family : 8, 1.43 + : 4; 1.44 + } bits; 1.45 + }; 1.46 + 1.47 + union StdCpuid1Ebx { // example, unused 1.48 + uint32_t value; 1.49 + struct { 1.50 + uint32_t brand_id : 8, 1.51 + clflush_size : 8, 1.52 + threads_per_cpu : 8, 1.53 + apic_id : 8; 1.54 + } bits; 1.55 + }; 1.56 + 1.57 + union StdCpuid1Ecx { 1.58 + uint32_t value; 1.59 + struct { 1.60 + uint32_t sse3 : 1, 1.61 + : 2, 1.62 + monitor : 1, 1.63 + : 1, 1.64 + vmx : 1, 1.65 + : 1, 1.66 + est : 1, 1.67 + : 1, 1.68 + ssse3 : 1, 1.69 + cid : 1, 1.70 + : 2, 1.71 + cmpxchg16: 1, 1.72 + : 4, 1.73 + dca : 1, 1.74 + sse4_1 : 1, 1.75 + sse4_2 : 1, 1.76 + : 11; 1.77 + } bits; 1.78 + }; 1.79 + 1.80 + union StdCpuid1Edx { 1.81 + uint32_t value; 1.82 + struct { 1.83 + uint32_t : 4, 1.84 + tsc : 1, 1.85 + : 3, 1.86 + cmpxchg8 : 1, 1.87 + : 6, 1.88 + cmov : 1, 1.89 + : 7, 1.90 + mmx : 1, 1.91 + fxsr : 1, 1.92 + sse : 1, 1.93 + sse2 : 1, 1.94 + : 1, 1.95 + ht : 1, 1.96 + : 3; 1.97 + } bits; 1.98 + }; 1.99 + 1.100 + union DcpCpuid4Eax { 1.101 + uint32_t value; 1.102 + struct { 1.103 + uint32_t cache_type : 5, 1.104 + : 21, 1.105 + cores_per_cpu : 6; 1.106 + } bits; 1.107 + }; 1.108 + 1.109 + union DcpCpuid4Ebx { 1.110 + uint32_t value; 1.111 + struct { 1.112 + uint32_t L1_line_size : 12, 1.113 + partitions : 10, 1.114 + associativity : 10; 1.115 + } bits; 1.116 + }; 1.117 + 1.118 + union ExtCpuid1Ecx { 1.119 + uint32_t value; 1.120 + struct { 1.121 + uint32_t LahfSahf : 1, 1.122 + CmpLegacy : 1, 1.123 + : 4, 1.124 + abm : 1, 1.125 + sse4a : 1, 1.126 + misalignsse : 1, 1.127 + prefetchw : 1, 1.128 + : 22; 1.129 + } bits; 1.130 + }; 1.131 + 1.132 + union ExtCpuid1Edx { 1.133 + uint32_t value; 1.134 + struct { 1.135 + uint32_t : 22, 1.136 + mmx_amd : 1, 1.137 + mmx : 1, 1.138 + fxsr : 1, 1.139 + : 4, 1.140 + long_mode : 1, 1.141 + tdnow2 : 1, 1.142 + tdnow : 1; 1.143 + } bits; 1.144 + }; 1.145 + 1.146 + union ExtCpuid5Ex { 1.147 + uint32_t value; 1.148 + struct { 1.149 + uint32_t L1_line_size : 8, 1.150 + L1_tag_lines : 8, 1.151 + L1_assoc : 8, 1.152 + L1_size : 8; 1.153 + } bits; 1.154 + }; 1.155 + 1.156 + union ExtCpuid8Ecx { 1.157 + uint32_t value; 1.158 + struct { 1.159 + uint32_t cores_per_cpu : 8, 1.160 + : 24; 1.161 + } bits; 1.162 + }; 1.163 + 1.164 +protected: 1.165 + static int _cpu; 1.166 + static int _model; 1.167 + static int _stepping; 1.168 + static int _cpuFeatures; // features returned by the "cpuid" instruction 1.169 + // 0 if this instruction is not available 1.170 + static const char* _features_str; 1.171 + 1.172 + enum { 1.173 + CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) 1.174 + CPU_CMOV = (1 << 1), 1.175 + CPU_FXSR = (1 << 2), 1.176 + CPU_HT = (1 << 3), 1.177 + CPU_MMX = (1 << 4), 1.178 + CPU_3DNOW = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX) 1.179 + CPU_SSE = (1 << 6), 1.180 + CPU_SSE2 = (1 << 7), 1.181 + CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) 1.182 + CPU_SSSE3 = (1 << 9), 1.183 + CPU_SSE4A = (1 << 10), 1.184 + CPU_SSE4_1 = (1 << 11), 1.185 + CPU_SSE4_2 = (1 << 12) 1.186 + } cpuFeatureFlags; 1.187 + 1.188 + // cpuid information block. All info derived from executing cpuid with 1.189 + // various function numbers is stored here. Intel and AMD info is 1.190 + // merged in this block: accessor methods disentangle it. 1.191 + // 1.192 + // The info block is laid out in subblocks of 4 dwords corresponding to 1.193 + // eax, ebx, ecx and edx, whether or not they contain anything useful. 1.194 + struct CpuidInfo { 1.195 + // cpuid function 0 1.196 + uint32_t std_max_function; 1.197 + uint32_t std_vendor_name_0; 1.198 + uint32_t std_vendor_name_1; 1.199 + uint32_t std_vendor_name_2; 1.200 + 1.201 + // cpuid function 1 1.202 + StdCpuid1Eax std_cpuid1_eax; 1.203 + StdCpuid1Ebx std_cpuid1_ebx; 1.204 + StdCpuid1Ecx std_cpuid1_ecx; 1.205 + StdCpuid1Edx std_cpuid1_edx; 1.206 + 1.207 + // cpuid function 4 (deterministic cache parameters) 1.208 + DcpCpuid4Eax dcp_cpuid4_eax; 1.209 + DcpCpuid4Ebx dcp_cpuid4_ebx; 1.210 + uint32_t dcp_cpuid4_ecx; // unused currently 1.211 + uint32_t dcp_cpuid4_edx; // unused currently 1.212 + 1.213 + // cpuid function 0x80000000 // example, unused 1.214 + uint32_t ext_max_function; 1.215 + uint32_t ext_vendor_name_0; 1.216 + uint32_t ext_vendor_name_1; 1.217 + uint32_t ext_vendor_name_2; 1.218 + 1.219 + // cpuid function 0x80000001 1.220 + uint32_t ext_cpuid1_eax; // reserved 1.221 + uint32_t ext_cpuid1_ebx; // reserved 1.222 + ExtCpuid1Ecx ext_cpuid1_ecx; 1.223 + ExtCpuid1Edx ext_cpuid1_edx; 1.224 + 1.225 + // cpuid functions 0x80000002 thru 0x80000004: example, unused 1.226 + uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; 1.227 + uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; 1.228 + uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; 1.229 + 1.230 + // cpuid function 0x80000005 //AMD L1, Intel reserved 1.231 + uint32_t ext_cpuid5_eax; // unused currently 1.232 + uint32_t ext_cpuid5_ebx; // reserved 1.233 + ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) 1.234 + ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) 1.235 + 1.236 + // cpuid function 0x80000008 1.237 + uint32_t ext_cpuid8_eax; // unused currently 1.238 + uint32_t ext_cpuid8_ebx; // reserved 1.239 + ExtCpuid8Ecx ext_cpuid8_ecx; 1.240 + uint32_t ext_cpuid8_edx; // reserved 1.241 + }; 1.242 + 1.243 + // The actual cpuid info block 1.244 + static CpuidInfo _cpuid_info; 1.245 + 1.246 + // Extractors and predicates 1.247 + static uint32_t extended_cpu_family() { 1.248 + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; 1.249 + result += _cpuid_info.std_cpuid1_eax.bits.ext_family; 1.250 + return result; 1.251 + } 1.252 + static uint32_t extended_cpu_model() { 1.253 + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; 1.254 + result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; 1.255 + return result; 1.256 + } 1.257 + static uint32_t cpu_stepping() { 1.258 + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; 1.259 + return result; 1.260 + } 1.261 + static uint logical_processor_count() { 1.262 + uint result = threads_per_core(); 1.263 + return result; 1.264 + } 1.265 + static uint32_t feature_flags() { 1.266 + uint32_t result = 0; 1.267 + if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) 1.268 + result |= CPU_CX8; 1.269 + if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) 1.270 + result |= CPU_CMOV; 1.271 + if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() && 1.272 + _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0) 1.273 + result |= CPU_FXSR; 1.274 + // HT flag is set for multi-core processors also. 1.275 + if (threads_per_core() > 1) 1.276 + result |= CPU_HT; 1.277 + if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && 1.278 + _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) 1.279 + result |= CPU_MMX; 1.280 + if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) 1.281 + result |= CPU_3DNOW; 1.282 + if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) 1.283 + result |= CPU_SSE; 1.284 + if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) 1.285 + result |= CPU_SSE2; 1.286 + if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) 1.287 + result |= CPU_SSE3; 1.288 + if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) 1.289 + result |= CPU_SSSE3; 1.290 + if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 1.291 + result |= CPU_SSE4A; 1.292 + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) 1.293 + result |= CPU_SSE4_1; 1.294 + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 1.295 + result |= CPU_SSE4_2; 1.296 + return result; 1.297 + } 1.298 + 1.299 + static void get_processor_features(); 1.300 + 1.301 +public: 1.302 + // Offsets for cpuid asm stub 1.303 + static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } 1.304 + static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } 1.305 + static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } 1.306 + static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } 1.307 + static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } 1.308 + static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } 1.309 + 1.310 + // Initialization 1.311 + static void initialize(); 1.312 + 1.313 + // Asserts 1.314 + static void assert_is_initialized() { 1.315 + assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); 1.316 + } 1.317 + 1.318 + // 1.319 + // Processor family: 1.320 + // 3 - 386 1.321 + // 4 - 486 1.322 + // 5 - Pentium 1.323 + // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, 1.324 + // Pentium M, Core Solo, Core Duo, Core2 Duo 1.325 + // family 6 model: 9, 13, 14, 15 1.326 + // 0x0f - Pentium 4, Opteron 1.327 + // 1.328 + // Note: The cpu family should be used to select between 1.329 + // instruction sequences which are valid on all Intel 1.330 + // processors. Use the feature test functions below to 1.331 + // determine whether a particular instruction is supported. 1.332 + // 1.333 + static int cpu_family() { return _cpu;} 1.334 + static bool is_P6() { return cpu_family() >= 6; } 1.335 + 1.336 + static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' 1.337 + static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' 1.338 + 1.339 + static uint cores_per_cpu() { 1.340 + uint result = 1; 1.341 + if (is_intel()) { 1.342 + result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 1.343 + } else if (is_amd()) { 1.344 + result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 1.345 + } 1.346 + return result; 1.347 + } 1.348 + 1.349 + static uint threads_per_core() { 1.350 + uint result = 1; 1.351 + if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 1.352 + result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 1.353 + cores_per_cpu(); 1.354 + } 1.355 + return result; 1.356 + } 1.357 + 1.358 + static intx L1_data_cache_line_size() { 1.359 + intx result = 0; 1.360 + if (is_intel()) { 1.361 + result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 1.362 + } else if (is_amd()) { 1.363 + result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 1.364 + } 1.365 + if (result < 32) // not defined ? 1.366 + result = 32; // 32 bytes by default on x86 and other x64 1.367 + return result; 1.368 + } 1.369 + 1.370 + // 1.371 + // Feature identification 1.372 + // 1.373 + static bool supports_cpuid() { return _cpuFeatures != 0; } 1.374 + static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } 1.375 + static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } 1.376 + static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } 1.377 + static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } 1.378 + static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } 1.379 + static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } 1.380 + static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } 1.381 + static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } 1.382 + static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } 1.383 + static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } 1.384 + static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } 1.385 + // 1.386 + // AMD features 1.387 + // 1.388 + static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } 1.389 + static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } 1.390 + static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } 1.391 + static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } 1.392 + 1.393 + static bool supports_compare_and_exchange() { return true; } 1.394 + 1.395 + static const char* cpu_features() { return _features_str; } 1.396 + 1.397 + static intx allocate_prefetch_distance() { 1.398 + // This method should be called before allocate_prefetch_style(). 1.399 + // 1.400 + // Hardware prefetching (distance/size in bytes): 1.401 + // Pentium 3 - 64 / 32 1.402 + // Pentium 4 - 256 / 128 1.403 + // Athlon - 64 / 32 ???? 1.404 + // Opteron - 128 / 64 only when 2 sequential cache lines accessed 1.405 + // Core - 128 / 64 1.406 + // 1.407 + // Software prefetching (distance in bytes / instruction with best score): 1.408 + // Pentium 3 - 128 / prefetchnta 1.409 + // Pentium 4 - 512 / prefetchnta 1.410 + // Athlon - 128 / prefetchnta 1.411 + // Opteron - 256 / prefetchnta 1.412 + // Core - 256 / prefetchnta 1.413 + // It will be used only when AllocatePrefetchStyle > 0 1.414 + 1.415 + intx count = AllocatePrefetchDistance; 1.416 + if (count < 0) { // default ? 1.417 + if (is_amd()) { // AMD 1.418 + if (supports_sse2()) 1.419 + count = 256; // Opteron 1.420 + else 1.421 + count = 128; // Athlon 1.422 + } else { // Intel 1.423 + if (supports_sse2()) 1.424 + if (cpu_family() == 6) { 1.425 + count = 256; // Pentium M, Core, Core2 1.426 + } else { 1.427 + count = 512; // Pentium 4 1.428 + } 1.429 + else 1.430 + count = 128; // Pentium 3 (and all other old CPUs) 1.431 + } 1.432 + } 1.433 + return count; 1.434 + } 1.435 + static intx allocate_prefetch_style() { 1.436 + assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); 1.437 + // Return 0 if AllocatePrefetchDistance was not defined. 1.438 + return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; 1.439 + } 1.440 + 1.441 + // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1.442 + // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1.443 + // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1.444 + // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1.445 + 1.446 + // gc copy/scan is disabled if prefetchw isn't supported, because 1.447 + // Prefetch::write emits an inlined prefetchw on Linux. 1.448 + // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1.449 + // The used prefetcht0 instruction works for both amd64 and em64t. 1.450 + static intx prefetch_copy_interval_in_bytes() { 1.451 + intx interval = PrefetchCopyIntervalInBytes; 1.452 + return interval >= 0 ? interval : 576; 1.453 + } 1.454 + static intx prefetch_scan_interval_in_bytes() { 1.455 + intx interval = PrefetchScanIntervalInBytes; 1.456 + return interval >= 0 ? interval : 576; 1.457 + } 1.458 + static intx prefetch_fields_ahead() { 1.459 + intx count = PrefetchFieldsAhead; 1.460 + return count >= 0 ? count : 1; 1.461 + } 1.462 +};