1.1 --- a/src/cpu/x86/vm/vm_version_x86_64.hpp Wed Feb 18 18:20:02 2009 -0800 1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 1.3 @@ -1,449 +0,0 @@ 1.4 -/* 1.5 - * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved. 1.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 - * 1.8 - * This code is free software; you can redistribute it and/or modify it 1.9 - * under the terms of the GNU General Public License version 2 only, as 1.10 - * published by the Free Software Foundation. 1.11 - * 1.12 - * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 - * version 2 for more details (a copy is included in the LICENSE file that 1.16 - * accompanied this code). 1.17 - * 1.18 - * You should have received a copy of the GNU General Public License version 1.19 - * 2 along with this work; if not, write to the Free Software Foundation, 1.20 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 - * 1.22 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 - * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 - * have any questions. 1.25 - * 1.26 - */ 1.27 - 1.28 -class VM_Version : public Abstract_VM_Version { 1.29 -public: 1.30 - // cpuid result register layouts. These are all unions of a uint32_t 1.31 - // (in case anyone wants access to the register as a whole) and a bitfield. 1.32 - 1.33 - union StdCpuid1Eax { 1.34 - uint32_t value; 1.35 - struct { 1.36 - uint32_t stepping : 4, 1.37 - model : 4, 1.38 - family : 4, 1.39 - proc_type : 2, 1.40 - : 2, 1.41 - ext_model : 4, 1.42 - ext_family : 8, 1.43 - : 4; 1.44 - } bits; 1.45 - }; 1.46 - 1.47 - union StdCpuid1Ebx { // example, unused 1.48 - uint32_t value; 1.49 - struct { 1.50 - uint32_t brand_id : 8, 1.51 - clflush_size : 8, 1.52 - threads_per_cpu : 8, 1.53 - apic_id : 8; 1.54 - } bits; 1.55 - }; 1.56 - 1.57 - union StdCpuid1Ecx { 1.58 - uint32_t value; 1.59 - struct { 1.60 - uint32_t sse3 : 1, 1.61 - : 2, 1.62 - monitor : 1, 1.63 - : 1, 1.64 - vmx : 1, 1.65 - : 1, 1.66 - est : 1, 1.67 - : 1, 1.68 - ssse3 : 1, 1.69 - cid : 1, 1.70 - : 2, 1.71 - cmpxchg16: 1, 1.72 - : 4, 1.73 - dca : 1, 1.74 - sse4_1 : 1, 1.75 - sse4_2 : 1, 1.76 - : 11; 1.77 - } bits; 1.78 - }; 1.79 - 1.80 - union StdCpuid1Edx { 1.81 - uint32_t value; 1.82 - struct { 1.83 - uint32_t : 4, 1.84 - tsc : 1, 1.85 - : 3, 1.86 - cmpxchg8 : 1, 1.87 - : 6, 1.88 - cmov : 1, 1.89 - : 7, 1.90 - mmx : 1, 1.91 - fxsr : 1, 1.92 - sse : 1, 1.93 - sse2 : 1, 1.94 - : 1, 1.95 - ht : 1, 1.96 - : 3; 1.97 - } bits; 1.98 - }; 1.99 - 1.100 - union DcpCpuid4Eax { 1.101 - uint32_t value; 1.102 - struct { 1.103 - uint32_t cache_type : 5, 1.104 - : 21, 1.105 - cores_per_cpu : 6; 1.106 - } bits; 1.107 - }; 1.108 - 1.109 - union DcpCpuid4Ebx { 1.110 - uint32_t value; 1.111 - struct { 1.112 - uint32_t L1_line_size : 12, 1.113 - partitions : 10, 1.114 - associativity : 10; 1.115 - } bits; 1.116 - }; 1.117 - 1.118 - union ExtCpuid1Edx { 1.119 - uint32_t value; 1.120 - struct { 1.121 - uint32_t : 22, 1.122 - mmx_amd : 1, 1.123 - mmx : 1, 1.124 - fxsr : 1, 1.125 - : 4, 1.126 - long_mode : 1, 1.127 - tdnow2 : 1, 1.128 - tdnow : 1; 1.129 - } bits; 1.130 - }; 1.131 - 1.132 - union ExtCpuid1Ecx { 1.133 - uint32_t value; 1.134 - struct { 1.135 - uint32_t LahfSahf : 1, 1.136 - CmpLegacy : 1, 1.137 - : 4, 1.138 - abm : 1, 1.139 - sse4a : 1, 1.140 - misalignsse : 1, 1.141 - prefetchw : 1, 1.142 - : 22; 1.143 - } bits; 1.144 - }; 1.145 - 1.146 - union ExtCpuid5Ex { 1.147 - uint32_t value; 1.148 - struct { 1.149 - uint32_t L1_line_size : 8, 1.150 - L1_tag_lines : 8, 1.151 - L1_assoc : 8, 1.152 - L1_size : 8; 1.153 - } bits; 1.154 - }; 1.155 - 1.156 - union ExtCpuid8Ecx { 1.157 - uint32_t value; 1.158 - struct { 1.159 - uint32_t cores_per_cpu : 8, 1.160 - : 24; 1.161 - } bits; 1.162 - }; 1.163 - 1.164 -protected: 1.165 - static int _cpu; 1.166 - static int _model; 1.167 - static int _stepping; 1.168 - static int _cpuFeatures; // features returned by the "cpuid" instruction 1.169 - // 0 if this instruction is not available 1.170 - static const char* _features_str; 1.171 - 1.172 - enum { 1.173 - CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) 1.174 - CPU_CMOV = (1 << 1), 1.175 - CPU_FXSR = (1 << 2), 1.176 - CPU_HT = (1 << 3), 1.177 - CPU_MMX = (1 << 4), 1.178 - CPU_3DNOW= (1 << 5), 1.179 - CPU_SSE = (1 << 6), 1.180 - CPU_SSE2 = (1 << 7), 1.181 - CPU_SSE3 = (1 << 8), 1.182 - CPU_SSSE3= (1 << 9), 1.183 - CPU_SSE4A= (1 <<10), 1.184 - CPU_SSE4_1 = (1 << 11), 1.185 - CPU_SSE4_2 = (1 << 12) 1.186 - } cpuFeatureFlags; 1.187 - 1.188 - // cpuid information block. All info derived from executing cpuid with 1.189 - // various function numbers is stored here. Intel and AMD info is 1.190 - // merged in this block: accessor methods disentangle it. 1.191 - // 1.192 - // The info block is laid out in subblocks of 4 dwords corresponding to 1.193 - // eax, ebx, ecx and edx, whether or not they contain anything useful. 1.194 - struct CpuidInfo { 1.195 - // cpuid function 0 1.196 - uint32_t std_max_function; 1.197 - uint32_t std_vendor_name_0; 1.198 - uint32_t std_vendor_name_1; 1.199 - uint32_t std_vendor_name_2; 1.200 - 1.201 - // cpuid function 1 1.202 - StdCpuid1Eax std_cpuid1_eax; 1.203 - StdCpuid1Ebx std_cpuid1_ebx; 1.204 - StdCpuid1Ecx std_cpuid1_ecx; 1.205 - StdCpuid1Edx std_cpuid1_edx; 1.206 - 1.207 - // cpuid function 4 (deterministic cache parameters) 1.208 - DcpCpuid4Eax dcp_cpuid4_eax; 1.209 - DcpCpuid4Ebx dcp_cpuid4_ebx; 1.210 - uint32_t dcp_cpuid4_ecx; // unused currently 1.211 - uint32_t dcp_cpuid4_edx; // unused currently 1.212 - 1.213 - // cpuid function 0x80000000 // example, unused 1.214 - uint32_t ext_max_function; 1.215 - uint32_t ext_vendor_name_0; 1.216 - uint32_t ext_vendor_name_1; 1.217 - uint32_t ext_vendor_name_2; 1.218 - 1.219 - // cpuid function 0x80000001 1.220 - uint32_t ext_cpuid1_eax; // reserved 1.221 - uint32_t ext_cpuid1_ebx; // reserved 1.222 - ExtCpuid1Ecx ext_cpuid1_ecx; 1.223 - ExtCpuid1Edx ext_cpuid1_edx; 1.224 - 1.225 - // cpuid functions 0x80000002 thru 0x80000004: example, unused 1.226 - uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; 1.227 - uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; 1.228 - uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; 1.229 - 1.230 - // cpuid function 0x80000005 //AMD L1, Intel reserved 1.231 - uint32_t ext_cpuid5_eax; // unused currently 1.232 - uint32_t ext_cpuid5_ebx; // reserved 1.233 - ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) 1.234 - ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) 1.235 - 1.236 - // cpuid function 0x80000008 1.237 - uint32_t ext_cpuid8_eax; // unused currently 1.238 - uint32_t ext_cpuid8_ebx; // reserved 1.239 - ExtCpuid8Ecx ext_cpuid8_ecx; 1.240 - uint32_t ext_cpuid8_edx; // reserved 1.241 - }; 1.242 - 1.243 - // The actual cpuid info block 1.244 - static CpuidInfo _cpuid_info; 1.245 - 1.246 - // Extractors and predicates 1.247 - static uint32_t extended_cpu_family() { 1.248 - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; 1.249 - result += _cpuid_info.std_cpuid1_eax.bits.ext_family; 1.250 - return result; 1.251 - } 1.252 - static uint32_t extended_cpu_model() { 1.253 - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; 1.254 - result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; 1.255 - return result; 1.256 - } 1.257 - static uint32_t cpu_stepping() { 1.258 - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; 1.259 - return result; 1.260 - } 1.261 - static uint logical_processor_count() { 1.262 - uint result = threads_per_core(); 1.263 - return result; 1.264 - } 1.265 - static uint32_t feature_flags() { 1.266 - uint32_t result = 0; 1.267 - if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) 1.268 - result |= CPU_CX8; 1.269 - if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) 1.270 - result |= CPU_CMOV; 1.271 - if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() && 1.272 - _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0) 1.273 - result |= CPU_FXSR; 1.274 - // HT flag is set for multi-core processors also. 1.275 - if (threads_per_core() > 1) 1.276 - result |= CPU_HT; 1.277 - if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && 1.278 - _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) 1.279 - result |= CPU_MMX; 1.280 - if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) 1.281 - result |= CPU_3DNOW; 1.282 - if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) 1.283 - result |= CPU_SSE; 1.284 - if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) 1.285 - result |= CPU_SSE2; 1.286 - if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) 1.287 - result |= CPU_SSE3; 1.288 - if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) 1.289 - result |= CPU_SSSE3; 1.290 - if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 1.291 - result |= CPU_SSE4A; 1.292 - if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) 1.293 - result |= CPU_SSE4_1; 1.294 - if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 1.295 - result |= CPU_SSE4_2; 1.296 - return result; 1.297 - } 1.298 - 1.299 - static void get_processor_features(); 1.300 - 1.301 -public: 1.302 - // Offsets for cpuid asm stub 1.303 - static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } 1.304 - static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } 1.305 - static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } 1.306 - static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } 1.307 - static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } 1.308 - static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } 1.309 - 1.310 - // Initialization 1.311 - static void initialize(); 1.312 - 1.313 - // Asserts 1.314 - static void assert_is_initialized() { 1.315 - assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); 1.316 - } 1.317 - 1.318 - // 1.319 - // Processor family: 1.320 - // 3 - 386 1.321 - // 4 - 486 1.322 - // 5 - Pentium 1.323 - // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, 1.324 - // Pentium M, Core Solo, Core Duo, Core2 Duo 1.325 - // family 6 model: 9, 13, 14, 15 1.326 - // 0x0f - Pentium 4, Opteron 1.327 - // 1.328 - // Note: The cpu family should be used to select between 1.329 - // instruction sequences which are valid on all Intel 1.330 - // processors. Use the feature test functions below to 1.331 - // determine whether a particular instruction is supported. 1.332 - // 1.333 - static int cpu_family() { return _cpu;} 1.334 - static bool is_P6() { return cpu_family() >= 6; } 1.335 - 1.336 - static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' 1.337 - static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' 1.338 - 1.339 - static uint cores_per_cpu() { 1.340 - uint result = 1; 1.341 - if (is_intel()) { 1.342 - result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 1.343 - } else if (is_amd()) { 1.344 - result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 1.345 - } 1.346 - return result; 1.347 - } 1.348 - 1.349 - static uint threads_per_core() { 1.350 - uint result = 1; 1.351 - if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 1.352 - result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 1.353 - cores_per_cpu(); 1.354 - } 1.355 - return result; 1.356 - } 1.357 - 1.358 - static intx L1_data_cache_line_size() { 1.359 - intx result = 0; 1.360 - if (is_intel()) { 1.361 - result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 1.362 - } else if (is_amd()) { 1.363 - result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 1.364 - } 1.365 - if (result < 32) // not defined ? 1.366 - result = 32; // 32 bytes by default for other x64 1.367 - return result; 1.368 - } 1.369 - 1.370 - // 1.371 - // Feature identification 1.372 - // 1.373 - static bool supports_cpuid() { return _cpuFeatures != 0; } 1.374 - static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } 1.375 - static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } 1.376 - static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } 1.377 - static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } 1.378 - static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } 1.379 - static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } 1.380 - static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } 1.381 - static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } 1.382 - static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } 1.383 - static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } 1.384 - static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } 1.385 - // 1.386 - // AMD features 1.387 - // 1.388 - static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } 1.389 - static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } 1.390 - static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } 1.391 - static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } 1.392 - 1.393 - static bool supports_compare_and_exchange() { return true; } 1.394 - 1.395 - static const char* cpu_features() { return _features_str; } 1.396 - 1.397 - static intx allocate_prefetch_distance() { 1.398 - // This method should be called before allocate_prefetch_style(). 1.399 - // 1.400 - // Hardware prefetching (distance/size in bytes): 1.401 - // Pentium 4 - 256 / 128 1.402 - // Opteron - 128 / 64 only when 2 sequential cache lines accessed 1.403 - // Core - 128 / 64 1.404 - // 1.405 - // Software prefetching (distance in bytes / instruction with best score): 1.406 - // Pentium 4 - 512 / prefetchnta 1.407 - // Opteron - 256 / prefetchnta 1.408 - // Core - 256 / prefetchnta 1.409 - // It will be used only when AllocatePrefetchStyle > 0 1.410 - 1.411 - intx count = AllocatePrefetchDistance; 1.412 - if (count < 0) { // default ? 1.413 - if (is_amd()) { // AMD 1.414 - count = 256; // Opteron 1.415 - } else { // Intel 1.416 - if (cpu_family() == 6) { 1.417 - count = 256;// Pentium M, Core, Core2 1.418 - } else { 1.419 - count = 512;// Pentium 4 1.420 - } 1.421 - } 1.422 - } 1.423 - return count; 1.424 - } 1.425 - static intx allocate_prefetch_style() { 1.426 - assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); 1.427 - // Return 0 if AllocatePrefetchDistance was not defined. 1.428 - return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; 1.429 - } 1.430 - 1.431 - // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1.432 - // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1.433 - // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1.434 - // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1.435 - 1.436 - // gc copy/scan is disabled if prefetchw isn't supported, because 1.437 - // Prefetch::write emits an inlined prefetchw on Linux. 1.438 - // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1.439 - // The used prefetcht0 instruction works for both amd64 and em64t. 1.440 - static intx prefetch_copy_interval_in_bytes() { 1.441 - intx interval = PrefetchCopyIntervalInBytes; 1.442 - return interval >= 0 ? interval : 576; 1.443 - } 1.444 - static intx prefetch_scan_interval_in_bytes() { 1.445 - intx interval = PrefetchScanIntervalInBytes; 1.446 - return interval >= 0 ? interval : 576; 1.447 - } 1.448 - static intx prefetch_fields_ahead() { 1.449 - intx count = PrefetchFieldsAhead; 1.450 - return count >= 0 ? count : 1; 1.451 - } 1.452 -};