Tue, 10 Mar 2009 08:52:16 -0700
Merge
twisti@1020 | 1 | /* |
twisti@1020 | 2 | * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. |
twisti@1020 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
twisti@1020 | 4 | * |
twisti@1020 | 5 | * This code is free software; you can redistribute it and/or modify it |
twisti@1020 | 6 | * under the terms of the GNU General Public License version 2 only, as |
twisti@1020 | 7 | * published by the Free Software Foundation. |
twisti@1020 | 8 | * |
twisti@1020 | 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
twisti@1020 | 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
twisti@1020 | 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
twisti@1020 | 12 | * version 2 for more details (a copy is included in the LICENSE file that |
twisti@1020 | 13 | * accompanied this code). |
twisti@1020 | 14 | * |
twisti@1020 | 15 | * You should have received a copy of the GNU General Public License version |
twisti@1020 | 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
twisti@1020 | 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
twisti@1020 | 18 | * |
twisti@1020 | 19 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
twisti@1020 | 20 | * CA 95054 USA or visit www.sun.com if you need additional information or |
twisti@1020 | 21 | * have any questions. |
twisti@1020 | 22 | * |
twisti@1020 | 23 | */ |
twisti@1020 | 24 | |
twisti@1020 | 25 | class VM_Version : public Abstract_VM_Version { |
twisti@1020 | 26 | public: |
twisti@1020 | 27 | // cpuid result register layouts. These are all unions of a uint32_t |
twisti@1020 | 28 | // (in case anyone wants access to the register as a whole) and a bitfield. |
twisti@1020 | 29 | |
twisti@1020 | 30 | union StdCpuid1Eax { |
twisti@1020 | 31 | uint32_t value; |
twisti@1020 | 32 | struct { |
twisti@1020 | 33 | uint32_t stepping : 4, |
twisti@1020 | 34 | model : 4, |
twisti@1020 | 35 | family : 4, |
twisti@1020 | 36 | proc_type : 2, |
twisti@1020 | 37 | : 2, |
twisti@1020 | 38 | ext_model : 4, |
twisti@1020 | 39 | ext_family : 8, |
twisti@1020 | 40 | : 4; |
twisti@1020 | 41 | } bits; |
twisti@1020 | 42 | }; |
twisti@1020 | 43 | |
twisti@1020 | 44 | union StdCpuid1Ebx { // example, unused |
twisti@1020 | 45 | uint32_t value; |
twisti@1020 | 46 | struct { |
twisti@1020 | 47 | uint32_t brand_id : 8, |
twisti@1020 | 48 | clflush_size : 8, |
twisti@1020 | 49 | threads_per_cpu : 8, |
twisti@1020 | 50 | apic_id : 8; |
twisti@1020 | 51 | } bits; |
twisti@1020 | 52 | }; |
twisti@1020 | 53 | |
twisti@1020 | 54 | union StdCpuid1Ecx { |
twisti@1020 | 55 | uint32_t value; |
twisti@1020 | 56 | struct { |
twisti@1020 | 57 | uint32_t sse3 : 1, |
twisti@1020 | 58 | : 2, |
twisti@1020 | 59 | monitor : 1, |
twisti@1020 | 60 | : 1, |
twisti@1020 | 61 | vmx : 1, |
twisti@1020 | 62 | : 1, |
twisti@1020 | 63 | est : 1, |
twisti@1020 | 64 | : 1, |
twisti@1020 | 65 | ssse3 : 1, |
twisti@1020 | 66 | cid : 1, |
twisti@1020 | 67 | : 2, |
twisti@1020 | 68 | cmpxchg16: 1, |
twisti@1020 | 69 | : 4, |
twisti@1020 | 70 | dca : 1, |
twisti@1020 | 71 | sse4_1 : 1, |
twisti@1020 | 72 | sse4_2 : 1, |
twisti@1020 | 73 | : 11; |
twisti@1020 | 74 | } bits; |
twisti@1020 | 75 | }; |
twisti@1020 | 76 | |
twisti@1020 | 77 | union StdCpuid1Edx { |
twisti@1020 | 78 | uint32_t value; |
twisti@1020 | 79 | struct { |
twisti@1020 | 80 | uint32_t : 4, |
twisti@1020 | 81 | tsc : 1, |
twisti@1020 | 82 | : 3, |
twisti@1020 | 83 | cmpxchg8 : 1, |
twisti@1020 | 84 | : 6, |
twisti@1020 | 85 | cmov : 1, |
twisti@1020 | 86 | : 7, |
twisti@1020 | 87 | mmx : 1, |
twisti@1020 | 88 | fxsr : 1, |
twisti@1020 | 89 | sse : 1, |
twisti@1020 | 90 | sse2 : 1, |
twisti@1020 | 91 | : 1, |
twisti@1020 | 92 | ht : 1, |
twisti@1020 | 93 | : 3; |
twisti@1020 | 94 | } bits; |
twisti@1020 | 95 | }; |
twisti@1020 | 96 | |
twisti@1020 | 97 | union DcpCpuid4Eax { |
twisti@1020 | 98 | uint32_t value; |
twisti@1020 | 99 | struct { |
twisti@1020 | 100 | uint32_t cache_type : 5, |
twisti@1020 | 101 | : 21, |
twisti@1020 | 102 | cores_per_cpu : 6; |
twisti@1020 | 103 | } bits; |
twisti@1020 | 104 | }; |
twisti@1020 | 105 | |
twisti@1020 | 106 | union DcpCpuid4Ebx { |
twisti@1020 | 107 | uint32_t value; |
twisti@1020 | 108 | struct { |
twisti@1020 | 109 | uint32_t L1_line_size : 12, |
twisti@1020 | 110 | partitions : 10, |
twisti@1020 | 111 | associativity : 10; |
twisti@1020 | 112 | } bits; |
twisti@1020 | 113 | }; |
twisti@1020 | 114 | |
twisti@1020 | 115 | union ExtCpuid1Ecx { |
twisti@1020 | 116 | uint32_t value; |
twisti@1020 | 117 | struct { |
twisti@1020 | 118 | uint32_t LahfSahf : 1, |
twisti@1020 | 119 | CmpLegacy : 1, |
twisti@1020 | 120 | : 4, |
twisti@1020 | 121 | abm : 1, |
twisti@1020 | 122 | sse4a : 1, |
twisti@1020 | 123 | misalignsse : 1, |
twisti@1020 | 124 | prefetchw : 1, |
twisti@1020 | 125 | : 22; |
twisti@1020 | 126 | } bits; |
twisti@1020 | 127 | }; |
twisti@1020 | 128 | |
twisti@1020 | 129 | union ExtCpuid1Edx { |
twisti@1020 | 130 | uint32_t value; |
twisti@1020 | 131 | struct { |
twisti@1020 | 132 | uint32_t : 22, |
twisti@1020 | 133 | mmx_amd : 1, |
twisti@1020 | 134 | mmx : 1, |
twisti@1020 | 135 | fxsr : 1, |
twisti@1020 | 136 | : 4, |
twisti@1020 | 137 | long_mode : 1, |
twisti@1020 | 138 | tdnow2 : 1, |
twisti@1020 | 139 | tdnow : 1; |
twisti@1020 | 140 | } bits; |
twisti@1020 | 141 | }; |
twisti@1020 | 142 | |
twisti@1020 | 143 | union ExtCpuid5Ex { |
twisti@1020 | 144 | uint32_t value; |
twisti@1020 | 145 | struct { |
twisti@1020 | 146 | uint32_t L1_line_size : 8, |
twisti@1020 | 147 | L1_tag_lines : 8, |
twisti@1020 | 148 | L1_assoc : 8, |
twisti@1020 | 149 | L1_size : 8; |
twisti@1020 | 150 | } bits; |
twisti@1020 | 151 | }; |
twisti@1020 | 152 | |
twisti@1020 | 153 | union ExtCpuid8Ecx { |
twisti@1020 | 154 | uint32_t value; |
twisti@1020 | 155 | struct { |
twisti@1020 | 156 | uint32_t cores_per_cpu : 8, |
twisti@1020 | 157 | : 24; |
twisti@1020 | 158 | } bits; |
twisti@1020 | 159 | }; |
twisti@1020 | 160 | |
twisti@1020 | 161 | protected: |
twisti@1020 | 162 | static int _cpu; |
twisti@1020 | 163 | static int _model; |
twisti@1020 | 164 | static int _stepping; |
twisti@1020 | 165 | static int _cpuFeatures; // features returned by the "cpuid" instruction |
twisti@1020 | 166 | // 0 if this instruction is not available |
twisti@1020 | 167 | static const char* _features_str; |
twisti@1020 | 168 | |
twisti@1020 | 169 | enum { |
twisti@1020 | 170 | CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) |
twisti@1020 | 171 | CPU_CMOV = (1 << 1), |
twisti@1020 | 172 | CPU_FXSR = (1 << 2), |
twisti@1020 | 173 | CPU_HT = (1 << 3), |
twisti@1020 | 174 | CPU_MMX = (1 << 4), |
twisti@1020 | 175 | CPU_3DNOW = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX) |
twisti@1020 | 176 | CPU_SSE = (1 << 6), |
twisti@1020 | 177 | CPU_SSE2 = (1 << 7), |
twisti@1020 | 178 | CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) |
twisti@1020 | 179 | CPU_SSSE3 = (1 << 9), |
twisti@1020 | 180 | CPU_SSE4A = (1 << 10), |
twisti@1020 | 181 | CPU_SSE4_1 = (1 << 11), |
twisti@1020 | 182 | CPU_SSE4_2 = (1 << 12) |
twisti@1020 | 183 | } cpuFeatureFlags; |
twisti@1020 | 184 | |
twisti@1020 | 185 | // cpuid information block. All info derived from executing cpuid with |
twisti@1020 | 186 | // various function numbers is stored here. Intel and AMD info is |
twisti@1020 | 187 | // merged in this block: accessor methods disentangle it. |
twisti@1020 | 188 | // |
twisti@1020 | 189 | // The info block is laid out in subblocks of 4 dwords corresponding to |
twisti@1020 | 190 | // eax, ebx, ecx and edx, whether or not they contain anything useful. |
twisti@1020 | 191 | struct CpuidInfo { |
twisti@1020 | 192 | // cpuid function 0 |
twisti@1020 | 193 | uint32_t std_max_function; |
twisti@1020 | 194 | uint32_t std_vendor_name_0; |
twisti@1020 | 195 | uint32_t std_vendor_name_1; |
twisti@1020 | 196 | uint32_t std_vendor_name_2; |
twisti@1020 | 197 | |
twisti@1020 | 198 | // cpuid function 1 |
twisti@1020 | 199 | StdCpuid1Eax std_cpuid1_eax; |
twisti@1020 | 200 | StdCpuid1Ebx std_cpuid1_ebx; |
twisti@1020 | 201 | StdCpuid1Ecx std_cpuid1_ecx; |
twisti@1020 | 202 | StdCpuid1Edx std_cpuid1_edx; |
twisti@1020 | 203 | |
twisti@1020 | 204 | // cpuid function 4 (deterministic cache parameters) |
twisti@1020 | 205 | DcpCpuid4Eax dcp_cpuid4_eax; |
twisti@1020 | 206 | DcpCpuid4Ebx dcp_cpuid4_ebx; |
twisti@1020 | 207 | uint32_t dcp_cpuid4_ecx; // unused currently |
twisti@1020 | 208 | uint32_t dcp_cpuid4_edx; // unused currently |
twisti@1020 | 209 | |
twisti@1020 | 210 | // cpuid function 0x80000000 // example, unused |
twisti@1020 | 211 | uint32_t ext_max_function; |
twisti@1020 | 212 | uint32_t ext_vendor_name_0; |
twisti@1020 | 213 | uint32_t ext_vendor_name_1; |
twisti@1020 | 214 | uint32_t ext_vendor_name_2; |
twisti@1020 | 215 | |
twisti@1020 | 216 | // cpuid function 0x80000001 |
twisti@1020 | 217 | uint32_t ext_cpuid1_eax; // reserved |
twisti@1020 | 218 | uint32_t ext_cpuid1_ebx; // reserved |
twisti@1020 | 219 | ExtCpuid1Ecx ext_cpuid1_ecx; |
twisti@1020 | 220 | ExtCpuid1Edx ext_cpuid1_edx; |
twisti@1020 | 221 | |
twisti@1020 | 222 | // cpuid functions 0x80000002 thru 0x80000004: example, unused |
twisti@1020 | 223 | uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; |
twisti@1020 | 224 | uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; |
twisti@1020 | 225 | uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; |
twisti@1020 | 226 | |
twisti@1020 | 227 | // cpuid function 0x80000005 //AMD L1, Intel reserved |
twisti@1020 | 228 | uint32_t ext_cpuid5_eax; // unused currently |
twisti@1020 | 229 | uint32_t ext_cpuid5_ebx; // reserved |
twisti@1020 | 230 | ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) |
twisti@1020 | 231 | ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) |
twisti@1020 | 232 | |
twisti@1020 | 233 | // cpuid function 0x80000008 |
twisti@1020 | 234 | uint32_t ext_cpuid8_eax; // unused currently |
twisti@1020 | 235 | uint32_t ext_cpuid8_ebx; // reserved |
twisti@1020 | 236 | ExtCpuid8Ecx ext_cpuid8_ecx; |
twisti@1020 | 237 | uint32_t ext_cpuid8_edx; // reserved |
twisti@1020 | 238 | }; |
twisti@1020 | 239 | |
twisti@1020 | 240 | // The actual cpuid info block |
twisti@1020 | 241 | static CpuidInfo _cpuid_info; |
twisti@1020 | 242 | |
twisti@1020 | 243 | // Extractors and predicates |
twisti@1020 | 244 | static uint32_t extended_cpu_family() { |
twisti@1020 | 245 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; |
twisti@1020 | 246 | result += _cpuid_info.std_cpuid1_eax.bits.ext_family; |
twisti@1020 | 247 | return result; |
twisti@1020 | 248 | } |
twisti@1020 | 249 | static uint32_t extended_cpu_model() { |
twisti@1020 | 250 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; |
twisti@1020 | 251 | result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; |
twisti@1020 | 252 | return result; |
twisti@1020 | 253 | } |
twisti@1020 | 254 | static uint32_t cpu_stepping() { |
twisti@1020 | 255 | uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; |
twisti@1020 | 256 | return result; |
twisti@1020 | 257 | } |
twisti@1020 | 258 | static uint logical_processor_count() { |
twisti@1020 | 259 | uint result = threads_per_core(); |
twisti@1020 | 260 | return result; |
twisti@1020 | 261 | } |
twisti@1020 | 262 | static uint32_t feature_flags() { |
twisti@1020 | 263 | uint32_t result = 0; |
twisti@1020 | 264 | if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) |
twisti@1020 | 265 | result |= CPU_CX8; |
twisti@1020 | 266 | if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) |
twisti@1020 | 267 | result |= CPU_CMOV; |
twisti@1020 | 268 | if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() && |
twisti@1020 | 269 | _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0) |
twisti@1020 | 270 | result |= CPU_FXSR; |
twisti@1020 | 271 | // HT flag is set for multi-core processors also. |
twisti@1020 | 272 | if (threads_per_core() > 1) |
twisti@1020 | 273 | result |= CPU_HT; |
twisti@1020 | 274 | if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && |
twisti@1020 | 275 | _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) |
twisti@1020 | 276 | result |= CPU_MMX; |
twisti@1020 | 277 | if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) |
twisti@1020 | 278 | result |= CPU_3DNOW; |
twisti@1020 | 279 | if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) |
twisti@1020 | 280 | result |= CPU_SSE; |
twisti@1020 | 281 | if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) |
twisti@1020 | 282 | result |= CPU_SSE2; |
twisti@1020 | 283 | if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) |
twisti@1020 | 284 | result |= CPU_SSE3; |
twisti@1020 | 285 | if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) |
twisti@1020 | 286 | result |= CPU_SSSE3; |
twisti@1020 | 287 | if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) |
twisti@1020 | 288 | result |= CPU_SSE4A; |
twisti@1020 | 289 | if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) |
twisti@1020 | 290 | result |= CPU_SSE4_1; |
twisti@1020 | 291 | if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) |
twisti@1020 | 292 | result |= CPU_SSE4_2; |
twisti@1020 | 293 | return result; |
twisti@1020 | 294 | } |
twisti@1020 | 295 | |
twisti@1020 | 296 | static void get_processor_features(); |
twisti@1020 | 297 | |
twisti@1020 | 298 | public: |
twisti@1020 | 299 | // Offsets for cpuid asm stub |
twisti@1020 | 300 | static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } |
twisti@1020 | 301 | static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } |
twisti@1020 | 302 | static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } |
twisti@1020 | 303 | static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } |
twisti@1020 | 304 | static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } |
twisti@1020 | 305 | static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } |
twisti@1020 | 306 | |
twisti@1020 | 307 | // Initialization |
twisti@1020 | 308 | static void initialize(); |
twisti@1020 | 309 | |
twisti@1020 | 310 | // Asserts |
twisti@1020 | 311 | static void assert_is_initialized() { |
twisti@1020 | 312 | assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); |
twisti@1020 | 313 | } |
twisti@1020 | 314 | |
twisti@1020 | 315 | // |
twisti@1020 | 316 | // Processor family: |
twisti@1020 | 317 | // 3 - 386 |
twisti@1020 | 318 | // 4 - 486 |
twisti@1020 | 319 | // 5 - Pentium |
twisti@1020 | 320 | // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, |
twisti@1020 | 321 | // Pentium M, Core Solo, Core Duo, Core2 Duo |
twisti@1020 | 322 | // family 6 model: 9, 13, 14, 15 |
twisti@1020 | 323 | // 0x0f - Pentium 4, Opteron |
twisti@1020 | 324 | // |
twisti@1020 | 325 | // Note: The cpu family should be used to select between |
twisti@1020 | 326 | // instruction sequences which are valid on all Intel |
twisti@1020 | 327 | // processors. Use the feature test functions below to |
twisti@1020 | 328 | // determine whether a particular instruction is supported. |
twisti@1020 | 329 | // |
twisti@1020 | 330 | static int cpu_family() { return _cpu;} |
twisti@1020 | 331 | static bool is_P6() { return cpu_family() >= 6; } |
twisti@1020 | 332 | |
twisti@1020 | 333 | static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' |
twisti@1020 | 334 | static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' |
twisti@1020 | 335 | |
twisti@1020 | 336 | static uint cores_per_cpu() { |
twisti@1020 | 337 | uint result = 1; |
twisti@1020 | 338 | if (is_intel()) { |
twisti@1020 | 339 | result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); |
twisti@1020 | 340 | } else if (is_amd()) { |
twisti@1020 | 341 | result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); |
twisti@1020 | 342 | } |
twisti@1020 | 343 | return result; |
twisti@1020 | 344 | } |
twisti@1020 | 345 | |
twisti@1020 | 346 | static uint threads_per_core() { |
twisti@1020 | 347 | uint result = 1; |
twisti@1020 | 348 | if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { |
twisti@1020 | 349 | result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / |
twisti@1020 | 350 | cores_per_cpu(); |
twisti@1020 | 351 | } |
twisti@1020 | 352 | return result; |
twisti@1020 | 353 | } |
twisti@1020 | 354 | |
twisti@1020 | 355 | static intx L1_data_cache_line_size() { |
twisti@1020 | 356 | intx result = 0; |
twisti@1020 | 357 | if (is_intel()) { |
twisti@1020 | 358 | result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); |
twisti@1020 | 359 | } else if (is_amd()) { |
twisti@1020 | 360 | result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; |
twisti@1020 | 361 | } |
twisti@1020 | 362 | if (result < 32) // not defined ? |
twisti@1020 | 363 | result = 32; // 32 bytes by default on x86 and other x64 |
twisti@1020 | 364 | return result; |
twisti@1020 | 365 | } |
twisti@1020 | 366 | |
twisti@1020 | 367 | // |
twisti@1020 | 368 | // Feature identification |
twisti@1020 | 369 | // |
twisti@1020 | 370 | static bool supports_cpuid() { return _cpuFeatures != 0; } |
twisti@1020 | 371 | static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } |
twisti@1020 | 372 | static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } |
twisti@1020 | 373 | static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } |
twisti@1020 | 374 | static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } |
twisti@1020 | 375 | static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } |
twisti@1020 | 376 | static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } |
twisti@1020 | 377 | static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } |
twisti@1020 | 378 | static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } |
twisti@1020 | 379 | static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } |
twisti@1020 | 380 | static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } |
twisti@1020 | 381 | static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } |
twisti@1020 | 382 | // |
twisti@1020 | 383 | // AMD features |
twisti@1020 | 384 | // |
twisti@1020 | 385 | static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } |
twisti@1020 | 386 | static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } |
twisti@1020 | 387 | static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } |
twisti@1020 | 388 | static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } |
twisti@1020 | 389 | |
twisti@1020 | 390 | static bool supports_compare_and_exchange() { return true; } |
twisti@1020 | 391 | |
twisti@1020 | 392 | static const char* cpu_features() { return _features_str; } |
twisti@1020 | 393 | |
twisti@1020 | 394 | static intx allocate_prefetch_distance() { |
twisti@1020 | 395 | // This method should be called before allocate_prefetch_style(). |
twisti@1020 | 396 | // |
twisti@1020 | 397 | // Hardware prefetching (distance/size in bytes): |
twisti@1020 | 398 | // Pentium 3 - 64 / 32 |
twisti@1020 | 399 | // Pentium 4 - 256 / 128 |
twisti@1020 | 400 | // Athlon - 64 / 32 ???? |
twisti@1020 | 401 | // Opteron - 128 / 64 only when 2 sequential cache lines accessed |
twisti@1020 | 402 | // Core - 128 / 64 |
twisti@1020 | 403 | // |
twisti@1020 | 404 | // Software prefetching (distance in bytes / instruction with best score): |
twisti@1020 | 405 | // Pentium 3 - 128 / prefetchnta |
twisti@1020 | 406 | // Pentium 4 - 512 / prefetchnta |
twisti@1020 | 407 | // Athlon - 128 / prefetchnta |
twisti@1020 | 408 | // Opteron - 256 / prefetchnta |
twisti@1020 | 409 | // Core - 256 / prefetchnta |
twisti@1020 | 410 | // It will be used only when AllocatePrefetchStyle > 0 |
twisti@1020 | 411 | |
twisti@1020 | 412 | intx count = AllocatePrefetchDistance; |
twisti@1020 | 413 | if (count < 0) { // default ? |
twisti@1020 | 414 | if (is_amd()) { // AMD |
twisti@1020 | 415 | if (supports_sse2()) |
twisti@1020 | 416 | count = 256; // Opteron |
twisti@1020 | 417 | else |
twisti@1020 | 418 | count = 128; // Athlon |
twisti@1020 | 419 | } else { // Intel |
twisti@1020 | 420 | if (supports_sse2()) |
twisti@1020 | 421 | if (cpu_family() == 6) { |
twisti@1020 | 422 | count = 256; // Pentium M, Core, Core2 |
twisti@1020 | 423 | } else { |
twisti@1020 | 424 | count = 512; // Pentium 4 |
twisti@1020 | 425 | } |
twisti@1020 | 426 | else |
twisti@1020 | 427 | count = 128; // Pentium 3 (and all other old CPUs) |
twisti@1020 | 428 | } |
twisti@1020 | 429 | } |
twisti@1020 | 430 | return count; |
twisti@1020 | 431 | } |
twisti@1020 | 432 | static intx allocate_prefetch_style() { |
twisti@1020 | 433 | assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); |
twisti@1020 | 434 | // Return 0 if AllocatePrefetchDistance was not defined. |
twisti@1020 | 435 | return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; |
twisti@1020 | 436 | } |
twisti@1020 | 437 | |
twisti@1020 | 438 | // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from |
twisti@1020 | 439 | // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. |
twisti@1020 | 440 | // Tested intervals from 128 to 2048 in increments of 64 == one cache line. |
twisti@1020 | 441 | // 256 bytes (4 dcache lines) was the nearest runner-up to 576. |
twisti@1020 | 442 | |
twisti@1020 | 443 | // gc copy/scan is disabled if prefetchw isn't supported, because |
twisti@1020 | 444 | // Prefetch::write emits an inlined prefetchw on Linux. |
twisti@1020 | 445 | // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. |
twisti@1020 | 446 | // The used prefetcht0 instruction works for both amd64 and em64t. |
twisti@1020 | 447 | static intx prefetch_copy_interval_in_bytes() { |
twisti@1020 | 448 | intx interval = PrefetchCopyIntervalInBytes; |
twisti@1020 | 449 | return interval >= 0 ? interval : 576; |
twisti@1020 | 450 | } |
twisti@1020 | 451 | static intx prefetch_scan_interval_in_bytes() { |
twisti@1020 | 452 | intx interval = PrefetchScanIntervalInBytes; |
twisti@1020 | 453 | return interval >= 0 ? interval : 576; |
twisti@1020 | 454 | } |
twisti@1020 | 455 | static intx prefetch_fields_ahead() { |
twisti@1020 | 456 | intx count = PrefetchFieldsAhead; |
twisti@1020 | 457 | return count >= 0 ? count : 1; |
twisti@1020 | 458 | } |
twisti@1020 | 459 | }; |