Mon, 23 Feb 2009 12:02:30 -0800
6808589: Merge vm_version_x86_{32,64}.{cpp,hpp}
Summary: There is very much duplicated code in vm_version_x86_{32,64}.{cpp,hpp}. Refactoring these would help maintainability.
Reviewed-by: kvn, never
src/cpu/x86/vm/vm_version_x86.cpp | file | annotate | diff | comparison | revisions | |
src/cpu/x86/vm/vm_version_x86.hpp | file | annotate | diff | comparison | revisions | |
src/cpu/x86/vm/vm_version_x86_32.cpp | file | annotate | diff | comparison | revisions | |
src/cpu/x86/vm/vm_version_x86_32.hpp | file | annotate | diff | comparison | revisions | |
src/cpu/x86/vm/vm_version_x86_64.cpp | file | annotate | diff | comparison | revisions | |
src/cpu/x86/vm/vm_version_x86_64.hpp | file | annotate | diff | comparison | revisions | |
src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp | file | annotate | diff | comparison | revisions | |
src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp | file | annotate | diff | comparison | revisions | |
src/share/vm/includeDB_core | file | annotate | diff | comparison | revisions |
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Mon Feb 23 12:02:30 2009 -0800 1.3 @@ -0,0 +1,514 @@ 1.4 +/* 1.5 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +# include "incls/_precompiled.incl" 1.29 +# include "incls/_vm_version_x86.cpp.incl" 1.30 + 1.31 + 1.32 +int VM_Version::_cpu; 1.33 +int VM_Version::_model; 1.34 +int VM_Version::_stepping; 1.35 +int VM_Version::_cpuFeatures; 1.36 +const char* VM_Version::_features_str = ""; 1.37 +VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 1.38 + 1.39 +static BufferBlob* stub_blob; 1.40 +static const int stub_size = 300; 1.41 + 1.42 +extern "C" { 1.43 + typedef void (*getPsrInfo_stub_t)(void*); 1.44 +} 1.45 +static getPsrInfo_stub_t getPsrInfo_stub = NULL; 1.46 + 1.47 + 1.48 +class VM_Version_StubGenerator: public StubCodeGenerator { 1.49 + public: 1.50 + 1.51 + VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 1.52 + 1.53 + address generate_getPsrInfo() { 1.54 + // Flags to test CPU type. 1.55 + const uint32_t EFL_AC = 0x40000; 1.56 + const uint32_t EFL_ID = 0x200000; 1.57 + // Values for when we don't have a CPUID instruction. 1.58 + const int CPU_FAMILY_SHIFT = 8; 1.59 + const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 1.60 + const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 1.61 + 1.62 + Label detect_486, cpu486, detect_586, std_cpuid1; 1.63 + Label ext_cpuid1, ext_cpuid5, done; 1.64 + 1.65 + StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); 1.66 +# define __ _masm-> 1.67 + 1.68 + address start = __ pc(); 1.69 + 1.70 + // 1.71 + // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); 1.72 + // 1.73 + // LP64: rcx and rdx are first and second argument registers on windows 1.74 + 1.75 + __ push(rbp); 1.76 +#ifdef _LP64 1.77 + __ mov(rbp, c_rarg0); // cpuid_info address 1.78 +#else 1.79 + __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 1.80 +#endif 1.81 + __ push(rbx); 1.82 + __ push(rsi); 1.83 + __ pushf(); // preserve rbx, and flags 1.84 + __ pop(rax); 1.85 + __ push(rax); 1.86 + __ mov(rcx, rax); 1.87 + // 1.88 + // if we are unable to change the AC flag, we have a 386 1.89 + // 1.90 + __ xorl(rax, EFL_AC); 1.91 + __ push(rax); 1.92 + __ popf(); 1.93 + __ pushf(); 1.94 + __ pop(rax); 1.95 + __ cmpptr(rax, rcx); 1.96 + __ jccb(Assembler::notEqual, detect_486); 1.97 + 1.98 + __ movl(rax, CPU_FAMILY_386); 1.99 + __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 1.100 + __ jmp(done); 1.101 + 1.102 + // 1.103 + // If we are unable to change the ID flag, we have a 486 which does 1.104 + // not support the "cpuid" instruction. 1.105 + // 1.106 + __ bind(detect_486); 1.107 + __ mov(rax, rcx); 1.108 + __ xorl(rax, EFL_ID); 1.109 + __ push(rax); 1.110 + __ popf(); 1.111 + __ pushf(); 1.112 + __ pop(rax); 1.113 + __ cmpptr(rcx, rax); 1.114 + __ jccb(Assembler::notEqual, detect_586); 1.115 + 1.116 + __ bind(cpu486); 1.117 + __ movl(rax, CPU_FAMILY_486); 1.118 + __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 1.119 + __ jmp(done); 1.120 + 1.121 + // 1.122 + // At this point, we have a chip which supports the "cpuid" instruction 1.123 + // 1.124 + __ bind(detect_586); 1.125 + __ xorl(rax, rax); 1.126 + __ cpuid(); 1.127 + __ orl(rax, rax); 1.128 + __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 1.129 + // value of at least 1, we give up and 1.130 + // assume a 486 1.131 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 1.132 + __ movl(Address(rsi, 0), rax); 1.133 + __ movl(Address(rsi, 4), rbx); 1.134 + __ movl(Address(rsi, 8), rcx); 1.135 + __ movl(Address(rsi,12), rdx); 1.136 + 1.137 + __ cmpl(rax, 3); // Is cpuid(0x4) supported? 1.138 + __ jccb(Assembler::belowEqual, std_cpuid1); 1.139 + 1.140 + // 1.141 + // cpuid(0x4) Deterministic cache params 1.142 + // 1.143 + __ movl(rax, 4); 1.144 + __ xorl(rcx, rcx); // L1 cache 1.145 + __ cpuid(); 1.146 + __ push(rax); 1.147 + __ andl(rax, 0x1f); // Determine if valid cache parameters used 1.148 + __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 1.149 + __ pop(rax); 1.150 + __ jccb(Assembler::equal, std_cpuid1); 1.151 + 1.152 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 1.153 + __ movl(Address(rsi, 0), rax); 1.154 + __ movl(Address(rsi, 4), rbx); 1.155 + __ movl(Address(rsi, 8), rcx); 1.156 + __ movl(Address(rsi,12), rdx); 1.157 + 1.158 + // 1.159 + // Standard cpuid(0x1) 1.160 + // 1.161 + __ bind(std_cpuid1); 1.162 + __ movl(rax, 1); 1.163 + __ cpuid(); 1.164 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 1.165 + __ movl(Address(rsi, 0), rax); 1.166 + __ movl(Address(rsi, 4), rbx); 1.167 + __ movl(Address(rsi, 8), rcx); 1.168 + __ movl(Address(rsi,12), rdx); 1.169 + 1.170 + __ movl(rax, 0x80000000); 1.171 + __ cpuid(); 1.172 + __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 1.173 + __ jcc(Assembler::belowEqual, done); 1.174 + __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 1.175 + __ jccb(Assembler::belowEqual, ext_cpuid1); 1.176 + __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 1.177 + __ jccb(Assembler::belowEqual, ext_cpuid5); 1.178 + // 1.179 + // Extended cpuid(0x80000008) 1.180 + // 1.181 + __ movl(rax, 0x80000008); 1.182 + __ cpuid(); 1.183 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 1.184 + __ movl(Address(rsi, 0), rax); 1.185 + __ movl(Address(rsi, 4), rbx); 1.186 + __ movl(Address(rsi, 8), rcx); 1.187 + __ movl(Address(rsi,12), rdx); 1.188 + 1.189 + // 1.190 + // Extended cpuid(0x80000005) 1.191 + // 1.192 + __ bind(ext_cpuid5); 1.193 + __ movl(rax, 0x80000005); 1.194 + __ cpuid(); 1.195 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 1.196 + __ movl(Address(rsi, 0), rax); 1.197 + __ movl(Address(rsi, 4), rbx); 1.198 + __ movl(Address(rsi, 8), rcx); 1.199 + __ movl(Address(rsi,12), rdx); 1.200 + 1.201 + // 1.202 + // Extended cpuid(0x80000001) 1.203 + // 1.204 + __ bind(ext_cpuid1); 1.205 + __ movl(rax, 0x80000001); 1.206 + __ cpuid(); 1.207 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 1.208 + __ movl(Address(rsi, 0), rax); 1.209 + __ movl(Address(rsi, 4), rbx); 1.210 + __ movl(Address(rsi, 8), rcx); 1.211 + __ movl(Address(rsi,12), rdx); 1.212 + 1.213 + // 1.214 + // return 1.215 + // 1.216 + __ bind(done); 1.217 + __ popf(); 1.218 + __ pop(rsi); 1.219 + __ pop(rbx); 1.220 + __ pop(rbp); 1.221 + __ ret(0); 1.222 + 1.223 +# undef __ 1.224 + 1.225 + return start; 1.226 + }; 1.227 +}; 1.228 + 1.229 + 1.230 +void VM_Version::get_processor_features() { 1.231 + 1.232 + _cpu = 4; // 486 by default 1.233 + _model = 0; 1.234 + _stepping = 0; 1.235 + _cpuFeatures = 0; 1.236 + _logical_processors_per_package = 1; 1.237 + 1.238 + if (!Use486InstrsOnly) { 1.239 + // Get raw processor info 1.240 + getPsrInfo_stub(&_cpuid_info); 1.241 + assert_is_initialized(); 1.242 + _cpu = extended_cpu_family(); 1.243 + _model = extended_cpu_model(); 1.244 + _stepping = cpu_stepping(); 1.245 + 1.246 + if (cpu_family() > 4) { // it supports CPUID 1.247 + _cpuFeatures = feature_flags(); 1.248 + // Logical processors are only available on P4s and above, 1.249 + // and only if hyperthreading is available. 1.250 + _logical_processors_per_package = logical_processor_count(); 1.251 + } 1.252 + } 1.253 + 1.254 + _supports_cx8 = supports_cmpxchg8(); 1.255 + 1.256 +#ifdef _LP64 1.257 + // OS should support SSE for x64 and hardware should support at least SSE2. 1.258 + if (!VM_Version::supports_sse2()) { 1.259 + vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 1.260 + } 1.261 +#endif 1.262 + 1.263 + // If the OS doesn't support SSE, we can't use this feature even if the HW does 1.264 + if (!os::supports_sse()) 1.265 + _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); 1.266 + 1.267 + if (UseSSE < 4) { 1.268 + _cpuFeatures &= ~CPU_SSE4_1; 1.269 + _cpuFeatures &= ~CPU_SSE4_2; 1.270 + } 1.271 + 1.272 + if (UseSSE < 3) { 1.273 + _cpuFeatures &= ~CPU_SSE3; 1.274 + _cpuFeatures &= ~CPU_SSSE3; 1.275 + _cpuFeatures &= ~CPU_SSE4A; 1.276 + } 1.277 + 1.278 + if (UseSSE < 2) 1.279 + _cpuFeatures &= ~CPU_SSE2; 1.280 + 1.281 + if (UseSSE < 1) 1.282 + _cpuFeatures &= ~CPU_SSE; 1.283 + 1.284 + if (logical_processors_per_package() == 1) { 1.285 + // HT processor could be installed on a system which doesn't support HT. 1.286 + _cpuFeatures &= ~CPU_HT; 1.287 + } 1.288 + 1.289 + char buf[256]; 1.290 + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 1.291 + cores_per_cpu(), threads_per_core(), 1.292 + cpu_family(), _model, _stepping, 1.293 + (supports_cmov() ? ", cmov" : ""), 1.294 + (supports_cmpxchg8() ? ", cx8" : ""), 1.295 + (supports_fxsr() ? ", fxsr" : ""), 1.296 + (supports_mmx() ? ", mmx" : ""), 1.297 + (supports_sse() ? ", sse" : ""), 1.298 + (supports_sse2() ? ", sse2" : ""), 1.299 + (supports_sse3() ? ", sse3" : ""), 1.300 + (supports_ssse3()? ", ssse3": ""), 1.301 + (supports_sse4_1() ? ", sse4.1" : ""), 1.302 + (supports_sse4_2() ? ", sse4.2" : ""), 1.303 + (supports_mmx_ext() ? ", mmxext" : ""), 1.304 + (supports_3dnow() ? ", 3dnow" : ""), 1.305 + (supports_3dnow2() ? ", 3dnowext" : ""), 1.306 + (supports_sse4a() ? ", sse4a": ""), 1.307 + (supports_ht() ? ", ht": "")); 1.308 + _features_str = strdup(buf); 1.309 + 1.310 + // UseSSE is set to the smaller of what hardware supports and what 1.311 + // the command line requires. I.e., you cannot set UseSSE to 2 on 1.312 + // older Pentiums which do not support it. 1.313 + if( UseSSE > 4 ) UseSSE=4; 1.314 + if( UseSSE < 0 ) UseSSE=0; 1.315 + if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support 1.316 + UseSSE = MIN2((intx)3,UseSSE); 1.317 + if( !supports_sse3() ) // Drop to 2 if no SSE3 support 1.318 + UseSSE = MIN2((intx)2,UseSSE); 1.319 + if( !supports_sse2() ) // Drop to 1 if no SSE2 support 1.320 + UseSSE = MIN2((intx)1,UseSSE); 1.321 + if( !supports_sse () ) // Drop to 0 if no SSE support 1.322 + UseSSE = 0; 1.323 + 1.324 + // On new cpus instructions which update whole XMM register should be used 1.325 + // to prevent partial register stall due to dependencies on high half. 1.326 + // 1.327 + // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1.328 + // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1.329 + // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1.330 + // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1.331 + 1.332 + if( is_amd() ) { // AMD cpus specific settings 1.333 + if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { 1.334 + // Use it on new AMD cpus starting from Opteron. 1.335 + UseAddressNop = true; 1.336 + } 1.337 + if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { 1.338 + // Use it on new AMD cpus starting from Opteron. 1.339 + UseNewLongLShift = true; 1.340 + } 1.341 + if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 1.342 + if( supports_sse4a() ) { 1.343 + UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1.344 + } else { 1.345 + UseXmmLoadAndClearUpper = false; 1.346 + } 1.347 + } 1.348 + if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 1.349 + if( supports_sse4a() ) { 1.350 + UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1.351 + } else { 1.352 + UseXmmRegToRegMoveAll = false; 1.353 + } 1.354 + } 1.355 + if( FLAG_IS_DEFAULT(UseXmmI2F) ) { 1.356 + if( supports_sse4a() ) { 1.357 + UseXmmI2F = true; 1.358 + } else { 1.359 + UseXmmI2F = false; 1.360 + } 1.361 + } 1.362 + if( FLAG_IS_DEFAULT(UseXmmI2D) ) { 1.363 + if( supports_sse4a() ) { 1.364 + UseXmmI2D = true; 1.365 + } else { 1.366 + UseXmmI2D = false; 1.367 + } 1.368 + } 1.369 + } 1.370 + 1.371 + if( is_intel() ) { // Intel cpus specific settings 1.372 + if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { 1.373 + UseStoreImmI16 = false; // don't use it on Intel cpus 1.374 + } 1.375 + if( cpu_family() == 6 || cpu_family() == 15 ) { 1.376 + if( FLAG_IS_DEFAULT(UseAddressNop) ) { 1.377 + // Use it on all Intel cpus starting from PentiumPro 1.378 + UseAddressNop = true; 1.379 + } 1.380 + } 1.381 + if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 1.382 + UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1.383 + } 1.384 + if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 1.385 + if( supports_sse3() ) { 1.386 + UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1.387 + } else { 1.388 + UseXmmRegToRegMoveAll = false; 1.389 + } 1.390 + } 1.391 + if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus 1.392 +#ifdef COMPILER2 1.393 + if( FLAG_IS_DEFAULT(MaxLoopPad) ) { 1.394 + // For new Intel cpus do the next optimization: 1.395 + // don't align the beginning of a loop if there are enough instructions 1.396 + // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1.397 + // in current fetch line (OptoLoopAlignment) or the padding 1.398 + // is big (> MaxLoopPad). 1.399 + // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1.400 + // generated NOP instructions. 11 is the largest size of one 1.401 + // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1.402 + MaxLoopPad = 11; 1.403 + } 1.404 +#endif // COMPILER2 1.405 + if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { 1.406 + UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1.407 + } 1.408 + if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus 1.409 + if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { 1.410 + UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1.411 + } 1.412 + } 1.413 + } 1.414 + } 1.415 + 1.416 + assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 1.417 + assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 1.418 + 1.419 + // set valid Prefetch instruction 1.420 + if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; 1.421 + if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; 1.422 + if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; 1.423 + if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3; 1.424 + 1.425 + if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; 1.426 + if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; 1.427 + if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; 1.428 + if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3; 1.429 + 1.430 + // Allocation prefetch settings 1.431 + intx cache_line_size = L1_data_cache_line_size(); 1.432 + if( cache_line_size > AllocatePrefetchStepSize ) 1.433 + AllocatePrefetchStepSize = cache_line_size; 1.434 + if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) 1.435 + AllocatePrefetchLines = 3; // Optimistic value 1.436 + assert(AllocatePrefetchLines > 0, "invalid value"); 1.437 + if( AllocatePrefetchLines < 1 ) // set valid value in product VM 1.438 + AllocatePrefetchLines = 1; // Conservative value 1.439 + 1.440 + AllocatePrefetchDistance = allocate_prefetch_distance(); 1.441 + AllocatePrefetchStyle = allocate_prefetch_style(); 1.442 + 1.443 + if( AllocatePrefetchStyle == 2 && is_intel() && 1.444 + cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core 1.445 +#ifdef _LP64 1.446 + AllocatePrefetchDistance = 384; 1.447 +#else 1.448 + AllocatePrefetchDistance = 320; 1.449 +#endif 1.450 + } 1.451 + assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); 1.452 + 1.453 +#ifdef _LP64 1.454 + // Prefetch settings 1.455 + PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); 1.456 + PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); 1.457 + PrefetchFieldsAhead = prefetch_fields_ahead(); 1.458 +#endif 1.459 + 1.460 +#ifndef PRODUCT 1.461 + if (PrintMiscellaneous && Verbose) { 1.462 + tty->print_cr("Logical CPUs per core: %u", 1.463 + logical_processors_per_package()); 1.464 + tty->print_cr("UseSSE=%d",UseSSE); 1.465 + tty->print("Allocation: "); 1.466 + if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { 1.467 + tty->print_cr("no prefetching"); 1.468 + } else { 1.469 + if (UseSSE == 0 && supports_3dnow()) { 1.470 + tty->print("PREFETCHW"); 1.471 + } else if (UseSSE >= 1) { 1.472 + if (AllocatePrefetchInstr == 0) { 1.473 + tty->print("PREFETCHNTA"); 1.474 + } else if (AllocatePrefetchInstr == 1) { 1.475 + tty->print("PREFETCHT0"); 1.476 + } else if (AllocatePrefetchInstr == 2) { 1.477 + tty->print("PREFETCHT2"); 1.478 + } else if (AllocatePrefetchInstr == 3) { 1.479 + tty->print("PREFETCHW"); 1.480 + } 1.481 + } 1.482 + if (AllocatePrefetchLines > 1) { 1.483 + tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1.484 + } else { 1.485 + tty->print_cr(" %d, one line", AllocatePrefetchDistance); 1.486 + } 1.487 + } 1.488 + 1.489 + if (PrefetchCopyIntervalInBytes > 0) { 1.490 + tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); 1.491 + } 1.492 + if (PrefetchScanIntervalInBytes > 0) { 1.493 + tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); 1.494 + } 1.495 + if (PrefetchFieldsAhead > 0) { 1.496 + tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); 1.497 + } 1.498 + } 1.499 +#endif // !PRODUCT 1.500 +} 1.501 + 1.502 +void VM_Version::initialize() { 1.503 + ResourceMark rm; 1.504 + // Making this stub must be FIRST use of assembler 1.505 + 1.506 + stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); 1.507 + if (stub_blob == NULL) { 1.508 + vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); 1.509 + } 1.510 + CodeBuffer c(stub_blob->instructions_begin(), 1.511 + stub_blob->instructions_size()); 1.512 + VM_Version_StubGenerator g(&c); 1.513 + getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, 1.514 + g.generate_getPsrInfo()); 1.515 + 1.516 + get_processor_features(); 1.517 +}
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Mon Feb 23 12:02:30 2009 -0800 2.3 @@ -0,0 +1,459 @@ 2.4 +/* 2.5 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 2.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 2.7 + * 2.8 + * This code is free software; you can redistribute it and/or modify it 2.9 + * under the terms of the GNU General Public License version 2 only, as 2.10 + * published by the Free Software Foundation. 2.11 + * 2.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 2.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 2.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 2.15 + * version 2 for more details (a copy is included in the LICENSE file that 2.16 + * accompanied this code). 2.17 + * 2.18 + * You should have received a copy of the GNU General Public License version 2.19 + * 2 along with this work; if not, write to the Free Software Foundation, 2.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 2.21 + * 2.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 2.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 2.24 + * have any questions. 2.25 + * 2.26 + */ 2.27 + 2.28 +class VM_Version : public Abstract_VM_Version { 2.29 +public: 2.30 + // cpuid result register layouts. These are all unions of a uint32_t 2.31 + // (in case anyone wants access to the register as a whole) and a bitfield. 2.32 + 2.33 + union StdCpuid1Eax { 2.34 + uint32_t value; 2.35 + struct { 2.36 + uint32_t stepping : 4, 2.37 + model : 4, 2.38 + family : 4, 2.39 + proc_type : 2, 2.40 + : 2, 2.41 + ext_model : 4, 2.42 + ext_family : 8, 2.43 + : 4; 2.44 + } bits; 2.45 + }; 2.46 + 2.47 + union StdCpuid1Ebx { // example, unused 2.48 + uint32_t value; 2.49 + struct { 2.50 + uint32_t brand_id : 8, 2.51 + clflush_size : 8, 2.52 + threads_per_cpu : 8, 2.53 + apic_id : 8; 2.54 + } bits; 2.55 + }; 2.56 + 2.57 + union StdCpuid1Ecx { 2.58 + uint32_t value; 2.59 + struct { 2.60 + uint32_t sse3 : 1, 2.61 + : 2, 2.62 + monitor : 1, 2.63 + : 1, 2.64 + vmx : 1, 2.65 + : 1, 2.66 + est : 1, 2.67 + : 1, 2.68 + ssse3 : 1, 2.69 + cid : 1, 2.70 + : 2, 2.71 + cmpxchg16: 1, 2.72 + : 4, 2.73 + dca : 1, 2.74 + sse4_1 : 1, 2.75 + sse4_2 : 1, 2.76 + : 11; 2.77 + } bits; 2.78 + }; 2.79 + 2.80 + union StdCpuid1Edx { 2.81 + uint32_t value; 2.82 + struct { 2.83 + uint32_t : 4, 2.84 + tsc : 1, 2.85 + : 3, 2.86 + cmpxchg8 : 1, 2.87 + : 6, 2.88 + cmov : 1, 2.89 + : 7, 2.90 + mmx : 1, 2.91 + fxsr : 1, 2.92 + sse : 1, 2.93 + sse2 : 1, 2.94 + : 1, 2.95 + ht : 1, 2.96 + : 3; 2.97 + } bits; 2.98 + }; 2.99 + 2.100 + union DcpCpuid4Eax { 2.101 + uint32_t value; 2.102 + struct { 2.103 + uint32_t cache_type : 5, 2.104 + : 21, 2.105 + cores_per_cpu : 6; 2.106 + } bits; 2.107 + }; 2.108 + 2.109 + union DcpCpuid4Ebx { 2.110 + uint32_t value; 2.111 + struct { 2.112 + uint32_t L1_line_size : 12, 2.113 + partitions : 10, 2.114 + associativity : 10; 2.115 + } bits; 2.116 + }; 2.117 + 2.118 + union ExtCpuid1Ecx { 2.119 + uint32_t value; 2.120 + struct { 2.121 + uint32_t LahfSahf : 1, 2.122 + CmpLegacy : 1, 2.123 + : 4, 2.124 + abm : 1, 2.125 + sse4a : 1, 2.126 + misalignsse : 1, 2.127 + prefetchw : 1, 2.128 + : 22; 2.129 + } bits; 2.130 + }; 2.131 + 2.132 + union ExtCpuid1Edx { 2.133 + uint32_t value; 2.134 + struct { 2.135 + uint32_t : 22, 2.136 + mmx_amd : 1, 2.137 + mmx : 1, 2.138 + fxsr : 1, 2.139 + : 4, 2.140 + long_mode : 1, 2.141 + tdnow2 : 1, 2.142 + tdnow : 1; 2.143 + } bits; 2.144 + }; 2.145 + 2.146 + union ExtCpuid5Ex { 2.147 + uint32_t value; 2.148 + struct { 2.149 + uint32_t L1_line_size : 8, 2.150 + L1_tag_lines : 8, 2.151 + L1_assoc : 8, 2.152 + L1_size : 8; 2.153 + } bits; 2.154 + }; 2.155 + 2.156 + union ExtCpuid8Ecx { 2.157 + uint32_t value; 2.158 + struct { 2.159 + uint32_t cores_per_cpu : 8, 2.160 + : 24; 2.161 + } bits; 2.162 + }; 2.163 + 2.164 +protected: 2.165 + static int _cpu; 2.166 + static int _model; 2.167 + static int _stepping; 2.168 + static int _cpuFeatures; // features returned by the "cpuid" instruction 2.169 + // 0 if this instruction is not available 2.170 + static const char* _features_str; 2.171 + 2.172 + enum { 2.173 + CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) 2.174 + CPU_CMOV = (1 << 1), 2.175 + CPU_FXSR = (1 << 2), 2.176 + CPU_HT = (1 << 3), 2.177 + CPU_MMX = (1 << 4), 2.178 + CPU_3DNOW = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX) 2.179 + CPU_SSE = (1 << 6), 2.180 + CPU_SSE2 = (1 << 7), 2.181 + CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) 2.182 + CPU_SSSE3 = (1 << 9), 2.183 + CPU_SSE4A = (1 << 10), 2.184 + CPU_SSE4_1 = (1 << 11), 2.185 + CPU_SSE4_2 = (1 << 12) 2.186 + } cpuFeatureFlags; 2.187 + 2.188 + // cpuid information block. All info derived from executing cpuid with 2.189 + // various function numbers is stored here. Intel and AMD info is 2.190 + // merged in this block: accessor methods disentangle it. 2.191 + // 2.192 + // The info block is laid out in subblocks of 4 dwords corresponding to 2.193 + // eax, ebx, ecx and edx, whether or not they contain anything useful. 2.194 + struct CpuidInfo { 2.195 + // cpuid function 0 2.196 + uint32_t std_max_function; 2.197 + uint32_t std_vendor_name_0; 2.198 + uint32_t std_vendor_name_1; 2.199 + uint32_t std_vendor_name_2; 2.200 + 2.201 + // cpuid function 1 2.202 + StdCpuid1Eax std_cpuid1_eax; 2.203 + StdCpuid1Ebx std_cpuid1_ebx; 2.204 + StdCpuid1Ecx std_cpuid1_ecx; 2.205 + StdCpuid1Edx std_cpuid1_edx; 2.206 + 2.207 + // cpuid function 4 (deterministic cache parameters) 2.208 + DcpCpuid4Eax dcp_cpuid4_eax; 2.209 + DcpCpuid4Ebx dcp_cpuid4_ebx; 2.210 + uint32_t dcp_cpuid4_ecx; // unused currently 2.211 + uint32_t dcp_cpuid4_edx; // unused currently 2.212 + 2.213 + // cpuid function 0x80000000 // example, unused 2.214 + uint32_t ext_max_function; 2.215 + uint32_t ext_vendor_name_0; 2.216 + uint32_t ext_vendor_name_1; 2.217 + uint32_t ext_vendor_name_2; 2.218 + 2.219 + // cpuid function 0x80000001 2.220 + uint32_t ext_cpuid1_eax; // reserved 2.221 + uint32_t ext_cpuid1_ebx; // reserved 2.222 + ExtCpuid1Ecx ext_cpuid1_ecx; 2.223 + ExtCpuid1Edx ext_cpuid1_edx; 2.224 + 2.225 + // cpuid functions 0x80000002 thru 0x80000004: example, unused 2.226 + uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; 2.227 + uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; 2.228 + uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; 2.229 + 2.230 + // cpuid function 0x80000005 //AMD L1, Intel reserved 2.231 + uint32_t ext_cpuid5_eax; // unused currently 2.232 + uint32_t ext_cpuid5_ebx; // reserved 2.233 + ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) 2.234 + ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) 2.235 + 2.236 + // cpuid function 0x80000008 2.237 + uint32_t ext_cpuid8_eax; // unused currently 2.238 + uint32_t ext_cpuid8_ebx; // reserved 2.239 + ExtCpuid8Ecx ext_cpuid8_ecx; 2.240 + uint32_t ext_cpuid8_edx; // reserved 2.241 + }; 2.242 + 2.243 + // The actual cpuid info block 2.244 + static CpuidInfo _cpuid_info; 2.245 + 2.246 + // Extractors and predicates 2.247 + static uint32_t extended_cpu_family() { 2.248 + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; 2.249 + result += _cpuid_info.std_cpuid1_eax.bits.ext_family; 2.250 + return result; 2.251 + } 2.252 + static uint32_t extended_cpu_model() { 2.253 + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; 2.254 + result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; 2.255 + return result; 2.256 + } 2.257 + static uint32_t cpu_stepping() { 2.258 + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; 2.259 + return result; 2.260 + } 2.261 + static uint logical_processor_count() { 2.262 + uint result = threads_per_core(); 2.263 + return result; 2.264 + } 2.265 + static uint32_t feature_flags() { 2.266 + uint32_t result = 0; 2.267 + if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) 2.268 + result |= CPU_CX8; 2.269 + if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) 2.270 + result |= CPU_CMOV; 2.271 + if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() && 2.272 + _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0) 2.273 + result |= CPU_FXSR; 2.274 + // HT flag is set for multi-core processors also. 2.275 + if (threads_per_core() > 1) 2.276 + result |= CPU_HT; 2.277 + if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && 2.278 + _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) 2.279 + result |= CPU_MMX; 2.280 + if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) 2.281 + result |= CPU_3DNOW; 2.282 + if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) 2.283 + result |= CPU_SSE; 2.284 + if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) 2.285 + result |= CPU_SSE2; 2.286 + if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) 2.287 + result |= CPU_SSE3; 2.288 + if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) 2.289 + result |= CPU_SSSE3; 2.290 + if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 2.291 + result |= CPU_SSE4A; 2.292 + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) 2.293 + result |= CPU_SSE4_1; 2.294 + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 2.295 + result |= CPU_SSE4_2; 2.296 + return result; 2.297 + } 2.298 + 2.299 + static void get_processor_features(); 2.300 + 2.301 +public: 2.302 + // Offsets for cpuid asm stub 2.303 + static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } 2.304 + static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } 2.305 + static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } 2.306 + static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } 2.307 + static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } 2.308 + static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } 2.309 + 2.310 + // Initialization 2.311 + static void initialize(); 2.312 + 2.313 + // Asserts 2.314 + static void assert_is_initialized() { 2.315 + assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); 2.316 + } 2.317 + 2.318 + // 2.319 + // Processor family: 2.320 + // 3 - 386 2.321 + // 4 - 486 2.322 + // 5 - Pentium 2.323 + // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, 2.324 + // Pentium M, Core Solo, Core Duo, Core2 Duo 2.325 + // family 6 model: 9, 13, 14, 15 2.326 + // 0x0f - Pentium 4, Opteron 2.327 + // 2.328 + // Note: The cpu family should be used to select between 2.329 + // instruction sequences which are valid on all Intel 2.330 + // processors. Use the feature test functions below to 2.331 + // determine whether a particular instruction is supported. 2.332 + // 2.333 + static int cpu_family() { return _cpu;} 2.334 + static bool is_P6() { return cpu_family() >= 6; } 2.335 + 2.336 + static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' 2.337 + static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' 2.338 + 2.339 + static uint cores_per_cpu() { 2.340 + uint result = 1; 2.341 + if (is_intel()) { 2.342 + result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 2.343 + } else if (is_amd()) { 2.344 + result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 2.345 + } 2.346 + return result; 2.347 + } 2.348 + 2.349 + static uint threads_per_core() { 2.350 + uint result = 1; 2.351 + if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 2.352 + result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 2.353 + cores_per_cpu(); 2.354 + } 2.355 + return result; 2.356 + } 2.357 + 2.358 + static intx L1_data_cache_line_size() { 2.359 + intx result = 0; 2.360 + if (is_intel()) { 2.361 + result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 2.362 + } else if (is_amd()) { 2.363 + result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 2.364 + } 2.365 + if (result < 32) // not defined ? 2.366 + result = 32; // 32 bytes by default on x86 and other x64 2.367 + return result; 2.368 + } 2.369 + 2.370 + // 2.371 + // Feature identification 2.372 + // 2.373 + static bool supports_cpuid() { return _cpuFeatures != 0; } 2.374 + static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } 2.375 + static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } 2.376 + static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } 2.377 + static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } 2.378 + static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } 2.379 + static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } 2.380 + static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } 2.381 + static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } 2.382 + static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } 2.383 + static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } 2.384 + static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } 2.385 + // 2.386 + // AMD features 2.387 + // 2.388 + static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } 2.389 + static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } 2.390 + static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } 2.391 + static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } 2.392 + 2.393 + static bool supports_compare_and_exchange() { return true; } 2.394 + 2.395 + static const char* cpu_features() { return _features_str; } 2.396 + 2.397 + static intx allocate_prefetch_distance() { 2.398 + // This method should be called before allocate_prefetch_style(). 2.399 + // 2.400 + // Hardware prefetching (distance/size in bytes): 2.401 + // Pentium 3 - 64 / 32 2.402 + // Pentium 4 - 256 / 128 2.403 + // Athlon - 64 / 32 ???? 2.404 + // Opteron - 128 / 64 only when 2 sequential cache lines accessed 2.405 + // Core - 128 / 64 2.406 + // 2.407 + // Software prefetching (distance in bytes / instruction with best score): 2.408 + // Pentium 3 - 128 / prefetchnta 2.409 + // Pentium 4 - 512 / prefetchnta 2.410 + // Athlon - 128 / prefetchnta 2.411 + // Opteron - 256 / prefetchnta 2.412 + // Core - 256 / prefetchnta 2.413 + // It will be used only when AllocatePrefetchStyle > 0 2.414 + 2.415 + intx count = AllocatePrefetchDistance; 2.416 + if (count < 0) { // default ? 2.417 + if (is_amd()) { // AMD 2.418 + if (supports_sse2()) 2.419 + count = 256; // Opteron 2.420 + else 2.421 + count = 128; // Athlon 2.422 + } else { // Intel 2.423 + if (supports_sse2()) 2.424 + if (cpu_family() == 6) { 2.425 + count = 256; // Pentium M, Core, Core2 2.426 + } else { 2.427 + count = 512; // Pentium 4 2.428 + } 2.429 + else 2.430 + count = 128; // Pentium 3 (and all other old CPUs) 2.431 + } 2.432 + } 2.433 + return count; 2.434 + } 2.435 + static intx allocate_prefetch_style() { 2.436 + assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); 2.437 + // Return 0 if AllocatePrefetchDistance was not defined. 2.438 + return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; 2.439 + } 2.440 + 2.441 + // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 2.442 + // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 2.443 + // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 2.444 + // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 2.445 + 2.446 + // gc copy/scan is disabled if prefetchw isn't supported, because 2.447 + // Prefetch::write emits an inlined prefetchw on Linux. 2.448 + // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 2.449 + // The used prefetcht0 instruction works for both amd64 and em64t. 2.450 + static intx prefetch_copy_interval_in_bytes() { 2.451 + intx interval = PrefetchCopyIntervalInBytes; 2.452 + return interval >= 0 ? interval : 576; 2.453 + } 2.454 + static intx prefetch_scan_interval_in_bytes() { 2.455 + intx interval = PrefetchScanIntervalInBytes; 2.456 + return interval >= 0 ? interval : 576; 2.457 + } 2.458 + static intx prefetch_fields_ahead() { 2.459 + intx count = PrefetchFieldsAhead; 2.460 + return count >= 0 ? count : 1; 2.461 + } 2.462 +};
3.1 --- a/src/cpu/x86/vm/vm_version_x86_32.cpp Thu Feb 19 17:38:53 2009 -0800 3.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 3.3 @@ -1,472 +0,0 @@ 3.4 -/* 3.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 3.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3.7 - * 3.8 - * This code is free software; you can redistribute it and/or modify it 3.9 - * under the terms of the GNU General Public License version 2 only, as 3.10 - * published by the Free Software Foundation. 3.11 - * 3.12 - * This code is distributed in the hope that it will be useful, but WITHOUT 3.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 3.14 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 3.15 - * version 2 for more details (a copy is included in the LICENSE file that 3.16 - * accompanied this code). 3.17 - * 3.18 - * You should have received a copy of the GNU General Public License version 3.19 - * 2 along with this work; if not, write to the Free Software Foundation, 3.20 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 3.21 - * 3.22 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 3.23 - * CA 95054 USA or visit www.sun.com if you need additional information or 3.24 - * have any questions. 3.25 - * 3.26 - */ 3.27 - 3.28 -# include "incls/_precompiled.incl" 3.29 -# include "incls/_vm_version_x86_32.cpp.incl" 3.30 - 3.31 - 3.32 -int VM_Version::_cpu; 3.33 -int VM_Version::_model; 3.34 -int VM_Version::_stepping; 3.35 -int VM_Version::_cpuFeatures; 3.36 -const char* VM_Version::_features_str = ""; 3.37 -VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 3.38 - 3.39 -static BufferBlob* stub_blob; 3.40 -static const int stub_size = 300; 3.41 - 3.42 -extern "C" { 3.43 - typedef void (*getPsrInfo_stub_t)(void*); 3.44 -} 3.45 -static getPsrInfo_stub_t getPsrInfo_stub = NULL; 3.46 - 3.47 - 3.48 -class VM_Version_StubGenerator: public StubCodeGenerator { 3.49 - public: 3.50 - 3.51 - VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 3.52 - 3.53 - address generate_getPsrInfo() { 3.54 - // Flags to test CPU type. 3.55 - const uint32_t EFL_AC = 0x40000; 3.56 - const uint32_t EFL_ID = 0x200000; 3.57 - // Values for when we don't have a CPUID instruction. 3.58 - const int CPU_FAMILY_SHIFT = 8; 3.59 - const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 3.60 - const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 3.61 - 3.62 - Label detect_486, cpu486, detect_586, std_cpuid1; 3.63 - Label ext_cpuid1, ext_cpuid5, done; 3.64 - 3.65 - StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); 3.66 -# define __ _masm-> 3.67 - 3.68 - address start = __ pc(); 3.69 - 3.70 - // 3.71 - // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); 3.72 - // 3.73 - __ push(rbp); 3.74 - __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 3.75 - __ push(rbx); 3.76 - __ push(rsi); 3.77 - __ pushf(); // preserve rbx, and flags 3.78 - __ pop(rax); 3.79 - __ push(rax); 3.80 - __ mov(rcx, rax); 3.81 - // 3.82 - // if we are unable to change the AC flag, we have a 386 3.83 - // 3.84 - __ xorl(rax, EFL_AC); 3.85 - __ push(rax); 3.86 - __ popf(); 3.87 - __ pushf(); 3.88 - __ pop(rax); 3.89 - __ cmpptr(rax, rcx); 3.90 - __ jccb(Assembler::notEqual, detect_486); 3.91 - 3.92 - __ movl(rax, CPU_FAMILY_386); 3.93 - __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 3.94 - __ jmp(done); 3.95 - 3.96 - // 3.97 - // If we are unable to change the ID flag, we have a 486 which does 3.98 - // not support the "cpuid" instruction. 3.99 - // 3.100 - __ bind(detect_486); 3.101 - __ mov(rax, rcx); 3.102 - __ xorl(rax, EFL_ID); 3.103 - __ push(rax); 3.104 - __ popf(); 3.105 - __ pushf(); 3.106 - __ pop(rax); 3.107 - __ cmpptr(rcx, rax); 3.108 - __ jccb(Assembler::notEqual, detect_586); 3.109 - 3.110 - __ bind(cpu486); 3.111 - __ movl(rax, CPU_FAMILY_486); 3.112 - __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 3.113 - __ jmp(done); 3.114 - 3.115 - // 3.116 - // at this point, we have a chip which supports the "cpuid" instruction 3.117 - // 3.118 - __ bind(detect_586); 3.119 - __ xorptr(rax, rax); 3.120 - __ cpuid(); 3.121 - __ orptr(rax, rax); 3.122 - __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 3.123 - // value of at least 1, we give up and 3.124 - // assume a 486 3.125 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 3.126 - __ movl(Address(rsi, 0), rax); 3.127 - __ movl(Address(rsi, 4), rbx); 3.128 - __ movl(Address(rsi, 8), rcx); 3.129 - __ movl(Address(rsi,12), rdx); 3.130 - 3.131 - __ cmpl(rax, 3); // Is cpuid(0x4) supported? 3.132 - __ jccb(Assembler::belowEqual, std_cpuid1); 3.133 - 3.134 - // 3.135 - // cpuid(0x4) Deterministic cache params 3.136 - // 3.137 - __ movl(rax, 4); // and rcx already set to 0x0 3.138 - __ xorl(rcx, rcx); 3.139 - __ cpuid(); 3.140 - __ push(rax); 3.141 - __ andl(rax, 0x1f); // Determine if valid cache parameters used 3.142 - __ orl(rax, rax); // rax,[4:0] == 0 indicates invalid cache 3.143 - __ pop(rax); 3.144 - __ jccb(Assembler::equal, std_cpuid1); 3.145 - 3.146 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 3.147 - __ movl(Address(rsi, 0), rax); 3.148 - __ movl(Address(rsi, 4), rbx); 3.149 - __ movl(Address(rsi, 8), rcx); 3.150 - __ movl(Address(rsi,12), rdx); 3.151 - 3.152 - // 3.153 - // Standard cpuid(0x1) 3.154 - // 3.155 - __ bind(std_cpuid1); 3.156 - __ movl(rax, 1); 3.157 - __ cpuid(); 3.158 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 3.159 - __ movl(Address(rsi, 0), rax); 3.160 - __ movl(Address(rsi, 4), rbx); 3.161 - __ movl(Address(rsi, 8), rcx); 3.162 - __ movl(Address(rsi,12), rdx); 3.163 - 3.164 - __ movl(rax, 0x80000000); 3.165 - __ cpuid(); 3.166 - __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 3.167 - __ jcc(Assembler::belowEqual, done); 3.168 - __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 3.169 - __ jccb(Assembler::belowEqual, ext_cpuid1); 3.170 - __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 3.171 - __ jccb(Assembler::belowEqual, ext_cpuid5); 3.172 - // 3.173 - // Extended cpuid(0x80000008) 3.174 - // 3.175 - __ movl(rax, 0x80000008); 3.176 - __ cpuid(); 3.177 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 3.178 - __ movl(Address(rsi, 0), rax); 3.179 - __ movl(Address(rsi, 4), rbx); 3.180 - __ movl(Address(rsi, 8), rcx); 3.181 - __ movl(Address(rsi,12), rdx); 3.182 - 3.183 - // 3.184 - // Extended cpuid(0x80000005) 3.185 - // 3.186 - __ bind(ext_cpuid5); 3.187 - __ movl(rax, 0x80000005); 3.188 - __ cpuid(); 3.189 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 3.190 - __ movl(Address(rsi, 0), rax); 3.191 - __ movl(Address(rsi, 4), rbx); 3.192 - __ movl(Address(rsi, 8), rcx); 3.193 - __ movl(Address(rsi,12), rdx); 3.194 - 3.195 - // 3.196 - // Extended cpuid(0x80000001) 3.197 - // 3.198 - __ bind(ext_cpuid1); 3.199 - __ movl(rax, 0x80000001); 3.200 - __ cpuid(); 3.201 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 3.202 - __ movl(Address(rsi, 0), rax); 3.203 - __ movl(Address(rsi, 4), rbx); 3.204 - __ movl(Address(rsi, 8), rcx); 3.205 - __ movl(Address(rsi,12), rdx); 3.206 - 3.207 - // 3.208 - // return 3.209 - // 3.210 - __ bind(done); 3.211 - __ popf(); 3.212 - __ pop(rsi); 3.213 - __ pop(rbx); 3.214 - __ pop(rbp); 3.215 - __ ret(0); 3.216 - 3.217 -# undef __ 3.218 - 3.219 - return start; 3.220 - }; 3.221 -}; 3.222 - 3.223 - 3.224 -void VM_Version::get_processor_features() { 3.225 - 3.226 - _cpu = 4; // 486 by default 3.227 - _model = 0; 3.228 - _stepping = 0; 3.229 - _cpuFeatures = 0; 3.230 - _logical_processors_per_package = 1; 3.231 - if (!Use486InstrsOnly) { 3.232 - // Get raw processor info 3.233 - getPsrInfo_stub(&_cpuid_info); 3.234 - assert_is_initialized(); 3.235 - _cpu = extended_cpu_family(); 3.236 - _model = extended_cpu_model(); 3.237 - _stepping = cpu_stepping(); 3.238 - if (cpu_family() > 4) { // it supports CPUID 3.239 - _cpuFeatures = feature_flags(); 3.240 - // Logical processors are only available on P4s and above, 3.241 - // and only if hyperthreading is available. 3.242 - _logical_processors_per_package = logical_processor_count(); 3.243 - } 3.244 - } 3.245 - _supports_cx8 = supports_cmpxchg8(); 3.246 - // if the OS doesn't support SSE, we can't use this feature even if the HW does 3.247 - if( !os::supports_sse()) 3.248 - _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); 3.249 - if (UseSSE < 4) { 3.250 - _cpuFeatures &= ~CPU_SSE4_1; 3.251 - _cpuFeatures &= ~CPU_SSE4_2; 3.252 - } 3.253 - if (UseSSE < 3) { 3.254 - _cpuFeatures &= ~CPU_SSE3; 3.255 - _cpuFeatures &= ~CPU_SSSE3; 3.256 - _cpuFeatures &= ~CPU_SSE4A; 3.257 - } 3.258 - if (UseSSE < 2) 3.259 - _cpuFeatures &= ~CPU_SSE2; 3.260 - if (UseSSE < 1) 3.261 - _cpuFeatures &= ~CPU_SSE; 3.262 - 3.263 - if (logical_processors_per_package() == 1) { 3.264 - // HT processor could be installed on a system which doesn't support HT. 3.265 - _cpuFeatures &= ~CPU_HT; 3.266 - } 3.267 - 3.268 - char buf[256]; 3.269 - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 3.270 - cores_per_cpu(), threads_per_core(), 3.271 - cpu_family(), _model, _stepping, 3.272 - (supports_cmov() ? ", cmov" : ""), 3.273 - (supports_cmpxchg8() ? ", cx8" : ""), 3.274 - (supports_fxsr() ? ", fxsr" : ""), 3.275 - (supports_mmx() ? ", mmx" : ""), 3.276 - (supports_sse() ? ", sse" : ""), 3.277 - (supports_sse2() ? ", sse2" : ""), 3.278 - (supports_sse3() ? ", sse3" : ""), 3.279 - (supports_ssse3()? ", ssse3": ""), 3.280 - (supports_sse4_1() ? ", sse4.1" : ""), 3.281 - (supports_sse4_2() ? ", sse4.2" : ""), 3.282 - (supports_mmx_ext() ? ", mmxext" : ""), 3.283 - (supports_3dnow() ? ", 3dnow" : ""), 3.284 - (supports_3dnow2() ? ", 3dnowext" : ""), 3.285 - (supports_sse4a() ? ", sse4a": ""), 3.286 - (supports_ht() ? ", ht": "")); 3.287 - _features_str = strdup(buf); 3.288 - 3.289 - // UseSSE is set to the smaller of what hardware supports and what 3.290 - // the command line requires. I.e., you cannot set UseSSE to 2 on 3.291 - // older Pentiums which do not support it. 3.292 - if( UseSSE > 4 ) UseSSE=4; 3.293 - if( UseSSE < 0 ) UseSSE=0; 3.294 - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support 3.295 - UseSSE = MIN2((intx)3,UseSSE); 3.296 - if( !supports_sse3() ) // Drop to 2 if no SSE3 support 3.297 - UseSSE = MIN2((intx)2,UseSSE); 3.298 - if( !supports_sse2() ) // Drop to 1 if no SSE2 support 3.299 - UseSSE = MIN2((intx)1,UseSSE); 3.300 - if( !supports_sse () ) // Drop to 0 if no SSE support 3.301 - UseSSE = 0; 3.302 - 3.303 - // On new cpus instructions which update whole XMM register should be used 3.304 - // to prevent partial register stall due to dependencies on high half. 3.305 - // 3.306 - // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 3.307 - // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 3.308 - // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 3.309 - // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 3.310 - 3.311 - if( is_amd() ) { // AMD cpus specific settings 3.312 - if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { 3.313 - // Use it on new AMD cpus starting from Opteron. 3.314 - UseAddressNop = true; 3.315 - } 3.316 - if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { 3.317 - // Use it on new AMD cpus starting from Opteron. 3.318 - UseNewLongLShift = true; 3.319 - } 3.320 - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 3.321 - if( supports_sse4a() ) { 3.322 - UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 3.323 - } else { 3.324 - UseXmmLoadAndClearUpper = false; 3.325 - } 3.326 - } 3.327 - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 3.328 - if( supports_sse4a() ) { 3.329 - UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 3.330 - } else { 3.331 - UseXmmRegToRegMoveAll = false; 3.332 - } 3.333 - } 3.334 - if( FLAG_IS_DEFAULT(UseXmmI2F) ) { 3.335 - if( supports_sse4a() ) { 3.336 - UseXmmI2F = true; 3.337 - } else { 3.338 - UseXmmI2F = false; 3.339 - } 3.340 - } 3.341 - if( FLAG_IS_DEFAULT(UseXmmI2D) ) { 3.342 - if( supports_sse4a() ) { 3.343 - UseXmmI2D = true; 3.344 - } else { 3.345 - UseXmmI2D = false; 3.346 - } 3.347 - } 3.348 - } 3.349 - 3.350 - if( is_intel() ) { // Intel cpus specific settings 3.351 - if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { 3.352 - UseStoreImmI16 = false; // don't use it on Intel cpus 3.353 - } 3.354 - if( cpu_family() == 6 || cpu_family() == 15 ) { 3.355 - if( FLAG_IS_DEFAULT(UseAddressNop) ) { 3.356 - // Use it on all Intel cpus starting from PentiumPro 3.357 - UseAddressNop = true; 3.358 - } 3.359 - } 3.360 - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 3.361 - UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 3.362 - } 3.363 - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 3.364 - if( supports_sse3() ) { 3.365 - UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 3.366 - } else { 3.367 - UseXmmRegToRegMoveAll = false; 3.368 - } 3.369 - } 3.370 - if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus 3.371 -#ifdef COMPILER2 3.372 - if( FLAG_IS_DEFAULT(MaxLoopPad) ) { 3.373 - // For new Intel cpus do the next optimization: 3.374 - // don't align the beginning of a loop if there are enough instructions 3.375 - // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 3.376 - // in current fetch line (OptoLoopAlignment) or the padding 3.377 - // is big (> MaxLoopPad). 3.378 - // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 3.379 - // generated NOP instructions. 11 is the largest size of one 3.380 - // address NOP instruction '0F 1F' (see Assembler::nop(i)). 3.381 - MaxLoopPad = 11; 3.382 - } 3.383 -#endif // COMPILER2 3.384 - if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { 3.385 - UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 3.386 - } 3.387 - if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus 3.388 - if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { 3.389 - UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 3.390 - } 3.391 - } 3.392 - } 3.393 - } 3.394 - 3.395 - assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 3.396 - assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 3.397 - 3.398 - // set valid Prefetch instruction 3.399 - if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; 3.400 - if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; 3.401 - if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; 3.402 - if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3; 3.403 - 3.404 - if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; 3.405 - if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; 3.406 - if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; 3.407 - if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3; 3.408 - 3.409 - // Allocation prefetch settings 3.410 - intx cache_line_size = L1_data_cache_line_size(); 3.411 - if( cache_line_size > AllocatePrefetchStepSize ) 3.412 - AllocatePrefetchStepSize = cache_line_size; 3.413 - if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) 3.414 - AllocatePrefetchLines = 3; // Optimistic value 3.415 - assert(AllocatePrefetchLines > 0, "invalid value"); 3.416 - if( AllocatePrefetchLines < 1 ) // set valid value in product VM 3.417 - AllocatePrefetchLines = 1; // Conservative value 3.418 - 3.419 - AllocatePrefetchDistance = allocate_prefetch_distance(); 3.420 - AllocatePrefetchStyle = allocate_prefetch_style(); 3.421 - 3.422 - if( AllocatePrefetchStyle == 2 && is_intel() && 3.423 - cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core 3.424 - AllocatePrefetchDistance = 320; 3.425 - } 3.426 - assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); 3.427 - 3.428 -#ifndef PRODUCT 3.429 - if (PrintMiscellaneous && Verbose) { 3.430 - tty->print_cr("Logical CPUs per core: %u", 3.431 - logical_processors_per_package()); 3.432 - tty->print_cr("UseSSE=%d",UseSSE); 3.433 - tty->print("Allocation: "); 3.434 - if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { 3.435 - tty->print_cr("no prefetching"); 3.436 - } else { 3.437 - if (UseSSE == 0 && supports_3dnow()) { 3.438 - tty->print("PREFETCHW"); 3.439 - } else if (UseSSE >= 1) { 3.440 - if (AllocatePrefetchInstr == 0) { 3.441 - tty->print("PREFETCHNTA"); 3.442 - } else if (AllocatePrefetchInstr == 1) { 3.443 - tty->print("PREFETCHT0"); 3.444 - } else if (AllocatePrefetchInstr == 2) { 3.445 - tty->print("PREFETCHT2"); 3.446 - } else if (AllocatePrefetchInstr == 3) { 3.447 - tty->print("PREFETCHW"); 3.448 - } 3.449 - } 3.450 - if (AllocatePrefetchLines > 1) { 3.451 - tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 3.452 - } else { 3.453 - tty->print_cr(" %d, one line", AllocatePrefetchDistance); 3.454 - } 3.455 - } 3.456 - } 3.457 -#endif // !PRODUCT 3.458 -} 3.459 - 3.460 -void VM_Version::initialize() { 3.461 - ResourceMark rm; 3.462 - // Making this stub must be FIRST use of assembler 3.463 - 3.464 - stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); 3.465 - if (stub_blob == NULL) { 3.466 - vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); 3.467 - } 3.468 - CodeBuffer c(stub_blob->instructions_begin(), 3.469 - stub_blob->instructions_size()); 3.470 - VM_Version_StubGenerator g(&c); 3.471 - getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, 3.472 - g.generate_getPsrInfo()); 3.473 - 3.474 - get_processor_features(); 3.475 -}
4.1 --- a/src/cpu/x86/vm/vm_version_x86_32.hpp Thu Feb 19 17:38:53 2009 -0800 4.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 4.3 @@ -1,439 +0,0 @@ 4.4 -/* 4.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 4.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4.7 - * 4.8 - * This code is free software; you can redistribute it and/or modify it 4.9 - * under the terms of the GNU General Public License version 2 only, as 4.10 - * published by the Free Software Foundation. 4.11 - * 4.12 - * This code is distributed in the hope that it will be useful, but WITHOUT 4.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 4.14 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 4.15 - * version 2 for more details (a copy is included in the LICENSE file that 4.16 - * accompanied this code). 4.17 - * 4.18 - * You should have received a copy of the GNU General Public License version 4.19 - * 2 along with this work; if not, write to the Free Software Foundation, 4.20 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 4.21 - * 4.22 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 4.23 - * CA 95054 USA or visit www.sun.com if you need additional information or 4.24 - * have any questions. 4.25 - * 4.26 - */ 4.27 - 4.28 -class VM_Version: public Abstract_VM_Version { 4.29 -public: 4.30 - // cpuid result register layouts. These are all unions of a uint32_t 4.31 - // (in case anyone wants access to the register as a whole) and a bitfield. 4.32 - 4.33 - union StdCpuid1Eax { 4.34 - uint32_t value; 4.35 - struct { 4.36 - uint32_t stepping : 4, 4.37 - model : 4, 4.38 - family : 4, 4.39 - proc_type : 2, 4.40 - : 2, 4.41 - ext_model : 4, 4.42 - ext_family : 8, 4.43 - : 4; 4.44 - } bits; 4.45 - }; 4.46 - 4.47 - union StdCpuid1Ebx { // example, unused 4.48 - uint32_t value; 4.49 - struct { 4.50 - uint32_t brand_id : 8, 4.51 - clflush_size : 8, 4.52 - threads_per_cpu : 8, 4.53 - apic_id : 8; 4.54 - } bits; 4.55 - }; 4.56 - 4.57 - union StdCpuid1Ecx { 4.58 - uint32_t value; 4.59 - struct { 4.60 - uint32_t sse3 : 1, 4.61 - : 2, 4.62 - monitor : 1, 4.63 - : 1, 4.64 - vmx : 1, 4.65 - : 1, 4.66 - est : 1, 4.67 - : 1, 4.68 - ssse3 : 1, 4.69 - cid : 1, 4.70 - : 2, 4.71 - cmpxchg16: 1, 4.72 - : 4, 4.73 - dca : 1, 4.74 - sse4_1 : 1, 4.75 - sse4_2 : 1, 4.76 - : 11; 4.77 - } bits; 4.78 - }; 4.79 - 4.80 - union StdCpuid1Edx { 4.81 - uint32_t value; 4.82 - struct { 4.83 - uint32_t : 4, 4.84 - tsc : 1, 4.85 - : 3, 4.86 - cmpxchg8 : 1, 4.87 - : 6, 4.88 - cmov : 1, 4.89 - : 7, 4.90 - mmx : 1, 4.91 - fxsr : 1, 4.92 - sse : 1, 4.93 - sse2 : 1, 4.94 - : 1, 4.95 - ht : 1, 4.96 - : 3; 4.97 - } bits; 4.98 - }; 4.99 - 4.100 - union DcpCpuid4Eax { 4.101 - uint32_t value; 4.102 - struct { 4.103 - uint32_t cache_type : 5, 4.104 - : 21, 4.105 - cores_per_cpu : 6; 4.106 - } bits; 4.107 - }; 4.108 - 4.109 - union DcpCpuid4Ebx { 4.110 - uint32_t value; 4.111 - struct { 4.112 - uint32_t L1_line_size : 12, 4.113 - partitions : 10, 4.114 - associativity : 10; 4.115 - } bits; 4.116 - }; 4.117 - 4.118 - union ExtCpuid1Ecx { 4.119 - uint32_t value; 4.120 - struct { 4.121 - uint32_t LahfSahf : 1, 4.122 - CmpLegacy : 1, 4.123 - : 4, 4.124 - abm : 1, 4.125 - sse4a : 1, 4.126 - misalignsse : 1, 4.127 - prefetchw : 1, 4.128 - : 22; 4.129 - } bits; 4.130 - }; 4.131 - 4.132 - union ExtCpuid1Edx { 4.133 - uint32_t value; 4.134 - struct { 4.135 - uint32_t : 22, 4.136 - mmx_amd : 1, 4.137 - mmx : 1, 4.138 - fxsr : 1, 4.139 - : 4, 4.140 - long_mode : 1, 4.141 - tdnow2 : 1, 4.142 - tdnow : 1; 4.143 - } bits; 4.144 - }; 4.145 - 4.146 - union ExtCpuid5Ex { 4.147 - uint32_t value; 4.148 - struct { 4.149 - uint32_t L1_line_size : 8, 4.150 - L1_tag_lines : 8, 4.151 - L1_assoc : 8, 4.152 - L1_size : 8; 4.153 - } bits; 4.154 - }; 4.155 - 4.156 - union ExtCpuid8Ecx { 4.157 - uint32_t value; 4.158 - struct { 4.159 - uint32_t cores_per_cpu : 8, 4.160 - : 24; 4.161 - } bits; 4.162 - }; 4.163 - 4.164 -protected: 4.165 - static int _cpu; 4.166 - static int _model; 4.167 - static int _stepping; 4.168 - static int _cpuFeatures; // features returned by the "cpuid" instruction 4.169 - // 0 if this instruction is not available 4.170 - static const char* _features_str; 4.171 - 4.172 - enum { 4.173 - CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) 4.174 - CPU_CMOV = (1 << 1), 4.175 - CPU_FXSR = (1 << 2), 4.176 - CPU_HT = (1 << 3), 4.177 - CPU_MMX = (1 << 4), 4.178 - CPU_3DNOW= (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX) 4.179 - CPU_SSE = (1 << 6), 4.180 - CPU_SSE2 = (1 << 7), 4.181 - CPU_SSE3 = (1 << 8), // sse3 comes from cpuid 1 (ECX) 4.182 - CPU_SSSE3= (1 << 9), 4.183 - CPU_SSE4A= (1 <<10), 4.184 - CPU_SSE4_1 = (1 << 11), 4.185 - CPU_SSE4_2 = (1 << 12) 4.186 - } cpuFeatureFlags; 4.187 - 4.188 - // cpuid information block. All info derived from executing cpuid with 4.189 - // various function numbers is stored here. Intel and AMD info is 4.190 - // merged in this block: accessor methods disentangle it. 4.191 - // 4.192 - // The info block is laid out in subblocks of 4 dwords corresponding to 4.193 - // rax, rbx, rcx and rdx, whether or not they contain anything useful. 4.194 - struct CpuidInfo { 4.195 - // cpuid function 0 4.196 - uint32_t std_max_function; 4.197 - uint32_t std_vendor_name_0; 4.198 - uint32_t std_vendor_name_1; 4.199 - uint32_t std_vendor_name_2; 4.200 - 4.201 - // cpuid function 1 4.202 - StdCpuid1Eax std_cpuid1_rax; 4.203 - StdCpuid1Ebx std_cpuid1_rbx; 4.204 - StdCpuid1Ecx std_cpuid1_rcx; 4.205 - StdCpuid1Edx std_cpuid1_rdx; 4.206 - 4.207 - // cpuid function 4 (deterministic cache parameters) 4.208 - DcpCpuid4Eax dcp_cpuid4_rax; 4.209 - DcpCpuid4Ebx dcp_cpuid4_rbx; 4.210 - uint32_t dcp_cpuid4_rcx; // unused currently 4.211 - uint32_t dcp_cpuid4_rdx; // unused currently 4.212 - 4.213 - // cpuid function 0x80000000 // example, unused 4.214 - uint32_t ext_max_function; 4.215 - uint32_t ext_vendor_name_0; 4.216 - uint32_t ext_vendor_name_1; 4.217 - uint32_t ext_vendor_name_2; 4.218 - 4.219 - // cpuid function 0x80000001 4.220 - uint32_t ext_cpuid1_rax; // reserved 4.221 - uint32_t ext_cpuid1_rbx; // reserved 4.222 - ExtCpuid1Ecx ext_cpuid1_rcx; 4.223 - ExtCpuid1Edx ext_cpuid1_rdx; 4.224 - 4.225 - // cpuid functions 0x80000002 thru 0x80000004: example, unused 4.226 - uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; 4.227 - uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; 4.228 - uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; 4.229 - 4.230 - // cpuid function 0x80000005 //AMD L1, Intel reserved 4.231 - uint32_t ext_cpuid5_rax; // unused currently 4.232 - uint32_t ext_cpuid5_rbx; // reserved 4.233 - ExtCpuid5Ex ext_cpuid5_rcx; // L1 data cache info (AMD) 4.234 - ExtCpuid5Ex ext_cpuid5_rdx; // L1 instruction cache info (AMD) 4.235 - 4.236 - // cpuid function 0x80000008 4.237 - uint32_t ext_cpuid8_rax; // unused currently 4.238 - uint32_t ext_cpuid8_rbx; // reserved 4.239 - ExtCpuid8Ecx ext_cpuid8_rcx; 4.240 - uint32_t ext_cpuid8_rdx; // reserved 4.241 - }; 4.242 - 4.243 - // The actual cpuid info block 4.244 - static CpuidInfo _cpuid_info; 4.245 - 4.246 - // Extractors and predicates 4.247 - static uint32_t extended_cpu_family() { 4.248 - uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family; 4.249 - result += _cpuid_info.std_cpuid1_rax.bits.ext_family; 4.250 - return result; 4.251 - } 4.252 - static uint32_t extended_cpu_model() { 4.253 - uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model; 4.254 - result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4; 4.255 - return result; 4.256 - } 4.257 - static uint32_t cpu_stepping() { 4.258 - uint32_t result = _cpuid_info.std_cpuid1_rax.bits.stepping; 4.259 - return result; 4.260 - } 4.261 - static uint logical_processor_count() { 4.262 - uint result = threads_per_core(); 4.263 - return result; 4.264 - } 4.265 - static uint32_t feature_flags() { 4.266 - uint32_t result = 0; 4.267 - if (_cpuid_info.std_cpuid1_rdx.bits.cmpxchg8 != 0) 4.268 - result |= CPU_CX8; 4.269 - if (_cpuid_info.std_cpuid1_rdx.bits.cmov != 0) 4.270 - result |= CPU_CMOV; 4.271 - if (_cpuid_info.std_cpuid1_rdx.bits.fxsr != 0 || is_amd() && 4.272 - _cpuid_info.ext_cpuid1_rdx.bits.fxsr != 0) 4.273 - result |= CPU_FXSR; 4.274 - // HT flag is set for multi-core processors also. 4.275 - if (threads_per_core() > 1) 4.276 - result |= CPU_HT; 4.277 - if (_cpuid_info.std_cpuid1_rdx.bits.mmx != 0 || is_amd() && 4.278 - _cpuid_info.ext_cpuid1_rdx.bits.mmx != 0) 4.279 - result |= CPU_MMX; 4.280 - if (is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow != 0) 4.281 - result |= CPU_3DNOW; 4.282 - if (_cpuid_info.std_cpuid1_rdx.bits.sse != 0) 4.283 - result |= CPU_SSE; 4.284 - if (_cpuid_info.std_cpuid1_rdx.bits.sse2 != 0) 4.285 - result |= CPU_SSE2; 4.286 - if (_cpuid_info.std_cpuid1_rcx.bits.sse3 != 0) 4.287 - result |= CPU_SSE3; 4.288 - if (_cpuid_info.std_cpuid1_rcx.bits.ssse3 != 0) 4.289 - result |= CPU_SSSE3; 4.290 - if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0) 4.291 - result |= CPU_SSE4A; 4.292 - if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0) 4.293 - result |= CPU_SSE4_1; 4.294 - if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0) 4.295 - result |= CPU_SSE4_2; 4.296 - return result; 4.297 - } 4.298 - 4.299 - static void get_processor_features(); 4.300 - 4.301 -public: 4.302 - // Offsets for cpuid asm stub 4.303 - static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } 4.304 - static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_rax); } 4.305 - static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_rax); } 4.306 - static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_rax); } 4.307 - static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_rax); } 4.308 - static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_rax); } 4.309 - 4.310 - // Initialization 4.311 - static void initialize(); 4.312 - 4.313 - // Asserts 4.314 - static void assert_is_initialized() { 4.315 - assert(_cpuid_info.std_cpuid1_rax.bits.family != 0, "VM_Version not initialized"); 4.316 - } 4.317 - 4.318 - // 4.319 - // Processor family: 4.320 - // 3 - 386 4.321 - // 4 - 486 4.322 - // 5 - Pentium 4.323 - // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, 4.324 - // Pentium M, Core Solo, Core Duo, Core2 Duo 4.325 - // family 6 model: 9, 13, 14, 15 4.326 - // 0x0f - Pentium 4, Opteron 4.327 - // 4.328 - // Note: The cpu family should be used to select between 4.329 - // instruction sequences which are valid on all Intel 4.330 - // processors. Use the feature test functions below to 4.331 - // determine whether a particular instruction is supported. 4.332 - // 4.333 - static int cpu_family() { return _cpu;} 4.334 - static bool is_P6() { return cpu_family() >= 6; } 4.335 - 4.336 - static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' 4.337 - static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' 4.338 - 4.339 - static uint cores_per_cpu() { 4.340 - uint result = 1; 4.341 - if (is_intel()) { 4.342 - result = (_cpuid_info.dcp_cpuid4_rax.bits.cores_per_cpu + 1); 4.343 - } else if (is_amd()) { 4.344 - result = (_cpuid_info.ext_cpuid8_rcx.bits.cores_per_cpu + 1); 4.345 - } 4.346 - return result; 4.347 - } 4.348 - 4.349 - static uint threads_per_core() { 4.350 - uint result = 1; 4.351 - if (_cpuid_info.std_cpuid1_rdx.bits.ht != 0) { 4.352 - result = _cpuid_info.std_cpuid1_rbx.bits.threads_per_cpu / 4.353 - cores_per_cpu(); 4.354 - } 4.355 - return result; 4.356 - } 4.357 - 4.358 - static intx L1_data_cache_line_size() { 4.359 - intx result = 0; 4.360 - if (is_intel()) { 4.361 - result = (_cpuid_info.dcp_cpuid4_rbx.bits.L1_line_size + 1); 4.362 - } else if (is_amd()) { 4.363 - result = _cpuid_info.ext_cpuid5_rcx.bits.L1_line_size; 4.364 - } 4.365 - if (result < 32) // not defined ? 4.366 - result = 32; // 32 bytes by default on x86 4.367 - return result; 4.368 - } 4.369 - 4.370 - // 4.371 - // Feature identification 4.372 - // 4.373 - static bool supports_cpuid() { return _cpuFeatures != 0; } 4.374 - static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } 4.375 - static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } 4.376 - static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } 4.377 - static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } 4.378 - static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } 4.379 - static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } 4.380 - static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } 4.381 - static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } 4.382 - static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } 4.383 - static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } 4.384 - static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } 4.385 - // 4.386 - // AMD features 4.387 - // 4.388 - static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } 4.389 - static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.mmx_amd != 0; } 4.390 - static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow2 != 0; } 4.391 - static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } 4.392 - 4.393 - static bool supports_compare_and_exchange() { return true; } 4.394 - 4.395 - static const char* cpu_features() { return _features_str; } 4.396 - 4.397 - static intx allocate_prefetch_distance() { 4.398 - // This method should be called before allocate_prefetch_style(). 4.399 - // 4.400 - // Hardware prefetching (distance/size in bytes): 4.401 - // Pentium 3 - 64 / 32 4.402 - // Pentium 4 - 256 / 128 4.403 - // Athlon - 64 / 32 ???? 4.404 - // Opteron - 128 / 64 only when 2 sequential cache lines accessed 4.405 - // Core - 128 / 64 4.406 - // 4.407 - // Software prefetching (distance in bytes / instruction with best score): 4.408 - // Pentium 3 - 128 / prefetchnta 4.409 - // Pentium 4 - 512 / prefetchnta 4.410 - // Athlon - 128 / prefetchnta 4.411 - // Opteron - 256 / prefetchnta 4.412 - // Core - 256 / prefetchnta 4.413 - // It will be used only when AllocatePrefetchStyle > 0 4.414 - 4.415 - intx count = AllocatePrefetchDistance; 4.416 - if (count < 0) { // default ? 4.417 - if (is_amd()) { // AMD 4.418 - if (supports_sse2()) 4.419 - count = 256; // Opteron 4.420 - else 4.421 - count = 128; // Athlon 4.422 - } else { // Intel 4.423 - if (supports_sse2()) 4.424 - if (cpu_family() == 6) { 4.425 - count = 256; // Pentium M, Core, Core2 4.426 - } else { 4.427 - count = 512; // Pentium 4 4.428 - } 4.429 - else 4.430 - count = 128; // Pentium 3 (and all other old CPUs) 4.431 - } 4.432 - } 4.433 - return count; 4.434 - } 4.435 - static intx allocate_prefetch_style() { 4.436 - assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); 4.437 - // Return 0 if AllocatePrefetchDistance was not defined or 4.438 - // prefetch instruction is not supported. 4.439 - return (AllocatePrefetchDistance > 0 && 4.440 - (supports_3dnow() || supports_sse())) ? AllocatePrefetchStyle : 0; 4.441 - } 4.442 -};
5.1 --- a/src/cpu/x86/vm/vm_version_x86_64.cpp Thu Feb 19 17:38:53 2009 -0800 5.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 5.3 @@ -1,419 +0,0 @@ 5.4 -/* 5.5 - * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved. 5.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5.7 - * 5.8 - * This code is free software; you can redistribute it and/or modify it 5.9 - * under the terms of the GNU General Public License version 2 only, as 5.10 - * published by the Free Software Foundation. 5.11 - * 5.12 - * This code is distributed in the hope that it will be useful, but WITHOUT 5.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 5.14 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 5.15 - * version 2 for more details (a copy is included in the LICENSE file that 5.16 - * accompanied this code). 5.17 - * 5.18 - * You should have received a copy of the GNU General Public License version 5.19 - * 2 along with this work; if not, write to the Free Software Foundation, 5.20 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 5.21 - * 5.22 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 5.23 - * CA 95054 USA or visit www.sun.com if you need additional information or 5.24 - * have any questions. 5.25 - * 5.26 - */ 5.27 - 5.28 -# include "incls/_precompiled.incl" 5.29 -# include "incls/_vm_version_x86_64.cpp.incl" 5.30 - 5.31 -int VM_Version::_cpu; 5.32 -int VM_Version::_model; 5.33 -int VM_Version::_stepping; 5.34 -int VM_Version::_cpuFeatures; 5.35 -const char* VM_Version::_features_str = ""; 5.36 -VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 5.37 - 5.38 -static BufferBlob* stub_blob; 5.39 -static const int stub_size = 300; 5.40 - 5.41 -extern "C" { 5.42 - typedef void (*getPsrInfo_stub_t)(void*); 5.43 -} 5.44 -static getPsrInfo_stub_t getPsrInfo_stub = NULL; 5.45 - 5.46 - 5.47 -class VM_Version_StubGenerator: public StubCodeGenerator { 5.48 - public: 5.49 - 5.50 - VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 5.51 - 5.52 - address generate_getPsrInfo() { 5.53 - 5.54 - Label std_cpuid1, ext_cpuid1, ext_cpuid5, done; 5.55 - 5.56 - StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); 5.57 -# define __ _masm-> 5.58 - 5.59 - address start = __ pc(); 5.60 - 5.61 - // 5.62 - // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); 5.63 - // 5.64 - // rcx and rdx are first and second argument registers on windows 5.65 - 5.66 - __ push(rbp); 5.67 - __ mov(rbp, c_rarg0); // cpuid_info address 5.68 - __ push(rbx); 5.69 - __ push(rsi); 5.70 - 5.71 - // 5.72 - // we have a chip which supports the "cpuid" instruction 5.73 - // 5.74 - __ xorl(rax, rax); 5.75 - __ cpuid(); 5.76 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 5.77 - __ movl(Address(rsi, 0), rax); 5.78 - __ movl(Address(rsi, 4), rbx); 5.79 - __ movl(Address(rsi, 8), rcx); 5.80 - __ movl(Address(rsi,12), rdx); 5.81 - 5.82 - __ cmpl(rax, 3); // Is cpuid(0x4) supported? 5.83 - __ jccb(Assembler::belowEqual, std_cpuid1); 5.84 - 5.85 - // 5.86 - // cpuid(0x4) Deterministic cache params 5.87 - // 5.88 - __ movl(rax, 4); 5.89 - __ xorl(rcx, rcx); // L1 cache 5.90 - __ cpuid(); 5.91 - __ push(rax); 5.92 - __ andl(rax, 0x1f); // Determine if valid cache parameters used 5.93 - __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 5.94 - __ pop(rax); 5.95 - __ jccb(Assembler::equal, std_cpuid1); 5.96 - 5.97 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 5.98 - __ movl(Address(rsi, 0), rax); 5.99 - __ movl(Address(rsi, 4), rbx); 5.100 - __ movl(Address(rsi, 8), rcx); 5.101 - __ movl(Address(rsi,12), rdx); 5.102 - 5.103 - // 5.104 - // Standard cpuid(0x1) 5.105 - // 5.106 - __ bind(std_cpuid1); 5.107 - __ movl(rax, 1); 5.108 - __ cpuid(); 5.109 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 5.110 - __ movl(Address(rsi, 0), rax); 5.111 - __ movl(Address(rsi, 4), rbx); 5.112 - __ movl(Address(rsi, 8), rcx); 5.113 - __ movl(Address(rsi,12), rdx); 5.114 - 5.115 - __ movl(rax, 0x80000000); 5.116 - __ cpuid(); 5.117 - __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 5.118 - __ jcc(Assembler::belowEqual, done); 5.119 - __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 5.120 - __ jccb(Assembler::belowEqual, ext_cpuid1); 5.121 - __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 5.122 - __ jccb(Assembler::belowEqual, ext_cpuid5); 5.123 - // 5.124 - // Extended cpuid(0x80000008) 5.125 - // 5.126 - __ movl(rax, 0x80000008); 5.127 - __ cpuid(); 5.128 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 5.129 - __ movl(Address(rsi, 0), rax); 5.130 - __ movl(Address(rsi, 4), rbx); 5.131 - __ movl(Address(rsi, 8), rcx); 5.132 - __ movl(Address(rsi,12), rdx); 5.133 - 5.134 - // 5.135 - // Extended cpuid(0x80000005) 5.136 - // 5.137 - __ bind(ext_cpuid5); 5.138 - __ movl(rax, 0x80000005); 5.139 - __ cpuid(); 5.140 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 5.141 - __ movl(Address(rsi, 0), rax); 5.142 - __ movl(Address(rsi, 4), rbx); 5.143 - __ movl(Address(rsi, 8), rcx); 5.144 - __ movl(Address(rsi,12), rdx); 5.145 - 5.146 - // 5.147 - // Extended cpuid(0x80000001) 5.148 - // 5.149 - __ bind(ext_cpuid1); 5.150 - __ movl(rax, 0x80000001); 5.151 - __ cpuid(); 5.152 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 5.153 - __ movl(Address(rsi, 0), rax); 5.154 - __ movl(Address(rsi, 4), rbx); 5.155 - __ movl(Address(rsi, 8), rcx); 5.156 - __ movl(Address(rsi,12), rdx); 5.157 - 5.158 - // 5.159 - // return 5.160 - // 5.161 - __ bind(done); 5.162 - __ pop(rsi); 5.163 - __ pop(rbx); 5.164 - __ pop(rbp); 5.165 - __ ret(0); 5.166 - 5.167 -# undef __ 5.168 - 5.169 - return start; 5.170 - }; 5.171 -}; 5.172 - 5.173 - 5.174 -void VM_Version::get_processor_features() { 5.175 - 5.176 - _logical_processors_per_package = 1; 5.177 - // Get raw processor info 5.178 - getPsrInfo_stub(&_cpuid_info); 5.179 - assert_is_initialized(); 5.180 - _cpu = extended_cpu_family(); 5.181 - _model = extended_cpu_model(); 5.182 - _stepping = cpu_stepping(); 5.183 - _cpuFeatures = feature_flags(); 5.184 - // Logical processors are only available on P4s and above, 5.185 - // and only if hyperthreading is available. 5.186 - _logical_processors_per_package = logical_processor_count(); 5.187 - _supports_cx8 = supports_cmpxchg8(); 5.188 - // OS should support SSE for x64 and hardware should support at least SSE2. 5.189 - if (!VM_Version::supports_sse2()) { 5.190 - vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 5.191 - } 5.192 - if (UseSSE < 4) { 5.193 - _cpuFeatures &= ~CPU_SSE4_1; 5.194 - _cpuFeatures &= ~CPU_SSE4_2; 5.195 - } 5.196 - if (UseSSE < 3) { 5.197 - _cpuFeatures &= ~CPU_SSE3; 5.198 - _cpuFeatures &= ~CPU_SSSE3; 5.199 - _cpuFeatures &= ~CPU_SSE4A; 5.200 - } 5.201 - if (UseSSE < 2) 5.202 - _cpuFeatures &= ~CPU_SSE2; 5.203 - if (UseSSE < 1) 5.204 - _cpuFeatures &= ~CPU_SSE; 5.205 - 5.206 - if (logical_processors_per_package() == 1) { 5.207 - // HT processor could be installed on a system which doesn't support HT. 5.208 - _cpuFeatures &= ~CPU_HT; 5.209 - } 5.210 - 5.211 - char buf[256]; 5.212 - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 5.213 - cores_per_cpu(), threads_per_core(), 5.214 - cpu_family(), _model, _stepping, 5.215 - (supports_cmov() ? ", cmov" : ""), 5.216 - (supports_cmpxchg8() ? ", cx8" : ""), 5.217 - (supports_fxsr() ? ", fxsr" : ""), 5.218 - (supports_mmx() ? ", mmx" : ""), 5.219 - (supports_sse() ? ", sse" : ""), 5.220 - (supports_sse2() ? ", sse2" : ""), 5.221 - (supports_sse3() ? ", sse3" : ""), 5.222 - (supports_ssse3()? ", ssse3": ""), 5.223 - (supports_sse4_1() ? ", sse4.1" : ""), 5.224 - (supports_sse4_2() ? ", sse4.2" : ""), 5.225 - (supports_mmx_ext() ? ", mmxext" : ""), 5.226 - (supports_3dnow() ? ", 3dnow" : ""), 5.227 - (supports_3dnow2() ? ", 3dnowext" : ""), 5.228 - (supports_sse4a() ? ", sse4a": ""), 5.229 - (supports_ht() ? ", ht": "")); 5.230 - _features_str = strdup(buf); 5.231 - 5.232 - // UseSSE is set to the smaller of what hardware supports and what 5.233 - // the command line requires. I.e., you cannot set UseSSE to 2 on 5.234 - // older Pentiums which do not support it. 5.235 - if( UseSSE > 4 ) UseSSE=4; 5.236 - if( UseSSE < 0 ) UseSSE=0; 5.237 - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support 5.238 - UseSSE = MIN2((intx)3,UseSSE); 5.239 - if( !supports_sse3() ) // Drop to 2 if no SSE3 support 5.240 - UseSSE = MIN2((intx)2,UseSSE); 5.241 - if( !supports_sse2() ) // Drop to 1 if no SSE2 support 5.242 - UseSSE = MIN2((intx)1,UseSSE); 5.243 - if( !supports_sse () ) // Drop to 0 if no SSE support 5.244 - UseSSE = 0; 5.245 - 5.246 - // On new cpus instructions which update whole XMM register should be used 5.247 - // to prevent partial register stall due to dependencies on high half. 5.248 - // 5.249 - // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 5.250 - // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 5.251 - // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 5.252 - // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 5.253 - 5.254 - if( is_amd() ) { // AMD cpus specific settings 5.255 - if( FLAG_IS_DEFAULT(UseAddressNop) ) { 5.256 - // Use it on all AMD cpus starting from Opteron (don't need 5.257 - // a cpu check since only Opteron and new cpus support 64-bits mode). 5.258 - UseAddressNop = true; 5.259 - } 5.260 - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 5.261 - if( supports_sse4a() ) { 5.262 - UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 5.263 - } else { 5.264 - UseXmmLoadAndClearUpper = false; 5.265 - } 5.266 - } 5.267 - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 5.268 - if( supports_sse4a() ) { 5.269 - UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 5.270 - } else { 5.271 - UseXmmRegToRegMoveAll = false; 5.272 - } 5.273 - } 5.274 - if( FLAG_IS_DEFAULT(UseXmmI2F) ) { 5.275 - if( supports_sse4a() ) { 5.276 - UseXmmI2F = true; 5.277 - } else { 5.278 - UseXmmI2F = false; 5.279 - } 5.280 - } 5.281 - if( FLAG_IS_DEFAULT(UseXmmI2D) ) { 5.282 - if( supports_sse4a() ) { 5.283 - UseXmmI2D = true; 5.284 - } else { 5.285 - UseXmmI2D = false; 5.286 - } 5.287 - } 5.288 - } 5.289 - 5.290 - if( is_intel() ) { // Intel cpus specific settings 5.291 - if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { 5.292 - UseStoreImmI16 = false; // don't use it on Intel cpus 5.293 - } 5.294 - if( FLAG_IS_DEFAULT(UseAddressNop) ) { 5.295 - // Use it on all Intel cpus starting from PentiumPro 5.296 - // (don't need a cpu check since only new cpus support 64-bits mode). 5.297 - UseAddressNop = true; 5.298 - } 5.299 - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 5.300 - UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 5.301 - } 5.302 - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 5.303 - if( supports_sse3() ) { 5.304 - UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 5.305 - } else { 5.306 - UseXmmRegToRegMoveAll = false; 5.307 - } 5.308 - } 5.309 - if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus 5.310 -#ifdef COMPILER2 5.311 - if( FLAG_IS_DEFAULT(MaxLoopPad) ) { 5.312 - // For new Intel cpus do the next optimization: 5.313 - // don't align the beginning of a loop if there are enough instructions 5.314 - // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 5.315 - // in current fetch line (OptoLoopAlignment) or the padding 5.316 - // is big (> MaxLoopPad). 5.317 - // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 5.318 - // generated NOP instructions. 11 is the largest size of one 5.319 - // address NOP instruction '0F 1F' (see Assembler::nop(i)). 5.320 - MaxLoopPad = 11; 5.321 - } 5.322 -#endif // COMPILER2 5.323 - if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { 5.324 - UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 5.325 - } 5.326 - if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus 5.327 - if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { 5.328 - UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 5.329 - } 5.330 - } 5.331 - } 5.332 - } 5.333 - 5.334 - assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 5.335 - assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 5.336 - 5.337 - // set valid Prefetch instruction 5.338 - if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; 5.339 - if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; 5.340 - if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; 5.341 - 5.342 - if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; 5.343 - if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; 5.344 - if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; 5.345 - 5.346 - // Allocation prefetch settings 5.347 - intx cache_line_size = L1_data_cache_line_size(); 5.348 - if( cache_line_size > AllocatePrefetchStepSize ) 5.349 - AllocatePrefetchStepSize = cache_line_size; 5.350 - if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) 5.351 - AllocatePrefetchLines = 3; // Optimistic value 5.352 - assert(AllocatePrefetchLines > 0, "invalid value"); 5.353 - if( AllocatePrefetchLines < 1 ) // set valid value in product VM 5.354 - AllocatePrefetchLines = 1; // Conservative value 5.355 - 5.356 - AllocatePrefetchDistance = allocate_prefetch_distance(); 5.357 - AllocatePrefetchStyle = allocate_prefetch_style(); 5.358 - 5.359 - if( AllocatePrefetchStyle == 2 && is_intel() && 5.360 - cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core 5.361 - AllocatePrefetchDistance = 384; 5.362 - } 5.363 - assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); 5.364 - 5.365 - // Prefetch settings 5.366 - PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); 5.367 - PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); 5.368 - PrefetchFieldsAhead = prefetch_fields_ahead(); 5.369 - 5.370 -#ifndef PRODUCT 5.371 - if (PrintMiscellaneous && Verbose) { 5.372 - tty->print_cr("Logical CPUs per core: %u", 5.373 - logical_processors_per_package()); 5.374 - tty->print_cr("UseSSE=%d",UseSSE); 5.375 - tty->print("Allocation: "); 5.376 - if (AllocatePrefetchStyle <= 0) { 5.377 - tty->print_cr("no prefetching"); 5.378 - } else { 5.379 - if (AllocatePrefetchInstr == 0) { 5.380 - tty->print("PREFETCHNTA"); 5.381 - } else if (AllocatePrefetchInstr == 1) { 5.382 - tty->print("PREFETCHT0"); 5.383 - } else if (AllocatePrefetchInstr == 2) { 5.384 - tty->print("PREFETCHT2"); 5.385 - } else if (AllocatePrefetchInstr == 3) { 5.386 - tty->print("PREFETCHW"); 5.387 - } 5.388 - if (AllocatePrefetchLines > 1) { 5.389 - tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 5.390 - } else { 5.391 - tty->print_cr(" %d, one line", AllocatePrefetchDistance); 5.392 - } 5.393 - } 5.394 - if (PrefetchCopyIntervalInBytes > 0) { 5.395 - tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); 5.396 - } 5.397 - if (PrefetchScanIntervalInBytes > 0) { 5.398 - tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); 5.399 - } 5.400 - if (PrefetchFieldsAhead > 0) { 5.401 - tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); 5.402 - } 5.403 - } 5.404 -#endif // !PRODUCT 5.405 -} 5.406 - 5.407 -void VM_Version::initialize() { 5.408 - ResourceMark rm; 5.409 - // Making this stub must be FIRST use of assembler 5.410 - 5.411 - stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); 5.412 - if (stub_blob == NULL) { 5.413 - vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); 5.414 - } 5.415 - CodeBuffer c(stub_blob->instructions_begin(), 5.416 - stub_blob->instructions_size()); 5.417 - VM_Version_StubGenerator g(&c); 5.418 - getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, 5.419 - g.generate_getPsrInfo()); 5.420 - 5.421 - get_processor_features(); 5.422 -}
6.1 --- a/src/cpu/x86/vm/vm_version_x86_64.hpp Thu Feb 19 17:38:53 2009 -0800 6.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 6.3 @@ -1,449 +0,0 @@ 6.4 -/* 6.5 - * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved. 6.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6.7 - * 6.8 - * This code is free software; you can redistribute it and/or modify it 6.9 - * under the terms of the GNU General Public License version 2 only, as 6.10 - * published by the Free Software Foundation. 6.11 - * 6.12 - * This code is distributed in the hope that it will be useful, but WITHOUT 6.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 6.14 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 6.15 - * version 2 for more details (a copy is included in the LICENSE file that 6.16 - * accompanied this code). 6.17 - * 6.18 - * You should have received a copy of the GNU General Public License version 6.19 - * 2 along with this work; if not, write to the Free Software Foundation, 6.20 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 6.21 - * 6.22 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 6.23 - * CA 95054 USA or visit www.sun.com if you need additional information or 6.24 - * have any questions. 6.25 - * 6.26 - */ 6.27 - 6.28 -class VM_Version : public Abstract_VM_Version { 6.29 -public: 6.30 - // cpuid result register layouts. These are all unions of a uint32_t 6.31 - // (in case anyone wants access to the register as a whole) and a bitfield. 6.32 - 6.33 - union StdCpuid1Eax { 6.34 - uint32_t value; 6.35 - struct { 6.36 - uint32_t stepping : 4, 6.37 - model : 4, 6.38 - family : 4, 6.39 - proc_type : 2, 6.40 - : 2, 6.41 - ext_model : 4, 6.42 - ext_family : 8, 6.43 - : 4; 6.44 - } bits; 6.45 - }; 6.46 - 6.47 - union StdCpuid1Ebx { // example, unused 6.48 - uint32_t value; 6.49 - struct { 6.50 - uint32_t brand_id : 8, 6.51 - clflush_size : 8, 6.52 - threads_per_cpu : 8, 6.53 - apic_id : 8; 6.54 - } bits; 6.55 - }; 6.56 - 6.57 - union StdCpuid1Ecx { 6.58 - uint32_t value; 6.59 - struct { 6.60 - uint32_t sse3 : 1, 6.61 - : 2, 6.62 - monitor : 1, 6.63 - : 1, 6.64 - vmx : 1, 6.65 - : 1, 6.66 - est : 1, 6.67 - : 1, 6.68 - ssse3 : 1, 6.69 - cid : 1, 6.70 - : 2, 6.71 - cmpxchg16: 1, 6.72 - : 4, 6.73 - dca : 1, 6.74 - sse4_1 : 1, 6.75 - sse4_2 : 1, 6.76 - : 11; 6.77 - } bits; 6.78 - }; 6.79 - 6.80 - union StdCpuid1Edx { 6.81 - uint32_t value; 6.82 - struct { 6.83 - uint32_t : 4, 6.84 - tsc : 1, 6.85 - : 3, 6.86 - cmpxchg8 : 1, 6.87 - : 6, 6.88 - cmov : 1, 6.89 - : 7, 6.90 - mmx : 1, 6.91 - fxsr : 1, 6.92 - sse : 1, 6.93 - sse2 : 1, 6.94 - : 1, 6.95 - ht : 1, 6.96 - : 3; 6.97 - } bits; 6.98 - }; 6.99 - 6.100 - union DcpCpuid4Eax { 6.101 - uint32_t value; 6.102 - struct { 6.103 - uint32_t cache_type : 5, 6.104 - : 21, 6.105 - cores_per_cpu : 6; 6.106 - } bits; 6.107 - }; 6.108 - 6.109 - union DcpCpuid4Ebx { 6.110 - uint32_t value; 6.111 - struct { 6.112 - uint32_t L1_line_size : 12, 6.113 - partitions : 10, 6.114 - associativity : 10; 6.115 - } bits; 6.116 - }; 6.117 - 6.118 - union ExtCpuid1Edx { 6.119 - uint32_t value; 6.120 - struct { 6.121 - uint32_t : 22, 6.122 - mmx_amd : 1, 6.123 - mmx : 1, 6.124 - fxsr : 1, 6.125 - : 4, 6.126 - long_mode : 1, 6.127 - tdnow2 : 1, 6.128 - tdnow : 1; 6.129 - } bits; 6.130 - }; 6.131 - 6.132 - union ExtCpuid1Ecx { 6.133 - uint32_t value; 6.134 - struct { 6.135 - uint32_t LahfSahf : 1, 6.136 - CmpLegacy : 1, 6.137 - : 4, 6.138 - abm : 1, 6.139 - sse4a : 1, 6.140 - misalignsse : 1, 6.141 - prefetchw : 1, 6.142 - : 22; 6.143 - } bits; 6.144 - }; 6.145 - 6.146 - union ExtCpuid5Ex { 6.147 - uint32_t value; 6.148 - struct { 6.149 - uint32_t L1_line_size : 8, 6.150 - L1_tag_lines : 8, 6.151 - L1_assoc : 8, 6.152 - L1_size : 8; 6.153 - } bits; 6.154 - }; 6.155 - 6.156 - union ExtCpuid8Ecx { 6.157 - uint32_t value; 6.158 - struct { 6.159 - uint32_t cores_per_cpu : 8, 6.160 - : 24; 6.161 - } bits; 6.162 - }; 6.163 - 6.164 -protected: 6.165 - static int _cpu; 6.166 - static int _model; 6.167 - static int _stepping; 6.168 - static int _cpuFeatures; // features returned by the "cpuid" instruction 6.169 - // 0 if this instruction is not available 6.170 - static const char* _features_str; 6.171 - 6.172 - enum { 6.173 - CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) 6.174 - CPU_CMOV = (1 << 1), 6.175 - CPU_FXSR = (1 << 2), 6.176 - CPU_HT = (1 << 3), 6.177 - CPU_MMX = (1 << 4), 6.178 - CPU_3DNOW= (1 << 5), 6.179 - CPU_SSE = (1 << 6), 6.180 - CPU_SSE2 = (1 << 7), 6.181 - CPU_SSE3 = (1 << 8), 6.182 - CPU_SSSE3= (1 << 9), 6.183 - CPU_SSE4A= (1 <<10), 6.184 - CPU_SSE4_1 = (1 << 11), 6.185 - CPU_SSE4_2 = (1 << 12) 6.186 - } cpuFeatureFlags; 6.187 - 6.188 - // cpuid information block. All info derived from executing cpuid with 6.189 - // various function numbers is stored here. Intel and AMD info is 6.190 - // merged in this block: accessor methods disentangle it. 6.191 - // 6.192 - // The info block is laid out in subblocks of 4 dwords corresponding to 6.193 - // eax, ebx, ecx and edx, whether or not they contain anything useful. 6.194 - struct CpuidInfo { 6.195 - // cpuid function 0 6.196 - uint32_t std_max_function; 6.197 - uint32_t std_vendor_name_0; 6.198 - uint32_t std_vendor_name_1; 6.199 - uint32_t std_vendor_name_2; 6.200 - 6.201 - // cpuid function 1 6.202 - StdCpuid1Eax std_cpuid1_eax; 6.203 - StdCpuid1Ebx std_cpuid1_ebx; 6.204 - StdCpuid1Ecx std_cpuid1_ecx; 6.205 - StdCpuid1Edx std_cpuid1_edx; 6.206 - 6.207 - // cpuid function 4 (deterministic cache parameters) 6.208 - DcpCpuid4Eax dcp_cpuid4_eax; 6.209 - DcpCpuid4Ebx dcp_cpuid4_ebx; 6.210 - uint32_t dcp_cpuid4_ecx; // unused currently 6.211 - uint32_t dcp_cpuid4_edx; // unused currently 6.212 - 6.213 - // cpuid function 0x80000000 // example, unused 6.214 - uint32_t ext_max_function; 6.215 - uint32_t ext_vendor_name_0; 6.216 - uint32_t ext_vendor_name_1; 6.217 - uint32_t ext_vendor_name_2; 6.218 - 6.219 - // cpuid function 0x80000001 6.220 - uint32_t ext_cpuid1_eax; // reserved 6.221 - uint32_t ext_cpuid1_ebx; // reserved 6.222 - ExtCpuid1Ecx ext_cpuid1_ecx; 6.223 - ExtCpuid1Edx ext_cpuid1_edx; 6.224 - 6.225 - // cpuid functions 0x80000002 thru 0x80000004: example, unused 6.226 - uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; 6.227 - uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; 6.228 - uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; 6.229 - 6.230 - // cpuid function 0x80000005 //AMD L1, Intel reserved 6.231 - uint32_t ext_cpuid5_eax; // unused currently 6.232 - uint32_t ext_cpuid5_ebx; // reserved 6.233 - ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) 6.234 - ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) 6.235 - 6.236 - // cpuid function 0x80000008 6.237 - uint32_t ext_cpuid8_eax; // unused currently 6.238 - uint32_t ext_cpuid8_ebx; // reserved 6.239 - ExtCpuid8Ecx ext_cpuid8_ecx; 6.240 - uint32_t ext_cpuid8_edx; // reserved 6.241 - }; 6.242 - 6.243 - // The actual cpuid info block 6.244 - static CpuidInfo _cpuid_info; 6.245 - 6.246 - // Extractors and predicates 6.247 - static uint32_t extended_cpu_family() { 6.248 - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; 6.249 - result += _cpuid_info.std_cpuid1_eax.bits.ext_family; 6.250 - return result; 6.251 - } 6.252 - static uint32_t extended_cpu_model() { 6.253 - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; 6.254 - result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; 6.255 - return result; 6.256 - } 6.257 - static uint32_t cpu_stepping() { 6.258 - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; 6.259 - return result; 6.260 - } 6.261 - static uint logical_processor_count() { 6.262 - uint result = threads_per_core(); 6.263 - return result; 6.264 - } 6.265 - static uint32_t feature_flags() { 6.266 - uint32_t result = 0; 6.267 - if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) 6.268 - result |= CPU_CX8; 6.269 - if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) 6.270 - result |= CPU_CMOV; 6.271 - if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() && 6.272 - _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0) 6.273 - result |= CPU_FXSR; 6.274 - // HT flag is set for multi-core processors also. 6.275 - if (threads_per_core() > 1) 6.276 - result |= CPU_HT; 6.277 - if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && 6.278 - _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) 6.279 - result |= CPU_MMX; 6.280 - if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) 6.281 - result |= CPU_3DNOW; 6.282 - if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) 6.283 - result |= CPU_SSE; 6.284 - if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) 6.285 - result |= CPU_SSE2; 6.286 - if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) 6.287 - result |= CPU_SSE3; 6.288 - if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) 6.289 - result |= CPU_SSSE3; 6.290 - if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 6.291 - result |= CPU_SSE4A; 6.292 - if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) 6.293 - result |= CPU_SSE4_1; 6.294 - if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 6.295 - result |= CPU_SSE4_2; 6.296 - return result; 6.297 - } 6.298 - 6.299 - static void get_processor_features(); 6.300 - 6.301 -public: 6.302 - // Offsets for cpuid asm stub 6.303 - static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } 6.304 - static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } 6.305 - static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } 6.306 - static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } 6.307 - static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } 6.308 - static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } 6.309 - 6.310 - // Initialization 6.311 - static void initialize(); 6.312 - 6.313 - // Asserts 6.314 - static void assert_is_initialized() { 6.315 - assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); 6.316 - } 6.317 - 6.318 - // 6.319 - // Processor family: 6.320 - // 3 - 386 6.321 - // 4 - 486 6.322 - // 5 - Pentium 6.323 - // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, 6.324 - // Pentium M, Core Solo, Core Duo, Core2 Duo 6.325 - // family 6 model: 9, 13, 14, 15 6.326 - // 0x0f - Pentium 4, Opteron 6.327 - // 6.328 - // Note: The cpu family should be used to select between 6.329 - // instruction sequences which are valid on all Intel 6.330 - // processors. Use the feature test functions below to 6.331 - // determine whether a particular instruction is supported. 6.332 - // 6.333 - static int cpu_family() { return _cpu;} 6.334 - static bool is_P6() { return cpu_family() >= 6; } 6.335 - 6.336 - static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' 6.337 - static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' 6.338 - 6.339 - static uint cores_per_cpu() { 6.340 - uint result = 1; 6.341 - if (is_intel()) { 6.342 - result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 6.343 - } else if (is_amd()) { 6.344 - result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 6.345 - } 6.346 - return result; 6.347 - } 6.348 - 6.349 - static uint threads_per_core() { 6.350 - uint result = 1; 6.351 - if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 6.352 - result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 6.353 - cores_per_cpu(); 6.354 - } 6.355 - return result; 6.356 - } 6.357 - 6.358 - static intx L1_data_cache_line_size() { 6.359 - intx result = 0; 6.360 - if (is_intel()) { 6.361 - result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 6.362 - } else if (is_amd()) { 6.363 - result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 6.364 - } 6.365 - if (result < 32) // not defined ? 6.366 - result = 32; // 32 bytes by default for other x64 6.367 - return result; 6.368 - } 6.369 - 6.370 - // 6.371 - // Feature identification 6.372 - // 6.373 - static bool supports_cpuid() { return _cpuFeatures != 0; } 6.374 - static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } 6.375 - static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } 6.376 - static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } 6.377 - static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } 6.378 - static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } 6.379 - static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } 6.380 - static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } 6.381 - static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } 6.382 - static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } 6.383 - static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } 6.384 - static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } 6.385 - // 6.386 - // AMD features 6.387 - // 6.388 - static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } 6.389 - static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } 6.390 - static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } 6.391 - static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } 6.392 - 6.393 - static bool supports_compare_and_exchange() { return true; } 6.394 - 6.395 - static const char* cpu_features() { return _features_str; } 6.396 - 6.397 - static intx allocate_prefetch_distance() { 6.398 - // This method should be called before allocate_prefetch_style(). 6.399 - // 6.400 - // Hardware prefetching (distance/size in bytes): 6.401 - // Pentium 4 - 256 / 128 6.402 - // Opteron - 128 / 64 only when 2 sequential cache lines accessed 6.403 - // Core - 128 / 64 6.404 - // 6.405 - // Software prefetching (distance in bytes / instruction with best score): 6.406 - // Pentium 4 - 512 / prefetchnta 6.407 - // Opteron - 256 / prefetchnta 6.408 - // Core - 256 / prefetchnta 6.409 - // It will be used only when AllocatePrefetchStyle > 0 6.410 - 6.411 - intx count = AllocatePrefetchDistance; 6.412 - if (count < 0) { // default ? 6.413 - if (is_amd()) { // AMD 6.414 - count = 256; // Opteron 6.415 - } else { // Intel 6.416 - if (cpu_family() == 6) { 6.417 - count = 256;// Pentium M, Core, Core2 6.418 - } else { 6.419 - count = 512;// Pentium 4 6.420 - } 6.421 - } 6.422 - } 6.423 - return count; 6.424 - } 6.425 - static intx allocate_prefetch_style() { 6.426 - assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); 6.427 - // Return 0 if AllocatePrefetchDistance was not defined. 6.428 - return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; 6.429 - } 6.430 - 6.431 - // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 6.432 - // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 6.433 - // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 6.434 - // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 6.435 - 6.436 - // gc copy/scan is disabled if prefetchw isn't supported, because 6.437 - // Prefetch::write emits an inlined prefetchw on Linux. 6.438 - // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 6.439 - // The used prefetcht0 instruction works for both amd64 and em64t. 6.440 - static intx prefetch_copy_interval_in_bytes() { 6.441 - intx interval = PrefetchCopyIntervalInBytes; 6.442 - return interval >= 0 ? interval : 576; 6.443 - } 6.444 - static intx prefetch_scan_interval_in_bytes() { 6.445 - intx interval = PrefetchScanIntervalInBytes; 6.446 - return interval >= 0 ? interval : 576; 6.447 - } 6.448 - static intx prefetch_fields_ahead() { 6.449 - intx count = PrefetchFieldsAhead; 6.450 - return count >= 0 ? count : 1; 6.451 - } 6.452 -};
7.1 --- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Thu Feb 19 17:38:53 2009 -0800 7.2 +++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Mon Feb 23 12:02:30 2009 -0800 7.3 @@ -1,5 +1,5 @@ 7.4 /* 7.5 - * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved. 7.6 + * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. 7.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 7.8 * 7.9 * This code is free software; you can redistribute it and/or modify it 7.10 @@ -299,14 +299,18 @@ 7.11 7.12 } 7.13 7.14 +#endif // AMD64 7.15 + 7.16 bool os::supports_sse() { 7.17 +#ifdef AMD64 7.18 + return true; 7.19 +#else 7.20 if (sse_status == SSE_UNKNOWN) 7.21 check_for_sse_support(); 7.22 return sse_status == SSE_SUPPORTED; 7.23 +#endif // AMD64 7.24 } 7.25 7.26 -#endif // AMD64 7.27 - 7.28 bool os::is_allocatable(size_t bytes) { 7.29 #ifdef AMD64 7.30 return true;
8.1 --- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp Thu Feb 19 17:38:53 2009 -0800 8.2 +++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp Mon Feb 23 12:02:30 2009 -0800 8.3 @@ -1,5 +1,5 @@ 8.4 /* 8.5 - * Copyright 1999-2004 Sun Microsystems, Inc. All Rights Reserved. 8.6 + * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. 8.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 8.8 * 8.9 * This code is free software; you can redistribute it and/or modify it 8.10 @@ -41,8 +41,9 @@ 8.11 static void fence_bootstrap (); 8.12 8.13 static void setup_fpu(); 8.14 +#endif // AMD64 8.15 + 8.16 static bool supports_sse(); 8.17 -#endif // AMD64 8.18 8.19 static bool is_allocatable(size_t bytes); 8.20
9.1 --- a/src/share/vm/includeDB_core Thu Feb 19 17:38:53 2009 -0800 9.2 +++ b/src/share/vm/includeDB_core Mon Feb 23 12:02:30 2009 -0800 9.3 @@ -1,5 +1,5 @@ 9.4 // 9.5 -// Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 9.6 +// Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 9.7 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 9.8 // 9.9 // This code is free software; you can redistribute it and/or modify it 9.10 @@ -176,7 +176,7 @@ 9.11 arguments.cpp oop.inline.hpp 9.12 arguments.cpp os_<os_family>.inline.hpp 9.13 arguments.cpp universe.inline.hpp 9.14 -arguments.cpp vm_version_<arch_model>.hpp 9.15 +arguments.cpp vm_version_<arch>.hpp 9.16 9.17 arguments.hpp java.hpp 9.18 arguments.hpp perfData.hpp 9.19 @@ -241,7 +241,7 @@ 9.20 assembler.hpp register_<arch>.hpp 9.21 assembler.hpp relocInfo.hpp 9.22 assembler.hpp top.hpp 9.23 -assembler.hpp vm_version_<arch_model>.hpp 9.24 +assembler.hpp vm_version_<arch>.hpp 9.25 9.26 assembler.inline.hpp assembler.hpp 9.27 assembler.inline.hpp codeBuffer.hpp 9.28 @@ -280,7 +280,7 @@ 9.29 9.30 atomic_<os_arch>.inline.hpp atomic.hpp 9.31 atomic_<os_arch>.inline.hpp os.hpp 9.32 -atomic_<os_arch>.inline.hpp vm_version_<arch_model>.hpp 9.33 +atomic_<os_arch>.inline.hpp vm_version_<arch>.hpp 9.34 9.35 // attachListener is jck optional, put cpp deps in includeDB_features 9.36 9.37 @@ -2176,7 +2176,7 @@ 9.38 interpreterRuntime.cpp threadCritical.hpp 9.39 interpreterRuntime.cpp universe.inline.hpp 9.40 interpreterRuntime.cpp vmSymbols.hpp 9.41 -interpreterRuntime.cpp vm_version_<arch_model>.hpp 9.42 +interpreterRuntime.cpp vm_version_<arch>.hpp 9.43 9.44 interpreterRuntime.hpp bytecode.hpp 9.45 interpreterRuntime.hpp frame.inline.hpp 9.46 @@ -2279,7 +2279,7 @@ 9.47 java.cpp universe.hpp 9.48 java.cpp vmError.hpp 9.49 java.cpp vm_operations.hpp 9.50 -java.cpp vm_version_<arch_model>.hpp 9.51 +java.cpp vm_version_<arch>.hpp 9.52 java.cpp vtune.hpp 9.53 9.54 java.hpp os.hpp 9.55 @@ -3485,7 +3485,7 @@ 9.56 register_<arch>.cpp register_<arch>.hpp 9.57 9.58 register_<arch>.hpp register.hpp 9.59 -register_<arch>.hpp vm_version_<arch_model>.hpp 9.60 +register_<arch>.hpp vm_version_<arch>.hpp 9.61 9.62 registerMap.hpp globalDefinitions.hpp 9.63 registerMap.hpp register_<arch>.hpp 9.64 @@ -3835,7 +3835,7 @@ 9.65 statSampler.cpp statSampler.hpp 9.66 statSampler.cpp systemDictionary.hpp 9.67 statSampler.cpp vmSymbols.hpp 9.68 -statSampler.cpp vm_version_<arch_model>.hpp 9.69 +statSampler.cpp vm_version_<arch>.hpp 9.70 9.71 statSampler.hpp perfData.hpp 9.72 statSampler.hpp task.hpp 9.73 @@ -4579,22 +4579,22 @@ 9.74 vm_version.cpp arguments.hpp 9.75 vm_version.cpp oop.inline.hpp 9.76 vm_version.cpp universe.hpp 9.77 -vm_version.cpp vm_version_<arch_model>.hpp 9.78 +vm_version.cpp vm_version_<arch>.hpp 9.79 9.80 vm_version.hpp allocation.hpp 9.81 vm_version.hpp ostream.hpp 9.82 9.83 -vm_version_<arch_model>.cpp assembler_<arch>.inline.hpp 9.84 -vm_version_<arch_model>.cpp java.hpp 9.85 -vm_version_<arch_model>.cpp os_<os_family>.inline.hpp 9.86 -vm_version_<arch_model>.cpp resourceArea.hpp 9.87 -vm_version_<arch_model>.cpp stubCodeGenerator.hpp 9.88 -vm_version_<arch_model>.cpp vm_version_<arch_model>.hpp 9.89 - 9.90 -vm_version_<arch_model>.hpp globals_extension.hpp 9.91 -vm_version_<arch_model>.hpp vm_version.hpp 9.92 - 9.93 -vm_version_<os_arch>.cpp vm_version_<arch_model>.hpp 9.94 +vm_version_<arch>.cpp assembler_<arch>.inline.hpp 9.95 +vm_version_<arch>.cpp java.hpp 9.96 +vm_version_<arch>.cpp os_<os_family>.inline.hpp 9.97 +vm_version_<arch>.cpp resourceArea.hpp 9.98 +vm_version_<arch>.cpp stubCodeGenerator.hpp 9.99 +vm_version_<arch>.cpp vm_version_<arch>.hpp 9.100 + 9.101 +vm_version_<arch>.hpp globals_extension.hpp 9.102 +vm_version_<arch>.hpp vm_version.hpp 9.103 + 9.104 +vm_version_<os_arch>.cpp vm_version_<arch>.hpp 9.105 9.106 vmreg.cpp assembler.hpp 9.107 vmreg.cpp vmreg.hpp