1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/cpu/x86/vm/vm_version_x86_32.cpp Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,443 @@ 1.4 +/* 1.5 + * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +# include "incls/_precompiled.incl" 1.29 +# include "incls/_vm_version_x86_32.cpp.incl" 1.30 + 1.31 + 1.32 +int VM_Version::_cpu; 1.33 +int VM_Version::_model; 1.34 +int VM_Version::_stepping; 1.35 +int VM_Version::_cpuFeatures; 1.36 +const char* VM_Version::_features_str = ""; 1.37 +VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 1.38 + 1.39 +static BufferBlob* stub_blob; 1.40 +static const int stub_size = 300; 1.41 + 1.42 +extern "C" { 1.43 + typedef void (*getPsrInfo_stub_t)(void*); 1.44 +} 1.45 +static getPsrInfo_stub_t getPsrInfo_stub = NULL; 1.46 + 1.47 + 1.48 +class VM_Version_StubGenerator: public StubCodeGenerator { 1.49 + public: 1.50 + 1.51 + VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 1.52 + 1.53 + address generate_getPsrInfo() { 1.54 + // Flags to test CPU type. 1.55 + const uint32_t EFL_AC = 0x40000; 1.56 + const uint32_t EFL_ID = 0x200000; 1.57 + // Values for when we don't have a CPUID instruction. 1.58 + const int CPU_FAMILY_SHIFT = 8; 1.59 + const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 1.60 + const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 1.61 + 1.62 + Label detect_486, cpu486, detect_586, std_cpuid1; 1.63 + Label ext_cpuid1, ext_cpuid5, done; 1.64 + 1.65 + StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); 1.66 +# define __ _masm-> 1.67 + 1.68 + address start = __ pc(); 1.69 + 1.70 + // 1.71 + // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); 1.72 + // 1.73 + __ pushl(rbp); 1.74 + __ movl(rbp, Address(rsp, 8)); // cpuid_info address 1.75 + __ pushl(rbx); 1.76 + __ pushl(rsi); 1.77 + __ pushfd(); // preserve rbx, and flags 1.78 + __ popl(rax); 1.79 + __ pushl(rax); 1.80 + __ movl(rcx, rax); 1.81 + // 1.82 + // if we are unable to change the AC flag, we have a 386 1.83 + // 1.84 + __ xorl(rax, EFL_AC); 1.85 + __ pushl(rax); 1.86 + __ popfd(); 1.87 + __ pushfd(); 1.88 + __ popl(rax); 1.89 + __ cmpl(rax, rcx); 1.90 + __ jccb(Assembler::notEqual, detect_486); 1.91 + 1.92 + __ movl(rax, CPU_FAMILY_386); 1.93 + __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 1.94 + __ jmp(done); 1.95 + 1.96 + // 1.97 + // If we are unable to change the ID flag, we have a 486 which does 1.98 + // not support the "cpuid" instruction. 1.99 + // 1.100 + __ bind(detect_486); 1.101 + __ movl(rax, rcx); 1.102 + __ xorl(rax, EFL_ID); 1.103 + __ pushl(rax); 1.104 + __ popfd(); 1.105 + __ pushfd(); 1.106 + __ popl(rax); 1.107 + __ cmpl(rcx, rax); 1.108 + __ jccb(Assembler::notEqual, detect_586); 1.109 + 1.110 + __ bind(cpu486); 1.111 + __ movl(rax, CPU_FAMILY_486); 1.112 + __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 1.113 + __ jmp(done); 1.114 + 1.115 + // 1.116 + // at this point, we have a chip which supports the "cpuid" instruction 1.117 + // 1.118 + __ bind(detect_586); 1.119 + __ xorl(rax, rax); 1.120 + __ cpuid(); 1.121 + __ orl(rax, rax); 1.122 + __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 1.123 + // value of at least 1, we give up and 1.124 + // assume a 486 1.125 + __ leal(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 1.126 + __ movl(Address(rsi, 0), rax); 1.127 + __ movl(Address(rsi, 4), rbx); 1.128 + __ movl(Address(rsi, 8), rcx); 1.129 + __ movl(Address(rsi,12), rdx); 1.130 + 1.131 + __ cmpl(rax, 3); // Is cpuid(0x4) supported? 1.132 + __ jccb(Assembler::belowEqual, std_cpuid1); 1.133 + 1.134 + // 1.135 + // cpuid(0x4) Deterministic cache params 1.136 + // 1.137 + __ movl(rax, 4); // and rcx already set to 0x0 1.138 + __ xorl(rcx, rcx); 1.139 + __ cpuid(); 1.140 + __ pushl(rax); 1.141 + __ andl(rax, 0x1f); // Determine if valid cache parameters used 1.142 + __ orl(rax, rax); // rax,[4:0] == 0 indicates invalid cache 1.143 + __ popl(rax); 1.144 + __ jccb(Assembler::equal, std_cpuid1); 1.145 + 1.146 + __ leal(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 1.147 + __ movl(Address(rsi, 0), rax); 1.148 + __ movl(Address(rsi, 4), rbx); 1.149 + __ movl(Address(rsi, 8), rcx); 1.150 + __ movl(Address(rsi,12), rdx); 1.151 + 1.152 + // 1.153 + // Standard cpuid(0x1) 1.154 + // 1.155 + __ bind(std_cpuid1); 1.156 + __ movl(rax, 1); 1.157 + __ cpuid(); 1.158 + __ leal(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 1.159 + __ movl(Address(rsi, 0), rax); 1.160 + __ movl(Address(rsi, 4), rbx); 1.161 + __ movl(Address(rsi, 8), rcx); 1.162 + __ movl(Address(rsi,12), rdx); 1.163 + 1.164 + __ movl(rax, 0x80000000); 1.165 + __ cpuid(); 1.166 + __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 1.167 + __ jcc(Assembler::belowEqual, done); 1.168 + __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 1.169 + __ jccb(Assembler::belowEqual, ext_cpuid1); 1.170 + __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 1.171 + __ jccb(Assembler::belowEqual, ext_cpuid5); 1.172 + // 1.173 + // Extended cpuid(0x80000008) 1.174 + // 1.175 + __ movl(rax, 0x80000008); 1.176 + __ cpuid(); 1.177 + __ leal(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 1.178 + __ movl(Address(rsi, 0), rax); 1.179 + __ movl(Address(rsi, 4), rbx); 1.180 + __ movl(Address(rsi, 8), rcx); 1.181 + __ movl(Address(rsi,12), rdx); 1.182 + 1.183 + // 1.184 + // Extended cpuid(0x80000005) 1.185 + // 1.186 + __ bind(ext_cpuid5); 1.187 + __ movl(rax, 0x80000005); 1.188 + __ cpuid(); 1.189 + __ leal(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 1.190 + __ movl(Address(rsi, 0), rax); 1.191 + __ movl(Address(rsi, 4), rbx); 1.192 + __ movl(Address(rsi, 8), rcx); 1.193 + __ movl(Address(rsi,12), rdx); 1.194 + 1.195 + // 1.196 + // Extended cpuid(0x80000001) 1.197 + // 1.198 + __ bind(ext_cpuid1); 1.199 + __ movl(rax, 0x80000001); 1.200 + __ cpuid(); 1.201 + __ leal(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 1.202 + __ movl(Address(rsi, 0), rax); 1.203 + __ movl(Address(rsi, 4), rbx); 1.204 + __ movl(Address(rsi, 8), rcx); 1.205 + __ movl(Address(rsi,12), rdx); 1.206 + 1.207 + // 1.208 + // return 1.209 + // 1.210 + __ bind(done); 1.211 + __ popfd(); 1.212 + __ popl(rsi); 1.213 + __ popl(rbx); 1.214 + __ popl(rbp); 1.215 + __ ret(0); 1.216 + 1.217 +# undef __ 1.218 + 1.219 + return start; 1.220 + }; 1.221 +}; 1.222 + 1.223 + 1.224 +void VM_Version::get_processor_features() { 1.225 + 1.226 + _cpu = 4; // 486 by default 1.227 + _model = 0; 1.228 + _stepping = 0; 1.229 + _cpuFeatures = 0; 1.230 + _logical_processors_per_package = 1; 1.231 + if (!Use486InstrsOnly) { 1.232 + // Get raw processor info 1.233 + getPsrInfo_stub(&_cpuid_info); 1.234 + assert_is_initialized(); 1.235 + _cpu = extended_cpu_family(); 1.236 + _model = extended_cpu_model(); 1.237 + _stepping = cpu_stepping(); 1.238 + if (cpu_family() > 4) { // it supports CPUID 1.239 + _cpuFeatures = feature_flags(); 1.240 + // Logical processors are only available on P4s and above, 1.241 + // and only if hyperthreading is available. 1.242 + _logical_processors_per_package = logical_processor_count(); 1.243 + } 1.244 + } 1.245 + _supports_cx8 = supports_cmpxchg8(); 1.246 + // if the OS doesn't support SSE, we can't use this feature even if the HW does 1.247 + if( !os::supports_sse()) 1.248 + _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4|CPU_SSE4A); 1.249 + if (UseSSE < 4) 1.250 + _cpuFeatures &= ~CPU_SSE4; 1.251 + if (UseSSE < 3) { 1.252 + _cpuFeatures &= ~CPU_SSE3; 1.253 + _cpuFeatures &= ~CPU_SSSE3; 1.254 + _cpuFeatures &= ~CPU_SSE4A; 1.255 + } 1.256 + if (UseSSE < 2) 1.257 + _cpuFeatures &= ~CPU_SSE2; 1.258 + if (UseSSE < 1) 1.259 + _cpuFeatures &= ~CPU_SSE; 1.260 + 1.261 + if (logical_processors_per_package() == 1) { 1.262 + // HT processor could be installed on a system which doesn't support HT. 1.263 + _cpuFeatures &= ~CPU_HT; 1.264 + } 1.265 + 1.266 + char buf[256]; 1.267 + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 1.268 + cores_per_cpu(), threads_per_core(), 1.269 + cpu_family(), _model, _stepping, 1.270 + (supports_cmov() ? ", cmov" : ""), 1.271 + (supports_cmpxchg8() ? ", cx8" : ""), 1.272 + (supports_fxsr() ? ", fxsr" : ""), 1.273 + (supports_mmx() ? ", mmx" : ""), 1.274 + (supports_sse() ? ", sse" : ""), 1.275 + (supports_sse2() ? ", sse2" : ""), 1.276 + (supports_sse3() ? ", sse3" : ""), 1.277 + (supports_ssse3()? ", ssse3": ""), 1.278 + (supports_sse4() ? ", sse4" : ""), 1.279 + (supports_mmx_ext() ? ", mmxext" : ""), 1.280 + (supports_3dnow() ? ", 3dnow" : ""), 1.281 + (supports_3dnow2() ? ", 3dnowext" : ""), 1.282 + (supports_sse4a() ? ", sse4a": ""), 1.283 + (supports_ht() ? ", ht": "")); 1.284 + _features_str = strdup(buf); 1.285 + 1.286 + // UseSSE is set to the smaller of what hardware supports and what 1.287 + // the command line requires. I.e., you cannot set UseSSE to 2 on 1.288 + // older Pentiums which do not support it. 1.289 + if( UseSSE > 4 ) UseSSE=4; 1.290 + if( UseSSE < 0 ) UseSSE=0; 1.291 + if( !supports_sse4() ) // Drop to 3 if no SSE4 support 1.292 + UseSSE = MIN2((intx)3,UseSSE); 1.293 + if( !supports_sse3() ) // Drop to 2 if no SSE3 support 1.294 + UseSSE = MIN2((intx)2,UseSSE); 1.295 + if( !supports_sse2() ) // Drop to 1 if no SSE2 support 1.296 + UseSSE = MIN2((intx)1,UseSSE); 1.297 + if( !supports_sse () ) // Drop to 0 if no SSE support 1.298 + UseSSE = 0; 1.299 + 1.300 + // On new cpus instructions which update whole XMM register should be used 1.301 + // to prevent partial register stall due to dependencies on high half. 1.302 + // 1.303 + // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1.304 + // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1.305 + // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1.306 + // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1.307 + 1.308 + if( is_amd() ) { // AMD cpus specific settings 1.309 + if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { 1.310 + // Use it on new AMD cpus starting from Opteron. 1.311 + UseAddressNop = true; 1.312 + } 1.313 + if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 1.314 + if( supports_sse4a() ) { 1.315 + UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1.316 + } else { 1.317 + UseXmmLoadAndClearUpper = false; 1.318 + } 1.319 + } 1.320 + if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 1.321 + if( supports_sse4a() ) { 1.322 + UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1.323 + } else { 1.324 + UseXmmRegToRegMoveAll = false; 1.325 + } 1.326 + } 1.327 + } 1.328 + 1.329 + if( is_intel() ) { // Intel cpus specific settings 1.330 + if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { 1.331 + UseStoreImmI16 = false; // don't use it on Intel cpus 1.332 + } 1.333 + if( cpu_family() == 6 || cpu_family() == 15 ) { 1.334 + if( FLAG_IS_DEFAULT(UseAddressNop) ) { 1.335 + // Use it on all Intel cpus starting from PentiumPro 1.336 + UseAddressNop = true; 1.337 + } 1.338 + } 1.339 + if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 1.340 + UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1.341 + } 1.342 + if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 1.343 + if( supports_sse3() ) { 1.344 + UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1.345 + } else { 1.346 + UseXmmRegToRegMoveAll = false; 1.347 + } 1.348 + } 1.349 + if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus 1.350 +#ifdef COMPILER2 1.351 + if( FLAG_IS_DEFAULT(MaxLoopPad) ) { 1.352 + // For new Intel cpus do the next optimization: 1.353 + // don't align the beginning of a loop if there are enough instructions 1.354 + // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1.355 + // in current fetch line (OptoLoopAlignment) or the padding 1.356 + // is big (> MaxLoopPad). 1.357 + // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1.358 + // generated NOP instructions. 11 is the largest size of one 1.359 + // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1.360 + MaxLoopPad = 11; 1.361 + } 1.362 +#endif // COMPILER2 1.363 + } 1.364 + } 1.365 + 1.366 + assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 1.367 + assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 1.368 + 1.369 + // set valid Prefetch instruction 1.370 + if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; 1.371 + if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; 1.372 + if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; 1.373 + if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3; 1.374 + 1.375 + if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; 1.376 + if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; 1.377 + if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; 1.378 + if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3; 1.379 + 1.380 + // Allocation prefetch settings 1.381 + intx cache_line_size = L1_data_cache_line_size(); 1.382 + if( cache_line_size > AllocatePrefetchStepSize ) 1.383 + AllocatePrefetchStepSize = cache_line_size; 1.384 + if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) 1.385 + AllocatePrefetchLines = 3; // Optimistic value 1.386 + assert(AllocatePrefetchLines > 0, "invalid value"); 1.387 + if( AllocatePrefetchLines < 1 ) // set valid value in product VM 1.388 + AllocatePrefetchLines = 1; // Conservative value 1.389 + 1.390 + AllocatePrefetchDistance = allocate_prefetch_distance(); 1.391 + AllocatePrefetchStyle = allocate_prefetch_style(); 1.392 + 1.393 + if( AllocatePrefetchStyle == 2 && is_intel() && 1.394 + cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core 1.395 + AllocatePrefetchDistance = 320; 1.396 + } 1.397 + assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); 1.398 + 1.399 +#ifndef PRODUCT 1.400 + if (PrintMiscellaneous && Verbose) { 1.401 + tty->print_cr("Logical CPUs per package: %u", 1.402 + logical_processors_per_package()); 1.403 + tty->print_cr("UseSSE=%d",UseSSE); 1.404 + tty->print("Allocation: "); 1.405 + if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { 1.406 + tty->print_cr("no prefetching"); 1.407 + } else { 1.408 + if (UseSSE == 0 && supports_3dnow()) { 1.409 + tty->print("PREFETCHW"); 1.410 + } else if (UseSSE >= 1) { 1.411 + if (AllocatePrefetchInstr == 0) { 1.412 + tty->print("PREFETCHNTA"); 1.413 + } else if (AllocatePrefetchInstr == 1) { 1.414 + tty->print("PREFETCHT0"); 1.415 + } else if (AllocatePrefetchInstr == 2) { 1.416 + tty->print("PREFETCHT2"); 1.417 + } else if (AllocatePrefetchInstr == 3) { 1.418 + tty->print("PREFETCHW"); 1.419 + } 1.420 + } 1.421 + if (AllocatePrefetchLines > 1) { 1.422 + tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1.423 + } else { 1.424 + tty->print_cr(" %d, one line", AllocatePrefetchDistance); 1.425 + } 1.426 + } 1.427 + } 1.428 +#endif // !PRODUCT 1.429 +} 1.430 + 1.431 +void VM_Version::initialize() { 1.432 + ResourceMark rm; 1.433 + // Making this stub must be FIRST use of assembler 1.434 + 1.435 + stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); 1.436 + if (stub_blob == NULL) { 1.437 + vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); 1.438 + } 1.439 + CodeBuffer c(stub_blob->instructions_begin(), 1.440 + stub_blob->instructions_size()); 1.441 + VM_Version_StubGenerator g(&c); 1.442 + getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, 1.443 + g.generate_getPsrInfo()); 1.444 + 1.445 + get_processor_features(); 1.446 +}