1.1 --- a/src/cpu/x86/vm/vm_version_x86_64.cpp Wed Feb 18 18:20:02 2009 -0800 1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 1.3 @@ -1,419 +0,0 @@ 1.4 -/* 1.5 - * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved. 1.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 - * 1.8 - * This code is free software; you can redistribute it and/or modify it 1.9 - * under the terms of the GNU General Public License version 2 only, as 1.10 - * published by the Free Software Foundation. 1.11 - * 1.12 - * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 - * version 2 for more details (a copy is included in the LICENSE file that 1.16 - * accompanied this code). 1.17 - * 1.18 - * You should have received a copy of the GNU General Public License version 1.19 - * 2 along with this work; if not, write to the Free Software Foundation, 1.20 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 - * 1.22 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 - * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 - * have any questions. 1.25 - * 1.26 - */ 1.27 - 1.28 -# include "incls/_precompiled.incl" 1.29 -# include "incls/_vm_version_x86_64.cpp.incl" 1.30 - 1.31 -int VM_Version::_cpu; 1.32 -int VM_Version::_model; 1.33 -int VM_Version::_stepping; 1.34 -int VM_Version::_cpuFeatures; 1.35 -const char* VM_Version::_features_str = ""; 1.36 -VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 1.37 - 1.38 -static BufferBlob* stub_blob; 1.39 -static const int stub_size = 300; 1.40 - 1.41 -extern "C" { 1.42 - typedef void (*getPsrInfo_stub_t)(void*); 1.43 -} 1.44 -static getPsrInfo_stub_t getPsrInfo_stub = NULL; 1.45 - 1.46 - 1.47 -class VM_Version_StubGenerator: public StubCodeGenerator { 1.48 - public: 1.49 - 1.50 - VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 1.51 - 1.52 - address generate_getPsrInfo() { 1.53 - 1.54 - Label std_cpuid1, ext_cpuid1, ext_cpuid5, done; 1.55 - 1.56 - StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); 1.57 -# define __ _masm-> 1.58 - 1.59 - address start = __ pc(); 1.60 - 1.61 - // 1.62 - // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); 1.63 - // 1.64 - // rcx and rdx are first and second argument registers on windows 1.65 - 1.66 - __ push(rbp); 1.67 - __ mov(rbp, c_rarg0); // cpuid_info address 1.68 - __ push(rbx); 1.69 - __ push(rsi); 1.70 - 1.71 - // 1.72 - // we have a chip which supports the "cpuid" instruction 1.73 - // 1.74 - __ xorl(rax, rax); 1.75 - __ cpuid(); 1.76 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 1.77 - __ movl(Address(rsi, 0), rax); 1.78 - __ movl(Address(rsi, 4), rbx); 1.79 - __ movl(Address(rsi, 8), rcx); 1.80 - __ movl(Address(rsi,12), rdx); 1.81 - 1.82 - __ cmpl(rax, 3); // Is cpuid(0x4) supported? 1.83 - __ jccb(Assembler::belowEqual, std_cpuid1); 1.84 - 1.85 - // 1.86 - // cpuid(0x4) Deterministic cache params 1.87 - // 1.88 - __ movl(rax, 4); 1.89 - __ xorl(rcx, rcx); // L1 cache 1.90 - __ cpuid(); 1.91 - __ push(rax); 1.92 - __ andl(rax, 0x1f); // Determine if valid cache parameters used 1.93 - __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 1.94 - __ pop(rax); 1.95 - __ jccb(Assembler::equal, std_cpuid1); 1.96 - 1.97 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 1.98 - __ movl(Address(rsi, 0), rax); 1.99 - __ movl(Address(rsi, 4), rbx); 1.100 - __ movl(Address(rsi, 8), rcx); 1.101 - __ movl(Address(rsi,12), rdx); 1.102 - 1.103 - // 1.104 - // Standard cpuid(0x1) 1.105 - // 1.106 - __ bind(std_cpuid1); 1.107 - __ movl(rax, 1); 1.108 - __ cpuid(); 1.109 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 1.110 - __ movl(Address(rsi, 0), rax); 1.111 - __ movl(Address(rsi, 4), rbx); 1.112 - __ movl(Address(rsi, 8), rcx); 1.113 - __ movl(Address(rsi,12), rdx); 1.114 - 1.115 - __ movl(rax, 0x80000000); 1.116 - __ cpuid(); 1.117 - __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 1.118 - __ jcc(Assembler::belowEqual, done); 1.119 - __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 1.120 - __ jccb(Assembler::belowEqual, ext_cpuid1); 1.121 - __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 1.122 - __ jccb(Assembler::belowEqual, ext_cpuid5); 1.123 - // 1.124 - // Extended cpuid(0x80000008) 1.125 - // 1.126 - __ movl(rax, 0x80000008); 1.127 - __ cpuid(); 1.128 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 1.129 - __ movl(Address(rsi, 0), rax); 1.130 - __ movl(Address(rsi, 4), rbx); 1.131 - __ movl(Address(rsi, 8), rcx); 1.132 - __ movl(Address(rsi,12), rdx); 1.133 - 1.134 - // 1.135 - // Extended cpuid(0x80000005) 1.136 - // 1.137 - __ bind(ext_cpuid5); 1.138 - __ movl(rax, 0x80000005); 1.139 - __ cpuid(); 1.140 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 1.141 - __ movl(Address(rsi, 0), rax); 1.142 - __ movl(Address(rsi, 4), rbx); 1.143 - __ movl(Address(rsi, 8), rcx); 1.144 - __ movl(Address(rsi,12), rdx); 1.145 - 1.146 - // 1.147 - // Extended cpuid(0x80000001) 1.148 - // 1.149 - __ bind(ext_cpuid1); 1.150 - __ movl(rax, 0x80000001); 1.151 - __ cpuid(); 1.152 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 1.153 - __ movl(Address(rsi, 0), rax); 1.154 - __ movl(Address(rsi, 4), rbx); 1.155 - __ movl(Address(rsi, 8), rcx); 1.156 - __ movl(Address(rsi,12), rdx); 1.157 - 1.158 - // 1.159 - // return 1.160 - // 1.161 - __ bind(done); 1.162 - __ pop(rsi); 1.163 - __ pop(rbx); 1.164 - __ pop(rbp); 1.165 - __ ret(0); 1.166 - 1.167 -# undef __ 1.168 - 1.169 - return start; 1.170 - }; 1.171 -}; 1.172 - 1.173 - 1.174 -void VM_Version::get_processor_features() { 1.175 - 1.176 - _logical_processors_per_package = 1; 1.177 - // Get raw processor info 1.178 - getPsrInfo_stub(&_cpuid_info); 1.179 - assert_is_initialized(); 1.180 - _cpu = extended_cpu_family(); 1.181 - _model = extended_cpu_model(); 1.182 - _stepping = cpu_stepping(); 1.183 - _cpuFeatures = feature_flags(); 1.184 - // Logical processors are only available on P4s and above, 1.185 - // and only if hyperthreading is available. 1.186 - _logical_processors_per_package = logical_processor_count(); 1.187 - _supports_cx8 = supports_cmpxchg8(); 1.188 - // OS should support SSE for x64 and hardware should support at least SSE2. 1.189 - if (!VM_Version::supports_sse2()) { 1.190 - vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 1.191 - } 1.192 - if (UseSSE < 4) { 1.193 - _cpuFeatures &= ~CPU_SSE4_1; 1.194 - _cpuFeatures &= ~CPU_SSE4_2; 1.195 - } 1.196 - if (UseSSE < 3) { 1.197 - _cpuFeatures &= ~CPU_SSE3; 1.198 - _cpuFeatures &= ~CPU_SSSE3; 1.199 - _cpuFeatures &= ~CPU_SSE4A; 1.200 - } 1.201 - if (UseSSE < 2) 1.202 - _cpuFeatures &= ~CPU_SSE2; 1.203 - if (UseSSE < 1) 1.204 - _cpuFeatures &= ~CPU_SSE; 1.205 - 1.206 - if (logical_processors_per_package() == 1) { 1.207 - // HT processor could be installed on a system which doesn't support HT. 1.208 - _cpuFeatures &= ~CPU_HT; 1.209 - } 1.210 - 1.211 - char buf[256]; 1.212 - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 1.213 - cores_per_cpu(), threads_per_core(), 1.214 - cpu_family(), _model, _stepping, 1.215 - (supports_cmov() ? ", cmov" : ""), 1.216 - (supports_cmpxchg8() ? ", cx8" : ""), 1.217 - (supports_fxsr() ? ", fxsr" : ""), 1.218 - (supports_mmx() ? ", mmx" : ""), 1.219 - (supports_sse() ? ", sse" : ""), 1.220 - (supports_sse2() ? ", sse2" : ""), 1.221 - (supports_sse3() ? ", sse3" : ""), 1.222 - (supports_ssse3()? ", ssse3": ""), 1.223 - (supports_sse4_1() ? ", sse4.1" : ""), 1.224 - (supports_sse4_2() ? ", sse4.2" : ""), 1.225 - (supports_mmx_ext() ? ", mmxext" : ""), 1.226 - (supports_3dnow() ? ", 3dnow" : ""), 1.227 - (supports_3dnow2() ? ", 3dnowext" : ""), 1.228 - (supports_sse4a() ? ", sse4a": ""), 1.229 - (supports_ht() ? ", ht": "")); 1.230 - _features_str = strdup(buf); 1.231 - 1.232 - // UseSSE is set to the smaller of what hardware supports and what 1.233 - // the command line requires. I.e., you cannot set UseSSE to 2 on 1.234 - // older Pentiums which do not support it. 1.235 - if( UseSSE > 4 ) UseSSE=4; 1.236 - if( UseSSE < 0 ) UseSSE=0; 1.237 - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support 1.238 - UseSSE = MIN2((intx)3,UseSSE); 1.239 - if( !supports_sse3() ) // Drop to 2 if no SSE3 support 1.240 - UseSSE = MIN2((intx)2,UseSSE); 1.241 - if( !supports_sse2() ) // Drop to 1 if no SSE2 support 1.242 - UseSSE = MIN2((intx)1,UseSSE); 1.243 - if( !supports_sse () ) // Drop to 0 if no SSE support 1.244 - UseSSE = 0; 1.245 - 1.246 - // On new cpus instructions which update whole XMM register should be used 1.247 - // to prevent partial register stall due to dependencies on high half. 1.248 - // 1.249 - // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1.250 - // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1.251 - // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1.252 - // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1.253 - 1.254 - if( is_amd() ) { // AMD cpus specific settings 1.255 - if( FLAG_IS_DEFAULT(UseAddressNop) ) { 1.256 - // Use it on all AMD cpus starting from Opteron (don't need 1.257 - // a cpu check since only Opteron and new cpus support 64-bits mode). 1.258 - UseAddressNop = true; 1.259 - } 1.260 - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 1.261 - if( supports_sse4a() ) { 1.262 - UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1.263 - } else { 1.264 - UseXmmLoadAndClearUpper = false; 1.265 - } 1.266 - } 1.267 - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 1.268 - if( supports_sse4a() ) { 1.269 - UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1.270 - } else { 1.271 - UseXmmRegToRegMoveAll = false; 1.272 - } 1.273 - } 1.274 - if( FLAG_IS_DEFAULT(UseXmmI2F) ) { 1.275 - if( supports_sse4a() ) { 1.276 - UseXmmI2F = true; 1.277 - } else { 1.278 - UseXmmI2F = false; 1.279 - } 1.280 - } 1.281 - if( FLAG_IS_DEFAULT(UseXmmI2D) ) { 1.282 - if( supports_sse4a() ) { 1.283 - UseXmmI2D = true; 1.284 - } else { 1.285 - UseXmmI2D = false; 1.286 - } 1.287 - } 1.288 - } 1.289 - 1.290 - if( is_intel() ) { // Intel cpus specific settings 1.291 - if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { 1.292 - UseStoreImmI16 = false; // don't use it on Intel cpus 1.293 - } 1.294 - if( FLAG_IS_DEFAULT(UseAddressNop) ) { 1.295 - // Use it on all Intel cpus starting from PentiumPro 1.296 - // (don't need a cpu check since only new cpus support 64-bits mode). 1.297 - UseAddressNop = true; 1.298 - } 1.299 - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { 1.300 - UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1.301 - } 1.302 - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { 1.303 - if( supports_sse3() ) { 1.304 - UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1.305 - } else { 1.306 - UseXmmRegToRegMoveAll = false; 1.307 - } 1.308 - } 1.309 - if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus 1.310 -#ifdef COMPILER2 1.311 - if( FLAG_IS_DEFAULT(MaxLoopPad) ) { 1.312 - // For new Intel cpus do the next optimization: 1.313 - // don't align the beginning of a loop if there are enough instructions 1.314 - // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1.315 - // in current fetch line (OptoLoopAlignment) or the padding 1.316 - // is big (> MaxLoopPad). 1.317 - // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1.318 - // generated NOP instructions. 11 is the largest size of one 1.319 - // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1.320 - MaxLoopPad = 11; 1.321 - } 1.322 -#endif // COMPILER2 1.323 - if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { 1.324 - UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1.325 - } 1.326 - if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus 1.327 - if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { 1.328 - UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1.329 - } 1.330 - } 1.331 - } 1.332 - } 1.333 - 1.334 - assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 1.335 - assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 1.336 - 1.337 - // set valid Prefetch instruction 1.338 - if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; 1.339 - if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; 1.340 - if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; 1.341 - 1.342 - if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; 1.343 - if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; 1.344 - if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; 1.345 - 1.346 - // Allocation prefetch settings 1.347 - intx cache_line_size = L1_data_cache_line_size(); 1.348 - if( cache_line_size > AllocatePrefetchStepSize ) 1.349 - AllocatePrefetchStepSize = cache_line_size; 1.350 - if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) 1.351 - AllocatePrefetchLines = 3; // Optimistic value 1.352 - assert(AllocatePrefetchLines > 0, "invalid value"); 1.353 - if( AllocatePrefetchLines < 1 ) // set valid value in product VM 1.354 - AllocatePrefetchLines = 1; // Conservative value 1.355 - 1.356 - AllocatePrefetchDistance = allocate_prefetch_distance(); 1.357 - AllocatePrefetchStyle = allocate_prefetch_style(); 1.358 - 1.359 - if( AllocatePrefetchStyle == 2 && is_intel() && 1.360 - cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core 1.361 - AllocatePrefetchDistance = 384; 1.362 - } 1.363 - assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); 1.364 - 1.365 - // Prefetch settings 1.366 - PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); 1.367 - PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); 1.368 - PrefetchFieldsAhead = prefetch_fields_ahead(); 1.369 - 1.370 -#ifndef PRODUCT 1.371 - if (PrintMiscellaneous && Verbose) { 1.372 - tty->print_cr("Logical CPUs per core: %u", 1.373 - logical_processors_per_package()); 1.374 - tty->print_cr("UseSSE=%d",UseSSE); 1.375 - tty->print("Allocation: "); 1.376 - if (AllocatePrefetchStyle <= 0) { 1.377 - tty->print_cr("no prefetching"); 1.378 - } else { 1.379 - if (AllocatePrefetchInstr == 0) { 1.380 - tty->print("PREFETCHNTA"); 1.381 - } else if (AllocatePrefetchInstr == 1) { 1.382 - tty->print("PREFETCHT0"); 1.383 - } else if (AllocatePrefetchInstr == 2) { 1.384 - tty->print("PREFETCHT2"); 1.385 - } else if (AllocatePrefetchInstr == 3) { 1.386 - tty->print("PREFETCHW"); 1.387 - } 1.388 - if (AllocatePrefetchLines > 1) { 1.389 - tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1.390 - } else { 1.391 - tty->print_cr(" %d, one line", AllocatePrefetchDistance); 1.392 - } 1.393 - } 1.394 - if (PrefetchCopyIntervalInBytes > 0) { 1.395 - tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); 1.396 - } 1.397 - if (PrefetchScanIntervalInBytes > 0) { 1.398 - tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); 1.399 - } 1.400 - if (PrefetchFieldsAhead > 0) { 1.401 - tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); 1.402 - } 1.403 - } 1.404 -#endif // !PRODUCT 1.405 -} 1.406 - 1.407 -void VM_Version::initialize() { 1.408 - ResourceMark rm; 1.409 - // Making this stub must be FIRST use of assembler 1.410 - 1.411 - stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); 1.412 - if (stub_blob == NULL) { 1.413 - vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); 1.414 - } 1.415 - CodeBuffer c(stub_blob->instructions_begin(), 1.416 - stub_blob->instructions_size()); 1.417 - VM_Version_StubGenerator g(&c); 1.418 - getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, 1.419 - g.generate_getPsrInfo()); 1.420 - 1.421 - get_processor_features(); 1.422 -}