1.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Tue Dec 13 17:10:52 2011 -0800 1.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Wed Dec 14 14:54:38 2011 -0800 1.3 @@ -50,7 +50,7 @@ 1.4 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 1.5 1.6 static BufferBlob* stub_blob; 1.7 -static const int stub_size = 400; 1.8 +static const int stub_size = 500; 1.9 1.10 extern "C" { 1.11 typedef void (*getPsrInfo_stub_t)(void*); 1.12 @@ -73,7 +73,7 @@ 1.13 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 1.14 1.15 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 1.16 - Label ext_cpuid1, ext_cpuid5, done; 1.17 + Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, done; 1.18 1.19 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); 1.20 # define __ _masm-> 1.21 @@ -229,6 +229,41 @@ 1.22 __ movl(Address(rsi, 8), rcx); 1.23 __ movl(Address(rsi,12), rdx); 1.24 1.25 + // 1.26 + // Check if OS has enabled XGETBV instruction to access XCR0 1.27 + // (OSXSAVE feature flag) and CPU supports AVX 1.28 + // 1.29 + __ andl(rcx, 0x18000000); 1.30 + __ cmpl(rcx, 0x18000000); 1.31 + __ jccb(Assembler::notEqual, sef_cpuid); 1.32 + 1.33 + // 1.34 + // XCR0, XFEATURE_ENABLED_MASK register 1.35 + // 1.36 + __ xorl(rcx, rcx); // zero for XCR0 register 1.37 + __ xgetbv(); 1.38 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 1.39 + __ movl(Address(rsi, 0), rax); 1.40 + __ movl(Address(rsi, 4), rdx); 1.41 + 1.42 + // 1.43 + // cpuid(0x7) Structured Extended Features 1.44 + // 1.45 + __ bind(sef_cpuid); 1.46 + __ movl(rax, 7); 1.47 + __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 1.48 + __ jccb(Assembler::greater, ext_cpuid); 1.49 + 1.50 + __ xorl(rcx, rcx); 1.51 + __ cpuid(); 1.52 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 1.53 + __ movl(Address(rsi, 0), rax); 1.54 + __ movl(Address(rsi, 4), rbx); 1.55 + 1.56 + // 1.57 + // Extended cpuid(0x80000000) 1.58 + // 1.59 + __ bind(ext_cpuid); 1.60 __ movl(rax, 0x80000000); 1.61 __ cpuid(); 1.62 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 1.63 @@ -359,13 +394,19 @@ 1.64 if (UseSSE < 1) 1.65 _cpuFeatures &= ~CPU_SSE; 1.66 1.67 + if (UseAVX < 2) 1.68 + _cpuFeatures &= ~CPU_AVX2; 1.69 + 1.70 + if (UseAVX < 1) 1.71 + _cpuFeatures &= ~CPU_AVX; 1.72 + 1.73 if (logical_processors_per_package() == 1) { 1.74 // HT processor could be installed on a system which doesn't support HT. 1.75 _cpuFeatures &= ~CPU_HT; 1.76 } 1.77 1.78 char buf[256]; 1.79 - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 1.80 + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 1.81 cores_per_cpu(), threads_per_core(), 1.82 cpu_family(), _model, _stepping, 1.83 (supports_cmov() ? ", cmov" : ""), 1.84 @@ -379,6 +420,8 @@ 1.85 (supports_sse4_1() ? ", sse4.1" : ""), 1.86 (supports_sse4_2() ? ", sse4.2" : ""), 1.87 (supports_popcnt() ? ", popcnt" : ""), 1.88 + (supports_avx() ? ", avx" : ""), 1.89 + (supports_avx2() ? ", avx2" : ""), 1.90 (supports_mmx_ext() ? ", mmxext" : ""), 1.91 (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), 1.92 (supports_lzcnt() ? ", lzcnt": ""), 1.93 @@ -389,17 +432,24 @@ 1.94 // UseSSE is set to the smaller of what hardware supports and what 1.95 // the command line requires. I.e., you cannot set UseSSE to 2 on 1.96 // older Pentiums which do not support it. 1.97 - if( UseSSE > 4 ) UseSSE=4; 1.98 - if( UseSSE < 0 ) UseSSE=0; 1.99 - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support 1.100 + if (UseSSE > 4) UseSSE=4; 1.101 + if (UseSSE < 0) UseSSE=0; 1.102 + if (!supports_sse4_1()) // Drop to 3 if no SSE4 support 1.103 UseSSE = MIN2((intx)3,UseSSE); 1.104 - if( !supports_sse3() ) // Drop to 2 if no SSE3 support 1.105 + if (!supports_sse3()) // Drop to 2 if no SSE3 support 1.106 UseSSE = MIN2((intx)2,UseSSE); 1.107 - if( !supports_sse2() ) // Drop to 1 if no SSE2 support 1.108 + if (!supports_sse2()) // Drop to 1 if no SSE2 support 1.109 UseSSE = MIN2((intx)1,UseSSE); 1.110 - if( !supports_sse () ) // Drop to 0 if no SSE support 1.111 + if (!supports_sse ()) // Drop to 0 if no SSE support 1.112 UseSSE = 0; 1.113 1.114 + if (UseAVX > 2) UseAVX=2; 1.115 + if (UseAVX < 0) UseAVX=0; 1.116 + if (!supports_avx2()) // Drop to 1 if no AVX2 support 1.117 + UseAVX = MIN2((intx)1,UseAVX); 1.118 + if (!supports_avx ()) // Drop to 0 if no AVX support 1.119 + UseAVX = 0; 1.120 + 1.121 // On new cpus instructions which update whole XMM register should be used 1.122 // to prevent partial register stall due to dependencies on high half. 1.123 // 1.124 @@ -534,6 +584,9 @@ 1.125 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1.126 UsePopCountInstruction = true; 1.127 } 1.128 + } else if (UsePopCountInstruction) { 1.129 + warning("POPCNT instruction is not available on this CPU"); 1.130 + FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1.131 } 1.132 1.133 #ifdef COMPILER2 1.134 @@ -605,7 +658,11 @@ 1.135 if (PrintMiscellaneous && Verbose) { 1.136 tty->print_cr("Logical CPUs per core: %u", 1.137 logical_processors_per_package()); 1.138 - tty->print_cr("UseSSE=%d",UseSSE); 1.139 + tty->print("UseSSE=%d",UseSSE); 1.140 + if (UseAVX > 0) { 1.141 + tty->print(" UseAVX=%d",UseAVX); 1.142 + } 1.143 + tty->cr(); 1.144 tty->print("Allocation"); 1.145 if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { 1.146 tty->print_cr(": no prefetching");