Tue, 29 Jun 2010 10:34:00 -0700
6964774: Adjust optimization flags setting
Summary: Adjust performance flags settings.
Reviewed-by: never, phh
1.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Mon Jun 28 14:54:39 2010 -0700 1.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Tue Jun 29 10:34:00 2010 -0700 1.3 @@ -1,5 +1,5 @@ 1.4 /* 1.5 - * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved. 1.6 + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved. 1.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 * 1.9 * This code is free software; you can redistribute it and/or modify it 1.10 @@ -34,7 +34,7 @@ 1.11 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 1.12 1.13 static BufferBlob* stub_blob; 1.14 -static const int stub_size = 300; 1.15 +static const int stub_size = 400; 1.16 1.17 extern "C" { 1.18 typedef void (*getPsrInfo_stub_t)(void*); 1.19 @@ -56,7 +56,7 @@ 1.20 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 1.21 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 1.22 1.23 - Label detect_486, cpu486, detect_586, std_cpuid1; 1.24 + Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 1.25 Label ext_cpuid1, ext_cpuid5, done; 1.26 1.27 StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); 1.28 @@ -131,13 +131,62 @@ 1.29 __ movl(Address(rsi, 8), rcx); 1.30 __ movl(Address(rsi,12), rdx); 1.31 1.32 - __ cmpl(rax, 3); // Is cpuid(0x4) supported? 1.33 - __ jccb(Assembler::belowEqual, std_cpuid1); 1.34 + __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 1.35 + __ jccb(Assembler::belowEqual, std_cpuid4); 1.36 + 1.37 + // 1.38 + // cpuid(0xB) Processor Topology 1.39 + // 1.40 + __ movl(rax, 0xb); 1.41 + __ xorl(rcx, rcx); // Threads level 1.42 + __ cpuid(); 1.43 + 1.44 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 1.45 + __ movl(Address(rsi, 0), rax); 1.46 + __ movl(Address(rsi, 4), rbx); 1.47 + __ movl(Address(rsi, 8), rcx); 1.48 + __ movl(Address(rsi,12), rdx); 1.49 + 1.50 + __ movl(rax, 0xb); 1.51 + __ movl(rcx, 1); // Cores level 1.52 + __ cpuid(); 1.53 + __ push(rax); 1.54 + __ andl(rax, 0x1f); // Determine if valid topology level 1.55 + __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 1.56 + __ andl(rax, 0xffff); 1.57 + __ pop(rax); 1.58 + __ jccb(Assembler::equal, std_cpuid4); 1.59 + 1.60 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 1.61 + __ movl(Address(rsi, 0), rax); 1.62 + __ movl(Address(rsi, 4), rbx); 1.63 + __ movl(Address(rsi, 8), rcx); 1.64 + __ movl(Address(rsi,12), rdx); 1.65 + 1.66 + __ movl(rax, 0xb); 1.67 + __ movl(rcx, 2); // Packages level 1.68 + __ cpuid(); 1.69 + __ push(rax); 1.70 + __ andl(rax, 0x1f); // Determine if valid topology level 1.71 + __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 1.72 + __ andl(rax, 0xffff); 1.73 + __ pop(rax); 1.74 + __ jccb(Assembler::equal, std_cpuid4); 1.75 + 1.76 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 1.77 + __ movl(Address(rsi, 0), rax); 1.78 + __ movl(Address(rsi, 4), rbx); 1.79 + __ movl(Address(rsi, 8), rcx); 1.80 + __ movl(Address(rsi,12), rdx); 1.81 1.82 // 1.83 // cpuid(0x4) Deterministic cache params 1.84 // 1.85 + __ bind(std_cpuid4); 1.86 __ movl(rax, 4); 1.87 + __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 1.88 + __ jccb(Assembler::greater, std_cpuid1); 1.89 + 1.90 __ xorl(rcx, rcx); // L1 cache 1.91 __ cpuid(); 1.92 __ push(rax); 1.93 @@ -460,13 +509,18 @@ 1.94 AllocatePrefetchDistance = allocate_prefetch_distance(); 1.95 AllocatePrefetchStyle = allocate_prefetch_style(); 1.96 1.97 - if( AllocatePrefetchStyle == 2 && is_intel() && 1.98 - cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core 1.99 + if( is_intel() && cpu_family() == 6 && supports_sse3() ) { 1.100 + if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core 1.101 #ifdef _LP64 1.102 - AllocatePrefetchDistance = 384; 1.103 + AllocatePrefetchDistance = 384; 1.104 #else 1.105 - AllocatePrefetchDistance = 320; 1.106 + AllocatePrefetchDistance = 320; 1.107 #endif 1.108 + } 1.109 + if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus 1.110 + AllocatePrefetchDistance = 192; 1.111 + AllocatePrefetchLines = 4; 1.112 + } 1.113 } 1.114 assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); 1.115
2.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp Mon Jun 28 14:54:39 2010 -0700 2.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Tue Jun 29 10:34:00 2010 -0700 2.3 @@ -1,5 +1,5 @@ 2.4 /* 2.5 - * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved. 2.6 + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved. 2.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 2.8 * 2.9 * This code is free software; you can redistribute it and/or modify it 2.10 @@ -114,6 +114,14 @@ 2.11 } bits; 2.12 }; 2.13 2.14 + union TplCpuidBEbx { 2.15 + uint32_t value; 2.16 + struct { 2.17 + uint32_t logical_cpus : 16, 2.18 + : 16; 2.19 + } bits; 2.20 + }; 2.21 + 2.22 union ExtCpuid1Ecx { 2.23 uint32_t value; 2.24 struct { 2.25 @@ -211,6 +219,25 @@ 2.26 uint32_t dcp_cpuid4_ecx; // unused currently 2.27 uint32_t dcp_cpuid4_edx; // unused currently 2.28 2.29 + // cpuid function 0xB (processor topology) 2.30 + // ecx = 0 2.31 + uint32_t tpl_cpuidB0_eax; 2.32 + TplCpuidBEbx tpl_cpuidB0_ebx; 2.33 + uint32_t tpl_cpuidB0_ecx; // unused currently 2.34 + uint32_t tpl_cpuidB0_edx; // unused currently 2.35 + 2.36 + // ecx = 1 2.37 + uint32_t tpl_cpuidB1_eax; 2.38 + TplCpuidBEbx tpl_cpuidB1_ebx; 2.39 + uint32_t tpl_cpuidB1_ecx; // unused currently 2.40 + uint32_t tpl_cpuidB1_edx; // unused currently 2.41 + 2.42 + // ecx = 2 2.43 + uint32_t tpl_cpuidB2_eax; 2.44 + TplCpuidBEbx tpl_cpuidB2_ebx; 2.45 + uint32_t tpl_cpuidB2_ecx; // unused currently 2.46 + uint32_t tpl_cpuidB2_edx; // unused currently 2.47 + 2.48 // cpuid function 0x80000000 // example, unused 2.49 uint32_t ext_max_function; 2.50 uint32_t ext_vendor_name_0; 2.51 @@ -316,6 +343,9 @@ 2.52 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } 2.53 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } 2.54 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } 2.55 + static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } 2.56 + static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } 2.57 + static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } 2.58 2.59 // Initialization 2.60 static void initialize(); 2.61 @@ -349,7 +379,12 @@ 2.62 static uint cores_per_cpu() { 2.63 uint result = 1; 2.64 if (is_intel()) { 2.65 - result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 2.66 + if (_cpuid_info.std_max_function >= 0xB) { 2.67 + result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 2.68 + _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 2.69 + } else { 2.70 + result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 2.71 + } 2.72 } else if (is_amd()) { 2.73 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 2.74 } 2.75 @@ -358,7 +393,9 @@ 2.76 2.77 static uint threads_per_core() { 2.78 uint result = 1; 2.79 - if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 2.80 + if (is_intel() && _cpuid_info.std_max_function >= 0xB) { 2.81 + result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 2.82 + } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 2.83 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 2.84 cores_per_cpu(); 2.85 }
3.1 --- a/src/share/vm/runtime/arguments.cpp Mon Jun 28 14:54:39 2010 -0700 3.2 +++ b/src/share/vm/runtime/arguments.cpp Tue Jun 29 10:34:00 2010 -0700 3.3 @@ -1513,6 +1513,9 @@ 3.4 if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) { 3.5 FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500); 3.6 } 3.7 + if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeStringConcat)) { 3.8 + FLAG_SET_DEFAULT(OptimizeStringConcat, true); 3.9 + } 3.10 #endif 3.11 3.12 if (AggressiveOpts) {