6964774: Adjust optimization flags setting

Tue, 29 Jun 2010 10:34:00 -0700

author
kvn
date
Tue, 29 Jun 2010 10:34:00 -0700
changeset 1977
76efbe666d6c
parent 1976
6027dddc26c6
child 1978
fcbb92a1ab3b

6964774: Adjust optimization flags setting
Summary: Adjust performance flags settings.
Reviewed-by: never, phh

src/cpu/x86/vm/vm_version_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/arguments.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Mon Jun 28 14:54:39 2010 -0700
     1.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Tue Jun 29 10:34:00 2010 -0700
     1.3 @@ -1,5 +1,5 @@
     1.4  /*
     1.5 - * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
     1.6 + * Copyright (c) 1997, 2010, Oracle and/or its affiliates.  All Rights Reserved.
     1.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.8   *
     1.9   * This code is free software; you can redistribute it and/or modify it
    1.10 @@ -34,7 +34,7 @@
    1.11  VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
    1.12  
    1.13  static BufferBlob* stub_blob;
    1.14 -static const int stub_size = 300;
    1.15 +static const int stub_size = 400;
    1.16  
    1.17  extern "C" {
    1.18    typedef void (*getPsrInfo_stub_t)(void*);
    1.19 @@ -56,7 +56,7 @@
    1.20      const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
    1.21      const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
    1.22  
    1.23 -    Label detect_486, cpu486, detect_586, std_cpuid1;
    1.24 +    Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
    1.25      Label ext_cpuid1, ext_cpuid5, done;
    1.26  
    1.27      StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
    1.28 @@ -131,13 +131,62 @@
    1.29      __ movl(Address(rsi, 8), rcx);
    1.30      __ movl(Address(rsi,12), rdx);
    1.31  
    1.32 -    __ cmpl(rax, 3);     // Is cpuid(0x4) supported?
    1.33 -    __ jccb(Assembler::belowEqual, std_cpuid1);
    1.34 +    __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
    1.35 +    __ jccb(Assembler::belowEqual, std_cpuid4);
    1.36 +
    1.37 +    //
    1.38 +    // cpuid(0xB) Processor Topology
    1.39 +    //
    1.40 +    __ movl(rax, 0xb);
    1.41 +    __ xorl(rcx, rcx);   // Threads level
    1.42 +    __ cpuid();
    1.43 +
    1.44 +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
    1.45 +    __ movl(Address(rsi, 0), rax);
    1.46 +    __ movl(Address(rsi, 4), rbx);
    1.47 +    __ movl(Address(rsi, 8), rcx);
    1.48 +    __ movl(Address(rsi,12), rdx);
    1.49 +
    1.50 +    __ movl(rax, 0xb);
    1.51 +    __ movl(rcx, 1);     // Cores level
    1.52 +    __ cpuid();
    1.53 +    __ push(rax);
    1.54 +    __ andl(rax, 0x1f);  // Determine if valid topology level
    1.55 +    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
    1.56 +    __ andl(rax, 0xffff);
    1.57 +    __ pop(rax);
    1.58 +    __ jccb(Assembler::equal, std_cpuid4);
    1.59 +
    1.60 +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
    1.61 +    __ movl(Address(rsi, 0), rax);
    1.62 +    __ movl(Address(rsi, 4), rbx);
    1.63 +    __ movl(Address(rsi, 8), rcx);
    1.64 +    __ movl(Address(rsi,12), rdx);
    1.65 +
    1.66 +    __ movl(rax, 0xb);
    1.67 +    __ movl(rcx, 2);     // Packages level
    1.68 +    __ cpuid();
    1.69 +    __ push(rax);
    1.70 +    __ andl(rax, 0x1f);  // Determine if valid topology level
    1.71 +    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
    1.72 +    __ andl(rax, 0xffff);
    1.73 +    __ pop(rax);
    1.74 +    __ jccb(Assembler::equal, std_cpuid4);
    1.75 +
    1.76 +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
    1.77 +    __ movl(Address(rsi, 0), rax);
    1.78 +    __ movl(Address(rsi, 4), rbx);
    1.79 +    __ movl(Address(rsi, 8), rcx);
    1.80 +    __ movl(Address(rsi,12), rdx);
    1.81  
    1.82      //
    1.83      // cpuid(0x4) Deterministic cache params
    1.84      //
    1.85 +    __ bind(std_cpuid4);
    1.86      __ movl(rax, 4);
    1.87 +    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
    1.88 +    __ jccb(Assembler::greater, std_cpuid1);
    1.89 +
    1.90      __ xorl(rcx, rcx);   // L1 cache
    1.91      __ cpuid();
    1.92      __ push(rax);
    1.93 @@ -460,13 +509,18 @@
    1.94    AllocatePrefetchDistance = allocate_prefetch_distance();
    1.95    AllocatePrefetchStyle    = allocate_prefetch_style();
    1.96  
    1.97 -  if( AllocatePrefetchStyle == 2 && is_intel() &&
    1.98 -      cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
    1.99 +  if( is_intel() && cpu_family() == 6 && supports_sse3() ) {
   1.100 +    if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core
   1.101  #ifdef _LP64
   1.102 -    AllocatePrefetchDistance = 384;
   1.103 +      AllocatePrefetchDistance = 384;
   1.104  #else
   1.105 -    AllocatePrefetchDistance = 320;
   1.106 +      AllocatePrefetchDistance = 320;
   1.107  #endif
   1.108 +    }
   1.109 +    if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
   1.110 +      AllocatePrefetchDistance = 192;
   1.111 +      AllocatePrefetchLines = 4;
   1.112 +    }
   1.113    }
   1.114    assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
   1.115  
     2.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp	Mon Jun 28 14:54:39 2010 -0700
     2.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp	Tue Jun 29 10:34:00 2010 -0700
     2.3 @@ -1,5 +1,5 @@
     2.4  /*
     2.5 - * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
     2.6 + * Copyright (c) 1997, 2010, Oracle and/or its affiliates.  All Rights Reserved.
     2.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     2.8   *
     2.9   * This code is free software; you can redistribute it and/or modify it
    2.10 @@ -114,6 +114,14 @@
    2.11      } bits;
    2.12    };
    2.13  
    2.14 +  union TplCpuidBEbx {
    2.15 +    uint32_t value;
    2.16 +    struct {
    2.17 +      uint32_t logical_cpus : 16,
    2.18 +                            : 16;
    2.19 +    } bits;
    2.20 +  };
    2.21 +
    2.22    union ExtCpuid1Ecx {
    2.23      uint32_t value;
    2.24      struct {
    2.25 @@ -211,6 +219,25 @@
    2.26      uint32_t     dcp_cpuid4_ecx; // unused currently
    2.27      uint32_t     dcp_cpuid4_edx; // unused currently
    2.28  
    2.29 +    // cpuid function 0xB (processor topology)
    2.30 +    // ecx = 0
    2.31 +    uint32_t     tpl_cpuidB0_eax;
    2.32 +    TplCpuidBEbx tpl_cpuidB0_ebx;
    2.33 +    uint32_t     tpl_cpuidB0_ecx; // unused currently
    2.34 +    uint32_t     tpl_cpuidB0_edx; // unused currently
    2.35 +
    2.36 +    // ecx = 1
    2.37 +    uint32_t     tpl_cpuidB1_eax;
    2.38 +    TplCpuidBEbx tpl_cpuidB1_ebx;
    2.39 +    uint32_t     tpl_cpuidB1_ecx; // unused currently
    2.40 +    uint32_t     tpl_cpuidB1_edx; // unused currently
    2.41 +
    2.42 +    // ecx = 2
    2.43 +    uint32_t     tpl_cpuidB2_eax;
    2.44 +    TplCpuidBEbx tpl_cpuidB2_ebx;
    2.45 +    uint32_t     tpl_cpuidB2_ecx; // unused currently
    2.46 +    uint32_t     tpl_cpuidB2_edx; // unused currently
    2.47 +
    2.48      // cpuid function 0x80000000 // example, unused
    2.49      uint32_t ext_max_function;
    2.50      uint32_t ext_vendor_name_0;
    2.51 @@ -316,6 +343,9 @@
    2.52    static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
    2.53    static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
    2.54    static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
    2.55 +  static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
    2.56 +  static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
    2.57 +  static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
    2.58  
    2.59    // Initialization
    2.60    static void initialize();
    2.61 @@ -349,7 +379,12 @@
    2.62    static uint cores_per_cpu()  {
    2.63      uint result = 1;
    2.64      if (is_intel()) {
    2.65 -      result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
    2.66 +      if (_cpuid_info.std_max_function >= 0xB) {
    2.67 +        result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
    2.68 +                 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
    2.69 +      } else {
    2.70 +        result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
    2.71 +      }
    2.72      } else if (is_amd()) {
    2.73        result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
    2.74      }
    2.75 @@ -358,7 +393,9 @@
    2.76  
    2.77    static uint threads_per_core()  {
    2.78      uint result = 1;
    2.79 -    if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
    2.80 +    if (is_intel() && _cpuid_info.std_max_function >= 0xB) {
    2.81 +      result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
    2.82 +    } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
    2.83        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
    2.84                 cores_per_cpu();
    2.85      }
     3.1 --- a/src/share/vm/runtime/arguments.cpp	Mon Jun 28 14:54:39 2010 -0700
     3.2 +++ b/src/share/vm/runtime/arguments.cpp	Tue Jun 29 10:34:00 2010 -0700
     3.3 @@ -1513,6 +1513,9 @@
     3.4    if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) {
     3.5      FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500);
     3.6    }
     3.7 +  if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeStringConcat)) {
     3.8 +    FLAG_SET_DEFAULT(OptimizeStringConcat, true);
     3.9 +  }
    3.10  #endif
    3.11  
    3.12    if (AggressiveOpts) {

mercurial