Merge

Thu, 26 Feb 2009 14:25:55 -0800

author
apangin
date
Thu, 26 Feb 2009 14:25:55 -0800
changeset 1029
7898caac2071
parent 1028
1b68c738c0d9
parent 1023
ef3b3df478b9
child 1033
1fa16c3565be

Merge

src/cpu/x86/vm/vm_version_x86_32.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86_32.hpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86_64.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/vm_version_x86_64.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/.hgtags	Sun Feb 22 17:21:13 2009 -0800
     1.2 +++ b/.hgtags	Thu Feb 26 14:25:55 2009 -0800
     1.3 @@ -20,3 +20,5 @@
     1.4  fc6a5ae3fef5ebacfa896dbb3ae37715e388e282 jdk7-b43
     1.5  809e899c638bd9b21836abf9d09ab2a30ff3900b jdk7-b44
     1.6  945bf754069766e76873c53102fae48abf04cf5b jdk7-b45
     1.7 +16bb38eeda35b46268eefa4c1f829eb086e0ca46 jdk7-b46
     1.8 +fcb923bad68e2b10380a030ea83a723f4dc3d4d6 jdk7-b47
     2.1 --- a/make/hotspot_version	Sun Feb 22 17:21:13 2009 -0800
     2.2 +++ b/make/hotspot_version	Thu Feb 26 14:25:55 2009 -0800
     2.3 @@ -35,7 +35,7 @@
     2.4  
     2.5  HS_MAJOR_VER=15
     2.6  HS_MINOR_VER=0
     2.7 -HS_BUILD_NUMBER=01
     2.8 +HS_BUILD_NUMBER=02
     2.9  
    2.10  JDK_MAJOR_VER=1
    2.11  JDK_MINOR_VER=7
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Thu Feb 26 14:25:55 2009 -0800
     3.3 @@ -0,0 +1,514 @@
     3.4 +/*
     3.5 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
     3.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     3.7 + *
     3.8 + * This code is free software; you can redistribute it and/or modify it
     3.9 + * under the terms of the GNU General Public License version 2 only, as
    3.10 + * published by the Free Software Foundation.
    3.11 + *
    3.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    3.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    3.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    3.15 + * version 2 for more details (a copy is included in the LICENSE file that
    3.16 + * accompanied this code).
    3.17 + *
    3.18 + * You should have received a copy of the GNU General Public License version
    3.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    3.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    3.21 + *
    3.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    3.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    3.24 + * have any questions.
    3.25 + *
    3.26 + */
    3.27 +
    3.28 +# include "incls/_precompiled.incl"
    3.29 +# include "incls/_vm_version_x86.cpp.incl"
    3.30 +
    3.31 +
    3.32 +int VM_Version::_cpu;
    3.33 +int VM_Version::_model;
    3.34 +int VM_Version::_stepping;
    3.35 +int VM_Version::_cpuFeatures;
    3.36 +const char*           VM_Version::_features_str = "";
    3.37 +VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
    3.38 +
    3.39 +static BufferBlob* stub_blob;
    3.40 +static const int stub_size = 300;
    3.41 +
    3.42 +extern "C" {
    3.43 +  typedef void (*getPsrInfo_stub_t)(void*);
    3.44 +}
    3.45 +static getPsrInfo_stub_t getPsrInfo_stub = NULL;
    3.46 +
    3.47 +
    3.48 +class VM_Version_StubGenerator: public StubCodeGenerator {
    3.49 + public:
    3.50 +
    3.51 +  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
    3.52 +
    3.53 +  address generate_getPsrInfo() {
    3.54 +    // Flags to test CPU type.
    3.55 +    const uint32_t EFL_AC           = 0x40000;
    3.56 +    const uint32_t EFL_ID           = 0x200000;
    3.57 +    // Values for when we don't have a CPUID instruction.
    3.58 +    const int      CPU_FAMILY_SHIFT = 8;
    3.59 +    const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
    3.60 +    const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
    3.61 +
    3.62 +    Label detect_486, cpu486, detect_586, std_cpuid1;
    3.63 +    Label ext_cpuid1, ext_cpuid5, done;
    3.64 +
    3.65 +    StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
    3.66 +#   define __ _masm->
    3.67 +
    3.68 +    address start = __ pc();
    3.69 +
    3.70 +    //
    3.71 +    // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
    3.72 +    //
    3.73 +    // LP64: rcx and rdx are first and second argument registers on windows
    3.74 +
    3.75 +    __ push(rbp);
    3.76 +#ifdef _LP64
    3.77 +    __ mov(rbp, c_rarg0); // cpuid_info address
    3.78 +#else
    3.79 +    __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
    3.80 +#endif
    3.81 +    __ push(rbx);
    3.82 +    __ push(rsi);
    3.83 +    __ pushf();          // preserve rbx, and flags
    3.84 +    __ pop(rax);
    3.85 +    __ push(rax);
    3.86 +    __ mov(rcx, rax);
    3.87 +    //
    3.88 +    // if we are unable to change the AC flag, we have a 386
    3.89 +    //
    3.90 +    __ xorl(rax, EFL_AC);
    3.91 +    __ push(rax);
    3.92 +    __ popf();
    3.93 +    __ pushf();
    3.94 +    __ pop(rax);
    3.95 +    __ cmpptr(rax, rcx);
    3.96 +    __ jccb(Assembler::notEqual, detect_486);
    3.97 +
    3.98 +    __ movl(rax, CPU_FAMILY_386);
    3.99 +    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
   3.100 +    __ jmp(done);
   3.101 +
   3.102 +    //
   3.103 +    // If we are unable to change the ID flag, we have a 486 which does
   3.104 +    // not support the "cpuid" instruction.
   3.105 +    //
   3.106 +    __ bind(detect_486);
   3.107 +    __ mov(rax, rcx);
   3.108 +    __ xorl(rax, EFL_ID);
   3.109 +    __ push(rax);
   3.110 +    __ popf();
   3.111 +    __ pushf();
   3.112 +    __ pop(rax);
   3.113 +    __ cmpptr(rcx, rax);
   3.114 +    __ jccb(Assembler::notEqual, detect_586);
   3.115 +
   3.116 +    __ bind(cpu486);
   3.117 +    __ movl(rax, CPU_FAMILY_486);
   3.118 +    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
   3.119 +    __ jmp(done);
   3.120 +
   3.121 +    //
   3.122 +    // At this point, we have a chip which supports the "cpuid" instruction
   3.123 +    //
   3.124 +    __ bind(detect_586);
   3.125 +    __ xorl(rax, rax);
   3.126 +    __ cpuid();
   3.127 +    __ orl(rax, rax);
   3.128 +    __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
   3.129 +                                        // value of at least 1, we give up and
   3.130 +                                        // assume a 486
   3.131 +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
   3.132 +    __ movl(Address(rsi, 0), rax);
   3.133 +    __ movl(Address(rsi, 4), rbx);
   3.134 +    __ movl(Address(rsi, 8), rcx);
   3.135 +    __ movl(Address(rsi,12), rdx);
   3.136 +
   3.137 +    __ cmpl(rax, 3);     // Is cpuid(0x4) supported?
   3.138 +    __ jccb(Assembler::belowEqual, std_cpuid1);
   3.139 +
   3.140 +    //
   3.141 +    // cpuid(0x4) Deterministic cache params
   3.142 +    //
   3.143 +    __ movl(rax, 4);
   3.144 +    __ xorl(rcx, rcx);   // L1 cache
   3.145 +    __ cpuid();
   3.146 +    __ push(rax);
   3.147 +    __ andl(rax, 0x1f);  // Determine if valid cache parameters used
   3.148 +    __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
   3.149 +    __ pop(rax);
   3.150 +    __ jccb(Assembler::equal, std_cpuid1);
   3.151 +
   3.152 +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
   3.153 +    __ movl(Address(rsi, 0), rax);
   3.154 +    __ movl(Address(rsi, 4), rbx);
   3.155 +    __ movl(Address(rsi, 8), rcx);
   3.156 +    __ movl(Address(rsi,12), rdx);
   3.157 +
   3.158 +    //
   3.159 +    // Standard cpuid(0x1)
   3.160 +    //
   3.161 +    __ bind(std_cpuid1);
   3.162 +    __ movl(rax, 1);
   3.163 +    __ cpuid();
   3.164 +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
   3.165 +    __ movl(Address(rsi, 0), rax);
   3.166 +    __ movl(Address(rsi, 4), rbx);
   3.167 +    __ movl(Address(rsi, 8), rcx);
   3.168 +    __ movl(Address(rsi,12), rdx);
   3.169 +
   3.170 +    __ movl(rax, 0x80000000);
   3.171 +    __ cpuid();
   3.172 +    __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
   3.173 +    __ jcc(Assembler::belowEqual, done);
   3.174 +    __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
   3.175 +    __ jccb(Assembler::belowEqual, ext_cpuid1);
   3.176 +    __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
   3.177 +    __ jccb(Assembler::belowEqual, ext_cpuid5);
   3.178 +    //
   3.179 +    // Extended cpuid(0x80000008)
   3.180 +    //
   3.181 +    __ movl(rax, 0x80000008);
   3.182 +    __ cpuid();
   3.183 +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
   3.184 +    __ movl(Address(rsi, 0), rax);
   3.185 +    __ movl(Address(rsi, 4), rbx);
   3.186 +    __ movl(Address(rsi, 8), rcx);
   3.187 +    __ movl(Address(rsi,12), rdx);
   3.188 +
   3.189 +    //
   3.190 +    // Extended cpuid(0x80000005)
   3.191 +    //
   3.192 +    __ bind(ext_cpuid5);
   3.193 +    __ movl(rax, 0x80000005);
   3.194 +    __ cpuid();
   3.195 +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
   3.196 +    __ movl(Address(rsi, 0), rax);
   3.197 +    __ movl(Address(rsi, 4), rbx);
   3.198 +    __ movl(Address(rsi, 8), rcx);
   3.199 +    __ movl(Address(rsi,12), rdx);
   3.200 +
   3.201 +    //
   3.202 +    // Extended cpuid(0x80000001)
   3.203 +    //
   3.204 +    __ bind(ext_cpuid1);
   3.205 +    __ movl(rax, 0x80000001);
   3.206 +    __ cpuid();
   3.207 +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
   3.208 +    __ movl(Address(rsi, 0), rax);
   3.209 +    __ movl(Address(rsi, 4), rbx);
   3.210 +    __ movl(Address(rsi, 8), rcx);
   3.211 +    __ movl(Address(rsi,12), rdx);
   3.212 +
   3.213 +    //
   3.214 +    // return
   3.215 +    //
   3.216 +    __ bind(done);
   3.217 +    __ popf();
   3.218 +    __ pop(rsi);
   3.219 +    __ pop(rbx);
   3.220 +    __ pop(rbp);
   3.221 +    __ ret(0);
   3.222 +
   3.223 +#   undef __
   3.224 +
   3.225 +    return start;
   3.226 +  };
   3.227 +};
   3.228 +
   3.229 +
   3.230 +void VM_Version::get_processor_features() {
   3.231 +
   3.232 +  _cpu = 4; // 486 by default
   3.233 +  _model = 0;
   3.234 +  _stepping = 0;
   3.235 +  _cpuFeatures = 0;
   3.236 +  _logical_processors_per_package = 1;
   3.237 +
   3.238 +  if (!Use486InstrsOnly) {
   3.239 +    // Get raw processor info
   3.240 +    getPsrInfo_stub(&_cpuid_info);
   3.241 +    assert_is_initialized();
   3.242 +    _cpu = extended_cpu_family();
   3.243 +    _model = extended_cpu_model();
   3.244 +    _stepping = cpu_stepping();
   3.245 +
   3.246 +    if (cpu_family() > 4) { // it supports CPUID
   3.247 +      _cpuFeatures = feature_flags();
   3.248 +      // Logical processors are only available on P4s and above,
   3.249 +      // and only if hyperthreading is available.
   3.250 +      _logical_processors_per_package = logical_processor_count();
   3.251 +    }
   3.252 +  }
   3.253 +
   3.254 +  _supports_cx8 = supports_cmpxchg8();
   3.255 +
   3.256 +#ifdef _LP64
   3.257 +  // OS should support SSE for x64 and hardware should support at least SSE2.
   3.258 +  if (!VM_Version::supports_sse2()) {
   3.259 +    vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
   3.260 +  }
   3.261 +#endif
   3.262 +
   3.263 +  // If the OS doesn't support SSE, we can't use this feature even if the HW does
   3.264 +  if (!os::supports_sse())
   3.265 +    _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
   3.266 +
   3.267 +  if (UseSSE < 4) {
   3.268 +    _cpuFeatures &= ~CPU_SSE4_1;
   3.269 +    _cpuFeatures &= ~CPU_SSE4_2;
   3.270 +  }
   3.271 +
   3.272 +  if (UseSSE < 3) {
   3.273 +    _cpuFeatures &= ~CPU_SSE3;
   3.274 +    _cpuFeatures &= ~CPU_SSSE3;
   3.275 +    _cpuFeatures &= ~CPU_SSE4A;
   3.276 +  }
   3.277 +
   3.278 +  if (UseSSE < 2)
   3.279 +    _cpuFeatures &= ~CPU_SSE2;
   3.280 +
   3.281 +  if (UseSSE < 1)
   3.282 +    _cpuFeatures &= ~CPU_SSE;
   3.283 +
   3.284 +  if (logical_processors_per_package() == 1) {
   3.285 +    // HT processor could be installed on a system which doesn't support HT.
   3.286 +    _cpuFeatures &= ~CPU_HT;
   3.287 +  }
   3.288 +
   3.289 +  char buf[256];
   3.290 +  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
   3.291 +               cores_per_cpu(), threads_per_core(),
   3.292 +               cpu_family(), _model, _stepping,
   3.293 +               (supports_cmov() ? ", cmov" : ""),
   3.294 +               (supports_cmpxchg8() ? ", cx8" : ""),
   3.295 +               (supports_fxsr() ? ", fxsr" : ""),
   3.296 +               (supports_mmx()  ? ", mmx"  : ""),
   3.297 +               (supports_sse()  ? ", sse"  : ""),
   3.298 +               (supports_sse2() ? ", sse2" : ""),
   3.299 +               (supports_sse3() ? ", sse3" : ""),
   3.300 +               (supports_ssse3()? ", ssse3": ""),
   3.301 +               (supports_sse4_1() ? ", sse4.1" : ""),
   3.302 +               (supports_sse4_2() ? ", sse4.2" : ""),
   3.303 +               (supports_mmx_ext() ? ", mmxext" : ""),
   3.304 +               (supports_3dnow()   ? ", 3dnow"  : ""),
   3.305 +               (supports_3dnow2()  ? ", 3dnowext" : ""),
   3.306 +               (supports_sse4a()   ? ", sse4a": ""),
   3.307 +               (supports_ht() ? ", ht": ""));
   3.308 +  _features_str = strdup(buf);
   3.309 +
   3.310 +  // UseSSE is set to the smaller of what hardware supports and what
   3.311 +  // the command line requires.  I.e., you cannot set UseSSE to 2 on
   3.312 +  // older Pentiums which do not support it.
   3.313 +  if( UseSSE > 4 ) UseSSE=4;
   3.314 +  if( UseSSE < 0 ) UseSSE=0;
   3.315 +  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
   3.316 +    UseSSE = MIN2((intx)3,UseSSE);
   3.317 +  if( !supports_sse3() ) // Drop to 2 if no SSE3 support
   3.318 +    UseSSE = MIN2((intx)2,UseSSE);
   3.319 +  if( !supports_sse2() ) // Drop to 1 if no SSE2 support
   3.320 +    UseSSE = MIN2((intx)1,UseSSE);
   3.321 +  if( !supports_sse () ) // Drop to 0 if no SSE  support
   3.322 +    UseSSE = 0;
   3.323 +
   3.324 +  // On new cpus instructions which update whole XMM register should be used
   3.325 +  // to prevent partial register stall due to dependencies on high half.
   3.326 +  //
   3.327 +  // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
   3.328 +  // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
   3.329 +  // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
   3.330 +  // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
   3.331 +
   3.332 +  if( is_amd() ) { // AMD cpus specific settings
   3.333 +    if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
   3.334 +      // Use it on new AMD cpus starting from Opteron.
   3.335 +      UseAddressNop = true;
   3.336 +    }
   3.337 +    if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
   3.338 +      // Use it on new AMD cpus starting from Opteron.
   3.339 +      UseNewLongLShift = true;
   3.340 +    }
   3.341 +    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
   3.342 +      if( supports_sse4a() ) {
   3.343 +        UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
   3.344 +      } else {
   3.345 +        UseXmmLoadAndClearUpper = false;
   3.346 +      }
   3.347 +    }
   3.348 +    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
   3.349 +      if( supports_sse4a() ) {
   3.350 +        UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
   3.351 +      } else {
   3.352 +        UseXmmRegToRegMoveAll = false;
   3.353 +      }
   3.354 +    }
   3.355 +    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
   3.356 +      if( supports_sse4a() ) {
   3.357 +        UseXmmI2F = true;
   3.358 +      } else {
   3.359 +        UseXmmI2F = false;
   3.360 +      }
   3.361 +    }
   3.362 +    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
   3.363 +      if( supports_sse4a() ) {
   3.364 +        UseXmmI2D = true;
   3.365 +      } else {
   3.366 +        UseXmmI2D = false;
   3.367 +      }
   3.368 +    }
   3.369 +  }
   3.370 +
   3.371 +  if( is_intel() ) { // Intel cpus specific settings
   3.372 +    if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
   3.373 +      UseStoreImmI16 = false; // don't use it on Intel cpus
   3.374 +    }
   3.375 +    if( cpu_family() == 6 || cpu_family() == 15 ) {
   3.376 +      if( FLAG_IS_DEFAULT(UseAddressNop) ) {
   3.377 +        // Use it on all Intel cpus starting from PentiumPro
   3.378 +        UseAddressNop = true;
   3.379 +      }
   3.380 +    }
   3.381 +    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
   3.382 +      UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
   3.383 +    }
   3.384 +    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
   3.385 +      if( supports_sse3() ) {
   3.386 +        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
   3.387 +      } else {
   3.388 +        UseXmmRegToRegMoveAll = false;
   3.389 +      }
   3.390 +    }
   3.391 +    if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
   3.392 +#ifdef COMPILER2
   3.393 +      if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
   3.394 +        // For new Intel cpus do the next optimization:
   3.395 +        // don't align the beginning of a loop if there are enough instructions
   3.396 +        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
   3.397 +        // in current fetch line (OptoLoopAlignment) or the padding
   3.398 +        // is big (> MaxLoopPad).
   3.399 +        // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
   3.400 +        // generated NOP instructions. 11 is the largest size of one
   3.401 +        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
   3.402 +        MaxLoopPad = 11;
   3.403 +      }
   3.404 +#endif // COMPILER2
   3.405 +      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
   3.406 +        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
   3.407 +      }
   3.408 +      if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
   3.409 +        if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
   3.410 +          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
   3.411 +        }
   3.412 +      }
   3.413 +    }
   3.414 +  }
   3.415 +
   3.416 +  assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   3.417 +  assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
   3.418 +
   3.419 +  // set valid Prefetch instruction
   3.420 +  if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
   3.421 +  if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
   3.422 +  if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
   3.423 +  if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3;
   3.424 +
   3.425 +  if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
   3.426 +  if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
   3.427 +  if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
   3.428 +  if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3;
   3.429 +
   3.430 +  // Allocation prefetch settings
   3.431 +  intx cache_line_size = L1_data_cache_line_size();
   3.432 +  if( cache_line_size > AllocatePrefetchStepSize )
   3.433 +    AllocatePrefetchStepSize = cache_line_size;
   3.434 +  if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
   3.435 +    AllocatePrefetchLines = 3; // Optimistic value
   3.436 +  assert(AllocatePrefetchLines > 0, "invalid value");
   3.437 +  if( AllocatePrefetchLines < 1 ) // set valid value in product VM
   3.438 +    AllocatePrefetchLines = 1; // Conservative value
   3.439 +
   3.440 +  AllocatePrefetchDistance = allocate_prefetch_distance();
   3.441 +  AllocatePrefetchStyle    = allocate_prefetch_style();
   3.442 +
   3.443 +  if( AllocatePrefetchStyle == 2 && is_intel() &&
   3.444 +      cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
   3.445 +#ifdef _LP64
   3.446 +    AllocatePrefetchDistance = 384;
   3.447 +#else
   3.448 +    AllocatePrefetchDistance = 320;
   3.449 +#endif
   3.450 +  }
   3.451 +  assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
   3.452 +
   3.453 +#ifdef _LP64
   3.454 +  // Prefetch settings
   3.455 +  PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
   3.456 +  PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
   3.457 +  PrefetchFieldsAhead         = prefetch_fields_ahead();
   3.458 +#endif
   3.459 +
   3.460 +#ifndef PRODUCT
   3.461 +  if (PrintMiscellaneous && Verbose) {
   3.462 +    tty->print_cr("Logical CPUs per core: %u",
   3.463 +                  logical_processors_per_package());
   3.464 +    tty->print_cr("UseSSE=%d",UseSSE);
   3.465 +    tty->print("Allocation: ");
   3.466 +    if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) {
   3.467 +      tty->print_cr("no prefetching");
   3.468 +    } else {
   3.469 +      if (UseSSE == 0 && supports_3dnow()) {
   3.470 +        tty->print("PREFETCHW");
   3.471 +      } else if (UseSSE >= 1) {
   3.472 +        if (AllocatePrefetchInstr == 0) {
   3.473 +          tty->print("PREFETCHNTA");
   3.474 +        } else if (AllocatePrefetchInstr == 1) {
   3.475 +          tty->print("PREFETCHT0");
   3.476 +        } else if (AllocatePrefetchInstr == 2) {
   3.477 +          tty->print("PREFETCHT2");
   3.478 +        } else if (AllocatePrefetchInstr == 3) {
   3.479 +          tty->print("PREFETCHW");
   3.480 +        }
   3.481 +      }
   3.482 +      if (AllocatePrefetchLines > 1) {
   3.483 +        tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
   3.484 +      } else {
   3.485 +        tty->print_cr(" %d, one line", AllocatePrefetchDistance);
   3.486 +      }
   3.487 +    }
   3.488 +
   3.489 +    if (PrefetchCopyIntervalInBytes > 0) {
   3.490 +      tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes);
   3.491 +    }
   3.492 +    if (PrefetchScanIntervalInBytes > 0) {
   3.493 +      tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes);
   3.494 +    }
   3.495 +    if (PrefetchFieldsAhead > 0) {
   3.496 +      tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
   3.497 +    }
   3.498 +  }
   3.499 +#endif // !PRODUCT
   3.500 +}
   3.501 +
   3.502 +void VM_Version::initialize() {
   3.503 +  ResourceMark rm;
   3.504 +  // Making this stub must be FIRST use of assembler
   3.505 +
   3.506 +  stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
   3.507 +  if (stub_blob == NULL) {
   3.508 +    vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
   3.509 +  }
   3.510 +  CodeBuffer c(stub_blob->instructions_begin(),
   3.511 +               stub_blob->instructions_size());
   3.512 +  VM_Version_StubGenerator g(&c);
   3.513 +  getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
   3.514 +                                   g.generate_getPsrInfo());
   3.515 +
   3.516 +  get_processor_features();
   3.517 +}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp	Thu Feb 26 14:25:55 2009 -0800
     4.3 @@ -0,0 +1,459 @@
     4.4 +/*
     4.5 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
     4.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4.7 + *
     4.8 + * This code is free software; you can redistribute it and/or modify it
     4.9 + * under the terms of the GNU General Public License version 2 only, as
    4.10 + * published by the Free Software Foundation.
    4.11 + *
    4.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    4.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    4.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    4.15 + * version 2 for more details (a copy is included in the LICENSE file that
    4.16 + * accompanied this code).
    4.17 + *
    4.18 + * You should have received a copy of the GNU General Public License version
    4.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    4.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    4.21 + *
    4.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    4.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
    4.24 + * have any questions.
    4.25 + *
    4.26 + */
    4.27 +
    4.28 +class VM_Version : public Abstract_VM_Version {
    4.29 +public:
    4.30 +  // cpuid result register layouts.  These are all unions of a uint32_t
    4.31 +  // (in case anyone wants access to the register as a whole) and a bitfield.
    4.32 +
    4.33 +  union StdCpuid1Eax {
    4.34 +    uint32_t value;
    4.35 +    struct {
    4.36 +      uint32_t stepping   : 4,
    4.37 +               model      : 4,
    4.38 +               family     : 4,
    4.39 +               proc_type  : 2,
    4.40 +                          : 2,
    4.41 +               ext_model  : 4,
    4.42 +               ext_family : 8,
    4.43 +                          : 4;
    4.44 +    } bits;
    4.45 +  };
    4.46 +
    4.47 +  union StdCpuid1Ebx { // example, unused
    4.48 +    uint32_t value;
    4.49 +    struct {
    4.50 +      uint32_t brand_id         : 8,
    4.51 +               clflush_size     : 8,
    4.52 +               threads_per_cpu  : 8,
    4.53 +               apic_id          : 8;
    4.54 +    } bits;
    4.55 +  };
    4.56 +
    4.57 +  union StdCpuid1Ecx {
    4.58 +    uint32_t value;
    4.59 +    struct {
    4.60 +      uint32_t sse3     : 1,
    4.61 +                        : 2,
    4.62 +               monitor  : 1,
    4.63 +                        : 1,
    4.64 +               vmx      : 1,
    4.65 +                        : 1,
    4.66 +               est      : 1,
    4.67 +                        : 1,
    4.68 +               ssse3    : 1,
    4.69 +               cid      : 1,
    4.70 +                        : 2,
    4.71 +               cmpxchg16: 1,
    4.72 +                        : 4,
    4.73 +               dca      : 1,
    4.74 +               sse4_1   : 1,
    4.75 +               sse4_2   : 1,
    4.76 +                        : 11;
    4.77 +    } bits;
    4.78 +  };
    4.79 +
    4.80 +  union StdCpuid1Edx {
    4.81 +    uint32_t value;
    4.82 +    struct {
    4.83 +      uint32_t          : 4,
    4.84 +               tsc      : 1,
    4.85 +                        : 3,
    4.86 +               cmpxchg8 : 1,
    4.87 +                        : 6,
    4.88 +               cmov     : 1,
    4.89 +                        : 7,
    4.90 +               mmx      : 1,
    4.91 +               fxsr     : 1,
    4.92 +               sse      : 1,
    4.93 +               sse2     : 1,
    4.94 +                        : 1,
    4.95 +               ht       : 1,
    4.96 +                        : 3;
    4.97 +    } bits;
    4.98 +  };
    4.99 +
   4.100 +  union DcpCpuid4Eax {
   4.101 +    uint32_t value;
   4.102 +    struct {
   4.103 +      uint32_t cache_type    : 5,
   4.104 +                             : 21,
   4.105 +               cores_per_cpu : 6;
   4.106 +    } bits;
   4.107 +  };
   4.108 +
   4.109 +  union DcpCpuid4Ebx {
   4.110 +    uint32_t value;
   4.111 +    struct {
   4.112 +      uint32_t L1_line_size  : 12,
   4.113 +               partitions    : 10,
   4.114 +               associativity : 10;
   4.115 +    } bits;
   4.116 +  };
   4.117 +
   4.118 +  union ExtCpuid1Ecx {
   4.119 +    uint32_t value;
   4.120 +    struct {
   4.121 +      uint32_t LahfSahf     : 1,
   4.122 +               CmpLegacy    : 1,
   4.123 +                            : 4,
   4.124 +               abm          : 1,
   4.125 +               sse4a        : 1,
   4.126 +               misalignsse  : 1,
   4.127 +               prefetchw    : 1,
   4.128 +                            : 22;
   4.129 +    } bits;
   4.130 +  };
   4.131 +
   4.132 +  union ExtCpuid1Edx {
   4.133 +    uint32_t value;
   4.134 +    struct {
   4.135 +      uint32_t           : 22,
   4.136 +               mmx_amd   : 1,
   4.137 +               mmx       : 1,
   4.138 +               fxsr      : 1,
   4.139 +                         : 4,
   4.140 +               long_mode : 1,
   4.141 +               tdnow2    : 1,
   4.142 +               tdnow     : 1;
   4.143 +    } bits;
   4.144 +  };
   4.145 +
   4.146 +  union ExtCpuid5Ex {
   4.147 +    uint32_t value;
   4.148 +    struct {
   4.149 +      uint32_t L1_line_size : 8,
   4.150 +               L1_tag_lines : 8,
   4.151 +               L1_assoc     : 8,
   4.152 +               L1_size      : 8;
   4.153 +    } bits;
   4.154 +  };
   4.155 +
   4.156 +  union ExtCpuid8Ecx {
   4.157 +    uint32_t value;
   4.158 +    struct {
   4.159 +      uint32_t cores_per_cpu : 8,
   4.160 +                             : 24;
   4.161 +    } bits;
   4.162 +  };
   4.163 +
   4.164 +protected:
   4.165 +   static int _cpu;
   4.166 +   static int _model;
   4.167 +   static int _stepping;
   4.168 +   static int _cpuFeatures;     // features returned by the "cpuid" instruction
   4.169 +                                // 0 if this instruction is not available
   4.170 +   static const char* _features_str;
   4.171 +
   4.172 +   enum {
   4.173 +     CPU_CX8    = (1 << 0), // next bits are from cpuid 1 (EDX)
   4.174 +     CPU_CMOV   = (1 << 1),
   4.175 +     CPU_FXSR   = (1 << 2),
   4.176 +     CPU_HT     = (1 << 3),
   4.177 +     CPU_MMX    = (1 << 4),
   4.178 +     CPU_3DNOW  = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX)
   4.179 +     CPU_SSE    = (1 << 6),
   4.180 +     CPU_SSE2   = (1 << 7),
   4.181 +     CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
   4.182 +     CPU_SSSE3  = (1 << 9),
   4.183 +     CPU_SSE4A  = (1 << 10),
   4.184 +     CPU_SSE4_1 = (1 << 11),
   4.185 +     CPU_SSE4_2 = (1 << 12)
   4.186 +   } cpuFeatureFlags;
   4.187 +
   4.188 +  // cpuid information block.  All info derived from executing cpuid with
   4.189 +  // various function numbers is stored here.  Intel and AMD info is
   4.190 +  // merged in this block: accessor methods disentangle it.
   4.191 +  //
   4.192 +  // The info block is laid out in subblocks of 4 dwords corresponding to
   4.193 +  // eax, ebx, ecx and edx, whether or not they contain anything useful.
   4.194 +  struct CpuidInfo {
   4.195 +    // cpuid function 0
   4.196 +    uint32_t std_max_function;
   4.197 +    uint32_t std_vendor_name_0;
   4.198 +    uint32_t std_vendor_name_1;
   4.199 +    uint32_t std_vendor_name_2;
   4.200 +
   4.201 +    // cpuid function 1
   4.202 +    StdCpuid1Eax std_cpuid1_eax;
   4.203 +    StdCpuid1Ebx std_cpuid1_ebx;
   4.204 +    StdCpuid1Ecx std_cpuid1_ecx;
   4.205 +    StdCpuid1Edx std_cpuid1_edx;
   4.206 +
   4.207 +    // cpuid function 4 (deterministic cache parameters)
   4.208 +    DcpCpuid4Eax dcp_cpuid4_eax;
   4.209 +    DcpCpuid4Ebx dcp_cpuid4_ebx;
   4.210 +    uint32_t     dcp_cpuid4_ecx; // unused currently
   4.211 +    uint32_t     dcp_cpuid4_edx; // unused currently
   4.212 +
   4.213 +    // cpuid function 0x80000000 // example, unused
   4.214 +    uint32_t ext_max_function;
   4.215 +    uint32_t ext_vendor_name_0;
   4.216 +    uint32_t ext_vendor_name_1;
   4.217 +    uint32_t ext_vendor_name_2;
   4.218 +
   4.219 +    // cpuid function 0x80000001
   4.220 +    uint32_t     ext_cpuid1_eax; // reserved
   4.221 +    uint32_t     ext_cpuid1_ebx; // reserved
   4.222 +    ExtCpuid1Ecx ext_cpuid1_ecx;
   4.223 +    ExtCpuid1Edx ext_cpuid1_edx;
   4.224 +
   4.225 +    // cpuid functions 0x80000002 thru 0x80000004: example, unused
   4.226 +    uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
   4.227 +    uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
   4.228 +    uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
   4.229 +
   4.230 +    // cpuid function 0x80000005 //AMD L1, Intel reserved
   4.231 +    uint32_t     ext_cpuid5_eax; // unused currently
   4.232 +    uint32_t     ext_cpuid5_ebx; // reserved
   4.233 +    ExtCpuid5Ex  ext_cpuid5_ecx; // L1 data cache info (AMD)
   4.234 +    ExtCpuid5Ex  ext_cpuid5_edx; // L1 instruction cache info (AMD)
   4.235 +
   4.236 +    // cpuid function 0x80000008
   4.237 +    uint32_t     ext_cpuid8_eax; // unused currently
   4.238 +    uint32_t     ext_cpuid8_ebx; // reserved
   4.239 +    ExtCpuid8Ecx ext_cpuid8_ecx;
   4.240 +    uint32_t     ext_cpuid8_edx; // reserved
   4.241 +  };
   4.242 +
   4.243 +  // The actual cpuid info block
   4.244 +  static CpuidInfo _cpuid_info;
   4.245 +
   4.246 +  // Extractors and predicates
   4.247 +  static uint32_t extended_cpu_family() {
   4.248 +    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
   4.249 +    result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
   4.250 +    return result;
   4.251 +  }
   4.252 +  static uint32_t extended_cpu_model() {
   4.253 +    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
   4.254 +    result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
   4.255 +    return result;
   4.256 +  }
   4.257 +  static uint32_t cpu_stepping() {
   4.258 +    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
   4.259 +    return result;
   4.260 +  }
   4.261 +  static uint logical_processor_count() {
   4.262 +    uint result = threads_per_core();
   4.263 +    return result;
   4.264 +  }
   4.265 +  static uint32_t feature_flags() {
   4.266 +    uint32_t result = 0;
   4.267 +    if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
   4.268 +      result |= CPU_CX8;
   4.269 +    if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
   4.270 +      result |= CPU_CMOV;
   4.271 +    if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() &&
   4.272 +        _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)
   4.273 +      result |= CPU_FXSR;
   4.274 +    // HT flag is set for multi-core processors also.
   4.275 +    if (threads_per_core() > 1)
   4.276 +      result |= CPU_HT;
   4.277 +    if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() &&
   4.278 +        _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)
   4.279 +      result |= CPU_MMX;
   4.280 +    if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
   4.281 +      result |= CPU_3DNOW;
   4.282 +    if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
   4.283 +      result |= CPU_SSE;
   4.284 +    if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
   4.285 +      result |= CPU_SSE2;
   4.286 +    if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
   4.287 +      result |= CPU_SSE3;
   4.288 +    if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
   4.289 +      result |= CPU_SSSE3;
   4.290 +    if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
   4.291 +      result |= CPU_SSE4A;
   4.292 +    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
   4.293 +      result |= CPU_SSE4_1;
   4.294 +    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
   4.295 +      result |= CPU_SSE4_2;
   4.296 +    return result;
   4.297 +  }
   4.298 +
   4.299 +  static void get_processor_features();
   4.300 +
   4.301 +public:
   4.302 +  // Offsets for cpuid asm stub
   4.303 +  static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
   4.304 +  static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
   4.305 +  static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
   4.306 +  static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
   4.307 +  static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
   4.308 +  static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
   4.309 +
   4.310 +  // Initialization
   4.311 +  static void initialize();
   4.312 +
   4.313 +  // Asserts
   4.314 +  static void assert_is_initialized() {
   4.315 +    assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
   4.316 +  }
   4.317 +
   4.318 +  //
   4.319 +  // Processor family:
   4.320 +  //       3   -  386
   4.321 +  //       4   -  486
   4.322 +  //       5   -  Pentium
   4.323 +  //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
   4.324 +  //              Pentium M, Core Solo, Core Duo, Core2 Duo
   4.325 +  //    family 6 model:   9,        13,       14,        15
   4.326 +  //    0x0f   -  Pentium 4, Opteron
   4.327 +  //
   4.328 +  // Note: The cpu family should be used to select between
   4.329 +  //       instruction sequences which are valid on all Intel
   4.330 +  //       processors.  Use the feature test functions below to
   4.331 +  //       determine whether a particular instruction is supported.
   4.332 +  //
   4.333 +  static int  cpu_family()        { return _cpu;}
   4.334 +  static bool is_P6()             { return cpu_family() >= 6; }
   4.335 +
   4.336 +  static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
   4.337 +  static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
   4.338 +
   4.339 +  static uint cores_per_cpu()  {
   4.340 +    uint result = 1;
   4.341 +    if (is_intel()) {
   4.342 +      result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
   4.343 +    } else if (is_amd()) {
   4.344 +      result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
   4.345 +    }
   4.346 +    return result;
   4.347 +  }
   4.348 +
   4.349 +  static uint threads_per_core()  {
   4.350 +    uint result = 1;
   4.351 +    if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
   4.352 +      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
   4.353 +               cores_per_cpu();
   4.354 +    }
   4.355 +    return result;
   4.356 +  }
   4.357 +
   4.358 +  static intx L1_data_cache_line_size()  {
   4.359 +    intx result = 0;
   4.360 +    if (is_intel()) {
   4.361 +      result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
   4.362 +    } else if (is_amd()) {
   4.363 +      result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
   4.364 +    }
   4.365 +    if (result < 32) // not defined ?
   4.366 +      result = 32;   // 32 bytes by default on x86 and other x64
   4.367 +    return result;
   4.368 +  }
   4.369 +
   4.370 +  //
   4.371 +  // Feature identification
   4.372 +  //
   4.373 +  static bool supports_cpuid()    { return _cpuFeatures  != 0; }
   4.374 +  static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
   4.375 +  static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
   4.376 +  static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
   4.377 +  static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
   4.378 +  static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
   4.379 +  static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
   4.380 +  static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
   4.381 +  static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
   4.382 +  static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
   4.383 +  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   4.384 +  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   4.385 +  //
   4.386 +  // AMD features
   4.387 +  //
   4.388 +  static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
   4.389 +  static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
   4.390 +  static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
   4.391 +  static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
   4.392 +
   4.393 +  static bool supports_compare_and_exchange() { return true; }
   4.394 +
   4.395 +  static const char* cpu_features()           { return _features_str; }
   4.396 +
   4.397 +  static intx allocate_prefetch_distance() {
   4.398 +    // This method should be called before allocate_prefetch_style().
   4.399 +    //
   4.400 +    // Hardware prefetching (distance/size in bytes):
   4.401 +    // Pentium 3 -  64 /  32
   4.402 +    // Pentium 4 - 256 / 128
   4.403 +    // Athlon    -  64 /  32 ????
   4.404 +    // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
   4.405 +    // Core      - 128 /  64
   4.406 +    //
   4.407 +    // Software prefetching (distance in bytes / instruction with best score):
   4.408 +    // Pentium 3 - 128 / prefetchnta
   4.409 +    // Pentium 4 - 512 / prefetchnta
   4.410 +    // Athlon    - 128 / prefetchnta
   4.411 +    // Opteron   - 256 / prefetchnta
   4.412 +    // Core      - 256 / prefetchnta
   4.413 +    // It will be used only when AllocatePrefetchStyle > 0
   4.414 +
   4.415 +    intx count = AllocatePrefetchDistance;
   4.416 +    if (count < 0) {   // default ?
   4.417 +      if (is_amd()) {  // AMD
   4.418 +        if (supports_sse2())
   4.419 +          count = 256; // Opteron
   4.420 +        else
   4.421 +          count = 128; // Athlon
   4.422 +      } else {         // Intel
   4.423 +        if (supports_sse2())
   4.424 +          if (cpu_family() == 6) {
   4.425 +            count = 256; // Pentium M, Core, Core2
   4.426 +          } else {
   4.427 +            count = 512; // Pentium 4
   4.428 +          }
   4.429 +        else
   4.430 +          count = 128; // Pentium 3 (and all other old CPUs)
   4.431 +      }
   4.432 +    }
   4.433 +    return count;
   4.434 +  }
   4.435 +  static intx allocate_prefetch_style() {
   4.436 +    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
   4.437 +    // Return 0 if AllocatePrefetchDistance was not defined.
   4.438 +    return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
   4.439 +  }
   4.440 +
   4.441 +  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
   4.442 +  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
   4.443 +  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
   4.444 +  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
   4.445 +
   4.446 +  // gc copy/scan is disabled if prefetchw isn't supported, because
   4.447 +  // Prefetch::write emits an inlined prefetchw on Linux.
   4.448 +  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
   4.449 +  // The used prefetcht0 instruction works for both amd64 and em64t.
   4.450 +  static intx prefetch_copy_interval_in_bytes() {
   4.451 +    intx interval = PrefetchCopyIntervalInBytes;
   4.452 +    return interval >= 0 ? interval : 576;
   4.453 +  }
   4.454 +  static intx prefetch_scan_interval_in_bytes() {
   4.455 +    intx interval = PrefetchScanIntervalInBytes;
   4.456 +    return interval >= 0 ? interval : 576;
   4.457 +  }
   4.458 +  static intx prefetch_fields_ahead() {
   4.459 +    intx count = PrefetchFieldsAhead;
   4.460 +    return count >= 0 ? count : 1;
   4.461 +  }
   4.462 +};
     5.1 --- a/src/cpu/x86/vm/vm_version_x86_32.cpp	Sun Feb 22 17:21:13 2009 -0800
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,472 +0,0 @@
     5.4 -/*
     5.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
     5.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     5.7 - *
     5.8 - * This code is free software; you can redistribute it and/or modify it
     5.9 - * under the terms of the GNU General Public License version 2 only, as
    5.10 - * published by the Free Software Foundation.
    5.11 - *
    5.12 - * This code is distributed in the hope that it will be useful, but WITHOUT
    5.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    5.14 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    5.15 - * version 2 for more details (a copy is included in the LICENSE file that
    5.16 - * accompanied this code).
    5.17 - *
    5.18 - * You should have received a copy of the GNU General Public License version
    5.19 - * 2 along with this work; if not, write to the Free Software Foundation,
    5.20 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    5.21 - *
    5.22 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    5.23 - * CA 95054 USA or visit www.sun.com if you need additional information or
    5.24 - * have any questions.
    5.25 - *
    5.26 - */
    5.27 -
    5.28 -# include "incls/_precompiled.incl"
    5.29 -# include "incls/_vm_version_x86_32.cpp.incl"
    5.30 -
    5.31 -
    5.32 -int VM_Version::_cpu;
    5.33 -int VM_Version::_model;
    5.34 -int VM_Version::_stepping;
    5.35 -int VM_Version::_cpuFeatures;
    5.36 -const char*           VM_Version::_features_str = "";
    5.37 -VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
    5.38 -
    5.39 -static BufferBlob* stub_blob;
    5.40 -static const int stub_size = 300;
    5.41 -
    5.42 -extern "C" {
    5.43 -  typedef void (*getPsrInfo_stub_t)(void*);
    5.44 -}
    5.45 -static getPsrInfo_stub_t getPsrInfo_stub = NULL;
    5.46 -
    5.47 -
    5.48 -class VM_Version_StubGenerator: public StubCodeGenerator {
    5.49 - public:
    5.50 -
    5.51 -  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
    5.52 -
    5.53 -  address generate_getPsrInfo() {
    5.54 -    // Flags to test CPU type.
    5.55 -    const uint32_t EFL_AC           = 0x40000;
    5.56 -    const uint32_t EFL_ID           = 0x200000;
    5.57 -    // Values for when we don't have a CPUID instruction.
    5.58 -    const int      CPU_FAMILY_SHIFT = 8;
    5.59 -    const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
    5.60 -    const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
    5.61 -
    5.62 -    Label detect_486, cpu486, detect_586, std_cpuid1;
    5.63 -    Label ext_cpuid1, ext_cpuid5, done;
    5.64 -
    5.65 -    StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
    5.66 -#   define __ _masm->
    5.67 -
    5.68 -    address start = __ pc();
    5.69 -
    5.70 -    //
    5.71 -    // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
    5.72 -    //
    5.73 -    __ push(rbp);
    5.74 -    __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
    5.75 -    __ push(rbx);
    5.76 -    __ push(rsi);
    5.77 -    __ pushf();          // preserve rbx, and flags
    5.78 -    __ pop(rax);
    5.79 -    __ push(rax);
    5.80 -    __ mov(rcx, rax);
    5.81 -    //
    5.82 -    // if we are unable to change the AC flag, we have a 386
    5.83 -    //
    5.84 -    __ xorl(rax, EFL_AC);
    5.85 -    __ push(rax);
    5.86 -    __ popf();
    5.87 -    __ pushf();
    5.88 -    __ pop(rax);
    5.89 -    __ cmpptr(rax, rcx);
    5.90 -    __ jccb(Assembler::notEqual, detect_486);
    5.91 -
    5.92 -    __ movl(rax, CPU_FAMILY_386);
    5.93 -    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
    5.94 -    __ jmp(done);
    5.95 -
    5.96 -    //
    5.97 -    // If we are unable to change the ID flag, we have a 486 which does
    5.98 -    // not support the "cpuid" instruction.
    5.99 -    //
   5.100 -    __ bind(detect_486);
   5.101 -    __ mov(rax, rcx);
   5.102 -    __ xorl(rax, EFL_ID);
   5.103 -    __ push(rax);
   5.104 -    __ popf();
   5.105 -    __ pushf();
   5.106 -    __ pop(rax);
   5.107 -    __ cmpptr(rcx, rax);
   5.108 -    __ jccb(Assembler::notEqual, detect_586);
   5.109 -
   5.110 -    __ bind(cpu486);
   5.111 -    __ movl(rax, CPU_FAMILY_486);
   5.112 -    __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
   5.113 -    __ jmp(done);
   5.114 -
   5.115 -    //
   5.116 -    // at this point, we have a chip which supports the "cpuid" instruction
   5.117 -    //
   5.118 -    __ bind(detect_586);
   5.119 -    __ xorptr(rax, rax);
   5.120 -    __ cpuid();
   5.121 -    __ orptr(rax, rax);
   5.122 -    __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
   5.123 -                                        // value of at least 1, we give up and
   5.124 -                                        // assume a 486
   5.125 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
   5.126 -    __ movl(Address(rsi, 0), rax);
   5.127 -    __ movl(Address(rsi, 4), rbx);
   5.128 -    __ movl(Address(rsi, 8), rcx);
   5.129 -    __ movl(Address(rsi,12), rdx);
   5.130 -
   5.131 -    __ cmpl(rax, 3);     // Is cpuid(0x4) supported?
   5.132 -    __ jccb(Assembler::belowEqual, std_cpuid1);
   5.133 -
   5.134 -    //
   5.135 -    // cpuid(0x4) Deterministic cache params
   5.136 -    //
   5.137 -    __ movl(rax, 4);     // and rcx already set to 0x0
   5.138 -    __ xorl(rcx, rcx);
   5.139 -    __ cpuid();
   5.140 -    __ push(rax);
   5.141 -    __ andl(rax, 0x1f);  // Determine if valid cache parameters used
   5.142 -    __ orl(rax, rax);    // rax,[4:0] == 0 indicates invalid cache
   5.143 -    __ pop(rax);
   5.144 -    __ jccb(Assembler::equal, std_cpuid1);
   5.145 -
   5.146 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
   5.147 -    __ movl(Address(rsi, 0), rax);
   5.148 -    __ movl(Address(rsi, 4), rbx);
   5.149 -    __ movl(Address(rsi, 8), rcx);
   5.150 -    __ movl(Address(rsi,12), rdx);
   5.151 -
   5.152 -    //
   5.153 -    // Standard cpuid(0x1)
   5.154 -    //
   5.155 -    __ bind(std_cpuid1);
   5.156 -    __ movl(rax, 1);
   5.157 -    __ cpuid();
   5.158 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
   5.159 -    __ movl(Address(rsi, 0), rax);
   5.160 -    __ movl(Address(rsi, 4), rbx);
   5.161 -    __ movl(Address(rsi, 8), rcx);
   5.162 -    __ movl(Address(rsi,12), rdx);
   5.163 -
   5.164 -    __ movl(rax, 0x80000000);
   5.165 -    __ cpuid();
   5.166 -    __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
   5.167 -    __ jcc(Assembler::belowEqual, done);
   5.168 -    __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
   5.169 -    __ jccb(Assembler::belowEqual, ext_cpuid1);
   5.170 -    __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
   5.171 -    __ jccb(Assembler::belowEqual, ext_cpuid5);
   5.172 -    //
   5.173 -    // Extended cpuid(0x80000008)
   5.174 -    //
   5.175 -    __ movl(rax, 0x80000008);
   5.176 -    __ cpuid();
   5.177 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
   5.178 -    __ movl(Address(rsi, 0), rax);
   5.179 -    __ movl(Address(rsi, 4), rbx);
   5.180 -    __ movl(Address(rsi, 8), rcx);
   5.181 -    __ movl(Address(rsi,12), rdx);
   5.182 -
   5.183 -    //
   5.184 -    // Extended cpuid(0x80000005)
   5.185 -    //
   5.186 -    __ bind(ext_cpuid5);
   5.187 -    __ movl(rax, 0x80000005);
   5.188 -    __ cpuid();
   5.189 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
   5.190 -    __ movl(Address(rsi, 0), rax);
   5.191 -    __ movl(Address(rsi, 4), rbx);
   5.192 -    __ movl(Address(rsi, 8), rcx);
   5.193 -    __ movl(Address(rsi,12), rdx);
   5.194 -
   5.195 -    //
   5.196 -    // Extended cpuid(0x80000001)
   5.197 -    //
   5.198 -    __ bind(ext_cpuid1);
   5.199 -    __ movl(rax, 0x80000001);
   5.200 -    __ cpuid();
   5.201 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
   5.202 -    __ movl(Address(rsi, 0), rax);
   5.203 -    __ movl(Address(rsi, 4), rbx);
   5.204 -    __ movl(Address(rsi, 8), rcx);
   5.205 -    __ movl(Address(rsi,12), rdx);
   5.206 -
   5.207 -    //
   5.208 -    // return
   5.209 -    //
   5.210 -    __ bind(done);
   5.211 -    __ popf();
   5.212 -    __ pop(rsi);
   5.213 -    __ pop(rbx);
   5.214 -    __ pop(rbp);
   5.215 -    __ ret(0);
   5.216 -
   5.217 -#   undef __
   5.218 -
   5.219 -    return start;
   5.220 -  };
   5.221 -};
   5.222 -
   5.223 -
   5.224 -void VM_Version::get_processor_features() {
   5.225 -
   5.226 -  _cpu = 4; // 486 by default
   5.227 -  _model = 0;
   5.228 -  _stepping = 0;
   5.229 -  _cpuFeatures = 0;
   5.230 -  _logical_processors_per_package = 1;
   5.231 -  if (!Use486InstrsOnly) {
   5.232 -    // Get raw processor info
   5.233 -    getPsrInfo_stub(&_cpuid_info);
   5.234 -    assert_is_initialized();
   5.235 -    _cpu = extended_cpu_family();
   5.236 -    _model = extended_cpu_model();
   5.237 -    _stepping = cpu_stepping();
   5.238 -    if (cpu_family() > 4) { // it supports CPUID
   5.239 -      _cpuFeatures = feature_flags();
   5.240 -      // Logical processors are only available on P4s and above,
   5.241 -      // and only if hyperthreading is available.
   5.242 -      _logical_processors_per_package = logical_processor_count();
   5.243 -    }
   5.244 -  }
   5.245 -  _supports_cx8 = supports_cmpxchg8();
   5.246 -  // if the OS doesn't support SSE, we can't use this feature even if the HW does
   5.247 -  if( !os::supports_sse())
   5.248 -    _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
   5.249 -  if (UseSSE < 4) {
   5.250 -    _cpuFeatures &= ~CPU_SSE4_1;
   5.251 -    _cpuFeatures &= ~CPU_SSE4_2;
   5.252 -  }
   5.253 -  if (UseSSE < 3) {
   5.254 -    _cpuFeatures &= ~CPU_SSE3;
   5.255 -    _cpuFeatures &= ~CPU_SSSE3;
   5.256 -    _cpuFeatures &= ~CPU_SSE4A;
   5.257 -  }
   5.258 -  if (UseSSE < 2)
   5.259 -    _cpuFeatures &= ~CPU_SSE2;
   5.260 -  if (UseSSE < 1)
   5.261 -    _cpuFeatures &= ~CPU_SSE;
   5.262 -
   5.263 -  if (logical_processors_per_package() == 1) {
   5.264 -    // HT processor could be installed on a system which doesn't support HT.
   5.265 -    _cpuFeatures &= ~CPU_HT;
   5.266 -  }
   5.267 -
   5.268 -  char buf[256];
   5.269 -  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
   5.270 -               cores_per_cpu(), threads_per_core(),
   5.271 -               cpu_family(), _model, _stepping,
   5.272 -               (supports_cmov() ? ", cmov" : ""),
   5.273 -               (supports_cmpxchg8() ? ", cx8" : ""),
   5.274 -               (supports_fxsr() ? ", fxsr" : ""),
   5.275 -               (supports_mmx()  ? ", mmx"  : ""),
   5.276 -               (supports_sse()  ? ", sse"  : ""),
   5.277 -               (supports_sse2() ? ", sse2" : ""),
   5.278 -               (supports_sse3() ? ", sse3" : ""),
   5.279 -               (supports_ssse3()? ", ssse3": ""),
   5.280 -               (supports_sse4_1() ? ", sse4.1" : ""),
   5.281 -               (supports_sse4_2() ? ", sse4.2" : ""),
   5.282 -               (supports_mmx_ext() ? ", mmxext" : ""),
   5.283 -               (supports_3dnow()   ? ", 3dnow"  : ""),
   5.284 -               (supports_3dnow2()  ? ", 3dnowext" : ""),
   5.285 -               (supports_sse4a()   ? ", sse4a": ""),
   5.286 -               (supports_ht() ? ", ht": ""));
   5.287 -  _features_str = strdup(buf);
   5.288 -
   5.289 -  // UseSSE is set to the smaller of what hardware supports and what
   5.290 -  // the command line requires.  I.e., you cannot set UseSSE to 2 on
   5.291 -  // older Pentiums which do not support it.
   5.292 -  if( UseSSE > 4 ) UseSSE=4;
   5.293 -  if( UseSSE < 0 ) UseSSE=0;
   5.294 -  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
   5.295 -    UseSSE = MIN2((intx)3,UseSSE);
   5.296 -  if( !supports_sse3() ) // Drop to 2 if no SSE3 support
   5.297 -    UseSSE = MIN2((intx)2,UseSSE);
   5.298 -  if( !supports_sse2() ) // Drop to 1 if no SSE2 support
   5.299 -    UseSSE = MIN2((intx)1,UseSSE);
   5.300 -  if( !supports_sse () ) // Drop to 0 if no SSE  support
   5.301 -    UseSSE = 0;
   5.302 -
   5.303 -  // On new cpus instructions which update whole XMM register should be used
   5.304 -  // to prevent partial register stall due to dependencies on high half.
   5.305 -  //
   5.306 -  // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
   5.307 -  // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
   5.308 -  // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
   5.309 -  // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
   5.310 -
   5.311 -  if( is_amd() ) { // AMD cpus specific settings
   5.312 -    if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
   5.313 -      // Use it on new AMD cpus starting from Opteron.
   5.314 -      UseAddressNop = true;
   5.315 -    }
   5.316 -    if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) {
   5.317 -      // Use it on new AMD cpus starting from Opteron.
   5.318 -      UseNewLongLShift = true;
   5.319 -    }
   5.320 -    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
   5.321 -      if( supports_sse4a() ) {
   5.322 -        UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
   5.323 -      } else {
   5.324 -        UseXmmLoadAndClearUpper = false;
   5.325 -      }
   5.326 -    }
   5.327 -    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
   5.328 -      if( supports_sse4a() ) {
   5.329 -        UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
   5.330 -      } else {
   5.331 -        UseXmmRegToRegMoveAll = false;
   5.332 -      }
   5.333 -    }
   5.334 -    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
   5.335 -      if( supports_sse4a() ) {
   5.336 -        UseXmmI2F = true;
   5.337 -      } else {
   5.338 -        UseXmmI2F = false;
   5.339 -      }
   5.340 -    }
   5.341 -    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
   5.342 -      if( supports_sse4a() ) {
   5.343 -        UseXmmI2D = true;
   5.344 -      } else {
   5.345 -        UseXmmI2D = false;
   5.346 -      }
   5.347 -    }
   5.348 -  }
   5.349 -
   5.350 -  if( is_intel() ) { // Intel cpus specific settings
   5.351 -    if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
   5.352 -      UseStoreImmI16 = false; // don't use it on Intel cpus
   5.353 -    }
   5.354 -    if( cpu_family() == 6 || cpu_family() == 15 ) {
   5.355 -      if( FLAG_IS_DEFAULT(UseAddressNop) ) {
   5.356 -        // Use it on all Intel cpus starting from PentiumPro
   5.357 -        UseAddressNop = true;
   5.358 -      }
   5.359 -    }
   5.360 -    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
   5.361 -      UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
   5.362 -    }
   5.363 -    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
   5.364 -      if( supports_sse3() ) {
   5.365 -        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
   5.366 -      } else {
   5.367 -        UseXmmRegToRegMoveAll = false;
   5.368 -      }
   5.369 -    }
   5.370 -    if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
   5.371 -#ifdef COMPILER2
   5.372 -      if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
   5.373 -        // For new Intel cpus do the next optimization:
   5.374 -        // don't align the beginning of a loop if there are enough instructions
   5.375 -        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
   5.376 -        // in current fetch line (OptoLoopAlignment) or the padding
   5.377 -        // is big (> MaxLoopPad).
   5.378 -        // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
   5.379 -        // generated NOP instructions. 11 is the largest size of one
   5.380 -        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
   5.381 -        MaxLoopPad = 11;
   5.382 -      }
   5.383 -#endif // COMPILER2
   5.384 -      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
   5.385 -        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
   5.386 -      }
   5.387 -      if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
   5.388 -        if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
   5.389 -          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
   5.390 -        }
   5.391 -      }
   5.392 -    }
   5.393 -  }
   5.394 -
   5.395 -  assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   5.396 -  assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
   5.397 -
   5.398 -  // set valid Prefetch instruction
   5.399 -  if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
   5.400 -  if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
   5.401 -  if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
   5.402 -  if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3;
   5.403 -
   5.404 -  if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
   5.405 -  if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
   5.406 -  if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
   5.407 -  if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3;
   5.408 -
   5.409 -  // Allocation prefetch settings
   5.410 -  intx cache_line_size = L1_data_cache_line_size();
   5.411 -  if( cache_line_size > AllocatePrefetchStepSize )
   5.412 -    AllocatePrefetchStepSize = cache_line_size;
   5.413 -  if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
   5.414 -    AllocatePrefetchLines = 3; // Optimistic value
   5.415 -  assert(AllocatePrefetchLines > 0, "invalid value");
   5.416 -  if( AllocatePrefetchLines < 1 ) // set valid value in product VM
   5.417 -    AllocatePrefetchLines = 1; // Conservative value
   5.418 -
   5.419 -  AllocatePrefetchDistance = allocate_prefetch_distance();
   5.420 -  AllocatePrefetchStyle    = allocate_prefetch_style();
   5.421 -
   5.422 -  if( AllocatePrefetchStyle == 2 && is_intel() &&
   5.423 -      cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
   5.424 -    AllocatePrefetchDistance = 320;
   5.425 -  }
   5.426 -  assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
   5.427 -
   5.428 -#ifndef PRODUCT
   5.429 -  if (PrintMiscellaneous && Verbose) {
   5.430 -    tty->print_cr("Logical CPUs per core: %u",
   5.431 -                  logical_processors_per_package());
   5.432 -    tty->print_cr("UseSSE=%d",UseSSE);
   5.433 -    tty->print("Allocation: ");
   5.434 -    if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) {
   5.435 -      tty->print_cr("no prefetching");
   5.436 -    } else {
   5.437 -      if (UseSSE == 0 && supports_3dnow()) {
   5.438 -        tty->print("PREFETCHW");
   5.439 -      } else if (UseSSE >= 1) {
   5.440 -        if (AllocatePrefetchInstr == 0) {
   5.441 -          tty->print("PREFETCHNTA");
   5.442 -        } else if (AllocatePrefetchInstr == 1) {
   5.443 -          tty->print("PREFETCHT0");
   5.444 -        } else if (AllocatePrefetchInstr == 2) {
   5.445 -          tty->print("PREFETCHT2");
   5.446 -        } else if (AllocatePrefetchInstr == 3) {
   5.447 -          tty->print("PREFETCHW");
   5.448 -        }
   5.449 -      }
   5.450 -      if (AllocatePrefetchLines > 1) {
   5.451 -        tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
   5.452 -      } else {
   5.453 -        tty->print_cr(" %d, one line", AllocatePrefetchDistance);
   5.454 -      }
   5.455 -    }
   5.456 -  }
   5.457 -#endif // !PRODUCT
   5.458 -}
   5.459 -
   5.460 -void VM_Version::initialize() {
   5.461 -  ResourceMark rm;
   5.462 -  // Making this stub must be FIRST use of assembler
   5.463 -
   5.464 -  stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
   5.465 -  if (stub_blob == NULL) {
   5.466 -    vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
   5.467 -  }
   5.468 -  CodeBuffer c(stub_blob->instructions_begin(),
   5.469 -               stub_blob->instructions_size());
   5.470 -  VM_Version_StubGenerator g(&c);
   5.471 -  getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
   5.472 -                                   g.generate_getPsrInfo());
   5.473 -
   5.474 -  get_processor_features();
   5.475 -}
     6.1 --- a/src/cpu/x86/vm/vm_version_x86_32.hpp	Sun Feb 22 17:21:13 2009 -0800
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,439 +0,0 @@
     6.4 -/*
     6.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
     6.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     6.7 - *
     6.8 - * This code is free software; you can redistribute it and/or modify it
     6.9 - * under the terms of the GNU General Public License version 2 only, as
    6.10 - * published by the Free Software Foundation.
    6.11 - *
    6.12 - * This code is distributed in the hope that it will be useful, but WITHOUT
    6.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    6.14 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    6.15 - * version 2 for more details (a copy is included in the LICENSE file that
    6.16 - * accompanied this code).
    6.17 - *
    6.18 - * You should have received a copy of the GNU General Public License version
    6.19 - * 2 along with this work; if not, write to the Free Software Foundation,
    6.20 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    6.21 - *
    6.22 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    6.23 - * CA 95054 USA or visit www.sun.com if you need additional information or
    6.24 - * have any questions.
    6.25 - *
    6.26 - */
    6.27 -
    6.28 -class VM_Version: public Abstract_VM_Version {
    6.29 -public:
    6.30 -  // cpuid result register layouts.  These are all unions of a uint32_t
    6.31 -  // (in case anyone wants access to the register as a whole) and a bitfield.
    6.32 -
    6.33 -  union StdCpuid1Eax {
    6.34 -    uint32_t value;
    6.35 -    struct {
    6.36 -      uint32_t stepping   : 4,
    6.37 -               model      : 4,
    6.38 -               family     : 4,
    6.39 -               proc_type  : 2,
    6.40 -                          : 2,
    6.41 -               ext_model  : 4,
    6.42 -               ext_family : 8,
    6.43 -                          : 4;
    6.44 -    } bits;
    6.45 -  };
    6.46 -
    6.47 -  union StdCpuid1Ebx { // example, unused
    6.48 -    uint32_t value;
    6.49 -    struct {
    6.50 -      uint32_t brand_id         : 8,
    6.51 -               clflush_size     : 8,
    6.52 -               threads_per_cpu  : 8,
    6.53 -               apic_id          : 8;
    6.54 -    } bits;
    6.55 -  };
    6.56 -
    6.57 -  union StdCpuid1Ecx {
    6.58 -    uint32_t value;
    6.59 -    struct {
    6.60 -      uint32_t sse3     : 1,
    6.61 -                        : 2,
    6.62 -               monitor  : 1,
    6.63 -                        : 1,
    6.64 -               vmx      : 1,
    6.65 -                        : 1,
    6.66 -               est      : 1,
    6.67 -                        : 1,
    6.68 -               ssse3    : 1,
    6.69 -               cid      : 1,
    6.70 -                        : 2,
    6.71 -               cmpxchg16: 1,
    6.72 -                        : 4,
    6.73 -               dca      : 1,
    6.74 -               sse4_1   : 1,
    6.75 -               sse4_2   : 1,
    6.76 -                        : 11;
    6.77 -    } bits;
    6.78 -  };
    6.79 -
    6.80 -  union StdCpuid1Edx {
    6.81 -    uint32_t value;
    6.82 -    struct {
    6.83 -      uint32_t          : 4,
    6.84 -               tsc      : 1,
    6.85 -                        : 3,
    6.86 -               cmpxchg8 : 1,
    6.87 -                        : 6,
    6.88 -               cmov     : 1,
    6.89 -                        : 7,
    6.90 -               mmx      : 1,
    6.91 -               fxsr     : 1,
    6.92 -               sse      : 1,
    6.93 -               sse2     : 1,
    6.94 -                        : 1,
    6.95 -               ht       : 1,
    6.96 -                        : 3;
    6.97 -    } bits;
    6.98 -  };
    6.99 -
   6.100 -  union DcpCpuid4Eax {
   6.101 -    uint32_t value;
   6.102 -    struct {
   6.103 -      uint32_t cache_type    : 5,
   6.104 -                             : 21,
   6.105 -               cores_per_cpu : 6;
   6.106 -    } bits;
   6.107 -  };
   6.108 -
   6.109 -  union DcpCpuid4Ebx {
   6.110 -    uint32_t value;
   6.111 -    struct {
   6.112 -      uint32_t L1_line_size  : 12,
   6.113 -               partitions    : 10,
   6.114 -               associativity : 10;
   6.115 -    } bits;
   6.116 -  };
   6.117 -
   6.118 -  union ExtCpuid1Ecx {
   6.119 -    uint32_t value;
   6.120 -    struct {
   6.121 -      uint32_t LahfSahf     : 1,
   6.122 -               CmpLegacy    : 1,
   6.123 -                            : 4,
   6.124 -               abm          : 1,
   6.125 -               sse4a        : 1,
   6.126 -               misalignsse  : 1,
   6.127 -               prefetchw    : 1,
   6.128 -                            : 22;
   6.129 -    } bits;
   6.130 -  };
   6.131 -
   6.132 -  union ExtCpuid1Edx {
   6.133 -    uint32_t value;
   6.134 -    struct {
   6.135 -      uint32_t           : 22,
   6.136 -               mmx_amd   : 1,
   6.137 -               mmx       : 1,
   6.138 -               fxsr      : 1,
   6.139 -                         : 4,
   6.140 -               long_mode : 1,
   6.141 -               tdnow2    : 1,
   6.142 -               tdnow     : 1;
   6.143 -    } bits;
   6.144 -  };
   6.145 -
   6.146 -  union ExtCpuid5Ex {
   6.147 -    uint32_t value;
   6.148 -    struct {
   6.149 -      uint32_t L1_line_size : 8,
   6.150 -               L1_tag_lines : 8,
   6.151 -               L1_assoc     : 8,
   6.152 -               L1_size      : 8;
   6.153 -    } bits;
   6.154 -  };
   6.155 -
   6.156 -  union ExtCpuid8Ecx {
   6.157 -    uint32_t value;
   6.158 -    struct {
   6.159 -      uint32_t cores_per_cpu : 8,
   6.160 -                             : 24;
   6.161 -    } bits;
   6.162 -  };
   6.163 -
   6.164 -protected:
   6.165 -   static int _cpu;
   6.166 -   static int _model;
   6.167 -   static int _stepping;
   6.168 -   static int _cpuFeatures;     // features returned by the "cpuid" instruction
   6.169 -                                // 0 if this instruction is not available
   6.170 -   static const char* _features_str;
   6.171 -
   6.172 -   enum {
   6.173 -     CPU_CX8  = (1 << 0), // next bits are from cpuid 1 (EDX)
   6.174 -     CPU_CMOV = (1 << 1),
   6.175 -     CPU_FXSR = (1 << 2),
   6.176 -     CPU_HT   = (1 << 3),
   6.177 -     CPU_MMX  = (1 << 4),
   6.178 -     CPU_3DNOW= (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX)
   6.179 -     CPU_SSE  = (1 << 6),
   6.180 -     CPU_SSE2 = (1 << 7),
   6.181 -     CPU_SSE3 = (1 << 8), // sse3  comes from cpuid 1 (ECX)
   6.182 -     CPU_SSSE3= (1 << 9),
   6.183 -     CPU_SSE4A= (1 <<10),
   6.184 -     CPU_SSE4_1 = (1 << 11),
   6.185 -     CPU_SSE4_2 = (1 << 12)
   6.186 -   } cpuFeatureFlags;
   6.187 -
   6.188 -  // cpuid information block.  All info derived from executing cpuid with
   6.189 -  // various function numbers is stored here.  Intel and AMD info is
   6.190 -  // merged in this block: accessor methods disentangle it.
   6.191 -  //
   6.192 -  // The info block is laid out in subblocks of 4 dwords corresponding to
   6.193 -  // rax, rbx, rcx and rdx, whether or not they contain anything useful.
   6.194 -  struct CpuidInfo {
   6.195 -    // cpuid function 0
   6.196 -    uint32_t std_max_function;
   6.197 -    uint32_t std_vendor_name_0;
   6.198 -    uint32_t std_vendor_name_1;
   6.199 -    uint32_t std_vendor_name_2;
   6.200 -
   6.201 -    // cpuid function 1
   6.202 -    StdCpuid1Eax std_cpuid1_rax;
   6.203 -    StdCpuid1Ebx std_cpuid1_rbx;
   6.204 -    StdCpuid1Ecx std_cpuid1_rcx;
   6.205 -    StdCpuid1Edx std_cpuid1_rdx;
   6.206 -
   6.207 -    // cpuid function 4 (deterministic cache parameters)
   6.208 -    DcpCpuid4Eax dcp_cpuid4_rax;
   6.209 -    DcpCpuid4Ebx dcp_cpuid4_rbx;
   6.210 -    uint32_t     dcp_cpuid4_rcx; // unused currently
   6.211 -    uint32_t     dcp_cpuid4_rdx; // unused currently
   6.212 -
   6.213 -    // cpuid function 0x80000000 // example, unused
   6.214 -    uint32_t ext_max_function;
   6.215 -    uint32_t ext_vendor_name_0;
   6.216 -    uint32_t ext_vendor_name_1;
   6.217 -    uint32_t ext_vendor_name_2;
   6.218 -
   6.219 -    // cpuid function 0x80000001
   6.220 -    uint32_t     ext_cpuid1_rax; // reserved
   6.221 -    uint32_t     ext_cpuid1_rbx; // reserved
   6.222 -    ExtCpuid1Ecx ext_cpuid1_rcx;
   6.223 -    ExtCpuid1Edx ext_cpuid1_rdx;
   6.224 -
   6.225 -    // cpuid functions 0x80000002 thru 0x80000004: example, unused
   6.226 -    uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
   6.227 -    uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
   6.228 -    uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
   6.229 -
   6.230 -    // cpuid function 0x80000005 //AMD L1, Intel reserved
   6.231 -    uint32_t     ext_cpuid5_rax; // unused currently
   6.232 -    uint32_t     ext_cpuid5_rbx; // reserved
   6.233 -    ExtCpuid5Ex  ext_cpuid5_rcx; // L1 data cache info (AMD)
   6.234 -    ExtCpuid5Ex  ext_cpuid5_rdx; // L1 instruction cache info (AMD)
   6.235 -
   6.236 -    // cpuid function 0x80000008
   6.237 -    uint32_t     ext_cpuid8_rax; // unused currently
   6.238 -    uint32_t     ext_cpuid8_rbx; // reserved
   6.239 -    ExtCpuid8Ecx ext_cpuid8_rcx;
   6.240 -    uint32_t     ext_cpuid8_rdx; // reserved
   6.241 -  };
   6.242 -
   6.243 -  // The actual cpuid info block
   6.244 -  static CpuidInfo _cpuid_info;
   6.245 -
   6.246 -  // Extractors and predicates
   6.247 -  static uint32_t extended_cpu_family() {
   6.248 -    uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family;
   6.249 -    result += _cpuid_info.std_cpuid1_rax.bits.ext_family;
   6.250 -    return result;
   6.251 -  }
   6.252 -  static uint32_t extended_cpu_model() {
   6.253 -    uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model;
   6.254 -    result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4;
   6.255 -    return result;
   6.256 -  }
   6.257 -  static uint32_t cpu_stepping() {
   6.258 -    uint32_t result = _cpuid_info.std_cpuid1_rax.bits.stepping;
   6.259 -    return result;
   6.260 -  }
   6.261 -  static uint logical_processor_count() {
   6.262 -    uint result = threads_per_core();
   6.263 -    return result;
   6.264 -  }
   6.265 -  static uint32_t feature_flags() {
   6.266 -    uint32_t result = 0;
   6.267 -    if (_cpuid_info.std_cpuid1_rdx.bits.cmpxchg8 != 0)
   6.268 -      result |= CPU_CX8;
   6.269 -    if (_cpuid_info.std_cpuid1_rdx.bits.cmov != 0)
   6.270 -      result |= CPU_CMOV;
   6.271 -    if (_cpuid_info.std_cpuid1_rdx.bits.fxsr != 0 || is_amd() &&
   6.272 -        _cpuid_info.ext_cpuid1_rdx.bits.fxsr != 0)
   6.273 -      result |= CPU_FXSR;
   6.274 -    // HT flag is set for multi-core processors also.
   6.275 -    if (threads_per_core() > 1)
   6.276 -      result |= CPU_HT;
   6.277 -    if (_cpuid_info.std_cpuid1_rdx.bits.mmx != 0 || is_amd() &&
   6.278 -        _cpuid_info.ext_cpuid1_rdx.bits.mmx != 0)
   6.279 -      result |= CPU_MMX;
   6.280 -    if (is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow != 0)
   6.281 -      result |= CPU_3DNOW;
   6.282 -    if (_cpuid_info.std_cpuid1_rdx.bits.sse != 0)
   6.283 -      result |= CPU_SSE;
   6.284 -    if (_cpuid_info.std_cpuid1_rdx.bits.sse2 != 0)
   6.285 -      result |= CPU_SSE2;
   6.286 -    if (_cpuid_info.std_cpuid1_rcx.bits.sse3 != 0)
   6.287 -      result |= CPU_SSE3;
   6.288 -    if (_cpuid_info.std_cpuid1_rcx.bits.ssse3 != 0)
   6.289 -      result |= CPU_SSSE3;
   6.290 -    if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0)
   6.291 -      result |= CPU_SSE4A;
   6.292 -    if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0)
   6.293 -      result |= CPU_SSE4_1;
   6.294 -    if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0)
   6.295 -      result |= CPU_SSE4_2;
   6.296 -    return result;
   6.297 -  }
   6.298 -
   6.299 -  static void get_processor_features();
   6.300 -
   6.301 -public:
   6.302 -  // Offsets for cpuid asm stub
   6.303 -  static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
   6.304 -  static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_rax); }
   6.305 -  static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_rax); }
   6.306 -  static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_rax); }
   6.307 -  static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_rax); }
   6.308 -  static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_rax); }
   6.309 -
   6.310 -  // Initialization
   6.311 -  static void initialize();
   6.312 -
   6.313 -  // Asserts
   6.314 -  static void assert_is_initialized() {
   6.315 -    assert(_cpuid_info.std_cpuid1_rax.bits.family != 0, "VM_Version not initialized");
   6.316 -  }
   6.317 -
   6.318 -  //
   6.319 -  // Processor family:
   6.320 -  //       3   -  386
   6.321 -  //       4   -  486
   6.322 -  //       5   -  Pentium
   6.323 -  //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
   6.324 -  //              Pentium M, Core Solo, Core Duo, Core2 Duo
   6.325 -  //    family 6 model:   9,        13,       14,        15
   6.326 -  //    0x0f   -  Pentium 4, Opteron
   6.327 -  //
   6.328 -  // Note: The cpu family should be used to select between
   6.329 -  //       instruction sequences which are valid on all Intel
   6.330 -  //       processors.  Use the feature test functions below to
   6.331 -  //       determine whether a particular instruction is supported.
   6.332 -  //
   6.333 -  static int  cpu_family()        { return _cpu;}
   6.334 -  static bool is_P6()             { return cpu_family() >= 6; }
   6.335 -
   6.336 -  static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
   6.337 -  static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
   6.338 -
   6.339 -  static uint cores_per_cpu()  {
   6.340 -    uint result = 1;
   6.341 -    if (is_intel()) {
   6.342 -      result = (_cpuid_info.dcp_cpuid4_rax.bits.cores_per_cpu + 1);
   6.343 -    } else if (is_amd()) {
   6.344 -      result = (_cpuid_info.ext_cpuid8_rcx.bits.cores_per_cpu + 1);
   6.345 -    }
   6.346 -    return result;
   6.347 -  }
   6.348 -
   6.349 -  static uint threads_per_core()  {
   6.350 -    uint result = 1;
   6.351 -    if (_cpuid_info.std_cpuid1_rdx.bits.ht != 0) {
   6.352 -      result = _cpuid_info.std_cpuid1_rbx.bits.threads_per_cpu /
   6.353 -               cores_per_cpu();
   6.354 -    }
   6.355 -    return result;
   6.356 -  }
   6.357 -
   6.358 -  static intx L1_data_cache_line_size()  {
   6.359 -    intx result = 0;
   6.360 -    if (is_intel()) {
   6.361 -      result = (_cpuid_info.dcp_cpuid4_rbx.bits.L1_line_size + 1);
   6.362 -    } else if (is_amd()) {
   6.363 -      result = _cpuid_info.ext_cpuid5_rcx.bits.L1_line_size;
   6.364 -    }
   6.365 -    if (result < 32) // not defined ?
   6.366 -      result = 32;   // 32 bytes by default on x86
   6.367 -    return result;
   6.368 -  }
   6.369 -
   6.370 -  //
   6.371 -  // Feature identification
   6.372 -  //
   6.373 -  static bool supports_cpuid()    { return _cpuFeatures  != 0; }
   6.374 -  static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
   6.375 -  static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
   6.376 -  static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
   6.377 -  static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
   6.378 -  static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
   6.379 -  static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
   6.380 -  static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
   6.381 -  static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
   6.382 -  static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
   6.383 -  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   6.384 -  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   6.385 -  //
   6.386 -  // AMD features
   6.387 -  //
   6.388 -  static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
   6.389 -  static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.mmx_amd != 0; }
   6.390 -  static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow2 != 0; }
   6.391 -  static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
   6.392 -
   6.393 -  static bool supports_compare_and_exchange() { return true; }
   6.394 -
   6.395 -  static const char* cpu_features()           { return _features_str; }
   6.396 -
   6.397 -  static intx allocate_prefetch_distance() {
   6.398 -    // This method should be called before allocate_prefetch_style().
   6.399 -    //
   6.400 -    // Hardware prefetching (distance/size in bytes):
   6.401 -    // Pentium 3 -  64 /  32
   6.402 -    // Pentium 4 - 256 / 128
   6.403 -    // Athlon    -  64 /  32 ????
   6.404 -    // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
   6.405 -    // Core      - 128 /  64
   6.406 -    //
   6.407 -    // Software prefetching (distance in bytes / instruction with best score):
   6.408 -    // Pentium 3 - 128 / prefetchnta
   6.409 -    // Pentium 4 - 512 / prefetchnta
   6.410 -    // Athlon    - 128 / prefetchnta
   6.411 -    // Opteron   - 256 / prefetchnta
   6.412 -    // Core      - 256 / prefetchnta
   6.413 -    // It will be used only when AllocatePrefetchStyle > 0
   6.414 -
   6.415 -    intx count = AllocatePrefetchDistance;
   6.416 -    if (count < 0) {   // default ?
   6.417 -      if (is_amd()) {  // AMD
   6.418 -        if (supports_sse2())
   6.419 -          count = 256; // Opteron
   6.420 -        else
   6.421 -          count = 128; // Athlon
   6.422 -      } else {         // Intel
   6.423 -        if (supports_sse2())
   6.424 -          if (cpu_family() == 6) {
   6.425 -            count = 256; // Pentium M, Core, Core2
   6.426 -          } else {
   6.427 -            count = 512; // Pentium 4
   6.428 -          }
   6.429 -        else
   6.430 -          count = 128; // Pentium 3 (and all other old CPUs)
   6.431 -      }
   6.432 -    }
   6.433 -    return count;
   6.434 -  }
   6.435 -  static intx allocate_prefetch_style() {
   6.436 -    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
   6.437 -    // Return 0 if AllocatePrefetchDistance was not defined or
   6.438 -    // prefetch instruction is not supported.
   6.439 -    return (AllocatePrefetchDistance > 0 &&
   6.440 -            (supports_3dnow() || supports_sse())) ? AllocatePrefetchStyle : 0;
   6.441 -  }
   6.442 -};
     7.1 --- a/src/cpu/x86/vm/vm_version_x86_64.cpp	Sun Feb 22 17:21:13 2009 -0800
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,419 +0,0 @@
     7.4 -/*
     7.5 - * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
     7.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     7.7 - *
     7.8 - * This code is free software; you can redistribute it and/or modify it
     7.9 - * under the terms of the GNU General Public License version 2 only, as
    7.10 - * published by the Free Software Foundation.
    7.11 - *
    7.12 - * This code is distributed in the hope that it will be useful, but WITHOUT
    7.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    7.14 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    7.15 - * version 2 for more details (a copy is included in the LICENSE file that
    7.16 - * accompanied this code).
    7.17 - *
    7.18 - * You should have received a copy of the GNU General Public License version
    7.19 - * 2 along with this work; if not, write to the Free Software Foundation,
    7.20 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    7.21 - *
    7.22 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    7.23 - * CA 95054 USA or visit www.sun.com if you need additional information or
    7.24 - * have any questions.
    7.25 - *
    7.26 - */
    7.27 -
    7.28 -# include "incls/_precompiled.incl"
    7.29 -# include "incls/_vm_version_x86_64.cpp.incl"
    7.30 -
    7.31 -int VM_Version::_cpu;
    7.32 -int VM_Version::_model;
    7.33 -int VM_Version::_stepping;
    7.34 -int VM_Version::_cpuFeatures;
    7.35 -const char*           VM_Version::_features_str = "";
    7.36 -VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
    7.37 -
    7.38 -static BufferBlob* stub_blob;
    7.39 -static const int stub_size = 300;
    7.40 -
    7.41 -extern "C" {
    7.42 -  typedef void (*getPsrInfo_stub_t)(void*);
    7.43 -}
    7.44 -static getPsrInfo_stub_t getPsrInfo_stub = NULL;
    7.45 -
    7.46 -
    7.47 -class VM_Version_StubGenerator: public StubCodeGenerator {
    7.48 - public:
    7.49 -
    7.50 -  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
    7.51 -
    7.52 -  address generate_getPsrInfo() {
    7.53 -
    7.54 -    Label std_cpuid1, ext_cpuid1, ext_cpuid5, done;
    7.55 -
    7.56 -    StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
    7.57 -#   define __ _masm->
    7.58 -
    7.59 -    address start = __ pc();
    7.60 -
    7.61 -    //
    7.62 -    // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info);
    7.63 -    //
    7.64 -    // rcx and rdx are first and second argument registers on windows
    7.65 -
    7.66 -    __ push(rbp);
    7.67 -    __ mov(rbp, c_rarg0); // cpuid_info address
    7.68 -    __ push(rbx);
    7.69 -    __ push(rsi);
    7.70 -
    7.71 -    //
    7.72 -    // we have a chip which supports the "cpuid" instruction
    7.73 -    //
    7.74 -    __ xorl(rax, rax);
    7.75 -    __ cpuid();
    7.76 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
    7.77 -    __ movl(Address(rsi, 0), rax);
    7.78 -    __ movl(Address(rsi, 4), rbx);
    7.79 -    __ movl(Address(rsi, 8), rcx);
    7.80 -    __ movl(Address(rsi,12), rdx);
    7.81 -
    7.82 -    __ cmpl(rax, 3);     // Is cpuid(0x4) supported?
    7.83 -    __ jccb(Assembler::belowEqual, std_cpuid1);
    7.84 -
    7.85 -    //
    7.86 -    // cpuid(0x4) Deterministic cache params
    7.87 -    //
    7.88 -    __ movl(rax, 4);
    7.89 -    __ xorl(rcx, rcx);   // L1 cache
    7.90 -    __ cpuid();
    7.91 -    __ push(rax);
    7.92 -    __ andl(rax, 0x1f);  // Determine if valid cache parameters used
    7.93 -    __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
    7.94 -    __ pop(rax);
    7.95 -    __ jccb(Assembler::equal, std_cpuid1);
    7.96 -
    7.97 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
    7.98 -    __ movl(Address(rsi, 0), rax);
    7.99 -    __ movl(Address(rsi, 4), rbx);
   7.100 -    __ movl(Address(rsi, 8), rcx);
   7.101 -    __ movl(Address(rsi,12), rdx);
   7.102 -
   7.103 -    //
   7.104 -    // Standard cpuid(0x1)
   7.105 -    //
   7.106 -    __ bind(std_cpuid1);
   7.107 -    __ movl(rax, 1);
   7.108 -    __ cpuid();
   7.109 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
   7.110 -    __ movl(Address(rsi, 0), rax);
   7.111 -    __ movl(Address(rsi, 4), rbx);
   7.112 -    __ movl(Address(rsi, 8), rcx);
   7.113 -    __ movl(Address(rsi,12), rdx);
   7.114 -
   7.115 -    __ movl(rax, 0x80000000);
   7.116 -    __ cpuid();
   7.117 -    __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
   7.118 -    __ jcc(Assembler::belowEqual, done);
   7.119 -    __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
   7.120 -    __ jccb(Assembler::belowEqual, ext_cpuid1);
   7.121 -    __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
   7.122 -    __ jccb(Assembler::belowEqual, ext_cpuid5);
   7.123 -    //
   7.124 -    // Extended cpuid(0x80000008)
   7.125 -    //
   7.126 -    __ movl(rax, 0x80000008);
   7.127 -    __ cpuid();
   7.128 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
   7.129 -    __ movl(Address(rsi, 0), rax);
   7.130 -    __ movl(Address(rsi, 4), rbx);
   7.131 -    __ movl(Address(rsi, 8), rcx);
   7.132 -    __ movl(Address(rsi,12), rdx);
   7.133 -
   7.134 -    //
   7.135 -    // Extended cpuid(0x80000005)
   7.136 -    //
   7.137 -    __ bind(ext_cpuid5);
   7.138 -    __ movl(rax, 0x80000005);
   7.139 -    __ cpuid();
   7.140 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
   7.141 -    __ movl(Address(rsi, 0), rax);
   7.142 -    __ movl(Address(rsi, 4), rbx);
   7.143 -    __ movl(Address(rsi, 8), rcx);
   7.144 -    __ movl(Address(rsi,12), rdx);
   7.145 -
   7.146 -    //
   7.147 -    // Extended cpuid(0x80000001)
   7.148 -    //
   7.149 -    __ bind(ext_cpuid1);
   7.150 -    __ movl(rax, 0x80000001);
   7.151 -    __ cpuid();
   7.152 -    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
   7.153 -    __ movl(Address(rsi, 0), rax);
   7.154 -    __ movl(Address(rsi, 4), rbx);
   7.155 -    __ movl(Address(rsi, 8), rcx);
   7.156 -    __ movl(Address(rsi,12), rdx);
   7.157 -
   7.158 -    //
   7.159 -    // return
   7.160 -    //
   7.161 -    __ bind(done);
   7.162 -    __ pop(rsi);
   7.163 -    __ pop(rbx);
   7.164 -    __ pop(rbp);
   7.165 -    __ ret(0);
   7.166 -
   7.167 -#   undef __
   7.168 -
   7.169 -    return start;
   7.170 -  };
   7.171 -};
   7.172 -
   7.173 -
   7.174 -void VM_Version::get_processor_features() {
   7.175 -
   7.176 -  _logical_processors_per_package = 1;
   7.177 -  // Get raw processor info
   7.178 -  getPsrInfo_stub(&_cpuid_info);
   7.179 -  assert_is_initialized();
   7.180 -  _cpu = extended_cpu_family();
   7.181 -  _model = extended_cpu_model();
   7.182 -  _stepping = cpu_stepping();
   7.183 -  _cpuFeatures = feature_flags();
   7.184 -  // Logical processors are only available on P4s and above,
   7.185 -  // and only if hyperthreading is available.
   7.186 -  _logical_processors_per_package = logical_processor_count();
   7.187 -  _supports_cx8    = supports_cmpxchg8();
   7.188 -  // OS should support SSE for x64 and hardware should support at least SSE2.
   7.189 -  if (!VM_Version::supports_sse2()) {
   7.190 -    vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
   7.191 -  }
   7.192 -  if (UseSSE < 4) {
   7.193 -    _cpuFeatures &= ~CPU_SSE4_1;
   7.194 -    _cpuFeatures &= ~CPU_SSE4_2;
   7.195 -  }
   7.196 -  if (UseSSE < 3) {
   7.197 -    _cpuFeatures &= ~CPU_SSE3;
   7.198 -    _cpuFeatures &= ~CPU_SSSE3;
   7.199 -    _cpuFeatures &= ~CPU_SSE4A;
   7.200 -  }
   7.201 -  if (UseSSE < 2)
   7.202 -    _cpuFeatures &= ~CPU_SSE2;
   7.203 -  if (UseSSE < 1)
   7.204 -    _cpuFeatures &= ~CPU_SSE;
   7.205 -
   7.206 -  if (logical_processors_per_package() == 1) {
   7.207 -    // HT processor could be installed on a system which doesn't support HT.
   7.208 -    _cpuFeatures &= ~CPU_HT;
   7.209 -  }
   7.210 -
   7.211 -  char buf[256];
   7.212 -  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
   7.213 -               cores_per_cpu(), threads_per_core(),
   7.214 -               cpu_family(), _model, _stepping,
   7.215 -               (supports_cmov() ? ", cmov" : ""),
   7.216 -               (supports_cmpxchg8() ? ", cx8" : ""),
   7.217 -               (supports_fxsr() ? ", fxsr" : ""),
   7.218 -               (supports_mmx()  ? ", mmx"  : ""),
   7.219 -               (supports_sse()  ? ", sse"  : ""),
   7.220 -               (supports_sse2() ? ", sse2" : ""),
   7.221 -               (supports_sse3() ? ", sse3" : ""),
   7.222 -               (supports_ssse3()? ", ssse3": ""),
   7.223 -               (supports_sse4_1() ? ", sse4.1" : ""),
   7.224 -               (supports_sse4_2() ? ", sse4.2" : ""),
   7.225 -               (supports_mmx_ext() ? ", mmxext" : ""),
   7.226 -               (supports_3dnow()   ? ", 3dnow"  : ""),
   7.227 -               (supports_3dnow2()  ? ", 3dnowext" : ""),
   7.228 -               (supports_sse4a()   ? ", sse4a": ""),
   7.229 -               (supports_ht() ? ", ht": ""));
   7.230 -  _features_str = strdup(buf);
   7.231 -
   7.232 -  // UseSSE is set to the smaller of what hardware supports and what
   7.233 -  // the command line requires.  I.e., you cannot set UseSSE to 2 on
   7.234 -  // older Pentiums which do not support it.
   7.235 -  if( UseSSE > 4 ) UseSSE=4;
   7.236 -  if( UseSSE < 0 ) UseSSE=0;
   7.237 -  if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
   7.238 -    UseSSE = MIN2((intx)3,UseSSE);
   7.239 -  if( !supports_sse3() ) // Drop to 2 if no SSE3 support
   7.240 -    UseSSE = MIN2((intx)2,UseSSE);
   7.241 -  if( !supports_sse2() ) // Drop to 1 if no SSE2 support
   7.242 -    UseSSE = MIN2((intx)1,UseSSE);
   7.243 -  if( !supports_sse () ) // Drop to 0 if no SSE  support
   7.244 -    UseSSE = 0;
   7.245 -
   7.246 -  // On new cpus instructions which update whole XMM register should be used
   7.247 -  // to prevent partial register stall due to dependencies on high half.
   7.248 -  //
   7.249 -  // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
   7.250 -  // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
   7.251 -  // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
   7.252 -  // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
   7.253 -
   7.254 -  if( is_amd() ) { // AMD cpus specific settings
   7.255 -    if( FLAG_IS_DEFAULT(UseAddressNop) ) {
   7.256 -      // Use it on all AMD cpus starting from Opteron (don't need
   7.257 -      // a cpu check since only Opteron and new cpus support 64-bits mode).
   7.258 -      UseAddressNop = true;
   7.259 -    }
   7.260 -    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
   7.261 -      if( supports_sse4a() ) {
   7.262 -        UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
   7.263 -      } else {
   7.264 -        UseXmmLoadAndClearUpper = false;
   7.265 -      }
   7.266 -    }
   7.267 -    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
   7.268 -      if( supports_sse4a() ) {
   7.269 -        UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
   7.270 -      } else {
   7.271 -        UseXmmRegToRegMoveAll = false;
   7.272 -      }
   7.273 -    }
   7.274 -    if( FLAG_IS_DEFAULT(UseXmmI2F) ) {
   7.275 -      if( supports_sse4a() ) {
   7.276 -        UseXmmI2F = true;
   7.277 -      } else {
   7.278 -        UseXmmI2F = false;
   7.279 -      }
   7.280 -    }
   7.281 -    if( FLAG_IS_DEFAULT(UseXmmI2D) ) {
   7.282 -      if( supports_sse4a() ) {
   7.283 -        UseXmmI2D = true;
   7.284 -      } else {
   7.285 -        UseXmmI2D = false;
   7.286 -      }
   7.287 -    }
   7.288 -  }
   7.289 -
   7.290 -  if( is_intel() ) { // Intel cpus specific settings
   7.291 -    if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
   7.292 -      UseStoreImmI16 = false; // don't use it on Intel cpus
   7.293 -    }
   7.294 -    if( FLAG_IS_DEFAULT(UseAddressNop) ) {
   7.295 -      // Use it on all Intel cpus starting from PentiumPro
   7.296 -      // (don't need a cpu check since only new cpus support 64-bits mode).
   7.297 -      UseAddressNop = true;
   7.298 -    }
   7.299 -    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
   7.300 -      UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
   7.301 -    }
   7.302 -    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
   7.303 -      if( supports_sse3() ) {
   7.304 -        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
   7.305 -      } else {
   7.306 -        UseXmmRegToRegMoveAll = false;
   7.307 -      }
   7.308 -    }
   7.309 -    if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus
   7.310 -#ifdef COMPILER2
   7.311 -      if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
   7.312 -        // For new Intel cpus do the next optimization:
   7.313 -        // don't align the beginning of a loop if there are enough instructions
   7.314 -        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
   7.315 -        // in current fetch line (OptoLoopAlignment) or the padding
   7.316 -        // is big (> MaxLoopPad).
   7.317 -        // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
   7.318 -        // generated NOP instructions. 11 is the largest size of one
   7.319 -        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
   7.320 -        MaxLoopPad = 11;
   7.321 -      }
   7.322 -#endif // COMPILER2
   7.323 -      if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
   7.324 -        UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
   7.325 -      }
   7.326 -      if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus
   7.327 -        if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
   7.328 -          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
   7.329 -        }
   7.330 -      }
   7.331 -    }
   7.332 -  }
   7.333 -
   7.334 -  assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   7.335 -  assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
   7.336 -
   7.337 -  // set valid Prefetch instruction
   7.338 -  if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
   7.339 -  if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
   7.340 -  if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
   7.341 -
   7.342 -  if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
   7.343 -  if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
   7.344 -  if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
   7.345 -
   7.346 -  // Allocation prefetch settings
   7.347 -  intx cache_line_size = L1_data_cache_line_size();
   7.348 -  if( cache_line_size > AllocatePrefetchStepSize )
   7.349 -    AllocatePrefetchStepSize = cache_line_size;
   7.350 -  if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
   7.351 -    AllocatePrefetchLines = 3; // Optimistic value
   7.352 -  assert(AllocatePrefetchLines > 0, "invalid value");
   7.353 -  if( AllocatePrefetchLines < 1 ) // set valid value in product VM
   7.354 -    AllocatePrefetchLines = 1; // Conservative value
   7.355 -
   7.356 -  AllocatePrefetchDistance = allocate_prefetch_distance();
   7.357 -  AllocatePrefetchStyle    = allocate_prefetch_style();
   7.358 -
   7.359 -  if( AllocatePrefetchStyle == 2 && is_intel() &&
   7.360 -      cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
   7.361 -    AllocatePrefetchDistance = 384;
   7.362 -  }
   7.363 -  assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
   7.364 -
   7.365 -  // Prefetch settings
   7.366 -  PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
   7.367 -  PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
   7.368 -  PrefetchFieldsAhead         = prefetch_fields_ahead();
   7.369 -
   7.370 -#ifndef PRODUCT
   7.371 -  if (PrintMiscellaneous && Verbose) {
   7.372 -    tty->print_cr("Logical CPUs per core: %u",
   7.373 -                  logical_processors_per_package());
   7.374 -    tty->print_cr("UseSSE=%d",UseSSE);
   7.375 -    tty->print("Allocation: ");
   7.376 -    if (AllocatePrefetchStyle <= 0) {
   7.377 -      tty->print_cr("no prefetching");
   7.378 -    } else {
   7.379 -      if (AllocatePrefetchInstr == 0) {
   7.380 -        tty->print("PREFETCHNTA");
   7.381 -      } else if (AllocatePrefetchInstr == 1) {
   7.382 -        tty->print("PREFETCHT0");
   7.383 -      } else if (AllocatePrefetchInstr == 2) {
   7.384 -        tty->print("PREFETCHT2");
   7.385 -      } else if (AllocatePrefetchInstr == 3) {
   7.386 -        tty->print("PREFETCHW");
   7.387 -      }
   7.388 -      if (AllocatePrefetchLines > 1) {
   7.389 -        tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
   7.390 -      } else {
   7.391 -        tty->print_cr(" %d, one line", AllocatePrefetchDistance);
   7.392 -      }
   7.393 -    }
   7.394 -    if (PrefetchCopyIntervalInBytes > 0) {
   7.395 -      tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes);
   7.396 -    }
   7.397 -    if (PrefetchScanIntervalInBytes > 0) {
   7.398 -      tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes);
   7.399 -    }
   7.400 -    if (PrefetchFieldsAhead > 0) {
   7.401 -      tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead);
   7.402 -    }
   7.403 -  }
   7.404 -#endif // !PRODUCT
   7.405 -}
   7.406 -
   7.407 -void VM_Version::initialize() {
   7.408 -  ResourceMark rm;
   7.409 -  // Making this stub must be FIRST use of assembler
   7.410 -
   7.411 -  stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size);
   7.412 -  if (stub_blob == NULL) {
   7.413 -    vm_exit_during_initialization("Unable to allocate getPsrInfo_stub");
   7.414 -  }
   7.415 -  CodeBuffer c(stub_blob->instructions_begin(),
   7.416 -               stub_blob->instructions_size());
   7.417 -  VM_Version_StubGenerator g(&c);
   7.418 -  getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t,
   7.419 -                                   g.generate_getPsrInfo());
   7.420 -
   7.421 -  get_processor_features();
   7.422 -}
     8.1 --- a/src/cpu/x86/vm/vm_version_x86_64.hpp	Sun Feb 22 17:21:13 2009 -0800
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,449 +0,0 @@
     8.4 -/*
     8.5 - * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
     8.6 - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     8.7 - *
     8.8 - * This code is free software; you can redistribute it and/or modify it
     8.9 - * under the terms of the GNU General Public License version 2 only, as
    8.10 - * published by the Free Software Foundation.
    8.11 - *
    8.12 - * This code is distributed in the hope that it will be useful, but WITHOUT
    8.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    8.14 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    8.15 - * version 2 for more details (a copy is included in the LICENSE file that
    8.16 - * accompanied this code).
    8.17 - *
    8.18 - * You should have received a copy of the GNU General Public License version
    8.19 - * 2 along with this work; if not, write to the Free Software Foundation,
    8.20 - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    8.21 - *
    8.22 - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    8.23 - * CA 95054 USA or visit www.sun.com if you need additional information or
    8.24 - * have any questions.
    8.25 - *
    8.26 - */
    8.27 -
    8.28 -class VM_Version : public Abstract_VM_Version {
    8.29 -public:
    8.30 -  // cpuid result register layouts.  These are all unions of a uint32_t
    8.31 -  // (in case anyone wants access to the register as a whole) and a bitfield.
    8.32 -
    8.33 -  union StdCpuid1Eax {
    8.34 -    uint32_t value;
    8.35 -    struct {
    8.36 -      uint32_t stepping   : 4,
    8.37 -               model      : 4,
    8.38 -               family     : 4,
    8.39 -               proc_type  : 2,
    8.40 -                          : 2,
    8.41 -               ext_model  : 4,
    8.42 -               ext_family : 8,
    8.43 -                          : 4;
    8.44 -    } bits;
    8.45 -  };
    8.46 -
    8.47 -  union StdCpuid1Ebx { // example, unused
    8.48 -    uint32_t value;
    8.49 -    struct {
    8.50 -      uint32_t brand_id         : 8,
    8.51 -               clflush_size     : 8,
    8.52 -               threads_per_cpu  : 8,
    8.53 -               apic_id          : 8;
    8.54 -    } bits;
    8.55 -  };
    8.56 -
    8.57 -  union StdCpuid1Ecx {
    8.58 -    uint32_t value;
    8.59 -    struct {
    8.60 -      uint32_t sse3     : 1,
    8.61 -                        : 2,
    8.62 -               monitor  : 1,
    8.63 -                        : 1,
    8.64 -               vmx      : 1,
    8.65 -                        : 1,
    8.66 -               est      : 1,
    8.67 -                        : 1,
    8.68 -               ssse3    : 1,
    8.69 -               cid      : 1,
    8.70 -                        : 2,
    8.71 -               cmpxchg16: 1,
    8.72 -                        : 4,
    8.73 -               dca      : 1,
    8.74 -               sse4_1   : 1,
    8.75 -               sse4_2   : 1,
    8.76 -                        : 11;
    8.77 -    } bits;
    8.78 -  };
    8.79 -
    8.80 -  union StdCpuid1Edx {
    8.81 -    uint32_t value;
    8.82 -    struct {
    8.83 -      uint32_t          : 4,
    8.84 -               tsc      : 1,
    8.85 -                        : 3,
    8.86 -               cmpxchg8 : 1,
    8.87 -                        : 6,
    8.88 -               cmov     : 1,
    8.89 -                        : 7,
    8.90 -               mmx      : 1,
    8.91 -               fxsr     : 1,
    8.92 -               sse      : 1,
    8.93 -               sse2     : 1,
    8.94 -                        : 1,
    8.95 -               ht       : 1,
    8.96 -                        : 3;
    8.97 -    } bits;
    8.98 -  };
    8.99 -
   8.100 -  union DcpCpuid4Eax {
   8.101 -    uint32_t value;
   8.102 -    struct {
   8.103 -      uint32_t cache_type    : 5,
   8.104 -                             : 21,
   8.105 -               cores_per_cpu : 6;
   8.106 -    } bits;
   8.107 -  };
   8.108 -
   8.109 -  union DcpCpuid4Ebx {
   8.110 -    uint32_t value;
   8.111 -    struct {
   8.112 -      uint32_t L1_line_size  : 12,
   8.113 -               partitions    : 10,
   8.114 -               associativity : 10;
   8.115 -    } bits;
   8.116 -  };
   8.117 -
   8.118 -  union ExtCpuid1Edx {
   8.119 -    uint32_t value;
   8.120 -    struct {
   8.121 -      uint32_t           : 22,
   8.122 -               mmx_amd   : 1,
   8.123 -               mmx       : 1,
   8.124 -               fxsr      : 1,
   8.125 -                         : 4,
   8.126 -               long_mode : 1,
   8.127 -               tdnow2    : 1,
   8.128 -               tdnow     : 1;
   8.129 -    } bits;
   8.130 -  };
   8.131 -
   8.132 -  union ExtCpuid1Ecx {
   8.133 -    uint32_t value;
   8.134 -    struct {
   8.135 -      uint32_t LahfSahf     : 1,
   8.136 -               CmpLegacy    : 1,
   8.137 -                            : 4,
   8.138 -               abm          : 1,
   8.139 -               sse4a        : 1,
   8.140 -               misalignsse  : 1,
   8.141 -               prefetchw    : 1,
   8.142 -                            : 22;
   8.143 -    } bits;
   8.144 -  };
   8.145 -
   8.146 -  union ExtCpuid5Ex {
   8.147 -    uint32_t value;
   8.148 -    struct {
   8.149 -      uint32_t L1_line_size : 8,
   8.150 -               L1_tag_lines : 8,
   8.151 -               L1_assoc     : 8,
   8.152 -               L1_size      : 8;
   8.153 -    } bits;
   8.154 -  };
   8.155 -
   8.156 -  union ExtCpuid8Ecx {
   8.157 -    uint32_t value;
   8.158 -    struct {
   8.159 -      uint32_t cores_per_cpu : 8,
   8.160 -                             : 24;
   8.161 -    } bits;
   8.162 -  };
   8.163 -
   8.164 -protected:
   8.165 -   static int _cpu;
   8.166 -   static int _model;
   8.167 -   static int _stepping;
   8.168 -   static int _cpuFeatures;     // features returned by the "cpuid" instruction
   8.169 -                                // 0 if this instruction is not available
   8.170 -   static const char* _features_str;
   8.171 -
   8.172 -   enum {
   8.173 -     CPU_CX8  = (1 << 0), // next bits are from cpuid 1 (EDX)
   8.174 -     CPU_CMOV = (1 << 1),
   8.175 -     CPU_FXSR = (1 << 2),
   8.176 -     CPU_HT   = (1 << 3),
   8.177 -     CPU_MMX  = (1 << 4),
   8.178 -     CPU_3DNOW= (1 << 5),
   8.179 -     CPU_SSE  = (1 << 6),
   8.180 -     CPU_SSE2 = (1 << 7),
   8.181 -     CPU_SSE3 = (1 << 8),
   8.182 -     CPU_SSSE3= (1 << 9),
   8.183 -     CPU_SSE4A= (1 <<10),
   8.184 -     CPU_SSE4_1 = (1 << 11),
   8.185 -     CPU_SSE4_2 = (1 << 12)
   8.186 -   } cpuFeatureFlags;
   8.187 -
   8.188 -  // cpuid information block.  All info derived from executing cpuid with
   8.189 -  // various function numbers is stored here.  Intel and AMD info is
   8.190 -  // merged in this block: accessor methods disentangle it.
   8.191 -  //
   8.192 -  // The info block is laid out in subblocks of 4 dwords corresponding to
   8.193 -  // eax, ebx, ecx and edx, whether or not they contain anything useful.
   8.194 -  struct CpuidInfo {
   8.195 -    // cpuid function 0
   8.196 -    uint32_t std_max_function;
   8.197 -    uint32_t std_vendor_name_0;
   8.198 -    uint32_t std_vendor_name_1;
   8.199 -    uint32_t std_vendor_name_2;
   8.200 -
   8.201 -    // cpuid function 1
   8.202 -    StdCpuid1Eax std_cpuid1_eax;
   8.203 -    StdCpuid1Ebx std_cpuid1_ebx;
   8.204 -    StdCpuid1Ecx std_cpuid1_ecx;
   8.205 -    StdCpuid1Edx std_cpuid1_edx;
   8.206 -
   8.207 -    // cpuid function 4 (deterministic cache parameters)
   8.208 -    DcpCpuid4Eax dcp_cpuid4_eax;
   8.209 -    DcpCpuid4Ebx dcp_cpuid4_ebx;
   8.210 -    uint32_t     dcp_cpuid4_ecx; // unused currently
   8.211 -    uint32_t     dcp_cpuid4_edx; // unused currently
   8.212 -
   8.213 -    // cpuid function 0x80000000 // example, unused
   8.214 -    uint32_t ext_max_function;
   8.215 -    uint32_t ext_vendor_name_0;
   8.216 -    uint32_t ext_vendor_name_1;
   8.217 -    uint32_t ext_vendor_name_2;
   8.218 -
   8.219 -    // cpuid function 0x80000001
   8.220 -    uint32_t     ext_cpuid1_eax; // reserved
   8.221 -    uint32_t     ext_cpuid1_ebx; // reserved
   8.222 -    ExtCpuid1Ecx ext_cpuid1_ecx;
   8.223 -    ExtCpuid1Edx ext_cpuid1_edx;
   8.224 -
   8.225 -    // cpuid functions 0x80000002 thru 0x80000004: example, unused
   8.226 -    uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
   8.227 -    uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
   8.228 -    uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
   8.229 -
   8.230 -    // cpuid function 0x80000005 //AMD L1, Intel reserved
   8.231 -    uint32_t     ext_cpuid5_eax; // unused currently
   8.232 -    uint32_t     ext_cpuid5_ebx; // reserved
   8.233 -    ExtCpuid5Ex  ext_cpuid5_ecx; // L1 data cache info (AMD)
   8.234 -    ExtCpuid5Ex  ext_cpuid5_edx; // L1 instruction cache info (AMD)
   8.235 -
   8.236 -    // cpuid function 0x80000008
   8.237 -    uint32_t     ext_cpuid8_eax; // unused currently
   8.238 -    uint32_t     ext_cpuid8_ebx; // reserved
   8.239 -    ExtCpuid8Ecx ext_cpuid8_ecx;
   8.240 -    uint32_t     ext_cpuid8_edx; // reserved
   8.241 -  };
   8.242 -
   8.243 -  // The actual cpuid info block
   8.244 -  static CpuidInfo _cpuid_info;
   8.245 -
   8.246 -  // Extractors and predicates
   8.247 -  static uint32_t extended_cpu_family() {
   8.248 -    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family;
   8.249 -    result += _cpuid_info.std_cpuid1_eax.bits.ext_family;
   8.250 -    return result;
   8.251 -  }
   8.252 -  static uint32_t extended_cpu_model() {
   8.253 -    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model;
   8.254 -    result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4;
   8.255 -    return result;
   8.256 -  }
   8.257 -  static uint32_t cpu_stepping() {
   8.258 -    uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping;
   8.259 -    return result;
   8.260 -  }
   8.261 -  static uint logical_processor_count() {
   8.262 -    uint result = threads_per_core();
   8.263 -    return result;
   8.264 -  }
   8.265 -  static uint32_t feature_flags() {
   8.266 -    uint32_t result = 0;
   8.267 -    if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
   8.268 -      result |= CPU_CX8;
   8.269 -    if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
   8.270 -      result |= CPU_CMOV;
   8.271 -    if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() &&
   8.272 -        _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)
   8.273 -      result |= CPU_FXSR;
   8.274 -    // HT flag is set for multi-core processors also.
   8.275 -    if (threads_per_core() > 1)
   8.276 -      result |= CPU_HT;
   8.277 -    if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() &&
   8.278 -        _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)
   8.279 -      result |= CPU_MMX;
   8.280 -    if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
   8.281 -      result |= CPU_3DNOW;
   8.282 -    if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
   8.283 -      result |= CPU_SSE;
   8.284 -    if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
   8.285 -      result |= CPU_SSE2;
   8.286 -    if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
   8.287 -      result |= CPU_SSE3;
   8.288 -    if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
   8.289 -      result |= CPU_SSSE3;
   8.290 -    if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
   8.291 -      result |= CPU_SSE4A;
   8.292 -    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
   8.293 -      result |= CPU_SSE4_1;
   8.294 -    if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
   8.295 -      result |= CPU_SSE4_2;
   8.296 -    return result;
   8.297 -  }
   8.298 -
   8.299 -  static void get_processor_features();
   8.300 -
   8.301 -public:
   8.302 -  // Offsets for cpuid asm stub
   8.303 -  static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
   8.304 -  static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
   8.305 -  static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
   8.306 -  static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
   8.307 -  static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
   8.308 -  static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
   8.309 -
   8.310 -  // Initialization
   8.311 -  static void initialize();
   8.312 -
   8.313 -  // Asserts
   8.314 -  static void assert_is_initialized() {
   8.315 -    assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
   8.316 -  }
   8.317 -
   8.318 -  //
   8.319 -  // Processor family:
   8.320 -  //       3   -  386
   8.321 -  //       4   -  486
   8.322 -  //       5   -  Pentium
   8.323 -  //       6   -  PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
   8.324 -  //              Pentium M, Core Solo, Core Duo, Core2 Duo
   8.325 -  //    family 6 model:   9,        13,       14,        15
   8.326 -  //    0x0f   -  Pentium 4, Opteron
   8.327 -  //
   8.328 -  // Note: The cpu family should be used to select between
   8.329 -  //       instruction sequences which are valid on all Intel
   8.330 -  //       processors.  Use the feature test functions below to
   8.331 -  //       determine whether a particular instruction is supported.
   8.332 -  //
   8.333 -  static int  cpu_family()        { return _cpu;}
   8.334 -  static bool is_P6()             { return cpu_family() >= 6; }
   8.335 -
   8.336 -  static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
   8.337 -  static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
   8.338 -
   8.339 -  static uint cores_per_cpu()  {
   8.340 -    uint result = 1;
   8.341 -    if (is_intel()) {
   8.342 -      result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
   8.343 -    } else if (is_amd()) {
   8.344 -      result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
   8.345 -    }
   8.346 -    return result;
   8.347 -  }
   8.348 -
   8.349 -  static uint threads_per_core()  {
   8.350 -    uint result = 1;
   8.351 -    if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
   8.352 -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
   8.353 -               cores_per_cpu();
   8.354 -    }
   8.355 -    return result;
   8.356 -  }
   8.357 -
   8.358 -  static intx L1_data_cache_line_size()  {
   8.359 -    intx result = 0;
   8.360 -    if (is_intel()) {
   8.361 -      result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
   8.362 -    } else if (is_amd()) {
   8.363 -      result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
   8.364 -    }
   8.365 -    if (result < 32) // not defined ?
   8.366 -      result = 32;   // 32 bytes by default for other x64
   8.367 -    return result;
   8.368 -  }
   8.369 -
   8.370 -  //
   8.371 -  // Feature identification
   8.372 -  //
   8.373 -  static bool supports_cpuid()    { return _cpuFeatures  != 0; }
   8.374 -  static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; }
   8.375 -  static bool supports_cmov()     { return (_cpuFeatures & CPU_CMOV) != 0; }
   8.376 -  static bool supports_fxsr()     { return (_cpuFeatures & CPU_FXSR) != 0; }
   8.377 -  static bool supports_ht()       { return (_cpuFeatures & CPU_HT) != 0; }
   8.378 -  static bool supports_mmx()      { return (_cpuFeatures & CPU_MMX) != 0; }
   8.379 -  static bool supports_sse()      { return (_cpuFeatures & CPU_SSE) != 0; }
   8.380 -  static bool supports_sse2()     { return (_cpuFeatures & CPU_SSE2) != 0; }
   8.381 -  static bool supports_sse3()     { return (_cpuFeatures & CPU_SSE3) != 0; }
   8.382 -  static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
   8.383 -  static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   8.384 -  static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   8.385 -  //
   8.386 -  // AMD features
   8.387 -  //
   8.388 -  static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
   8.389 -  static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
   8.390 -  static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
   8.391 -  static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
   8.392 -
   8.393 -  static bool supports_compare_and_exchange() { return true; }
   8.394 -
   8.395 -  static const char* cpu_features()           { return _features_str; }
   8.396 -
   8.397 -  static intx allocate_prefetch_distance() {
   8.398 -    // This method should be called before allocate_prefetch_style().
   8.399 -    //
   8.400 -    // Hardware prefetching (distance/size in bytes):
   8.401 -    // Pentium 4 - 256 / 128
   8.402 -    // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
   8.403 -    // Core      - 128 /  64
   8.404 -    //
   8.405 -    // Software prefetching (distance in bytes / instruction with best score):
   8.406 -    // Pentium 4 - 512 / prefetchnta
   8.407 -    // Opteron   - 256 / prefetchnta
   8.408 -    // Core      - 256 / prefetchnta
   8.409 -    // It will be used only when AllocatePrefetchStyle > 0
   8.410 -
   8.411 -    intx count = AllocatePrefetchDistance;
   8.412 -    if (count < 0) {  // default ?
   8.413 -      if (is_amd()) { // AMD
   8.414 -        count = 256;  // Opteron
   8.415 -      } else {        // Intel
   8.416 -        if (cpu_family() == 6) {
   8.417 -          count = 256;// Pentium M, Core, Core2
   8.418 -        } else {
   8.419 -          count = 512;// Pentium 4
   8.420 -        }
   8.421 -      }
   8.422 -    }
   8.423 -    return count;
   8.424 -  }
   8.425 -  static intx allocate_prefetch_style() {
   8.426 -    assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");
   8.427 -    // Return 0 if AllocatePrefetchDistance was not defined.
   8.428 -    return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0;
   8.429 -  }
   8.430 -
   8.431 -  // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
   8.432 -  // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
   8.433 -  // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
   8.434 -  // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
   8.435 -
   8.436 -  // gc copy/scan is disabled if prefetchw isn't supported, because
   8.437 -  // Prefetch::write emits an inlined prefetchw on Linux.
   8.438 -  // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
   8.439 -  // The used prefetcht0 instruction works for both amd64 and em64t.
   8.440 -  static intx prefetch_copy_interval_in_bytes() {
   8.441 -    intx interval = PrefetchCopyIntervalInBytes;
   8.442 -    return interval >= 0 ? interval : 576;
   8.443 -  }
   8.444 -  static intx prefetch_scan_interval_in_bytes() {
   8.445 -    intx interval = PrefetchScanIntervalInBytes;
   8.446 -    return interval >= 0 ? interval : 576;
   8.447 -  }
   8.448 -  static intx prefetch_fields_ahead() {
   8.449 -    intx count = PrefetchFieldsAhead;
   8.450 -    return count >= 0 ? count : 1;
   8.451 -  }
   8.452 -};
     9.1 --- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Sun Feb 22 17:21:13 2009 -0800
     9.2 +++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp	Thu Feb 26 14:25:55 2009 -0800
     9.3 @@ -1,5 +1,5 @@
     9.4  /*
     9.5 - * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
     9.6 + * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
     9.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     9.8   *
     9.9   * This code is free software; you can redistribute it and/or modify it
    9.10 @@ -299,14 +299,18 @@
    9.11  
    9.12  }
    9.13  
    9.14 +#endif // AMD64
    9.15 +
    9.16  bool os::supports_sse() {
    9.17 +#ifdef AMD64
    9.18 +  return true;
    9.19 +#else
    9.20    if (sse_status == SSE_UNKNOWN)
    9.21      check_for_sse_support();
    9.22    return sse_status == SSE_SUPPORTED;
    9.23 +#endif // AMD64
    9.24  }
    9.25  
    9.26 -#endif // AMD64
    9.27 -
    9.28  bool os::is_allocatable(size_t bytes) {
    9.29  #ifdef AMD64
    9.30    return true;
    10.1 --- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp	Sun Feb 22 17:21:13 2009 -0800
    10.2 +++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp	Thu Feb 26 14:25:55 2009 -0800
    10.3 @@ -1,5 +1,5 @@
    10.4  /*
    10.5 - * Copyright 1999-2004 Sun Microsystems, Inc.  All Rights Reserved.
    10.6 + * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
    10.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    10.8   *
    10.9   * This code is free software; you can redistribute it and/or modify it
   10.10 @@ -41,8 +41,9 @@
   10.11    static void  fence_bootstrap              ();
   10.12  
   10.13    static void setup_fpu();
   10.14 +#endif // AMD64
   10.15 +
   10.16    static bool supports_sse();
   10.17 -#endif // AMD64
   10.18  
   10.19    static bool is_allocatable(size_t bytes);
   10.20  
    11.1 --- a/src/share/vm/includeDB_core	Sun Feb 22 17:21:13 2009 -0800
    11.2 +++ b/src/share/vm/includeDB_core	Thu Feb 26 14:25:55 2009 -0800
    11.3 @@ -1,5 +1,5 @@
    11.4  //
    11.5 -// Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    11.6 +// Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    11.7  // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    11.8  //
    11.9  // This code is free software; you can redistribute it and/or modify it
   11.10 @@ -176,7 +176,7 @@
   11.11  arguments.cpp                           oop.inline.hpp
   11.12  arguments.cpp                           os_<os_family>.inline.hpp
   11.13  arguments.cpp                           universe.inline.hpp
   11.14 -arguments.cpp                           vm_version_<arch_model>.hpp
   11.15 +arguments.cpp                           vm_version_<arch>.hpp
   11.16  
   11.17  arguments.hpp                           java.hpp
   11.18  arguments.hpp                           perfData.hpp
   11.19 @@ -241,7 +241,7 @@
   11.20  assembler.hpp                           register_<arch>.hpp
   11.21  assembler.hpp                           relocInfo.hpp
   11.22  assembler.hpp                           top.hpp
   11.23 -assembler.hpp                           vm_version_<arch_model>.hpp
   11.24 +assembler.hpp                           vm_version_<arch>.hpp
   11.25  
   11.26  assembler.inline.hpp                    assembler.hpp
   11.27  assembler.inline.hpp                    codeBuffer.hpp
   11.28 @@ -280,7 +280,7 @@
   11.29  
   11.30  atomic_<os_arch>.inline.hpp             atomic.hpp
   11.31  atomic_<os_arch>.inline.hpp             os.hpp
   11.32 -atomic_<os_arch>.inline.hpp             vm_version_<arch_model>.hpp
   11.33 +atomic_<os_arch>.inline.hpp             vm_version_<arch>.hpp
   11.34  
   11.35  // attachListener is jck optional, put cpp deps in includeDB_features
   11.36  
   11.37 @@ -2176,7 +2176,7 @@
   11.38  interpreterRuntime.cpp                  threadCritical.hpp
   11.39  interpreterRuntime.cpp                  universe.inline.hpp
   11.40  interpreterRuntime.cpp                  vmSymbols.hpp
   11.41 -interpreterRuntime.cpp                  vm_version_<arch_model>.hpp
   11.42 +interpreterRuntime.cpp                  vm_version_<arch>.hpp
   11.43  
   11.44  interpreterRuntime.hpp                  bytecode.hpp
   11.45  interpreterRuntime.hpp                  frame.inline.hpp
   11.46 @@ -2279,7 +2279,7 @@
   11.47  java.cpp                                universe.hpp
   11.48  java.cpp                                vmError.hpp
   11.49  java.cpp                                vm_operations.hpp
   11.50 -java.cpp                                vm_version_<arch_model>.hpp
   11.51 +java.cpp                                vm_version_<arch>.hpp
   11.52  java.cpp                                vtune.hpp
   11.53  
   11.54  java.hpp                                os.hpp
   11.55 @@ -3485,7 +3485,7 @@
   11.56  register_<arch>.cpp                     register_<arch>.hpp
   11.57  
   11.58  register_<arch>.hpp                     register.hpp
   11.59 -register_<arch>.hpp                     vm_version_<arch_model>.hpp
   11.60 +register_<arch>.hpp                     vm_version_<arch>.hpp
   11.61  
   11.62  registerMap.hpp                         globalDefinitions.hpp
   11.63  registerMap.hpp                         register_<arch>.hpp
   11.64 @@ -3835,7 +3835,7 @@
   11.65  statSampler.cpp                         statSampler.hpp
   11.66  statSampler.cpp                         systemDictionary.hpp
   11.67  statSampler.cpp                         vmSymbols.hpp
   11.68 -statSampler.cpp                         vm_version_<arch_model>.hpp
   11.69 +statSampler.cpp                         vm_version_<arch>.hpp
   11.70  
   11.71  statSampler.hpp                         perfData.hpp
   11.72  statSampler.hpp                         task.hpp
   11.73 @@ -4579,22 +4579,22 @@
   11.74  vm_version.cpp                          arguments.hpp
   11.75  vm_version.cpp                          oop.inline.hpp
   11.76  vm_version.cpp                          universe.hpp
   11.77 -vm_version.cpp                          vm_version_<arch_model>.hpp
   11.78 +vm_version.cpp                          vm_version_<arch>.hpp
   11.79  
   11.80  vm_version.hpp                          allocation.hpp
   11.81  vm_version.hpp                          ostream.hpp
   11.82  
   11.83 -vm_version_<arch_model>.cpp             assembler_<arch>.inline.hpp
   11.84 -vm_version_<arch_model>.cpp             java.hpp
   11.85 -vm_version_<arch_model>.cpp             os_<os_family>.inline.hpp
   11.86 -vm_version_<arch_model>.cpp             resourceArea.hpp
   11.87 -vm_version_<arch_model>.cpp             stubCodeGenerator.hpp
   11.88 -vm_version_<arch_model>.cpp             vm_version_<arch_model>.hpp
   11.89 -
   11.90 -vm_version_<arch_model>.hpp             globals_extension.hpp
   11.91 -vm_version_<arch_model>.hpp             vm_version.hpp
   11.92 -
   11.93 -vm_version_<os_arch>.cpp                vm_version_<arch_model>.hpp
   11.94 +vm_version_<arch>.cpp                   assembler_<arch>.inline.hpp
   11.95 +vm_version_<arch>.cpp                   java.hpp
   11.96 +vm_version_<arch>.cpp                   os_<os_family>.inline.hpp
   11.97 +vm_version_<arch>.cpp                   resourceArea.hpp
   11.98 +vm_version_<arch>.cpp                   stubCodeGenerator.hpp
   11.99 +vm_version_<arch>.cpp                   vm_version_<arch>.hpp
  11.100 +
  11.101 +vm_version_<arch>.hpp                   globals_extension.hpp
  11.102 +vm_version_<arch>.hpp                   vm_version.hpp
  11.103 +
  11.104 +vm_version_<os_arch>.cpp                vm_version_<arch>.hpp
  11.105  
  11.106  vmreg.cpp                               assembler.hpp
  11.107  vmreg.cpp                               vmreg.hpp
    12.1 --- a/src/share/vm/opto/escape.cpp	Sun Feb 22 17:21:13 2009 -0800
    12.2 +++ b/src/share/vm/opto/escape.cpp	Thu Feb 26 14:25:55 2009 -0800
    12.3 @@ -756,6 +756,16 @@
    12.4        } else {
    12.5          break;
    12.6        }
    12.7 +    } else if (result->Opcode() == Op_SCMemProj) {
    12.8 +      assert(result->in(0)->is_LoadStore(), "sanity");
    12.9 +      const Type *at = phase->type(result->in(0)->in(MemNode::Address));
   12.10 +      if (at != Type::TOP) {
   12.11 +        assert (at->isa_ptr() != NULL, "pointer type required.");
   12.12 +        int idx = C->get_alias_index(at->is_ptr());
   12.13 +        assert(idx != alias_idx, "Object is not scalar replaceable if a LoadStore node access its field");
   12.14 +        break;
   12.15 +      }
   12.16 +      result = result->in(0)->in(MemNode::Memory);
   12.17      }
   12.18    }
   12.19    if (result->is_Phi()) {
    13.1 --- a/src/share/vm/opto/macro.cpp	Sun Feb 22 17:21:13 2009 -0800
    13.2 +++ b/src/share/vm/opto/macro.cpp	Thu Feb 26 14:25:55 2009 -0800
    13.3 @@ -250,6 +250,15 @@
    13.4          assert(adr_idx == Compile::AliasIdxRaw, "address must match or be raw");
    13.5        }
    13.6        mem = mem->in(MemNode::Memory);
    13.7 +    } else if (mem->Opcode() == Op_SCMemProj) {
    13.8 +      assert(mem->in(0)->is_LoadStore(), "sanity");
    13.9 +      const TypePtr* atype = mem->in(0)->in(MemNode::Address)->bottom_type()->is_ptr();
   13.10 +      int adr_idx = Compile::current()->get_alias_index(atype);
   13.11 +      if (adr_idx == alias_idx) {
   13.12 +        assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
   13.13 +        return NULL;
   13.14 +      }
   13.15 +      mem = mem->in(0)->in(MemNode::Memory);
   13.16      } else {
   13.17        return mem;
   13.18      }
   13.19 @@ -329,8 +338,15 @@
   13.20            return NULL;
   13.21          }
   13.22          values.at_put(j, val);
   13.23 +      } else if (val->Opcode() == Op_SCMemProj) {
   13.24 +        assert(val->in(0)->is_LoadStore(), "sanity");
   13.25 +        assert(false, "Object is not scalar replaceable if a LoadStore node access its field");
   13.26 +        return NULL;
   13.27        } else {
   13.28 +#ifdef ASSERT
   13.29 +        val->dump();
   13.30          assert(false, "unknown node on this path");
   13.31 +#endif
   13.32          return NULL;  // unknown node on this path
   13.33        }
   13.34      }
    14.1 --- a/src/share/vm/opto/matcher.cpp	Sun Feb 22 17:21:13 2009 -0800
    14.2 +++ b/src/share/vm/opto/matcher.cpp	Thu Feb 26 14:25:55 2009 -0800
    14.3 @@ -1707,11 +1707,18 @@
    14.4  void Matcher::find_shared( Node *n ) {
    14.5    // Allocate stack of size C->unique() * 2 to avoid frequent realloc
    14.6    MStack mstack(C->unique() * 2);
    14.7 +  // Mark nodes as address_visited if they are inputs to an address expression
    14.8 +  VectorSet address_visited(Thread::current()->resource_area());
    14.9    mstack.push(n, Visit);     // Don't need to pre-visit root node
   14.10    while (mstack.is_nonempty()) {
   14.11      n = mstack.node();       // Leave node on stack
   14.12      Node_State nstate = mstack.state();
   14.13 +    uint nop = n->Opcode();
   14.14      if (nstate == Pre_Visit) {
   14.15 +      if (address_visited.test(n->_idx)) { // Visited in address already?
   14.16 +        // Flag as visited and shared now.
   14.17 +        set_visited(n);
   14.18 +      }
   14.19        if (is_visited(n)) {   // Visited already?
   14.20          // Node is shared and has no reason to clone.  Flag it as shared.
   14.21          // This causes it to match into a register for the sharing.
   14.22 @@ -1726,7 +1733,7 @@
   14.23        set_visited(n);   // Flag as visited now
   14.24        bool mem_op = false;
   14.25  
   14.26 -      switch( n->Opcode() ) {  // Handle some opcodes special
   14.27 +      switch( nop ) {  // Handle some opcodes special
   14.28        case Op_Phi:             // Treat Phis as shared roots
   14.29        case Op_Parm:
   14.30        case Op_Proj:            // All handled specially during matching
   14.31 @@ -1887,34 +1894,51 @@
   14.32              // to have a single use so force sharing here.
   14.33              set_shared(m->in(AddPNode::Base)->in(1));
   14.34            }
   14.35 +
   14.36 +          // Some inputs for address expression are not put on stack
   14.37 +          // to avoid marking them as shared and forcing them into register
   14.38 +          // if they are used only in address expressions.
   14.39 +          // But they should be marked as shared if there are other uses
   14.40 +          // besides address expressions.
   14.41 +
   14.42            Node *off = m->in(AddPNode::Offset);
   14.43 -          if( off->is_Con() ) {
   14.44 -            set_visited(m);  // Flag as visited now
   14.45 +          if( off->is_Con() &&
   14.46 +              // When there are other uses besides address expressions
   14.47 +              // put it on stack and mark as shared.
   14.48 +              !is_visited(m) ) {
   14.49 +            address_visited.test_set(m->_idx); // Flag as address_visited
   14.50              Node *adr = m->in(AddPNode::Address);
   14.51  
   14.52              // Intel, ARM and friends can handle 2 adds in addressing mode
   14.53              if( clone_shift_expressions && adr->is_AddP() &&
   14.54                  // AtomicAdd is not an addressing expression.
   14.55                  // Cheap to find it by looking for screwy base.
   14.56 -                !adr->in(AddPNode::Base)->is_top() ) {
   14.57 -              set_visited(adr);  // Flag as visited now
   14.58 +                !adr->in(AddPNode::Base)->is_top() &&
   14.59 +                // Are there other uses besides address expressions?
   14.60 +                !is_visited(adr) ) {
   14.61 +              address_visited.set(adr->_idx); // Flag as address_visited
   14.62                Node *shift = adr->in(AddPNode::Offset);
   14.63                // Check for shift by small constant as well
   14.64                if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
   14.65 -                  shift->in(2)->get_int() <= 3 ) {
   14.66 -                set_visited(shift);  // Flag as visited now
   14.67 +                  shift->in(2)->get_int() <= 3 &&
   14.68 +                  // Are there other uses besides address expressions?
   14.69 +                  !is_visited(shift) ) {
   14.70 +                address_visited.set(shift->_idx); // Flag as address_visited
   14.71                  mstack.push(shift->in(2), Visit);
   14.72 +                Node *conv = shift->in(1);
   14.73  #ifdef _LP64
   14.74                  // Allow Matcher to match the rule which bypass
   14.75                  // ConvI2L operation for an array index on LP64
   14.76                  // if the index value is positive.
   14.77 -                if( shift->in(1)->Opcode() == Op_ConvI2L &&
   14.78 -                    shift->in(1)->as_Type()->type()->is_long()->_lo >= 0 ) {
   14.79 -                  set_visited(shift->in(1));  // Flag as visited now
   14.80 -                  mstack.push(shift->in(1)->in(1), Pre_Visit);
   14.81 +                if( conv->Opcode() == Op_ConvI2L &&
   14.82 +                    conv->as_Type()->type()->is_long()->_lo >= 0 &&
   14.83 +                    // Are there other uses besides address expressions?
   14.84 +                    !is_visited(conv) ) {
   14.85 +                  address_visited.set(conv->_idx); // Flag as address_visited
   14.86 +                  mstack.push(conv->in(1), Pre_Visit);
   14.87                  } else
   14.88  #endif
   14.89 -                mstack.push(shift->in(1), Pre_Visit);
   14.90 +                mstack.push(conv, Pre_Visit);
   14.91                } else {
   14.92                  mstack.push(shift, Pre_Visit);
   14.93                }
    15.1 --- a/src/share/vm/opto/memnode.cpp	Sun Feb 22 17:21:13 2009 -0800
    15.2 +++ b/src/share/vm/opto/memnode.cpp	Thu Feb 26 14:25:55 2009 -0800
    15.3 @@ -1066,11 +1066,11 @@
    15.4          break;
    15.5        }
    15.6      }
    15.7 -    LoadNode* load = NULL;
    15.8 -    if (allocation != NULL && base->in(load_index)->is_Load()) {
    15.9 -      load = base->in(load_index)->as_Load();
   15.10 -    }
   15.11 -    if (load != NULL && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) {
   15.12 +    bool has_load = ( allocation != NULL &&
   15.13 +                      (base->in(load_index)->is_Load() ||
   15.14 +                       base->in(load_index)->is_DecodeN() &&
   15.15 +                       base->in(load_index)->in(1)->is_Load()) );
   15.16 +    if (has_load && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) {
   15.17        // Push the loads from the phi that comes from valueOf up
   15.18        // through it to allow elimination of the loads and the recovery
   15.19        // of the original value.
   15.20 @@ -1106,11 +1106,20 @@
   15.21        result->set_req(load_index, in2);
   15.22        return result;
   15.23      }
   15.24 -  } else if (base->is_Load()) {
   15.25 +  } else if (base->is_Load() ||
   15.26 +             base->is_DecodeN() && base->in(1)->is_Load()) {
   15.27 +    if (base->is_DecodeN()) {
   15.28 +      // Get LoadN node which loads cached Integer object
   15.29 +      base = base->in(1);
   15.30 +    }
   15.31      // Eliminate the load of Integer.value for integers from the cache
   15.32      // array by deriving the value from the index into the array.
   15.33      // Capture the offset of the load and then reverse the computation.
   15.34      Node* load_base = base->in(Address)->in(AddPNode::Base);
   15.35 +    if (load_base->is_DecodeN()) {
   15.36 +      // Get LoadN node which loads IntegerCache.cache field
   15.37 +      load_base = load_base->in(1);
   15.38 +    }
   15.39      if (load_base != NULL) {
   15.40        Compile::AliasType* atp = phase->C->alias_type(load_base->adr_type());
   15.41        intptr_t cache_offset;

mercurial