Merge

Mon, 23 Mar 2009 10:42:20 -0400

author
acorn
date
Mon, 23 Mar 2009 10:42:20 -0400
changeset 1090
60bfce711da4
parent 1089
c664a0794f85
parent 1082
bd441136a5ce
child 1091
6bdd6923ba16
child 1092
715dceaa89b7

Merge

agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java file | annotate | diff | comparison | revisions
src/share/vm/classfile/vmSymbols.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/Debugger.java	Fri Mar 20 22:08:48 2009 -0400
     1.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/Debugger.java	Mon Mar 23 10:42:20 2009 -0400
     1.3 @@ -118,9 +118,9 @@
     1.4    public long getJIntSize();
     1.5    public long getJLongSize();
     1.6    public long getJShortSize();
     1.7 -  public long getHeapBase();
     1.8    public long getHeapOopSize();
     1.9 -  public long getLogMinObjAlignmentInBytes();
    1.10 +  public long getNarrowOopBase();
    1.11 +  public int  getNarrowOopShift();
    1.12  
    1.13    public ReadResult readBytesFromProcess(long address, long numBytes)
    1.14      throws DebuggerException;
     2.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java	Fri Mar 20 22:08:48 2009 -0400
     2.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java	Mon Mar 23 10:42:20 2009 -0400
     2.3 @@ -56,8 +56,8 @@
     2.4    // heap data.
     2.5    protected long oopSize;
     2.6    protected long heapOopSize;
     2.7 -  protected long heapBase;                 // heap base for compressed oops.
     2.8 -  protected long logMinObjAlignmentInBytes; // Used to decode compressed oops.
     2.9 +  protected long narrowOopBase;  // heap base for compressed oops.
    2.10 +  protected int  narrowOopShift; // shift to decode compressed oops.
    2.11    // Should be initialized if desired by calling initCache()
    2.12    private PageCache cache;
    2.13  
    2.14 @@ -159,10 +159,10 @@
    2.15      javaPrimitiveTypesConfigured = true;
    2.16    }
    2.17  
    2.18 -  public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignmentInBytes) {
    2.19 -    this.heapBase = heapBase;
    2.20 +  public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift) {
    2.21      this.heapOopSize = heapOopSize;
    2.22 -    this.logMinObjAlignmentInBytes = logMinObjAlignmentInBytes;
    2.23 +    this.narrowOopBase = narrowOopBase;
    2.24 +    this.narrowOopShift = narrowOopShift;
    2.25    }
    2.26  
    2.27    /** May be called by subclasses if desired to initialize the page
    2.28 @@ -459,7 +459,7 @@
    2.29      long value = readCInteger(address, getHeapOopSize(), true);
    2.30      if (value != 0) {
    2.31        // See oop.inline.hpp decode_heap_oop
    2.32 -      value = (long)(heapBase + (long)(value << logMinObjAlignmentInBytes));
    2.33 +      value = (long)(narrowOopBase + (long)(value << narrowOopShift));
    2.34      }
    2.35      return value;
    2.36    }
    2.37 @@ -545,10 +545,10 @@
    2.38      return heapOopSize;
    2.39    }
    2.40  
    2.41 -  public long getHeapBase() {
    2.42 -    return heapBase;
    2.43 +  public long getNarrowOopBase() {
    2.44 +    return narrowOopBase;
    2.45    }
    2.46 -  public long getLogMinObjAlignmentInBytes() {
    2.47 -    return logMinObjAlignmentInBytes;
    2.48 +  public int getNarrowOopShift() {
    2.49 +    return narrowOopShift;
    2.50    }
    2.51  }
     3.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/JVMDebugger.java	Fri Mar 20 22:08:48 2009 -0400
     3.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/JVMDebugger.java	Mon Mar 23 10:42:20 2009 -0400
     3.3 @@ -42,5 +42,5 @@
     3.4                                                long jintSize,
     3.5                                                long jlongSize,
     3.6                                                long jshortSize);
     3.7 -  public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignment);
     3.8 +  public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift);
     3.9  }
     4.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebugger.java	Fri Mar 20 22:08:48 2009 -0400
     4.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebugger.java	Mon Mar 23 10:42:20 2009 -0400
     4.3 @@ -65,9 +65,10 @@
     4.4    public long      getJIntSize() throws RemoteException;
     4.5    public long      getJLongSize() throws RemoteException;
     4.6    public long      getJShortSize() throws RemoteException;
     4.7 -  public long      getHeapBase() throws RemoteException;
     4.8    public long      getHeapOopSize() throws RemoteException;
     4.9 -  public long      getLogMinObjAlignmentInBytes() throws RemoteException;
    4.10 +  public long      getNarrowOopBase() throws RemoteException;
    4.11 +  public int       getNarrowOopShift() throws RemoteException;
    4.12 +
    4.13    public boolean   areThreadsEqual(long addrOrId1, boolean isAddress1,
    4.14                                     long addrOrId2, boolean isAddress2) throws RemoteException;
    4.15    public int       getThreadHashCode(long addrOrId, boolean isAddress) throws RemoteException;
     5.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Fri Mar 20 22:08:48 2009 -0400
     5.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Mon Mar 23 10:42:20 2009 -0400
     5.3 @@ -85,9 +85,9 @@
     5.4        jlongSize    = remoteDebugger.getJLongSize();
     5.5        jshortSize   = remoteDebugger.getJShortSize();
     5.6        javaPrimitiveTypesConfigured = true;
     5.7 -      heapBase     = remoteDebugger.getHeapBase();
     5.8 +      narrowOopBase  = remoteDebugger.getNarrowOopBase();
     5.9 +      narrowOopShift = remoteDebugger.getNarrowOopShift();
    5.10        heapOopSize  = remoteDebugger.getHeapOopSize();
    5.11 -      logMinObjAlignmentInBytes  = remoteDebugger.getLogMinObjAlignmentInBytes();
    5.12      }
    5.13      catch (RemoteException e) {
    5.14        throw new DebuggerException(e);
     6.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerServer.java	Fri Mar 20 22:08:48 2009 -0400
     6.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerServer.java	Mon Mar 23 10:42:20 2009 -0400
     6.3 @@ -114,17 +114,18 @@
     6.4      return debugger.getJShortSize();
     6.5    }
     6.6  
     6.7 -  public long getHeapBase() throws RemoteException {
     6.8 -    return debugger.getHeapBase();
     6.9 -  }
    6.10 -
    6.11    public long getHeapOopSize() throws RemoteException {
    6.12      return debugger.getHeapOopSize();
    6.13    }
    6.14  
    6.15 -  public long getLogMinObjAlignmentInBytes() throws RemoteException {
    6.16 -    return debugger.getLogMinObjAlignmentInBytes();
    6.17 +  public long getNarrowOopBase() throws RemoteException {
    6.18 +    return debugger.getNarrowOopBase();
    6.19    }
    6.20 +
    6.21 +  public int  getNarrowOopShift() throws RemoteException {
    6.22 +    return debugger.getNarrowOopShift();
    6.23 +  }
    6.24 +
    6.25    public boolean   areThreadsEqual(long addrOrId1, boolean isAddress1,
    6.26                                     long addrOrId2, boolean isAddress2) throws RemoteException {
    6.27      ThreadProxy t1 = getThreadProxy(addrOrId1, isAddress1);
     7.1 --- a/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java	Fri Mar 20 22:08:48 2009 -0400
     7.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java	Mon Mar 23 10:42:20 2009 -0400
     7.3 @@ -53,7 +53,8 @@
     7.4    // system obj array klass object
     7.5    private static sun.jvm.hotspot.types.OopField systemObjArrayKlassObjField;
     7.6  
     7.7 -  private static AddressField heapBaseField;
     7.8 +  private static AddressField narrowOopBaseField;
     7.9 +  private static CIntegerField narrowOopShiftField;
    7.10  
    7.11    static {
    7.12      VM.registerVMInitializedObserver(new Observer() {
    7.13 @@ -86,7 +87,8 @@
    7.14  
    7.15      systemObjArrayKlassObjField = type.getOopField("_systemObjArrayKlassObj");
    7.16  
    7.17 -    heapBaseField = type.getAddressField("_heap_base");
    7.18 +    narrowOopBaseField = type.getAddressField("_narrow_oop._base");
    7.19 +    narrowOopShiftField = type.getCIntegerField("_narrow_oop._shift");
    7.20    }
    7.21  
    7.22    public Universe() {
    7.23 @@ -100,14 +102,18 @@
    7.24      }
    7.25    }
    7.26  
    7.27 -  public static long getHeapBase() {
    7.28 -    if (heapBaseField.getValue() == null) {
    7.29 +  public static long getNarrowOopBase() {
    7.30 +    if (narrowOopBaseField.getValue() == null) {
    7.31        return 0;
    7.32      } else {
    7.33 -      return heapBaseField.getValue().minus(null);
    7.34 +      return narrowOopBaseField.getValue().minus(null);
    7.35      }
    7.36    }
    7.37  
    7.38 +  public static int getNarrowOopShift() {
    7.39 +    return (int)narrowOopShiftField.getValue();
    7.40 +  }
    7.41 +
    7.42    /** Returns "TRUE" iff "p" points into the allocated area of the heap. */
    7.43    public boolean isIn(Address p) {
    7.44      return heap().isIn(p);
     8.1 --- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Fri Mar 20 22:08:48 2009 -0400
     8.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Mon Mar 23 10:42:20 2009 -0400
     8.3 @@ -342,13 +342,12 @@
     8.4        throw new RuntimeException("Attempt to initialize VM twice");
     8.5      }
     8.6      soleInstance = new VM(db, debugger, debugger.getMachineDescription().isBigEndian());
     8.7 -
     8.8 +    debugger.putHeapConst(soleInstance.getHeapOopSize(), Universe.getNarrowOopBase(),
     8.9 +                          Universe.getNarrowOopShift());
    8.10      for (Iterator iter = vmInitializedObservers.iterator(); iter.hasNext(); ) {
    8.11        ((Observer) iter.next()).update(null, null);
    8.12      }
    8.13  
    8.14 -    debugger.putHeapConst(Universe.getHeapBase(), soleInstance.getHeapOopSize(),
    8.15 -                        soleInstance.logMinObjAlignmentInBytes);
    8.16    }
    8.17  
    8.18    /** This is used by the debugging system */
     9.1 --- a/make/jprt.properties	Fri Mar 20 22:08:48 2009 -0400
     9.2 +++ b/make/jprt.properties	Mon Mar 23 10:42:20 2009 -0400
     9.3 @@ -19,12 +19,12 @@
     9.4  # Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
     9.5  # CA 95054 USA or visit www.sun.com if you need additional information or
     9.6  # have any questions.
     9.7 -#  
     9.8 +#
     9.9  #
    9.10  
    9.11  # Properties for jprt
    9.12  
    9.13 -# All build result bundles are full jdks, so the 64bit testing does not 
    9.14 +# All build result bundles are full jdks, so the 64bit testing does not
    9.15  #    need the 32bit sibling bundle installed.
    9.16  #    Note: If the hotspot/make/Makefile changed to only bundle the 64bit files
    9.17  #          when bundling 64bit, and stripped out the 64bit files from any 32bit
    9.18 @@ -89,60 +89,52 @@
    9.19      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jvm98, \
    9.20      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-scimark, \
    9.21      ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese, \
    9.22 -    ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese_Xcomp, \
    9.23 -    ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese_Xcomp_2, \
    9.24 -    ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese_Xcomp_3, \
    9.25      ${jprt.my.solaris.sparc}-fastdebug-c1-runThese_Xshare, \
    9.26      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_default, \
    9.27      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_SerialGC, \
    9.28      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC, \
    9.29      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC, \
    9.30      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_CMS, \
    9.31 -    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_default_2, \
    9.32 -    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_SerialGC_2, \
    9.33 -    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC_2, \
    9.34 -    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC_2, \
    9.35 -    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_CMS_2, \
    9.36 +    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_G1, \
    9.37 +    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParOldGC, \
    9.38      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_default, \
    9.39      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_SerialGC, \
    9.40      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_ParallelGC, \
    9.41      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_ParNewGC, \
    9.42      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_CMS, \
    9.43 +    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_G1, \
    9.44 +    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_ParOldGC, \
    9.45      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_default, \
    9.46      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_SerialGC, \
    9.47      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_ParallelGC, \
    9.48      ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_CMS, \
    9.49 -    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-scimark_2, \
    9.50 -    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-scimark_3
    9.51 +    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_G1, \
    9.52 +    ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_ParOldGC
    9.53  
    9.54  jprt.my.solaris.sparcv9.test.targets= \
    9.55      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jvm98, \
    9.56      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-scimark, \
    9.57      ${jprt.my.solaris.sparcv9}-product-c2-runThese, \
    9.58 -    ${jprt.my.solaris.sparcv9}-product-c2-runThese_Xcomp, \
    9.59 -    ${jprt.my.solaris.sparcv9}-product-c2-runThese_Xcomp_2, \
    9.60 -    ${jprt.my.solaris.sparcv9}-product-c2-runThese_Xcomp_3, \
    9.61      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_default, \
    9.62      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_SerialGC, \
    9.63      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParallelGC, \
    9.64      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParNewGC, \
    9.65      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_CMS, \
    9.66 -    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_default_2, \
    9.67 -    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_SerialGC_2, \
    9.68 -    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParallelGC_2, \
    9.69 -    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParNewGC_2, \
    9.70 -    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_CMS_2, \
    9.71 +    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_G1, \
    9.72 +    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParOldGC, \
    9.73      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_default, \
    9.74      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_SerialGC, \
    9.75      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_ParallelGC, \
    9.76      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_ParNewGC, \
    9.77      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_CMS, \
    9.78 +    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_G1, \
    9.79 +    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_ParOldGC, \
    9.80      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_default, \
    9.81      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_SerialGC, \
    9.82      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_ParallelGC, \
    9.83      ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_CMS, \
    9.84 -    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-scimark_2, \
    9.85 -    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-scimark_3
    9.86 +    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_G1, \
    9.87 +    ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_ParOldGC
    9.88  
    9.89  jprt.my.solaris.x64.test.targets= \
    9.90      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jvm98, \
    9.91 @@ -154,73 +146,80 @@
    9.92      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \
    9.93      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \
    9.94      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_CMS, \
    9.95 -    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_default_2, \
    9.96 -    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_SerialGC_2, \
    9.97 -    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC_2, \
    9.98 -    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC_2, \
    9.99 -    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_CMS_2, \
   9.100 +    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_G1, \
   9.101 +    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \
   9.102      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_default, \
   9.103      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \
   9.104      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParallelGC, \
   9.105      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \
   9.106      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_CMS, \
   9.107 +    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_G1, \
   9.108 +    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \
   9.109      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_default, \
   9.110      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_SerialGC, \
   9.111      ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \
   9.112 -    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_CMS
   9.113 +    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_CMS, \
   9.114 +    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_G1, \
   9.115 +    ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParOldGC
   9.116  
   9.117  jprt.my.solaris.i586.test.targets= \
   9.118      ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
   9.119      ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-scimark, \
   9.120      ${jprt.my.solaris.i586}-product-{c1|c2}-runThese_Xcomp, \
   9.121 -    ${jprt.my.solaris.i586}-product-c2-runThese_Xcomp_2, \
   9.122 -    ${jprt.my.solaris.i586}-fastdebug-c1-runThese_Xcomp_2, \
   9.123 +    ${jprt.my.solaris.i586}-fastdebug-c1-runThese_Xcomp, \
   9.124      ${jprt.my.solaris.i586}-fastdebug-c1-runThese_Xshare, \
   9.125      ${jprt.my.solaris.i586}-product-c1-GCBasher_default, \
   9.126      ${jprt.my.solaris.i586}-product-c1-GCBasher_SerialGC, \
   9.127      ${jprt.my.solaris.i586}-product-c1-GCBasher_ParallelGC, \
   9.128      ${jprt.my.solaris.i586}-product-c1-GCBasher_ParNewGC, \
   9.129      ${jprt.my.solaris.i586}-product-c1-GCBasher_CMS, \
   9.130 +    ${jprt.my.solaris.i586}-product-c1-GCBasher_G1, \
   9.131 +    ${jprt.my.solaris.i586}-product-c1-GCBasher_ParOldGC, \
   9.132      ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_default, \
   9.133      ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_SerialGC, \
   9.134      ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_ParallelGC, \
   9.135      ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_ParNewGC, \
   9.136      ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_CMS, \
   9.137 +    ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_G1, \
   9.138 +    ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_ParOldGC, \
   9.139      ${jprt.my.solaris.i586}-product-c1-GCOld_default, \
   9.140      ${jprt.my.solaris.i586}-product-c1-GCOld_SerialGC, \
   9.141      ${jprt.my.solaris.i586}-product-c1-GCOld_ParallelGC, \
   9.142      ${jprt.my.solaris.i586}-product-c1-GCOld_ParNewGC, \
   9.143      ${jprt.my.solaris.i586}-product-c1-GCOld_CMS, \
   9.144 +    ${jprt.my.solaris.i586}-product-c1-GCOld_G1, \
   9.145 +    ${jprt.my.solaris.i586}-product-c1-GCOld_ParOldGC, \
   9.146      ${jprt.my.solaris.i586}-fastdebug-c2-jbb_default, \
   9.147      ${jprt.my.solaris.i586}-fastdebug-c2-jbb_ParallelGC, \
   9.148      ${jprt.my.solaris.i586}-fastdebug-c2-jbb_CMS, \
   9.149 -    ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-scimark_2, \
   9.150 -    ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-scimark_3
   9.151 +    ${jprt.my.solaris.i586}-fastdebug-c2-jbb_G1, \
   9.152 +    ${jprt.my.solaris.i586}-fastdebug-c2-jbb_ParOldGC
   9.153  
   9.154  jprt.my.linux.i586.test.targets = \
   9.155      ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
   9.156      ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-scimark, \
   9.157      ${jprt.my.linux.i586}-product-c1-runThese_Xcomp, \
   9.158 -    ${jprt.my.linux.i586}-product-c1-runThese_Xcomp_2, \
   9.159 -    ${jprt.my.linux.i586}-product-c1-runThese_Xcomp_3, \
   9.160      ${jprt.my.linux.i586}-fastdebug-c1-runThese_Xshare, \
   9.161      ${jprt.my.linux.i586}-fastdebug-c2-runThese_Xcomp, \
   9.162 -    ${jprt.my.linux.i586}-fastdebug-c2-runThese_Xcomp_2, \
   9.163      ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_default, \
   9.164      ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_SerialGC, \
   9.165      ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC, \
   9.166      ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC, \
   9.167      ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_CMS, \
   9.168 +    ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_G1, \
   9.169 +    ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParOldGC, \
   9.170      ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_default, \
   9.171      ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_SerialGC, \
   9.172      ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_ParallelGC, \
   9.173      ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_ParNewGC, \
   9.174      ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_CMS, \
   9.175 +    ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_G1, \
   9.176 +    ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_ParOldGC, \
   9.177      ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_default, \
   9.178      ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_ParallelGC, \
   9.179      ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_CMS, \
   9.180 -    ${jprt.my.linux.i586}-{product|fastdebug}-c2-scimark_2, \
   9.181 -    ${jprt.my.linux.i586}-{product|fastdebug}-c2-scimark_3
   9.182 +    ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_G1, \
   9.183 +    ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_ParOldGC
   9.184  
   9.185  jprt.my.linux.x64.test.targets = \
   9.186      ${jprt.my.linux.x64}-{product|fastdebug}-c2-jvm98, \
   9.187 @@ -230,15 +229,19 @@
   9.188      ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \
   9.189      ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \
   9.190      ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_CMS, \
   9.191 +    ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_G1, \
   9.192 +    ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \
   9.193      ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_default, \
   9.194      ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \
   9.195      ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_ParallelGC, \
   9.196      ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \
   9.197      ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_CMS, \
   9.198 +    ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_G1, \
   9.199 +    ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \
   9.200      ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_default, \
   9.201      ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \
   9.202 -    ${jprt.my.linux.x64}-{product|fastdebug}-c2-scimark_2, \
   9.203 -    ${jprt.my.linux.x64}-{product|fastdebug}-c2-scimark_3
   9.204 +    ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_G1, \
   9.205 +    ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_ParOldGC
   9.206  
   9.207  jprt.my.windows.i586.test.targets = \
   9.208      ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-jvm98, \
   9.209 @@ -251,16 +254,20 @@
   9.210      ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC, \
   9.211      ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC, \
   9.212      ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_CMS, \
   9.213 +    ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_G1, \
   9.214 +    ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParOldGC, \
   9.215      ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_default, \
   9.216      ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_SerialGC, \
   9.217      ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_ParallelGC, \
   9.218      ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_ParNewGC, \
   9.219      ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_CMS, \
   9.220 +    ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_G1, \
   9.221 +    ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_ParOldGC, \
   9.222      ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-jbb_default, \
   9.223      ${jprt.my.windows.i586}-product-{c1|c2}-jbb_ParallelGC, \
   9.224      ${jprt.my.windows.i586}-product-{c1|c2}-jbb_CMS, \
   9.225 -    ${jprt.my.windows.i586}-product-{c1|c2}-scimark_2, \
   9.226 -    ${jprt.my.windows.i586}-product-{c1|c2}-scimark_3
   9.227 +    ${jprt.my.windows.i586}-product-{c1|c2}-jbb_G1, \
   9.228 +    ${jprt.my.windows.i586}-product-{c1|c2}-jbb_ParOldGC
   9.229  
   9.230  jprt.my.windows.x64.test.targets = \
   9.231      ${jprt.my.windows.x64}-{product|fastdebug}-c2-jvm98, \
   9.232 @@ -272,16 +279,20 @@
   9.233      ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \
   9.234      ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \
   9.235      ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_CMS, \
   9.236 +    ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_G1, \
   9.237 +    ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \
   9.238      ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_default, \
   9.239      ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \
   9.240      ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_ParallelGC, \
   9.241      ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \
   9.242      ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_CMS, \
   9.243 +    ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_G1, \
   9.244 +    ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \
   9.245      ${jprt.my.windows.x64}-{product|fastdebug}-c2-jbb_default, \
   9.246      ${jprt.my.windows.x64}-product-c2-jbb_CMS, \
   9.247      ${jprt.my.windows.x64}-product-c2-jbb_ParallelGC, \
   9.248 -    ${jprt.my.windows.x64}-{product|fastdebug}-c2-scimark_2, \
   9.249 -    ${jprt.my.windows.x64}-{product|fastdebug}-c2-scimark_3
   9.250 +    ${jprt.my.windows.x64}-product-c2-jbb_G1, \
   9.251 +    ${jprt.my.windows.x64}-product-c2-jbb_ParOldGC
   9.252  
   9.253  # The complete list of test targets for jprt
   9.254  
    10.1 --- a/make/windows/get_msc_ver.sh	Fri Mar 20 22:08:48 2009 -0400
    10.2 +++ b/make/windows/get_msc_ver.sh	Mon Mar 23 10:42:20 2009 -0400
    10.3 @@ -29,6 +29,7 @@
    10.4  # cl version 13.10.3077 returns "MSC_VER=1310"
    10.5  # cl version 14.00.30701 returns "MSC_VER=1399" (OLD_MSSDK version)
    10.6  # cl version 14.00.40310.41 returns "MSC_VER=1400"
    10.7 +# cl version 15.00.21022.8 returns "MSC_VER=1500"
    10.8  
    10.9  # Note that we currently do not have a way to set HotSpotMksHome in
   10.10  # the batch build, but so far this has not seemed to be a problem. The
    11.1 --- a/make/windows/makefiles/compile.make	Fri Mar 20 22:08:48 2009 -0400
    11.2 +++ b/make/windows/makefiles/compile.make	Mon Mar 23 10:42:20 2009 -0400
    11.3 @@ -170,11 +170,9 @@
    11.4  # Manifest Tool - used in VS2005 and later to adjust manifests stored
    11.5  # as resources inside build artifacts.
    11.6  MT=mt.exe
    11.7 -!if "$(BUILDARCH)" == "i486"
    11.8 -# VS2005 on x86 restricts the use of certain libc functions without this
    11.9 +# VS2005 and later restricts the use of certain libc functions without this
   11.10  CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE
   11.11  !endif
   11.12 -!endif
   11.13  
   11.14  !if "$(COMPILER_NAME)" == "VS2008"
   11.15  PRODUCT_OPT_OPTION   = /O2 /Oy-
   11.16 @@ -185,11 +183,9 @@
   11.17  # Manifest Tool - used in VS2005 and later to adjust manifests stored
   11.18  # as resources inside build artifacts.
   11.19  MT=mt.exe
   11.20 -!if "$(BUILDARCH)" == "i486"
   11.21 -# VS2005 on x86 restricts the use of certain libc functions without this
   11.22 +# VS2005 and later restricts the use of certain libc functions without this
   11.23  CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE
   11.24  !endif
   11.25 -!endif
   11.26  
   11.27  # Compile for space above time.
   11.28  !if "$(Variant)" == "kernel"
    12.1 --- a/make/windows/makefiles/sa.make	Fri Mar 20 22:08:48 2009 -0400
    12.2 +++ b/make/windows/makefiles/sa.make	Mon Mar 23 10:42:20 2009 -0400
    12.3 @@ -89,9 +89,11 @@
    12.4  SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
    12.5  !elseif "$(BUILDARCH)" == "amd64"
    12.6  SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
    12.7 +!if "$(COMPILER_NAME)" == "VS2005"
    12.8  # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 
    12.9  # otherwise we get missing __security_check_cookie externals at link time. 
   12.10  SA_LINK_FLAGS = bufferoverflowU.lib
   12.11 +!endif
   12.12  !else
   12.13  SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /ZI /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
   12.14  !endif
    13.1 --- a/make/windows/makefiles/sanity.make	Fri Mar 20 22:08:48 2009 -0400
    13.2 +++ b/make/windows/makefiles/sanity.make	Mon Mar 23 10:42:20 2009 -0400
    13.3 @@ -27,9 +27,9 @@
    13.4  all: checkCL checkLink
    13.5  
    13.6  checkCL:
    13.7 -	@ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" \
    13.8 +	@ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" if "$(MSC_VER)" NEQ "1500" \
    13.9  	echo *** WARNING *** unrecognized cl.exe version $(MSC_VER) ($(RAW_MSC_VER)).  Use FORCE_MSC_VER to override automatic detection.
   13.10  
   13.11  checkLink:
   13.12 -	@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" \
   13.13 +	@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" if "$(LINK_VER)" NEQ "900" \
   13.14  	echo *** WARNING *** unrecognized link.exe version $(LINK_VER) ($(RAW_LINK_VER)).  Use FORCE_LINK_VER to override automatic detection.
    14.1 --- a/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Mar 20 22:08:48 2009 -0400
    14.2 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Mon Mar 23 10:42:20 2009 -0400
    14.3 @@ -2767,6 +2767,268 @@
    14.4  }
    14.5  
    14.6  
    14.7 +void MacroAssembler::check_klass_subtype(Register sub_klass,
    14.8 +                                         Register super_klass,
    14.9 +                                         Register temp_reg,
   14.10 +                                         Register temp2_reg,
   14.11 +                                         Label& L_success) {
   14.12 +  Label L_failure, L_pop_to_failure;
   14.13 +  check_klass_subtype_fast_path(sub_klass, super_klass,
   14.14 +                                temp_reg, temp2_reg,
   14.15 +                                &L_success, &L_failure, NULL);
   14.16 +  Register sub_2 = sub_klass;
   14.17 +  Register sup_2 = super_klass;
   14.18 +  if (!sub_2->is_global())  sub_2 = L0;
   14.19 +  if (!sup_2->is_global())  sup_2 = L1;
   14.20 +
   14.21 +  save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
   14.22 +  check_klass_subtype_slow_path(sub_2, sup_2,
   14.23 +                                L2, L3, L4, L5,
   14.24 +                                NULL, &L_pop_to_failure);
   14.25 +
   14.26 +  // on success:
   14.27 +  restore();
   14.28 +  ba(false, L_success);
   14.29 +  delayed()->nop();
   14.30 +
   14.31 +  // on failure:
   14.32 +  bind(L_pop_to_failure);
   14.33 +  restore();
   14.34 +  bind(L_failure);
   14.35 +}
   14.36 +
   14.37 +
   14.38 +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
   14.39 +                                                   Register super_klass,
   14.40 +                                                   Register temp_reg,
   14.41 +                                                   Register temp2_reg,
   14.42 +                                                   Label* L_success,
   14.43 +                                                   Label* L_failure,
   14.44 +                                                   Label* L_slow_path,
   14.45 +                                        RegisterConstant super_check_offset,
   14.46 +                                        Register instanceof_hack) {
   14.47 +  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
   14.48 +                   Klass::secondary_super_cache_offset_in_bytes());
   14.49 +  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
   14.50 +                    Klass::super_check_offset_offset_in_bytes());
   14.51 +
   14.52 +  bool must_load_sco  = (super_check_offset.constant_or_zero() == -1);
   14.53 +  bool need_slow_path = (must_load_sco ||
   14.54 +                         super_check_offset.constant_or_zero() == sco_offset);
   14.55 +
   14.56 +  assert_different_registers(sub_klass, super_klass, temp_reg);
   14.57 +  if (super_check_offset.is_register()) {
   14.58 +    assert_different_registers(sub_klass, super_klass,
   14.59 +                               super_check_offset.as_register());
   14.60 +  } else if (must_load_sco) {
   14.61 +    assert(temp2_reg != noreg, "supply either a temp or a register offset");
   14.62 +  }
   14.63 +
   14.64 +  Label L_fallthrough;
   14.65 +  int label_nulls = 0;
   14.66 +  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
   14.67 +  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
   14.68 +  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
   14.69 +  assert(label_nulls <= 1 || instanceof_hack != noreg ||
   14.70 +         (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
   14.71 +         "at most one NULL in the batch, usually");
   14.72 +
   14.73 +  // Support for the instanceof hack, which uses delay slots to
   14.74 +  // set a destination register to zero or one.
   14.75 +  bool do_bool_sets = (instanceof_hack != noreg);
   14.76 +#define BOOL_SET(bool_value)                            \
   14.77 +  if (do_bool_sets && bool_value >= 0)                  \
   14.78 +    set(bool_value, instanceof_hack)
   14.79 +#define DELAYED_BOOL_SET(bool_value)                    \
   14.80 +  if (do_bool_sets && bool_value >= 0)                  \
   14.81 +    delayed()->set(bool_value, instanceof_hack);        \
   14.82 +  else delayed()->nop()
   14.83 +  // Hacked ba(), which may only be used just before L_fallthrough.
   14.84 +#define FINAL_JUMP(label, bool_value)                   \
   14.85 +  if (&(label) == &L_fallthrough) {                     \
   14.86 +    BOOL_SET(bool_value);                               \
   14.87 +  } else {                                              \
   14.88 +    ba((do_bool_sets && bool_value >= 0), label);       \
   14.89 +    DELAYED_BOOL_SET(bool_value);                       \
   14.90 +  }
   14.91 +
   14.92 +  // If the pointers are equal, we are done (e.g., String[] elements).
   14.93 +  // This self-check enables sharing of secondary supertype arrays among
   14.94 +  // non-primary types such as array-of-interface.  Otherwise, each such
   14.95 +  // type would need its own customized SSA.
   14.96 +  // We move this check to the front of the fast path because many
   14.97 +  // type checks are in fact trivially successful in this manner,
   14.98 +  // so we get a nicely predicted branch right at the start of the check.
   14.99 +  cmp(super_klass, sub_klass);
  14.100 +  brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
  14.101 +  DELAYED_BOOL_SET(1);
  14.102 +
  14.103 +  // Check the supertype display:
  14.104 +  if (must_load_sco) {
  14.105 +    // The super check offset is always positive...
  14.106 +    lduw(super_klass, sco_offset, temp2_reg);
  14.107 +    super_check_offset = RegisterConstant(temp2_reg);
  14.108 +  }
  14.109 +  ld_ptr(sub_klass, super_check_offset, temp_reg);
  14.110 +  cmp(super_klass, temp_reg);
  14.111 +
  14.112 +  // This check has worked decisively for primary supers.
  14.113 +  // Secondary supers are sought in the super_cache ('super_cache_addr').
  14.114 +  // (Secondary supers are interfaces and very deeply nested subtypes.)
  14.115 +  // This works in the same check above because of a tricky aliasing
  14.116 +  // between the super_cache and the primary super display elements.
  14.117 +  // (The 'super_check_addr' can address either, as the case requires.)
  14.118 +  // Note that the cache is updated below if it does not help us find
  14.119 +  // what we need immediately.
  14.120 +  // So if it was a primary super, we can just fail immediately.
  14.121 +  // Otherwise, it's the slow path for us (no success at this point).
  14.122 +
  14.123 +  if (super_check_offset.is_register()) {
  14.124 +    brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
  14.125 +    delayed(); if (do_bool_sets)  BOOL_SET(1);
  14.126 +    // if !do_bool_sets, sneak the next cmp into the delay slot:
  14.127 +    cmp(super_check_offset.as_register(), sc_offset);
  14.128 +
  14.129 +    if (L_failure == &L_fallthrough) {
  14.130 +      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path);
  14.131 +      delayed()->nop();
  14.132 +      BOOL_SET(0);  // fallthrough on failure
  14.133 +    } else {
  14.134 +      brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
  14.135 +      DELAYED_BOOL_SET(0);
  14.136 +      FINAL_JUMP(*L_slow_path, -1);  // -1 => vanilla delay slot
  14.137 +    }
  14.138 +  } else if (super_check_offset.as_constant() == sc_offset) {
  14.139 +    // Need a slow path; fast failure is impossible.
  14.140 +    if (L_slow_path == &L_fallthrough) {
  14.141 +      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
  14.142 +      DELAYED_BOOL_SET(1);
  14.143 +    } else {
  14.144 +      brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
  14.145 +      delayed()->nop();
  14.146 +      FINAL_JUMP(*L_success, 1);
  14.147 +    }
  14.148 +  } else {
  14.149 +    // No slow path; it's a fast decision.
  14.150 +    if (L_failure == &L_fallthrough) {
  14.151 +      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
  14.152 +      DELAYED_BOOL_SET(1);
  14.153 +      BOOL_SET(0);
  14.154 +    } else {
  14.155 +      brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
  14.156 +      DELAYED_BOOL_SET(0);
  14.157 +      FINAL_JUMP(*L_success, 1);
  14.158 +    }
  14.159 +  }
  14.160 +
  14.161 +  bind(L_fallthrough);
  14.162 +
  14.163 +#undef final_jump
  14.164 +#undef bool_set
  14.165 +#undef DELAYED_BOOL_SET
  14.166 +#undef final_jump
  14.167 +}
  14.168 +
  14.169 +
  14.170 +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  14.171 +                                                   Register super_klass,
  14.172 +                                                   Register count_temp,
  14.173 +                                                   Register scan_temp,
  14.174 +                                                   Register scratch_reg,
  14.175 +                                                   Register coop_reg,
  14.176 +                                                   Label* L_success,
  14.177 +                                                   Label* L_failure) {
  14.178 +  assert_different_registers(sub_klass, super_klass,
  14.179 +                             count_temp, scan_temp, scratch_reg, coop_reg);
  14.180 +
  14.181 +  Label L_fallthrough, L_loop;
  14.182 +  int label_nulls = 0;
  14.183 +  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  14.184 +  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  14.185 +  assert(label_nulls <= 1, "at most one NULL in the batch");
  14.186 +
  14.187 +  // a couple of useful fields in sub_klass:
  14.188 +  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
  14.189 +                   Klass::secondary_supers_offset_in_bytes());
  14.190 +  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  14.191 +                   Klass::secondary_super_cache_offset_in_bytes());
  14.192 +
  14.193 +  // Do a linear scan of the secondary super-klass chain.
  14.194 +  // This code is rarely used, so simplicity is a virtue here.
  14.195 +
  14.196 +#ifndef PRODUCT
  14.197 +  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  14.198 +  inc_counter((address) pst_counter, count_temp, scan_temp);
  14.199 +#endif
  14.200 +
  14.201 +  // We will consult the secondary-super array.
  14.202 +  ld_ptr(sub_klass, ss_offset, scan_temp);
  14.203 +
  14.204 +  // Compress superclass if necessary.
  14.205 +  Register search_key = super_klass;
  14.206 +  bool decode_super_klass = false;
  14.207 +  if (UseCompressedOops) {
  14.208 +    if (coop_reg != noreg) {
  14.209 +      encode_heap_oop_not_null(super_klass, coop_reg);
  14.210 +      search_key = coop_reg;
  14.211 +    } else {
  14.212 +      encode_heap_oop_not_null(super_klass);
  14.213 +      decode_super_klass = true; // scarce temps!
  14.214 +    }
  14.215 +    // The superclass is never null; it would be a basic system error if a null
  14.216 +    // pointer were to sneak in here.  Note that we have already loaded the
  14.217 +    // Klass::super_check_offset from the super_klass in the fast path,
  14.218 +    // so if there is a null in that register, we are already in the afterlife.
  14.219 +  }
  14.220 +
  14.221 +  // Load the array length.  (Positive movl does right thing on LP64.)
  14.222 +  lduw(scan_temp, arrayOopDesc::length_offset_in_bytes(), count_temp);
  14.223 +
  14.224 +  // Check for empty secondary super list
  14.225 +  tst(count_temp);
  14.226 +
  14.227 +  // Top of search loop
  14.228 +  bind(L_loop);
  14.229 +  br(Assembler::equal, false, Assembler::pn, *L_failure);
  14.230 +  delayed()->add(scan_temp, heapOopSize, scan_temp);
  14.231 +  assert(heapOopSize != 0, "heapOopSize should be initialized");
  14.232 +
  14.233 +  // Skip the array header in all array accesses.
  14.234 +  int elem_offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
  14.235 +  elem_offset -= heapOopSize;   // the scan pointer was pre-incremented also
  14.236 +
  14.237 +  // Load next super to check
  14.238 +  if (UseCompressedOops) {
  14.239 +    // Don't use load_heap_oop; we don't want to decode the element.
  14.240 +    lduw(   scan_temp, elem_offset, scratch_reg );
  14.241 +  } else {
  14.242 +    ld_ptr( scan_temp, elem_offset, scratch_reg );
  14.243 +  }
  14.244 +
  14.245 +  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
  14.246 +  cmp(scratch_reg, search_key);
  14.247 +
  14.248 +  // A miss means we are NOT a subtype and need to keep looping
  14.249 +  brx(Assembler::notEqual, false, Assembler::pn, L_loop);
  14.250 +  delayed()->deccc(count_temp); // decrement trip counter in delay slot
  14.251 +
  14.252 +  // Falling out the bottom means we found a hit; we ARE a subtype
  14.253 +  if (decode_super_klass) decode_heap_oop(super_klass);
  14.254 +
  14.255 +  // Success.  Cache the super we found and proceed in triumph.
  14.256 +  st_ptr(super_klass, sub_klass, sc_offset);
  14.257 +
  14.258 +  if (L_success != &L_fallthrough) {
  14.259 +    ba(false, *L_success);
  14.260 +    delayed()->nop();
  14.261 +  }
  14.262 +
  14.263 +  bind(L_fallthrough);
  14.264 +}
  14.265 +
  14.266 +
  14.267 +
  14.268 +
  14.269  void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
  14.270                                            Register temp_reg,
  14.271                                            Label& done, Label* slow_case,
  14.272 @@ -4316,7 +4578,13 @@
  14.273  
  14.274  void MacroAssembler::encode_heap_oop(Register src, Register dst) {
  14.275    assert (UseCompressedOops, "must be compressed");
  14.276 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  14.277 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  14.278    verify_oop(src);
  14.279 +  if (Universe::narrow_oop_base() == NULL) {
  14.280 +    srlx(src, LogMinObjAlignmentInBytes, dst);
  14.281 +    return;
  14.282 +  }
  14.283    Label done;
  14.284    if (src == dst) {
  14.285      // optimize for frequent case src == dst
  14.286 @@ -4338,26 +4606,39 @@
  14.287  
  14.288  void MacroAssembler::encode_heap_oop_not_null(Register r) {
  14.289    assert (UseCompressedOops, "must be compressed");
  14.290 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  14.291 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  14.292    verify_oop(r);
  14.293 -  sub(r, G6_heapbase, r);
  14.294 +  if (Universe::narrow_oop_base() != NULL)
  14.295 +    sub(r, G6_heapbase, r);
  14.296    srlx(r, LogMinObjAlignmentInBytes, r);
  14.297  }
  14.298  
  14.299  void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) {
  14.300    assert (UseCompressedOops, "must be compressed");
  14.301 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  14.302 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  14.303    verify_oop(src);
  14.304 -  sub(src, G6_heapbase, dst);
  14.305 -  srlx(dst, LogMinObjAlignmentInBytes, dst);
  14.306 +  if (Universe::narrow_oop_base() == NULL) {
  14.307 +    srlx(src, LogMinObjAlignmentInBytes, dst);
  14.308 +  } else {
  14.309 +    sub(src, G6_heapbase, dst);
  14.310 +    srlx(dst, LogMinObjAlignmentInBytes, dst);
  14.311 +  }
  14.312  }
  14.313  
  14.314  // Same algorithm as oops.inline.hpp decode_heap_oop.
  14.315  void  MacroAssembler::decode_heap_oop(Register src, Register dst) {
  14.316    assert (UseCompressedOops, "must be compressed");
  14.317 -  Label done;
  14.318 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  14.319 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  14.320    sllx(src, LogMinObjAlignmentInBytes, dst);
  14.321 -  bpr(rc_nz, true, Assembler::pt, dst, done);
  14.322 -  delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
  14.323 -  bind(done);
  14.324 +  if (Universe::narrow_oop_base() != NULL) {
  14.325 +    Label done;
  14.326 +    bpr(rc_nz, true, Assembler::pt, dst, done);
  14.327 +    delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
  14.328 +    bind(done);
  14.329 +  }
  14.330    verify_oop(dst);
  14.331  }
  14.332  
  14.333 @@ -4366,8 +4647,11 @@
  14.334    // pd_code_size_limit.
  14.335    // Also do not verify_oop as this is called by verify_oop.
  14.336    assert (UseCompressedOops, "must be compressed");
  14.337 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  14.338 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  14.339    sllx(r, LogMinObjAlignmentInBytes, r);
  14.340 -  add(r, G6_heapbase, r);
  14.341 +  if (Universe::narrow_oop_base() != NULL)
  14.342 +    add(r, G6_heapbase, r);
  14.343  }
  14.344  
  14.345  void  MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) {
  14.346 @@ -4375,14 +4659,17 @@
  14.347    // pd_code_size_limit.
  14.348    // Also do not verify_oop as this is called by verify_oop.
  14.349    assert (UseCompressedOops, "must be compressed");
  14.350 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  14.351 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  14.352    sllx(src, LogMinObjAlignmentInBytes, dst);
  14.353 -  add(dst, G6_heapbase, dst);
  14.354 +  if (Universe::narrow_oop_base() != NULL)
  14.355 +    add(dst, G6_heapbase, dst);
  14.356  }
  14.357  
  14.358  void MacroAssembler::reinit_heapbase() {
  14.359    if (UseCompressedOops) {
  14.360      // call indirectly to solve generation ordering problem
  14.361 -    Address base(G6_heapbase, (address)Universe::heap_base_addr());
  14.362 +    Address base(G6_heapbase, (address)Universe::narrow_oop_base_addr());
  14.363      load_ptr_contents(base, G6_heapbase);
  14.364    }
  14.365  }
    15.1 --- a/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Mar 20 22:08:48 2009 -0400
    15.2 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Mon Mar 23 10:42:20 2009 -0400
    15.3 @@ -2327,6 +2327,46 @@
    15.4                                 Register temp_reg, Register temp2_reg,
    15.5                                 Label& no_such_interface);
    15.6  
    15.7 +  // Test sub_klass against super_klass, with fast and slow paths.
    15.8 +
    15.9 +  // The fast path produces a tri-state answer: yes / no / maybe-slow.
   15.10 +  // One of the three labels can be NULL, meaning take the fall-through.
   15.11 +  // If super_check_offset is -1, the value is loaded up from super_klass.
   15.12 +  // No registers are killed, except temp_reg and temp2_reg.
   15.13 +  // If super_check_offset is not -1, temp2_reg is not used and can be noreg.
   15.14 +  void check_klass_subtype_fast_path(Register sub_klass,
   15.15 +                                     Register super_klass,
   15.16 +                                     Register temp_reg,
   15.17 +                                     Register temp2_reg,
   15.18 +                                     Label* L_success,
   15.19 +                                     Label* L_failure,
   15.20 +                                     Label* L_slow_path,
   15.21 +                RegisterConstant super_check_offset = RegisterConstant(-1),
   15.22 +                Register instanceof_hack = noreg);
   15.23 +
   15.24 +  // The rest of the type check; must be wired to a corresponding fast path.
   15.25 +  // It does not repeat the fast path logic, so don't use it standalone.
   15.26 +  // The temp_reg can be noreg, if no temps are available.
   15.27 +  // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
   15.28 +  // Updates the sub's secondary super cache as necessary.
   15.29 +  void check_klass_subtype_slow_path(Register sub_klass,
   15.30 +                                     Register super_klass,
   15.31 +                                     Register temp_reg,
   15.32 +                                     Register temp2_reg,
   15.33 +                                     Register temp3_reg,
   15.34 +                                     Register temp4_reg,
   15.35 +                                     Label* L_success,
   15.36 +                                     Label* L_failure);
   15.37 +
   15.38 +  // Simplified, combined version, good for typical uses.
   15.39 +  // Falls through on failure.
   15.40 +  void check_klass_subtype(Register sub_klass,
   15.41 +                           Register super_klass,
   15.42 +                           Register temp_reg,
   15.43 +                           Register temp2_reg,
   15.44 +                           Label& L_success);
   15.45 +
   15.46 +
   15.47    // Stack overflow checking
   15.48  
   15.49    // Note: this clobbers G3_scratch
    16.1 --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Mar 20 22:08:48 2009 -0400
    16.2 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Mon Mar 23 10:42:20 2009 -0400
    16.3 @@ -2393,23 +2393,11 @@
    16.4  
    16.5      // get instance klass
    16.6      load(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc), k_RInfo, T_OBJECT, NULL);
    16.7 -    // get super_check_offset
    16.8 -    load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
    16.9 -    // See if we get an immediate positive hit
   16.10 -    __ ld_ptr(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr->as_register());
   16.11 -    __ cmp(k_RInfo, O7);
   16.12 -    __ br(Assembler::equal, false, Assembler::pn, done);
   16.13 -    __ delayed()->nop();
   16.14 -    // check for immediate negative hit
   16.15 -    __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   16.16 -    __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
   16.17 -    __ delayed()->nop();
   16.18 -    // check for self
   16.19 -    __ cmp(klass_RInfo, k_RInfo);
   16.20 -    __ br(Assembler::equal, false, Assembler::pn, done);
   16.21 -    __ delayed()->nop();
   16.22 -
   16.23 -    // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
   16.24 +    // perform the fast part of the checking logic
   16.25 +    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done, stub->entry(), NULL);
   16.26 +
   16.27 +    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
   16.28 +    assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
   16.29      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
   16.30      __ delayed()->nop();
   16.31      __ cmp(G3, 0);
   16.32 @@ -2493,58 +2481,30 @@
   16.33        __ delayed()->nop();
   16.34        __ bind(done);
   16.35      } else {
   16.36 +      bool need_slow_path = true;
   16.37        if (k->is_loaded()) {
   16.38 -        load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL);
   16.39 -
   16.40 -        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
   16.41 -          // See if we get an immediate positive hit
   16.42 -          __ cmp(Rtmp1, k_RInfo );
   16.43 -          __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
   16.44 -          __ delayed()->nop();
   16.45 -        } else {
   16.46 -          // See if we get an immediate positive hit
   16.47 -          assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
   16.48 -          __ cmp(Rtmp1, k_RInfo );
   16.49 -          __ br(Assembler::equal, false, Assembler::pn, done);
   16.50 -          // check for self
   16.51 -          __ delayed()->cmp(klass_RInfo, k_RInfo);
   16.52 -          __ br(Assembler::equal, false, Assembler::pn, done);
   16.53 -          __ delayed()->nop();
   16.54 -
   16.55 -          // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
   16.56 -          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
   16.57 -          __ delayed()->nop();
   16.58 -          __ cmp(G3, 0);
   16.59 -          __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
   16.60 -          __ delayed()->nop();
   16.61 -        }
   16.62 -        __ bind(done);
   16.63 +        if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
   16.64 +          need_slow_path = false;
   16.65 +        // perform the fast part of the checking logic
   16.66 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
   16.67 +                                         (need_slow_path ? &done : NULL),
   16.68 +                                         stub->entry(), NULL,
   16.69 +                                         RegisterConstant(k->super_check_offset()));
   16.70        } else {
   16.71 -        assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
   16.72 -
   16.73 -        load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
   16.74 -        // See if we get an immediate positive hit
   16.75 -        load(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr, T_OBJECT);
   16.76 -        __ cmp(k_RInfo, O7);
   16.77 -        __ br(Assembler::equal, false, Assembler::pn, done);
   16.78 -        __ delayed()->nop();
   16.79 -        // check for immediate negative hit
   16.80 -        __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   16.81 -        __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
   16.82 -        // check for self
   16.83 -        __ delayed()->cmp(klass_RInfo, k_RInfo);
   16.84 -        __ br(Assembler::equal, false, Assembler::pn, done);
   16.85 -        __ delayed()->nop();
   16.86 -
   16.87 -        // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
   16.88 +        // perform the fast part of the checking logic
   16.89 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7,
   16.90 +                                         &done, stub->entry(), NULL);
   16.91 +      }
   16.92 +      if (need_slow_path) {
   16.93 +        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
   16.94 +        assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
   16.95          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
   16.96          __ delayed()->nop();
   16.97          __ cmp(G3, 0);
   16.98          __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
   16.99          __ delayed()->nop();
  16.100 -        __ bind(done);
  16.101        }
  16.102 -
  16.103 +      __ bind(done);
  16.104      }
  16.105      __ mov(obj, dst);
  16.106    } else if (code == lir_instanceof) {
  16.107 @@ -2582,58 +2542,32 @@
  16.108        __ set(0, dst);
  16.109        __ bind(done);
  16.110      } else {
  16.111 +      bool need_slow_path = true;
  16.112        if (k->is_loaded()) {
  16.113 -        assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
  16.114 -        load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL);
  16.115 -
  16.116 -        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
  16.117 -          // See if we get an immediate positive hit
  16.118 -          __ cmp(Rtmp1, k_RInfo );
  16.119 -          __ br(Assembler::equal, true, Assembler::pt, done);
  16.120 -          __ delayed()->set(1, dst);
  16.121 -          __ set(0, dst);
  16.122 -          __ bind(done);
  16.123 -        } else {
  16.124 -          // See if we get an immediate positive hit
  16.125 -          assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
  16.126 -          __ cmp(Rtmp1, k_RInfo );
  16.127 -          __ br(Assembler::equal, true, Assembler::pt, done);
  16.128 -          __ delayed()->set(1, dst);
  16.129 -          // check for self
  16.130 -          __ cmp(klass_RInfo, k_RInfo);
  16.131 -          __ br(Assembler::equal, true, Assembler::pt, done);
  16.132 -          __ delayed()->set(1, dst);
  16.133 -
  16.134 -          // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
  16.135 -          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
  16.136 -          __ delayed()->nop();
  16.137 -          __ mov(G3, dst);
  16.138 -          __ bind(done);
  16.139 -        }
  16.140 +        if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
  16.141 +          need_slow_path = false;
  16.142 +        // perform the fast part of the checking logic
  16.143 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, noreg,
  16.144 +                                         (need_slow_path ? &done : NULL),
  16.145 +                                         (need_slow_path ? &done : NULL), NULL,
  16.146 +                                         RegisterConstant(k->super_check_offset()),
  16.147 +                                         dst);
  16.148        } else {
  16.149          assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
  16.150 -
  16.151 -        load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), dst, T_INT, NULL);
  16.152 -        // See if we get an immediate positive hit
  16.153 -        load(klass_RInfo, dst, FrameMap::O7_oop_opr, T_OBJECT);
  16.154 -        __ cmp(k_RInfo, O7);
  16.155 -        __ br(Assembler::equal, true, Assembler::pt, done);
  16.156 -        __ delayed()->set(1, dst);
  16.157 -        // check for immediate negative hit
  16.158 -        __ cmp(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
  16.159 -        __ br(Assembler::notEqual, true, Assembler::pt, done);
  16.160 -        __ delayed()->set(0, dst);
  16.161 -        // check for self
  16.162 -        __ cmp(klass_RInfo, k_RInfo);
  16.163 -        __ br(Assembler::equal, true, Assembler::pt, done);
  16.164 -        __ delayed()->set(1, dst);
  16.165 -
  16.166 -        // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
  16.167 +        // perform the fast part of the checking logic
  16.168 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, dst,
  16.169 +                                         &done, &done, NULL,
  16.170 +                                         RegisterConstant(-1),
  16.171 +                                         dst);
  16.172 +      }
  16.173 +      if (need_slow_path) {
  16.174 +        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
  16.175 +        assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
  16.176          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
  16.177          __ delayed()->nop();
  16.178          __ mov(G3, dst);
  16.179 -        __ bind(done);
  16.180        }
  16.181 +      __ bind(done);
  16.182      }
  16.183    } else {
  16.184      ShouldNotReachHere();
    17.1 --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Fri Mar 20 22:08:48 2009 -0400
    17.2 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Mon Mar 23 10:42:20 2009 -0400
    17.3 @@ -714,38 +714,19 @@
    17.4          //      sub  : G3, argument, destroyed
    17.5          //      super: G1, argument, not changed
    17.6          //      raddr: O7, blown by call
    17.7 -        Label loop, miss;
    17.8 +        Label miss;
    17.9  
   17.10          __ save_frame(0);               // Blow no registers!
   17.11  
   17.12 -        __ ld_ptr( G3, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
   17.13 -        __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0); // length in l0
   17.14 -        __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1); // ptr into array
   17.15 -        __ clr(L4);                     // Index
   17.16 -        // Load a little early; will load 1 off the end of the array.
   17.17 -        // Ok for now; revisit if we have other uses of this routine.
   17.18 -        __ ld_ptr(L1,0,L2);             // Will load a little early
   17.19 -
   17.20 -        // The scan loop
   17.21 -        __ bind(loop);
   17.22 -        __ add(L1,wordSize,L1); // Bump by OOP size
   17.23 -        __ cmp(L4,L0);
   17.24 -        __ br(Assembler::equal,false,Assembler::pn,miss);
   17.25 -        __ delayed()->inc(L4);  // Bump index
   17.26 -        __ subcc(L2,G1,L3);             // Check for match; zero in L3 for a hit
   17.27 -        __ brx( Assembler::notEqual, false, Assembler::pt, loop );
   17.28 -        __ delayed()->ld_ptr(L1,0,L2); // Will load a little early
   17.29 -
   17.30 -        // Got a hit; report success; set cache
   17.31 -        __ st_ptr( G1, G3, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
   17.32 +        __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss);
   17.33  
   17.34          __ mov(1, G3);
   17.35 -        __ ret();                       // Result in G5 is ok; flags set
   17.36 +        __ ret();                       // Result in G5 is 'true'
   17.37          __ delayed()->restore();        // free copy or add can go here
   17.38  
   17.39          __ bind(miss);
   17.40          __ mov(0, G3);
   17.41 -        __ ret();                       // Result in G5 is ok; flags set
   17.42 +        __ ret();                       // Result in G5 is 'false'
   17.43          __ delayed()->restore();        // free copy or add can go here
   17.44        }
   17.45  
    18.1 --- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Fri Mar 20 22:08:48 2009 -0400
    18.2 +++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Mon Mar 23 10:42:20 2009 -0400
    18.3 @@ -866,65 +866,18 @@
    18.4                                                    Register Rtmp2,
    18.5                                                    Register Rtmp3,
    18.6                                                    Label &ok_is_subtype ) {
    18.7 -  Label not_subtype, loop;
    18.8 +  Label not_subtype;
    18.9  
   18.10    // Profile the not-null value's klass.
   18.11    profile_typecheck(Rsub_klass, Rtmp1);
   18.12  
   18.13 -  // Load the super-klass's check offset into Rtmp1
   18.14 -  ld( Rsuper_klass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1 );
   18.15 -  // Load from the sub-klass's super-class display list, or a 1-word cache of
   18.16 -  // the secondary superclass list, or a failing value with a sentinel offset
   18.17 -  // if the super-klass is an interface or exceptionally deep in the Java
   18.18 -  // hierarchy and we have to scan the secondary superclass list the hard way.
   18.19 -  ld_ptr( Rsub_klass, Rtmp1, Rtmp2 );
   18.20 -  // See if we get an immediate positive hit
   18.21 -  cmp( Rtmp2, Rsuper_klass );
   18.22 -  brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
   18.23 -  // In the delay slot, check for immediate negative hit
   18.24 -  delayed()->cmp( Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
   18.25 -  br( Assembler::notEqual, false, Assembler::pt, not_subtype );
   18.26 -  // In the delay slot, check for self
   18.27 -  delayed()->cmp( Rsub_klass, Rsuper_klass );
   18.28 -  brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
   18.29 -
   18.30 -  // Now do a linear scan of the secondary super-klass chain.
   18.31 -  delayed()->ld_ptr( Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), Rtmp2 );
   18.32 -
   18.33 -  // compress superclass
   18.34 -  if (UseCompressedOops) encode_heap_oop(Rsuper_klass);
   18.35 -
   18.36 -  // Rtmp2 holds the objArrayOop of secondary supers.
   18.37 -  ld( Rtmp2, arrayOopDesc::length_offset_in_bytes(), Rtmp1 );// Load the array length
   18.38 -  // Check for empty secondary super list
   18.39 -  tst(Rtmp1);
   18.40 -
   18.41 -  // Top of search loop
   18.42 -  bind( loop );
   18.43 -  br( Assembler::equal, false, Assembler::pn, not_subtype );
   18.44 -  delayed()->nop();
   18.45 -
   18.46 -  // load next super to check
   18.47 -  if (UseCompressedOops) {
   18.48 -    lduw( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
   18.49 -    // Bump array pointer forward one oop
   18.50 -    add( Rtmp2, 4, Rtmp2 );
   18.51 -  } else {
   18.52 -    ld_ptr( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
   18.53 -    // Bump array pointer forward one oop
   18.54 -    add( Rtmp2, wordSize, Rtmp2);
   18.55 -  }
   18.56 -  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
   18.57 -  cmp( Rtmp3, Rsuper_klass );
   18.58 -  // A miss means we are NOT a subtype and need to keep looping
   18.59 -  brx( Assembler::notEqual, false, Assembler::pt, loop );
   18.60 -  delayed()->deccc( Rtmp1 );    // dec trip counter in delay slot
   18.61 -  // Falling out the bottom means we found a hit; we ARE a subtype
   18.62 -  if (UseCompressedOops) decode_heap_oop(Rsuper_klass);
   18.63 -  br( Assembler::always, false, Assembler::pt, ok_is_subtype );
   18.64 -  // Update the cache
   18.65 -  delayed()->st_ptr( Rsuper_klass, Rsub_klass,
   18.66 -                     sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
   18.67 +  check_klass_subtype_fast_path(Rsub_klass, Rsuper_klass,
   18.68 +                                Rtmp1, Rtmp2,
   18.69 +                                &ok_is_subtype, &not_subtype, NULL);
   18.70 +
   18.71 +  check_klass_subtype_slow_path(Rsub_klass, Rsuper_klass,
   18.72 +                                Rtmp1, Rtmp2, Rtmp3, /*hack:*/ noreg,
   18.73 +                                &ok_is_subtype, NULL);
   18.74  
   18.75    bind(not_subtype);
   18.76    profile_typecheck_failed(Rtmp1);
    19.1 --- a/src/cpu/sparc/vm/sparc.ad	Fri Mar 20 22:08:48 2009 -0400
    19.2 +++ b/src/cpu/sparc/vm/sparc.ad	Mon Mar 23 10:42:20 2009 -0400
    19.3 @@ -547,7 +547,11 @@
    19.4      int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
    19.5      int klass_load_size;
    19.6      if (UseCompressedOops) {
    19.7 -      klass_load_size = 3*BytesPerInstWord; // see MacroAssembler::load_klass()
    19.8 +      assert(Universe::heap() != NULL, "java heap should be initialized");
    19.9 +      if (Universe::narrow_oop_base() == NULL)
   19.10 +        klass_load_size = 2*BytesPerInstWord; // see MacroAssembler::load_klass()
   19.11 +      else
   19.12 +        klass_load_size = 3*BytesPerInstWord;
   19.13      } else {
   19.14        klass_load_size = 1*BytesPerInstWord;
   19.15      }
   19.16 @@ -1601,9 +1605,11 @@
   19.17    st->print_cr("\nUEP:");
   19.18  #ifdef    _LP64
   19.19    if (UseCompressedOops) {
   19.20 +    assert(Universe::heap() != NULL, "java heap should be initialized");
   19.21      st->print_cr("\tLDUW   [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass");
   19.22      st->print_cr("\tSLL    R_G5,3,R_G5");
   19.23 -    st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
   19.24 +    if (Universe::narrow_oop_base() != NULL)
   19.25 +      st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
   19.26    } else {
   19.27      st->print_cr("\tLDX    [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
   19.28    }
   19.29 @@ -2502,7 +2508,11 @@
   19.30        __ load_klass(O0, G3_scratch);
   19.31        int klass_load_size;
   19.32        if (UseCompressedOops) {
   19.33 -        klass_load_size = 3*BytesPerInstWord;
   19.34 +        assert(Universe::heap() != NULL, "java heap should be initialized");
   19.35 +        if (Universe::narrow_oop_base() == NULL)
   19.36 +          klass_load_size = 2*BytesPerInstWord;
   19.37 +        else
   19.38 +          klass_load_size = 3*BytesPerInstWord;
   19.39        } else {
   19.40          klass_load_size = 1*BytesPerInstWord;
   19.41        }
   19.42 @@ -9005,6 +9015,33 @@
   19.43    ins_pipe(long_memory_op);
   19.44  %}
   19.45  
   19.46 +
   19.47 +//---------- Population Count Instructions -------------------------------------
   19.48 +
   19.49 +instruct popCountI(iRegI dst, iRegI src) %{
   19.50 +  predicate(UsePopCountInstruction);
   19.51 +  match(Set dst (PopCountI src));
   19.52 +
   19.53 +  format %{ "POPC   $src, $dst" %}
   19.54 +  ins_encode %{
   19.55 +    __ popc($src$$Register, $dst$$Register);
   19.56 +  %}
   19.57 +  ins_pipe(ialu_reg);
   19.58 +%}
   19.59 +
   19.60 +// Note: Long.bitCount(long) returns an int.
   19.61 +instruct popCountL(iRegI dst, iRegL src) %{
   19.62 +  predicate(UsePopCountInstruction);
   19.63 +  match(Set dst (PopCountL src));
   19.64 +
   19.65 +  format %{ "POPC   $src, $dst" %}
   19.66 +  ins_encode %{
   19.67 +    __ popc($src$$Register, $dst$$Register);
   19.68 +  %}
   19.69 +  ins_pipe(ialu_reg);
   19.70 +%}
   19.71 +
   19.72 +
   19.73  // ============================================================================
   19.74  //------------Bytes reverse--------------------------------------------------
   19.75  
    20.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Mar 20 22:08:48 2009 -0400
    20.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Mon Mar 23 10:42:20 2009 -0400
    20.3 @@ -900,19 +900,7 @@
    20.4      __ align(CodeEntryAlignment);
    20.5      StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
    20.6      address start = __ pc();
    20.7 -    Label loop, miss;
    20.8 -
    20.9 -    // Compare super with sub directly, since super is not in its own SSA.
   20.10 -    // The compiler used to emit this test, but we fold it in here,
   20.11 -    // to increase overall code density, with no real loss of speed.
   20.12 -    { Label L;
   20.13 -      __ cmp(O1, O2);
   20.14 -      __ brx(Assembler::notEqual, false, Assembler::pt, L);
   20.15 -      __ delayed()->nop();
   20.16 -      __ retl();
   20.17 -      __ delayed()->addcc(G0,0,O0); // set Z flags, zero result
   20.18 -      __ bind(L);
   20.19 -    }
   20.20 +    Label miss;
   20.21  
   20.22  #if defined(COMPILER2) && !defined(_LP64)
   20.23      // Do not use a 'save' because it blows the 64-bit O registers.
   20.24 @@ -936,56 +924,12 @@
   20.25      Register L2_super   = L2;
   20.26      Register L3_index   = L3;
   20.27  
   20.28 -#ifdef _LP64
   20.29 -    Register L4_ooptmp  = L4;
   20.30 -
   20.31 -    if (UseCompressedOops) {
   20.32 -      // this must be under UseCompressedOops check, as we rely upon fact
   20.33 -      // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save
   20.34 -      // on stack, see several lines above
   20.35 -      __ encode_heap_oop(Rsuper, L4_ooptmp);
   20.36 -    }
   20.37 -#endif
   20.38 -
   20.39 -    inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1);
   20.40 -
   20.41 -    __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
   20.42 -    __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len);
   20.43 -    __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr);
   20.44 -    __ clr(L3_index);           // zero index
   20.45 -    // Load a little early; will load 1 off the end of the array.
   20.46 -    // Ok for now; revisit if we have other uses of this routine.
   20.47 -    if (UseCompressedOops) {
   20.48 -      __ lduw(L1_ary_ptr,0,L2_super);// Will load a little early
   20.49 -    } else {
   20.50 -      __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
   20.51 -    }
   20.52 -
   20.53 -    assert(heapOopSize != 0, "heapOopSize should be initialized");
   20.54 -    // The scan loop
   20.55 -    __ BIND(loop);
   20.56 -    __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size
   20.57 -    __ cmp(L3_index,L0_ary_len);
   20.58 -    __ br(Assembler::equal,false,Assembler::pn,miss);
   20.59 -    __ delayed()->inc(L3_index); // Bump index
   20.60 -
   20.61 -    if (UseCompressedOops) {
   20.62 -#ifdef  _LP64
   20.63 -      __ subcc(L2_super,L4_ooptmp,Rret);   // Check for match; zero in Rret for a hit
   20.64 -      __ br( Assembler::notEqual, false, Assembler::pt, loop );
   20.65 -      __ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early
   20.66 -#else
   20.67 -      ShouldNotReachHere();
   20.68 -#endif
   20.69 -    } else {
   20.70 -      __ subcc(L2_super,Rsuper,Rret);   // Check for match; zero in Rret for a hit
   20.71 -      __ brx( Assembler::notEqual, false, Assembler::pt, loop );
   20.72 -      __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
   20.73 -    }
   20.74 -
   20.75 -    // Got a hit; report success; set cache.  Cache load doesn't
   20.76 -    // happen here; for speed it is directly emitted by the compiler.
   20.77 -    __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
   20.78 +    __ check_klass_subtype_slow_path(Rsub, Rsuper,
   20.79 +                                     L0, L1, L2, L3,
   20.80 +                                     NULL, &miss);
   20.81 +
   20.82 +    // Match falls through here.
   20.83 +    __ addcc(G0,0,Rret);        // set Z flags, Z result
   20.84  
   20.85  #if defined(COMPILER2) && !defined(_LP64)
   20.86      __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
   20.87 @@ -999,7 +943,6 @@
   20.88      __ delayed()->restore();
   20.89  #endif
   20.90  
   20.91 -    // Hit or miss falls through here
   20.92      __ BIND(miss);
   20.93      __ addcc(G0,1,Rret);        // set NZ flags, NZ result
   20.94  
   20.95 @@ -2330,51 +2273,31 @@
   20.96                             Register super_check_offset,
   20.97                             Register super_klass,
   20.98                             Register temp,
   20.99 -                           Label& L_success,
  20.100 -                           Register deccc_hack = noreg) {
  20.101 +                           Label& L_success) {
  20.102      assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
  20.103  
  20.104      BLOCK_COMMENT("type_check:");
  20.105  
  20.106 -    Label L_miss;
  20.107 +    Label L_miss, L_pop_to_miss;
  20.108  
  20.109      assert_clean_int(super_check_offset, temp);
  20.110  
  20.111 -    // maybe decrement caller's trip count:
  20.112 -#define DELAY_SLOT delayed();   \
  20.113 -    { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); }
  20.114 -
  20.115 -    // if the pointers are equal, we are done (e.g., String[] elements)
  20.116 -    __ cmp(sub_klass, super_klass);
  20.117 -    __ brx(Assembler::equal, true, Assembler::pt, L_success);
  20.118 -    __ DELAY_SLOT;
  20.119 -
  20.120 -    // check the supertype display:
  20.121 -    __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type
  20.122 -    __ cmp(super_klass,                      temp); // test the super type
  20.123 -    __ brx(Assembler::equal, true, Assembler::pt, L_success);
  20.124 -    __ DELAY_SLOT;
  20.125 -
  20.126 -    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  20.127 -                     Klass::secondary_super_cache_offset_in_bytes());
  20.128 -    __ cmp(super_klass, sc_offset);
  20.129 -    __ brx(Assembler::notEqual, true, Assembler::pt, L_miss);
  20.130 -    __ delayed()->nop();
  20.131 -
  20.132 +    __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
  20.133 +                                     &L_success, &L_miss, NULL,
  20.134 +                                     super_check_offset);
  20.135 +
  20.136 +    BLOCK_COMMENT("type_check_slow_path:");
  20.137      __ save_frame(0);
  20.138 -    __ mov(sub_klass->after_save(), O1);
  20.139 -    // mov(super_klass->after_save(), O2); //fill delay slot
  20.140 -    assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation");
  20.141 -    __ call(StubRoutines::Sparc::_partial_subtype_check);
  20.142 -    __ delayed()->mov(super_klass->after_save(), O2);
  20.143 +    __ check_klass_subtype_slow_path(sub_klass->after_save(),
  20.144 +                                     super_klass->after_save(),
  20.145 +                                     L0, L1, L2, L4,
  20.146 +                                     NULL, &L_pop_to_miss);
  20.147 +    __ ba(false, L_success);
  20.148 +    __ delayed()->restore();
  20.149 +
  20.150 +    __ bind(L_pop_to_miss);
  20.151      __ restore();
  20.152  
  20.153 -    // Upon return, the condition codes are already set.
  20.154 -    __ brx(Assembler::equal, true, Assembler::pt, L_success);
  20.155 -    __ DELAY_SLOT;
  20.156 -
  20.157 -#undef DELAY_SLOT
  20.158 -
  20.159      // Fall through on failure!
  20.160      __ BIND(L_miss);
  20.161    }
  20.162 @@ -2411,7 +2334,7 @@
  20.163      gen_write_ref_array_pre_barrier(O1, O2);
  20.164  
  20.165  #ifdef ASSERT
  20.166 -    // We sometimes save a frame (see partial_subtype_check below).
  20.167 +    // We sometimes save a frame (see generate_type_check below).
  20.168      // If this will cause trouble, let's fail now instead of later.
  20.169      __ save_frame(0);
  20.170      __ restore();
  20.171 @@ -2455,41 +2378,39 @@
  20.172      //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
  20.173      __ align(16);
  20.174  
  20.175 -    __ bind(store_element);
  20.176 -    // deccc(G1_remain);                // decrement the count (hoisted)
  20.177 +    __ BIND(store_element);
  20.178 +    __ deccc(G1_remain);                // decrement the count
  20.179      __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
  20.180      __ inc(O5_offset, heapOopSize);     // step to next offset
  20.181      __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
  20.182      __ delayed()->set(0, O0);           // return -1 on success
  20.183  
  20.184      // ======== loop entry is here ========
  20.185 -    __ bind(load_element);
  20.186 +    __ BIND(load_element);
  20.187      __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
  20.188      __ br_null(G3_oop, true, Assembler::pt, store_element);
  20.189 -    __ delayed()->deccc(G1_remain);     // decrement the count
  20.190 +    __ delayed()->nop();
  20.191  
  20.192      __ load_klass(G3_oop, G4_klass); // query the object klass
  20.193  
  20.194      generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
  20.195                          // branch to this on success:
  20.196 -                        store_element,
  20.197 -                        // decrement this on success:
  20.198 -                        G1_remain);
  20.199 +                        store_element);
  20.200      // ======== end loop ========
  20.201  
  20.202      // It was a real error; we must depend on the caller to finish the job.
  20.203      // Register G1 has number of *remaining* oops, O2 number of *total* oops.
  20.204      // Emit GC store barriers for the oops we have copied (O2 minus G1),
  20.205      // and report their number to the caller.
  20.206 -    __ bind(fail);
  20.207 +    __ BIND(fail);
  20.208      __ subcc(O2_count, G1_remain, O2_count);
  20.209      __ brx(Assembler::zero, false, Assembler::pt, done);
  20.210      __ delayed()->not1(O2_count, O0);   // report (-1^K) to caller
  20.211  
  20.212 -    __ bind(do_card_marks);
  20.213 +    __ BIND(do_card_marks);
  20.214      gen_write_ref_array_post_barrier(O1_to, O2_count, O3);   // store check on O1[0..O2]
  20.215  
  20.216 -    __ bind(done);
  20.217 +    __ BIND(done);
  20.218      inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
  20.219      __ retl();
  20.220      __ delayed()->nop();             // return value in 00
  20.221 @@ -2942,14 +2863,15 @@
  20.222      StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
  20.223      StubRoutines::_fence_entry               = generate_fence();
  20.224  #endif  // COMPILER2 !=> _LP64
  20.225 -
  20.226 -    StubRoutines::Sparc::_partial_subtype_check                = generate_partial_subtype_check();
  20.227    }
  20.228  
  20.229  
  20.230    void generate_all() {
  20.231      // Generates all stubs and initializes the entry points
  20.232  
  20.233 +    // Generate partial_subtype_check first here since its code depends on
  20.234 +    // UseZeroBaseCompressedOops which is defined after heap initialization.
  20.235 +    StubRoutines::Sparc::_partial_subtype_check                = generate_partial_subtype_check();
  20.236      // These entry points require SharedInfo::stack0 to be set up in non-core builds
  20.237      StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
  20.238      StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
    21.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Mar 20 22:08:48 2009 -0400
    21.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Mon Mar 23 10:42:20 2009 -0400
    21.3 @@ -1,5 +1,5 @@
    21.4  /*
    21.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    21.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    21.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    21.8   *
    21.9   * This code is free software; you can redistribute it and/or modify it
   21.10 @@ -72,6 +72,9 @@
   21.11          FLAG_SET_ERGO(bool, UseCompressedOops, false);
   21.12        }
   21.13      }
   21.14 +    // 32-bit oops don't make sense for the 64-bit VM on sparc
   21.15 +    // since the 32-bit VM has the same registers and smaller objects.
   21.16 +    Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
   21.17  #endif // _LP64
   21.18  #ifdef COMPILER2
   21.19      // Indirect branch is the same cost as direct
   21.20 @@ -89,16 +92,26 @@
   21.21  #endif
   21.22    }
   21.23  
   21.24 +  // Use hardware population count instruction if available.
   21.25 +  if (has_hardware_popc()) {
   21.26 +    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
   21.27 +      UsePopCountInstruction = true;
   21.28 +    }
   21.29 +  }
   21.30 +
   21.31    char buf[512];
   21.32 -  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s",
   21.33 +  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
   21.34                 (has_v8() ? ", has_v8" : ""),
   21.35                 (has_v9() ? ", has_v9" : ""),
   21.36 +               (has_hardware_popc() ? ", popc" : ""),
   21.37                 (has_vis1() ? ", has_vis1" : ""),
   21.38                 (has_vis2() ? ", has_vis2" : ""),
   21.39                 (is_ultra3() ? ", is_ultra3" : ""),
   21.40                 (is_sun4v() ? ", is_sun4v" : ""),
   21.41                 (is_niagara1() ? ", is_niagara1" : ""),
   21.42 -               (!has_hardware_int_muldiv() ? ", no-muldiv" : ""),
   21.43 +               (is_niagara1_plus() ? ", is_niagara1_plus" : ""),
   21.44 +               (!has_hardware_mul32() ? ", no-mul32" : ""),
   21.45 +               (!has_hardware_div32() ? ", no-div32" : ""),
   21.46                 (!has_hardware_fsmuld() ? ", no-fsmuld" : ""));
   21.47  
   21.48    // buf is started with ", " or is empty
    22.1 --- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Mar 20 22:08:48 2009 -0400
    22.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Mon Mar 23 10:42:20 2009 -0400
    22.3 @@ -1,5 +1,5 @@
    22.4  /*
    22.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    22.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    22.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    22.8   *
    22.9   * This code is free software; you can redistribute it and/or modify it
   22.10 @@ -25,34 +25,38 @@
   22.11  class VM_Version: public Abstract_VM_Version {
   22.12  protected:
   22.13    enum Feature_Flag {
   22.14 -    v8_instructions     = 0,
   22.15 -    hardware_int_muldiv = 1,
   22.16 -    hardware_fsmuld     = 2,
   22.17 -    v9_instructions     = 3,
   22.18 -    vis1_instructions   = 4,
   22.19 -    vis2_instructions   = 5,
   22.20 -    sun4v_instructions  = 6
   22.21 +    v8_instructions    = 0,
   22.22 +    hardware_mul32     = 1,
   22.23 +    hardware_div32     = 2,
   22.24 +    hardware_fsmuld    = 3,
   22.25 +    hardware_popc      = 4,
   22.26 +    v9_instructions    = 5,
   22.27 +    vis1_instructions  = 6,
   22.28 +    vis2_instructions  = 7,
   22.29 +    sun4v_instructions = 8
   22.30    };
   22.31  
   22.32    enum Feature_Flag_Set {
   22.33 -    unknown_m             = 0,
   22.34 -    all_features_m        = -1,
   22.35 +    unknown_m           = 0,
   22.36 +    all_features_m      = -1,
   22.37  
   22.38 -    v8_instructions_m     = 1 << v8_instructions,
   22.39 -    hardware_int_muldiv_m = 1 << hardware_int_muldiv,
   22.40 -    hardware_fsmuld_m     = 1 << hardware_fsmuld,
   22.41 -    v9_instructions_m     = 1 << v9_instructions,
   22.42 -    vis1_instructions_m   = 1 << vis1_instructions,
   22.43 -    vis2_instructions_m   = 1 << vis2_instructions,
   22.44 -    sun4v_m               = 1 << sun4v_instructions,
   22.45 +    v8_instructions_m   = 1 << v8_instructions,
   22.46 +    hardware_mul32_m    = 1 << hardware_mul32,
   22.47 +    hardware_div32_m    = 1 << hardware_div32,
   22.48 +    hardware_fsmuld_m   = 1 << hardware_fsmuld,
   22.49 +    hardware_popc_m     = 1 << hardware_popc,
   22.50 +    v9_instructions_m   = 1 << v9_instructions,
   22.51 +    vis1_instructions_m = 1 << vis1_instructions,
   22.52 +    vis2_instructions_m = 1 << vis2_instructions,
   22.53 +    sun4v_m             = 1 << sun4v_instructions,
   22.54  
   22.55 -    generic_v8_m          = v8_instructions_m | hardware_int_muldiv_m | hardware_fsmuld_m,
   22.56 -    generic_v9_m          = generic_v8_m | v9_instructions_m | vis1_instructions_m,
   22.57 -    ultra3_m              = generic_v9_m | vis2_instructions_m,
   22.58 +    generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
   22.59 +    generic_v9_m        = generic_v8_m | v9_instructions_m,
   22.60 +    ultra3_m            = generic_v9_m | vis1_instructions_m | vis2_instructions_m,
   22.61  
   22.62      // Temporary until we have something more accurate
   22.63 -    niagara1_unique_m     = sun4v_m,
   22.64 -    niagara1_m            = generic_v9_m | niagara1_unique_m
   22.65 +    niagara1_unique_m   = sun4v_m,
   22.66 +    niagara1_m          = generic_v9_m | niagara1_unique_m
   22.67    };
   22.68  
   22.69    static int  _features;
   22.70 @@ -62,7 +66,7 @@
   22.71    static int  determine_features();
   22.72    static int  platform_features(int features);
   22.73  
   22.74 -  static bool is_niagara1(int features) { return (features & niagara1_m) == niagara1_m; }
   22.75 +  static bool is_niagara1(int features) { return (features & sun4v_m) != 0; }
   22.76  
   22.77    static int maximum_niagara1_processor_count() { return 32; }
   22.78    // Returns true if the platform is in the niagara line and
   22.79 @@ -76,8 +80,10 @@
   22.80    // Instruction support
   22.81    static bool has_v8()                  { return (_features & v8_instructions_m) != 0; }
   22.82    static bool has_v9()                  { return (_features & v9_instructions_m) != 0; }
   22.83 -  static bool has_hardware_int_muldiv() { return (_features & hardware_int_muldiv_m) != 0; }
   22.84 +  static bool has_hardware_mul32()      { return (_features & hardware_mul32_m) != 0; }
   22.85 +  static bool has_hardware_div32()      { return (_features & hardware_div32_m) != 0; }
   22.86    static bool has_hardware_fsmuld()     { return (_features & hardware_fsmuld_m) != 0; }
   22.87 +  static bool has_hardware_popc()       { return (_features & hardware_popc_m) != 0; }
   22.88    static bool has_vis1()                { return (_features & vis1_instructions_m) != 0; }
   22.89    static bool has_vis2()                { return (_features & vis2_instructions_m) != 0; }
   22.90  
    23.1 --- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Fri Mar 20 22:08:48 2009 -0400
    23.2 +++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Mon Mar 23 10:42:20 2009 -0400
    23.3 @@ -221,13 +221,15 @@
    23.4      if (is_vtable_stub) {
    23.5        // ld;ld;ld,jmp,nop
    23.6        const int basic = 5*BytesPerInstWord +
    23.7 -                        // shift;add for load_klass
    23.8 -                        (UseCompressedOops ? 2*BytesPerInstWord : 0);
    23.9 +                        // shift;add for load_klass (only shift with zero heap based)
   23.10 +                        (UseCompressedOops ?
   23.11 +                         ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
   23.12        return basic + slop;
   23.13      } else {
   23.14        const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord +
   23.15 -                        // shift;add for load_klass
   23.16 -                        (UseCompressedOops ? 2*BytesPerInstWord : 0);
   23.17 +                        // shift;add for load_klass (only shift with zero heap based)
   23.18 +                        (UseCompressedOops ?
   23.19 +                         ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
   23.20        return (basic + slop);
   23.21      }
   23.22    }
    24.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Fri Mar 20 22:08:48 2009 -0400
    24.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Mon Mar 23 10:42:20 2009 -0400
    24.3 @@ -727,7 +727,7 @@
    24.4    }
    24.5  
    24.6  #ifdef _LP64
    24.7 -  assert(false, "fix locate_operand");
    24.8 +  assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
    24.9  #else
   24.10    assert(which == imm_operand, "instruction has only an imm field");
   24.11  #endif // LP64
   24.12 @@ -2193,6 +2193,25 @@
   24.13    emit_byte(0x58 | encode);
   24.14  }
   24.15  
   24.16 +void Assembler::popcntl(Register dst, Address src) {
   24.17 +  assert(VM_Version::supports_popcnt(), "must support");
   24.18 +  InstructionMark im(this);
   24.19 +  emit_byte(0xF3);
   24.20 +  prefix(src, dst);
   24.21 +  emit_byte(0x0F);
   24.22 +  emit_byte(0xB8);
   24.23 +  emit_operand(dst, src);
   24.24 +}
   24.25 +
   24.26 +void Assembler::popcntl(Register dst, Register src) {
   24.27 +  assert(VM_Version::supports_popcnt(), "must support");
   24.28 +  emit_byte(0xF3);
   24.29 +  int encode = prefix_and_encode(dst->encoding(), src->encoding());
   24.30 +  emit_byte(0x0F);
   24.31 +  emit_byte(0xB8);
   24.32 +  emit_byte(0xC0 | encode);
   24.33 +}
   24.34 +
   24.35  void Assembler::popf() {
   24.36    emit_byte(0x9D);
   24.37  }
   24.38 @@ -3224,12 +3243,6 @@
   24.39    emit_byte(0xF1);
   24.40  }
   24.41  
   24.42 -void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format) {
   24.43 -  InstructionMark im(this);
   24.44 -  int encode = prefix_and_encode(dst->encoding());
   24.45 -  emit_byte(0xB8 | encode);
   24.46 -  emit_data((int)imm32, rspec, format);
   24.47 -}
   24.48  
   24.49  #ifndef _LP64
   24.50  
   24.51 @@ -3249,6 +3262,12 @@
   24.52    emit_data((int)imm32, rspec, 0);
   24.53  }
   24.54  
   24.55 +void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
   24.56 +  InstructionMark im(this);
   24.57 +  int encode = prefix_and_encode(dst->encoding());
   24.58 +  emit_byte(0xB8 | encode);
   24.59 +  emit_data((int)imm32, rspec, 0);
   24.60 +}
   24.61  
   24.62  void Assembler::popa() { // 32bit
   24.63    emit_byte(0x61);
   24.64 @@ -3857,6 +3876,37 @@
   24.65    emit_data64(imm64, rspec);
   24.66  }
   24.67  
   24.68 +void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
   24.69 +  InstructionMark im(this);
   24.70 +  int encode = prefix_and_encode(dst->encoding());
   24.71 +  emit_byte(0xB8 | encode);
   24.72 +  emit_data((int)imm32, rspec, narrow_oop_operand);
   24.73 +}
   24.74 +
   24.75 +void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
   24.76 +  InstructionMark im(this);
   24.77 +  prefix(dst);
   24.78 +  emit_byte(0xC7);
   24.79 +  emit_operand(rax, dst, 4);
   24.80 +  emit_data((int)imm32, rspec, narrow_oop_operand);
   24.81 +}
   24.82 +
   24.83 +void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
   24.84 +  InstructionMark im(this);
   24.85 +  int encode = prefix_and_encode(src1->encoding());
   24.86 +  emit_byte(0x81);
   24.87 +  emit_byte(0xF8 | encode);
   24.88 +  emit_data((int)imm32, rspec, narrow_oop_operand);
   24.89 +}
   24.90 +
   24.91 +void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
   24.92 +  InstructionMark im(this);
   24.93 +  prefix(src1);
   24.94 +  emit_byte(0x81);
   24.95 +  emit_operand(rax, src1, 4);
   24.96 +  emit_data((int)imm32, rspec, narrow_oop_operand);
   24.97 +}
   24.98 +
   24.99  void Assembler::movdq(XMMRegister dst, Register src) {
  24.100    // table D-1 says MMX/SSE2
  24.101    NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
  24.102 @@ -4049,6 +4099,25 @@
  24.103    addq(rsp, 16 * wordSize);
  24.104  }
  24.105  
  24.106 +void Assembler::popcntq(Register dst, Address src) {
  24.107 +  assert(VM_Version::supports_popcnt(), "must support");
  24.108 +  InstructionMark im(this);
  24.109 +  emit_byte(0xF3);
  24.110 +  prefixq(src, dst);
  24.111 +  emit_byte(0x0F);
  24.112 +  emit_byte(0xB8);
  24.113 +  emit_operand(dst, src);
  24.114 +}
  24.115 +
  24.116 +void Assembler::popcntq(Register dst, Register src) {
  24.117 +  assert(VM_Version::supports_popcnt(), "must support");
  24.118 +  emit_byte(0xF3);
  24.119 +  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  24.120 +  emit_byte(0x0F);
  24.121 +  emit_byte(0xB8);
  24.122 +  emit_byte(0xC0 | encode);
  24.123 +}
  24.124 +
  24.125  void Assembler::popq(Address dst) {
  24.126    InstructionMark im(this);
  24.127    prefixq(dst);
  24.128 @@ -7217,6 +7286,225 @@
  24.129  }
  24.130  
  24.131  
  24.132 +void MacroAssembler::check_klass_subtype(Register sub_klass,
  24.133 +                           Register super_klass,
  24.134 +                           Register temp_reg,
  24.135 +                           Label& L_success) {
  24.136 +  Label L_failure;
  24.137 +  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  24.138 +  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  24.139 +  bind(L_failure);
  24.140 +}
  24.141 +
  24.142 +
  24.143 +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  24.144 +                                                   Register super_klass,
  24.145 +                                                   Register temp_reg,
  24.146 +                                                   Label* L_success,
  24.147 +                                                   Label* L_failure,
  24.148 +                                                   Label* L_slow_path,
  24.149 +                                        RegisterConstant super_check_offset) {
  24.150 +  assert_different_registers(sub_klass, super_klass, temp_reg);
  24.151 +  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  24.152 +  if (super_check_offset.is_register()) {
  24.153 +    assert_different_registers(sub_klass, super_klass,
  24.154 +                               super_check_offset.as_register());
  24.155 +  } else if (must_load_sco) {
  24.156 +    assert(temp_reg != noreg, "supply either a temp or a register offset");
  24.157 +  }
  24.158 +
  24.159 +  Label L_fallthrough;
  24.160 +  int label_nulls = 0;
  24.161 +  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  24.162 +  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  24.163 +  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  24.164 +  assert(label_nulls <= 1, "at most one NULL in the batch");
  24.165 +
  24.166 +  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  24.167 +                   Klass::secondary_super_cache_offset_in_bytes());
  24.168 +  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
  24.169 +                    Klass::super_check_offset_offset_in_bytes());
  24.170 +  Address super_check_offset_addr(super_klass, sco_offset);
  24.171 +
  24.172 +  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
  24.173 +  // range of a jccb.  If this routine grows larger, reconsider at
  24.174 +  // least some of these.
  24.175 +#define local_jcc(assembler_cond, label)                                \
  24.176 +  if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
  24.177 +  else                             jcc( assembler_cond, label) /*omit semi*/
  24.178 +
  24.179 +  // Hacked jmp, which may only be used just before L_fallthrough.
  24.180 +#define final_jmp(label)                                                \
  24.181 +  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
  24.182 +  else                            jmp(label)                /*omit semi*/
  24.183 +
  24.184 +  // If the pointers are equal, we are done (e.g., String[] elements).
  24.185 +  // This self-check enables sharing of secondary supertype arrays among
  24.186 +  // non-primary types such as array-of-interface.  Otherwise, each such
  24.187 +  // type would need its own customized SSA.
  24.188 +  // We move this check to the front of the fast path because many
  24.189 +  // type checks are in fact trivially successful in this manner,
  24.190 +  // so we get a nicely predicted branch right at the start of the check.
  24.191 +  cmpptr(sub_klass, super_klass);
  24.192 +  local_jcc(Assembler::equal, *L_success);
  24.193 +
  24.194 +  // Check the supertype display:
  24.195 +  if (must_load_sco) {
  24.196 +    // Positive movl does right thing on LP64.
  24.197 +    movl(temp_reg, super_check_offset_addr);
  24.198 +    super_check_offset = RegisterConstant(temp_reg);
  24.199 +  }
  24.200 +  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
  24.201 +  cmpptr(super_klass, super_check_addr); // load displayed supertype
  24.202 +
  24.203 +  // This check has worked decisively for primary supers.
  24.204 +  // Secondary supers are sought in the super_cache ('super_cache_addr').
  24.205 +  // (Secondary supers are interfaces and very deeply nested subtypes.)
  24.206 +  // This works in the same check above because of a tricky aliasing
  24.207 +  // between the super_cache and the primary super display elements.
  24.208 +  // (The 'super_check_addr' can address either, as the case requires.)
  24.209 +  // Note that the cache is updated below if it does not help us find
  24.210 +  // what we need immediately.
  24.211 +  // So if it was a primary super, we can just fail immediately.
  24.212 +  // Otherwise, it's the slow path for us (no success at this point).
  24.213 +
  24.214 +  if (super_check_offset.is_register()) {
  24.215 +    local_jcc(Assembler::equal, *L_success);
  24.216 +    cmpl(super_check_offset.as_register(), sc_offset);
  24.217 +    if (L_failure == &L_fallthrough) {
  24.218 +      local_jcc(Assembler::equal, *L_slow_path);
  24.219 +    } else {
  24.220 +      local_jcc(Assembler::notEqual, *L_failure);
  24.221 +      final_jmp(*L_slow_path);
  24.222 +    }
  24.223 +  } else if (super_check_offset.as_constant() == sc_offset) {
  24.224 +    // Need a slow path; fast failure is impossible.
  24.225 +    if (L_slow_path == &L_fallthrough) {
  24.226 +      local_jcc(Assembler::equal, *L_success);
  24.227 +    } else {
  24.228 +      local_jcc(Assembler::notEqual, *L_slow_path);
  24.229 +      final_jmp(*L_success);
  24.230 +    }
  24.231 +  } else {
  24.232 +    // No slow path; it's a fast decision.
  24.233 +    if (L_failure == &L_fallthrough) {
  24.234 +      local_jcc(Assembler::equal, *L_success);
  24.235 +    } else {
  24.236 +      local_jcc(Assembler::notEqual, *L_failure);
  24.237 +      final_jmp(*L_success);
  24.238 +    }
  24.239 +  }
  24.240 +
  24.241 +  bind(L_fallthrough);
  24.242 +
  24.243 +#undef local_jcc
  24.244 +#undef final_jmp
  24.245 +}
  24.246 +
  24.247 +
  24.248 +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  24.249 +                                                   Register super_klass,
  24.250 +                                                   Register temp_reg,
  24.251 +                                                   Register temp2_reg,
  24.252 +                                                   Label* L_success,
  24.253 +                                                   Label* L_failure,
  24.254 +                                                   bool set_cond_codes) {
  24.255 +  assert_different_registers(sub_klass, super_klass, temp_reg);
  24.256 +  if (temp2_reg != noreg)
  24.257 +    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  24.258 +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  24.259 +
  24.260 +  Label L_fallthrough;
  24.261 +  int label_nulls = 0;
  24.262 +  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  24.263 +  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  24.264 +  assert(label_nulls <= 1, "at most one NULL in the batch");
  24.265 +
  24.266 +  // a couple of useful fields in sub_klass:
  24.267 +  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
  24.268 +                   Klass::secondary_supers_offset_in_bytes());
  24.269 +  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  24.270 +                   Klass::secondary_super_cache_offset_in_bytes());
  24.271 +  Address secondary_supers_addr(sub_klass, ss_offset);
  24.272 +  Address super_cache_addr(     sub_klass, sc_offset);
  24.273 +
  24.274 +  // Do a linear scan of the secondary super-klass chain.
  24.275 +  // This code is rarely used, so simplicity is a virtue here.
  24.276 +  // The repne_scan instruction uses fixed registers, which we must spill.
  24.277 +  // Don't worry too much about pre-existing connections with the input regs.
  24.278 +
  24.279 +  assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
  24.280 +  assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
  24.281 +
  24.282 +  // Get super_klass value into rax (even if it was in rdi or rcx).
  24.283 +  bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
  24.284 +  if (super_klass != rax || UseCompressedOops) {
  24.285 +    if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
  24.286 +    mov(rax, super_klass);
  24.287 +  }
  24.288 +  if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
  24.289 +  if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
  24.290 +
  24.291 +#ifndef PRODUCT
  24.292 +  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  24.293 +  ExternalAddress pst_counter_addr((address) pst_counter);
  24.294 +  NOT_LP64(  incrementl(pst_counter_addr) );
  24.295 +  LP64_ONLY( lea(rcx, pst_counter_addr) );
  24.296 +  LP64_ONLY( incrementl(Address(rcx, 0)) );
  24.297 +#endif //PRODUCT
  24.298 +
  24.299 +  // We will consult the secondary-super array.
  24.300 +  movptr(rdi, secondary_supers_addr);
  24.301 +  // Load the array length.  (Positive movl does right thing on LP64.)
  24.302 +  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
  24.303 +  // Skip to start of data.
  24.304 +  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  24.305 +
  24.306 +  // Scan RCX words at [RDI] for an occurrence of RAX.
  24.307 +  // Set NZ/Z based on last compare.
  24.308 +#ifdef _LP64
  24.309 +  // This part is tricky, as values in supers array could be 32 or 64 bit wide
  24.310 +  // and we store values in objArrays always encoded, thus we need to encode
  24.311 +  // the value of rax before repne.  Note that rax is dead after the repne.
  24.312 +  if (UseCompressedOops) {
  24.313 +    encode_heap_oop_not_null(rax);
  24.314 +    // The superclass is never null; it would be a basic system error if a null
  24.315 +    // pointer were to sneak in here.  Note that we have already loaded the
  24.316 +    // Klass::super_check_offset from the super_klass in the fast path,
  24.317 +    // so if there is a null in that register, we are already in the afterlife.
  24.318 +    repne_scanl();
  24.319 +  } else
  24.320 +#endif // _LP64
  24.321 +    repne_scan();
  24.322 +
  24.323 +  // Unspill the temp. registers:
  24.324 +  if (pushed_rdi)  pop(rdi);
  24.325 +  if (pushed_rcx)  pop(rcx);
  24.326 +  if (pushed_rax)  pop(rax);
  24.327 +
  24.328 +  if (set_cond_codes) {
  24.329 +    // Special hack for the AD files:  rdi is guaranteed non-zero.
  24.330 +    assert(!pushed_rdi, "rdi must be left non-NULL");
  24.331 +    // Also, the condition codes are properly set Z/NZ on succeed/failure.
  24.332 +  }
  24.333 +
  24.334 +  if (L_failure == &L_fallthrough)
  24.335 +        jccb(Assembler::notEqual, *L_failure);
  24.336 +  else  jcc(Assembler::notEqual, *L_failure);
  24.337 +
  24.338 +  // Success.  Cache the super we found and proceed in triumph.
  24.339 +  movptr(super_cache_addr, super_klass);
  24.340 +
  24.341 +  if (L_success != &L_fallthrough) {
  24.342 +    jmp(*L_success);
  24.343 +  }
  24.344 +
  24.345 +#undef IS_A_TEMP
  24.346 +
  24.347 +  bind(L_fallthrough);
  24.348 +}
  24.349 +
  24.350 +
  24.351  void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
  24.352    ucomisd(dst, as_Address(src));
  24.353  }
  24.354 @@ -7710,14 +7998,21 @@
  24.355  void MacroAssembler::load_prototype_header(Register dst, Register src) {
  24.356  #ifdef _LP64
  24.357    if (UseCompressedOops) {
  24.358 +    assert (Universe::heap() != NULL, "java heap should be initialized");
  24.359      movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  24.360 -    movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  24.361 +    if (Universe::narrow_oop_shift() != 0) {
  24.362 +      assert(Address::times_8 == LogMinObjAlignmentInBytes &&
  24.363 +             Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
  24.364 +      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  24.365 +    } else {
  24.366 +      movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  24.367 +    }
  24.368    } else
  24.369  #endif
  24.370 -    {
  24.371 -      movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  24.372 -      movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  24.373 -    }
  24.374 +  {
  24.375 +    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  24.376 +    movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  24.377 +  }
  24.378  }
  24.379  
  24.380  void MacroAssembler::store_klass(Register dst, Register src) {
  24.381 @@ -7760,11 +8055,20 @@
  24.382  // Algorithm must match oop.inline.hpp encode_heap_oop.
  24.383  void MacroAssembler::encode_heap_oop(Register r) {
  24.384    assert (UseCompressedOops, "should be compressed");
  24.385 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  24.386 +  if (Universe::narrow_oop_base() == NULL) {
  24.387 +    verify_oop(r, "broken oop in encode_heap_oop");
  24.388 +    if (Universe::narrow_oop_shift() != 0) {
  24.389 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  24.390 +      shrq(r, LogMinObjAlignmentInBytes);
  24.391 +    }
  24.392 +    return;
  24.393 +  }
  24.394  #ifdef ASSERT
  24.395    if (CheckCompressedOops) {
  24.396      Label ok;
  24.397      push(rscratch1); // cmpptr trashes rscratch1
  24.398 -    cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
  24.399 +    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
  24.400      jcc(Assembler::equal, ok);
  24.401      stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
  24.402      bind(ok);
  24.403 @@ -7780,6 +8084,7 @@
  24.404  
  24.405  void MacroAssembler::encode_heap_oop_not_null(Register r) {
  24.406    assert (UseCompressedOops, "should be compressed");
  24.407 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  24.408  #ifdef ASSERT
  24.409    if (CheckCompressedOops) {
  24.410      Label ok;
  24.411 @@ -7790,12 +8095,18 @@
  24.412    }
  24.413  #endif
  24.414    verify_oop(r, "broken oop in encode_heap_oop_not_null");
  24.415 -  subq(r, r12_heapbase);
  24.416 -  shrq(r, LogMinObjAlignmentInBytes);
  24.417 +  if (Universe::narrow_oop_base() != NULL) {
  24.418 +    subq(r, r12_heapbase);
  24.419 +  }
  24.420 +  if (Universe::narrow_oop_shift() != 0) {
  24.421 +    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  24.422 +    shrq(r, LogMinObjAlignmentInBytes);
  24.423 +  }
  24.424  }
  24.425  
  24.426  void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  24.427    assert (UseCompressedOops, "should be compressed");
  24.428 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  24.429  #ifdef ASSERT
  24.430    if (CheckCompressedOops) {
  24.431      Label ok;
  24.432 @@ -7809,18 +8120,32 @@
  24.433    if (dst != src) {
  24.434      movq(dst, src);
  24.435    }
  24.436 -  subq(dst, r12_heapbase);
  24.437 -  shrq(dst, LogMinObjAlignmentInBytes);
  24.438 +  if (Universe::narrow_oop_base() != NULL) {
  24.439 +    subq(dst, r12_heapbase);
  24.440 +  }
  24.441 +  if (Universe::narrow_oop_shift() != 0) {
  24.442 +    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  24.443 +    shrq(dst, LogMinObjAlignmentInBytes);
  24.444 +  }
  24.445  }
  24.446  
  24.447  void  MacroAssembler::decode_heap_oop(Register r) {
  24.448    assert (UseCompressedOops, "should be compressed");
  24.449 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  24.450 +  if (Universe::narrow_oop_base() == NULL) {
  24.451 +    if (Universe::narrow_oop_shift() != 0) {
  24.452 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  24.453 +      shlq(r, LogMinObjAlignmentInBytes);
  24.454 +    }
  24.455 +    verify_oop(r, "broken oop in decode_heap_oop");
  24.456 +    return;
  24.457 +  }
  24.458  #ifdef ASSERT
  24.459    if (CheckCompressedOops) {
  24.460      Label ok;
  24.461      push(rscratch1);
  24.462      cmpptr(r12_heapbase,
  24.463 -           ExternalAddress((address)Universe::heap_base_addr()));
  24.464 +           ExternalAddress((address)Universe::narrow_oop_base_addr()));
  24.465      jcc(Assembler::equal, ok);
  24.466      stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
  24.467      bind(ok);
  24.468 @@ -7844,32 +8169,76 @@
  24.469  
  24.470  void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  24.471    assert (UseCompressedOops, "should only be used for compressed headers");
  24.472 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  24.473    // Cannot assert, unverified entry point counts instructions (see .ad file)
  24.474    // vtableStubs also counts instructions in pd_code_size_limit.
  24.475    // Also do not verify_oop as this is called by verify_oop.
  24.476 -  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
  24.477 -  leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
  24.478 +  if (Universe::narrow_oop_base() == NULL) {
  24.479 +    if (Universe::narrow_oop_shift() != 0) {
  24.480 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  24.481 +      shlq(r, LogMinObjAlignmentInBytes);
  24.482 +    }
  24.483 +  } else {
  24.484 +      assert (Address::times_8 == LogMinObjAlignmentInBytes &&
  24.485 +              Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
  24.486 +    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
  24.487 +  }
  24.488  }
  24.489  
  24.490  void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  24.491    assert (UseCompressedOops, "should only be used for compressed headers");
  24.492 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  24.493    // Cannot assert, unverified entry point counts instructions (see .ad file)
  24.494    // vtableStubs also counts instructions in pd_code_size_limit.
  24.495    // Also do not verify_oop as this is called by verify_oop.
  24.496 -  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
  24.497 -  leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
  24.498 +  if (Universe::narrow_oop_shift() != 0) {
  24.499 +    assert (Address::times_8 == LogMinObjAlignmentInBytes &&
  24.500 +            Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
  24.501 +    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
  24.502 +  } else if (dst != src) {
  24.503 +    movq(dst, src);
  24.504 +  }
  24.505  }
  24.506  
  24.507  void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  24.508 -  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  24.509 +  assert (UseCompressedOops, "should only be used for compressed headers");
  24.510 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  24.511 +  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  24.512    int oop_index = oop_recorder()->find_index(obj);
  24.513    RelocationHolder rspec = oop_Relocation::spec(oop_index);
  24.514 -  mov_literal32(dst, oop_index, rspec, narrow_oop_operand);
  24.515 +  mov_narrow_oop(dst, oop_index, rspec);
  24.516 +}
  24.517 +
  24.518 +void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
  24.519 +  assert (UseCompressedOops, "should only be used for compressed headers");
  24.520 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  24.521 +  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  24.522 +  int oop_index = oop_recorder()->find_index(obj);
  24.523 +  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  24.524 +  mov_narrow_oop(dst, oop_index, rspec);
  24.525 +}
  24.526 +
  24.527 +void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
  24.528 +  assert (UseCompressedOops, "should only be used for compressed headers");
  24.529 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  24.530 +  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  24.531 +  int oop_index = oop_recorder()->find_index(obj);
  24.532 +  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  24.533 +  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
  24.534 +}
  24.535 +
  24.536 +void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
  24.537 +  assert (UseCompressedOops, "should only be used for compressed headers");
  24.538 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  24.539 +  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  24.540 +  int oop_index = oop_recorder()->find_index(obj);
  24.541 +  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  24.542 +  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
  24.543  }
  24.544  
  24.545  void MacroAssembler::reinit_heapbase() {
  24.546    if (UseCompressedOops) {
  24.547 -    movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
  24.548 +    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
  24.549    }
  24.550  }
  24.551  #endif // _LP64
    25.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Fri Mar 20 22:08:48 2009 -0400
    25.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Mon Mar 23 10:42:20 2009 -0400
    25.3 @@ -578,20 +578,25 @@
    25.4  
    25.5    // These are all easily abused and hence protected
    25.6  
    25.7 -  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format = 0);
    25.8 -
    25.9    // 32BIT ONLY SECTION
   25.10  #ifndef _LP64
   25.11    // Make these disappear in 64bit mode since they would never be correct
   25.12    void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec);   // 32BIT ONLY
   25.13    void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
   25.14  
   25.15 +  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
   25.16    void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec);     // 32BIT ONLY
   25.17  
   25.18    void push_literal32(int32_t imm32, RelocationHolder const& rspec);                 // 32BIT ONLY
   25.19  #else
   25.20    // 64BIT ONLY SECTION
   25.21    void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec);   // 64BIT ONLY
   25.22 +
   25.23 +  void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
   25.24 +  void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
   25.25 +
   25.26 +  void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
   25.27 +  void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
   25.28  #endif // _LP64
   25.29  
   25.30    // These are unique in that we are ensured by the caller that the 32bit
   25.31 @@ -1219,6 +1224,14 @@
   25.32    void popq(Address dst);
   25.33  #endif
   25.34  
   25.35 +  void popcntl(Register dst, Address src);
   25.36 +  void popcntl(Register dst, Register src);
   25.37 +
   25.38 +#ifdef _LP64
   25.39 +  void popcntq(Register dst, Address src);
   25.40 +  void popcntq(Register dst, Register src);
   25.41 +#endif
   25.42 +
   25.43    // Prefetches (SSE, SSE2, 3DNOW only)
   25.44  
   25.45    void prefetchnta(Address src);
   25.46 @@ -1647,6 +1660,9 @@
   25.47    void decode_heap_oop_not_null(Register dst, Register src);
   25.48  
   25.49    void set_narrow_oop(Register dst, jobject obj);
   25.50 +  void set_narrow_oop(Address dst, jobject obj);
   25.51 +  void cmp_narrow_oop(Register dst, jobject obj);
   25.52 +  void cmp_narrow_oop(Address dst, jobject obj);
   25.53  
   25.54    // if heap base register is used - reinit it with the correct value
   25.55    void reinit_heapbase();
   25.56 @@ -1791,6 +1807,40 @@
   25.57                                 Register scan_temp,
   25.58                                 Label& no_such_interface);
   25.59  
   25.60 +  // Test sub_klass against super_klass, with fast and slow paths.
   25.61 +
   25.62 +  // The fast path produces a tri-state answer: yes / no / maybe-slow.
   25.63 +  // One of the three labels can be NULL, meaning take the fall-through.
   25.64 +  // If super_check_offset is -1, the value is loaded up from super_klass.
   25.65 +  // No registers are killed, except temp_reg.
   25.66 +  void check_klass_subtype_fast_path(Register sub_klass,
   25.67 +                                     Register super_klass,
   25.68 +                                     Register temp_reg,
   25.69 +                                     Label* L_success,
   25.70 +                                     Label* L_failure,
   25.71 +                                     Label* L_slow_path,
   25.72 +                RegisterConstant super_check_offset = RegisterConstant(-1));
   25.73 +
   25.74 +  // The rest of the type check; must be wired to a corresponding fast path.
   25.75 +  // It does not repeat the fast path logic, so don't use it standalone.
   25.76 +  // The temp_reg and temp2_reg can be noreg, if no temps are available.
   25.77 +  // Updates the sub's secondary super cache as necessary.
   25.78 +  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
   25.79 +  void check_klass_subtype_slow_path(Register sub_klass,
   25.80 +                                     Register super_klass,
   25.81 +                                     Register temp_reg,
   25.82 +                                     Register temp2_reg,
   25.83 +                                     Label* L_success,
   25.84 +                                     Label* L_failure,
   25.85 +                                     bool set_cond_codes = false);
   25.86 +
   25.87 +  // Simplified, combined version, good for typical uses.
   25.88 +  // Falls through on failure.
   25.89 +  void check_klass_subtype(Register sub_klass,
   25.90 +                           Register super_klass,
   25.91 +                           Register temp_reg,
   25.92 +                           Label& L_success);
   25.93 +
   25.94    //----
   25.95    void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
   25.96  
    26.1 --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Mar 20 22:08:48 2009 -0400
    26.2 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Mar 23 10:42:20 2009 -0400
    26.3 @@ -1598,18 +1598,9 @@
    26.4  
    26.5      // get instance klass
    26.6      __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
    26.7 -    // get super_check_offset
    26.8 -    __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
    26.9 -    // See if we get an immediate positive hit
   26.10 -    __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
   26.11 -    __ jcc(Assembler::equal, done);
   26.12 -    // check for immediate negative hit
   26.13 -    __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   26.14 -    __ jcc(Assembler::notEqual, *stub->entry());
   26.15 -    // check for self
   26.16 -    __ cmpptr(klass_RInfo, k_RInfo);
   26.17 -    __ jcc(Assembler::equal, done);
   26.18 -
   26.19 +    // perform the fast part of the checking logic
   26.20 +    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
   26.21 +    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
   26.22      __ push(klass_RInfo);
   26.23      __ push(k_RInfo);
   26.24      __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
   26.25 @@ -1735,17 +1726,9 @@
   26.26          }
   26.27          __ bind(done);
   26.28        } else {
   26.29 -        __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
   26.30 -        // See if we get an immediate positive hit
   26.31 -        __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
   26.32 -        __ jcc(Assembler::equal, done);
   26.33 -        // check for immediate negative hit
   26.34 -        __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   26.35 -        __ jcc(Assembler::notEqual, *stub->entry());
   26.36 -        // check for self
   26.37 -        __ cmpptr(klass_RInfo, k_RInfo);
   26.38 -        __ jcc(Assembler::equal, done);
   26.39 -
   26.40 +        // perform the fast part of the checking logic
   26.41 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
   26.42 +        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
   26.43          __ push(klass_RInfo);
   26.44          __ push(k_RInfo);
   26.45          __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
   26.46 @@ -1821,23 +1804,15 @@
   26.47            __ pop(dst);
   26.48            __ jmp(done);
   26.49          }
   26.50 -      } else {
   26.51 -#else
   26.52 -      { // YUCK
   26.53 +      }
   26.54 +        else // next block is unconditional if LP64:
   26.55  #endif // LP64
   26.56 +      {
   26.57          assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
   26.58  
   26.59 -        __ movl(dst, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
   26.60 -        // See if we get an immediate positive hit
   26.61 -        __ cmpptr(k_RInfo, Address(klass_RInfo, dst, Address::times_1));
   26.62 -        __ jcc(Assembler::equal, one);
   26.63 -        // check for immediate negative hit
   26.64 -        __ cmpl(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   26.65 -        __ jcc(Assembler::notEqual, zero);
   26.66 -        // check for self
   26.67 -        __ cmpptr(klass_RInfo, k_RInfo);
   26.68 -        __ jcc(Assembler::equal, one);
   26.69 -
   26.70 +        // perform the fast part of the checking logic
   26.71 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, dst, &one, &zero, NULL);
   26.72 +        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
   26.73          __ push(klass_RInfo);
   26.74          __ push(k_RInfo);
   26.75          __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
    27.1 --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Fri Mar 20 22:08:48 2009 -0400
    27.2 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Mon Mar 23 10:42:20 2009 -0400
    27.3 @@ -1354,6 +1354,13 @@
    27.4  
    27.5      case slow_subtype_check_id:
    27.6        {
    27.7 +        // Typical calling sequence:
    27.8 +        // __ push(klass_RInfo);  // object klass or other subclass
    27.9 +        // __ push(sup_k_RInfo);  // array element klass or other superclass
   27.10 +        // __ call(slow_subtype_check);
   27.11 +        // Note that the subclass is pushed first, and is therefore deepest.
   27.12 +        // Previous versions of this code reversed the names 'sub' and 'super'.
   27.13 +        // This was operationally harmless but made the code unreadable.
   27.14          enum layout {
   27.15            rax_off, SLOT2(raxH_off)
   27.16            rcx_off, SLOT2(rcxH_off)
   27.17 @@ -1361,9 +1368,10 @@
   27.18            rdi_off, SLOT2(rdiH_off)
   27.19            // saved_rbp_off, SLOT2(saved_rbpH_off)
   27.20            return_off, SLOT2(returnH_off)
   27.21 -          sub_off, SLOT2(subH_off)
   27.22 -          super_off, SLOT2(superH_off)
   27.23 -          framesize
   27.24 +          sup_k_off, SLOT2(sup_kH_off)
   27.25 +          klass_off, SLOT2(superH_off)
   27.26 +          framesize,
   27.27 +          result_off = klass_off  // deepest argument is also the return value
   27.28          };
   27.29  
   27.30          __ set_info("slow_subtype_check", dont_gc_arguments);
   27.31 @@ -1373,19 +1381,14 @@
   27.32          __ push(rax);
   27.33  
   27.34          // This is called by pushing args and not with C abi
   27.35 -        __ movptr(rsi, Address(rsp, (super_off) * VMRegImpl::stack_slot_size)); // super
   27.36 -        __ movptr(rax, Address(rsp, (sub_off  ) * VMRegImpl::stack_slot_size)); // sub
   27.37 -
   27.38 -        __ movptr(rdi,Address(rsi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
   27.39 -        // since size is postive movl does right thing on 64bit
   27.40 -        __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
   27.41 -        __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   27.42 +        __ movptr(rsi, Address(rsp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass
   27.43 +        __ movptr(rax, Address(rsp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass
   27.44  
   27.45          Label miss;
   27.46 -        __ repne_scan();
   27.47 -        __ jcc(Assembler::notEqual, miss);
   27.48 -        __ movptr(Address(rsi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax);
   27.49 -        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), 1); // result
   27.50 +        __ check_klass_subtype_slow_path(rsi, rax, rcx, rdi, NULL, &miss);
   27.51 +
   27.52 +        // fallthrough on success:
   27.53 +        __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), 1); // result
   27.54          __ pop(rax);
   27.55          __ pop(rcx);
   27.56          __ pop(rsi);
   27.57 @@ -1393,7 +1396,7 @@
   27.58          __ ret(0);
   27.59  
   27.60          __ bind(miss);
   27.61 -        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
   27.62 +        __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
   27.63          __ pop(rax);
   27.64          __ pop(rcx);
   27.65          __ pop(rsi);
    28.1 --- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Fri Mar 20 22:08:48 2009 -0400
    28.2 +++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Mon Mar 23 10:42:20 2009 -0400
    28.3 @@ -219,47 +219,16 @@
    28.4    // Resets EDI to locals.  Register sub_klass cannot be any of the above.
    28.5  void InterpreterMacroAssembler::gen_subtype_check( Register Rsub_klass, Label &ok_is_subtype ) {
    28.6    assert( Rsub_klass != rax, "rax, holds superklass" );
    28.7 -  assert( Rsub_klass != rcx, "rcx holds 2ndary super array length" );
    28.8 -  assert( Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr" );
    28.9 -  Label not_subtype, loop;
   28.10 +  assert( Rsub_klass != rcx, "used as a temp" );
   28.11 +  assert( Rsub_klass != rdi, "used as a temp, restored from locals" );
   28.12  
   28.13    // Profile the not-null value's klass.
   28.14 -  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi
   28.15 +  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
   28.16  
   28.17 -  // Load the super-klass's check offset into ECX
   28.18 -  movl( rcx, Address(rax, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() ) );
   28.19 -  // Load from the sub-klass's super-class display list, or a 1-word cache of
   28.20 -  // the secondary superclass list, or a failing value with a sentinel offset
   28.21 -  // if the super-klass is an interface or exceptionally deep in the Java
   28.22 -  // hierarchy and we have to scan the secondary superclass list the hard way.
   28.23 -  // See if we get an immediate positive hit
   28.24 -  cmpptr( rax, Address(Rsub_klass,rcx,Address::times_1) );
   28.25 -  jcc( Assembler::equal,ok_is_subtype );
   28.26 +  // Do the check.
   28.27 +  check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
   28.28  
   28.29 -  // Check for immediate negative hit
   28.30 -  cmpl( rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
   28.31 -  jcc( Assembler::notEqual, not_subtype );
   28.32 -  // Check for self
   28.33 -  cmpptr( Rsub_klass, rax );
   28.34 -  jcc( Assembler::equal, ok_is_subtype );
   28.35 -
   28.36 -  // Now do a linear scan of the secondary super-klass chain.
   28.37 -  movptr( rdi, Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()) );
   28.38 -  // EDI holds the objArrayOop of secondary supers.
   28.39 -  movl( rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));// Load the array length
   28.40 -  // Skip to start of data; also clear Z flag incase ECX is zero
   28.41 -  addptr( rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT) );
   28.42 -  // Scan ECX words at [EDI] for occurance of EAX
   28.43 -  // Set NZ/Z based on last compare
   28.44 -  repne_scan();
   28.45 -  restore_locals();           // Restore EDI; Must not blow flags
   28.46 -  // Not equal?
   28.47 -  jcc( Assembler::notEqual, not_subtype );
   28.48 -  // Must be equal but missed in cache.  Update cache.
   28.49 -  movptr( Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax );
   28.50 -  jmp( ok_is_subtype );
   28.51 -
   28.52 -  bind(not_subtype);
   28.53 +  // Profile the failure of the check.
   28.54    profile_typecheck_failed(rcx); // blows rcx
   28.55  }
   28.56  
    29.1 --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Fri Mar 20 22:08:48 2009 -0400
    29.2 +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Mon Mar 23 10:42:20 2009 -0400
    29.3 @@ -232,65 +232,13 @@
    29.4    assert(Rsub_klass != rcx, "rcx holds 2ndary super array length");
    29.5    assert(Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr");
    29.6  
    29.7 -  Label not_subtype, not_subtype_pop, loop;
    29.8 +  // Profile the not-null value's klass.
    29.9 +  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
   29.10  
   29.11 -  // Profile the not-null value's klass.
   29.12 -  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi
   29.13 +  // Do the check.
   29.14 +  check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
   29.15  
   29.16 -  // Load the super-klass's check offset into rcx
   29.17 -  movl(rcx, Address(rax, sizeof(oopDesc) +
   29.18 -                    Klass::super_check_offset_offset_in_bytes()));
   29.19 -  // Load from the sub-klass's super-class display list, or a 1-word
   29.20 -  // cache of the secondary superclass list, or a failing value with a
   29.21 -  // sentinel offset if the super-klass is an interface or
   29.22 -  // exceptionally deep in the Java hierarchy and we have to scan the
   29.23 -  // secondary superclass list the hard way.  See if we get an
   29.24 -  // immediate positive hit
   29.25 -  cmpptr(rax, Address(Rsub_klass, rcx, Address::times_1));
   29.26 -  jcc(Assembler::equal,ok_is_subtype);
   29.27 -
   29.28 -  // Check for immediate negative hit
   29.29 -  cmpl(rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   29.30 -  jcc( Assembler::notEqual, not_subtype );
   29.31 -  // Check for self
   29.32 -  cmpptr(Rsub_klass, rax);
   29.33 -  jcc(Assembler::equal, ok_is_subtype);
   29.34 -
   29.35 -  // Now do a linear scan of the secondary super-klass chain.
   29.36 -  movptr(rdi, Address(Rsub_klass, sizeof(oopDesc) +
   29.37 -                      Klass::secondary_supers_offset_in_bytes()));
   29.38 -  // rdi holds the objArrayOop of secondary supers.
   29.39 -  // Load the array length
   29.40 -  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
   29.41 -  // Skip to start of data; also clear Z flag incase rcx is zero
   29.42 -  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   29.43 -  // Scan rcx words at [rdi] for occurance of rax
   29.44 -  // Set NZ/Z based on last compare
   29.45 -
   29.46 -  // this part is kind tricky, as values in supers array could be 32 or 64 bit wide
   29.47 -  // and we store values in objArrays always encoded, thus we need to encode value
   29.48 -  // before repne
   29.49 -  if (UseCompressedOops) {
   29.50 -    push(rax);
   29.51 -    encode_heap_oop(rax);
   29.52 -    repne_scanl();
   29.53 -    // Not equal?
   29.54 -    jcc(Assembler::notEqual, not_subtype_pop);
   29.55 -    // restore heap oop here for movq
   29.56 -    pop(rax);
   29.57 -  } else {
   29.58 -    repne_scan();
   29.59 -    jcc(Assembler::notEqual, not_subtype);
   29.60 -  }
   29.61 -  // Must be equal but missed in cache.  Update cache.
   29.62 -  movptr(Address(Rsub_klass, sizeof(oopDesc) +
   29.63 -               Klass::secondary_super_cache_offset_in_bytes()), rax);
   29.64 -  jmp(ok_is_subtype);
   29.65 -
   29.66 -  bind(not_subtype_pop);
   29.67 -  // restore heap oop here for miss
   29.68 -  if (UseCompressedOops) pop(rax);
   29.69 -  bind(not_subtype);
   29.70 +  // Profile the failure of the check.
   29.71    profile_typecheck_failed(rcx); // blows rcx
   29.72  }
   29.73  
    30.1 --- a/src/cpu/x86/vm/interpreterRT_x86_64.cpp	Fri Mar 20 22:08:48 2009 -0400
    30.2 +++ b/src/cpu/x86/vm/interpreterRT_x86_64.cpp	Mon Mar 23 10:42:20 2009 -0400
    30.3 @@ -349,7 +349,7 @@
    30.4  
    30.5      if (_num_args < Argument::n_float_register_parameters_c-1) {
    30.6        *_reg_args++ = from_obj;
    30.7 -      *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float
    30.8 +      *_fp_identifiers |= (intptr_t)(0x01 << (_num_args*2)); // mark as float
    30.9        _num_args++;
   30.10      } else {
   30.11        *_to++ = from_obj;
   30.12 @@ -364,7 +364,7 @@
   30.13  
   30.14      if (_num_args < Argument::n_float_register_parameters_c-1) {
   30.15        *_reg_args++ = from_obj;
   30.16 -      *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double
   30.17 +      *_fp_identifiers |= (intptr_t)(0x3 << (_num_args*2)); // mark as double
   30.18        _num_args++;
   30.19      } else {
   30.20        *_to++ = from_obj;
    31.1 --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri Mar 20 22:08:48 2009 -0400
    31.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Mon Mar 23 10:42:20 2009 -0400
    31.3 @@ -1310,81 +1310,51 @@
    31.4                             Address& super_check_offset_addr,
    31.5                             Address& super_klass_addr,
    31.6                             Register temp,
    31.7 -                           Label* L_success_ptr, Label* L_failure_ptr) {
    31.8 +                           Label* L_success, Label* L_failure) {
    31.9      BLOCK_COMMENT("type_check:");
   31.10  
   31.11      Label L_fallthrough;
   31.12 -    bool fall_through_on_success = (L_success_ptr == NULL);
   31.13 -    if (fall_through_on_success) {
   31.14 -      L_success_ptr = &L_fallthrough;
   31.15 -    } else {
   31.16 -      L_failure_ptr = &L_fallthrough;
   31.17 -    }
   31.18 -    Label& L_success = *L_success_ptr;
   31.19 -    Label& L_failure = *L_failure_ptr;
   31.20 +#define LOCAL_JCC(assembler_con, label_ptr)                             \
   31.21 +    if (label_ptr != NULL)  __ jcc(assembler_con, *(label_ptr));        \
   31.22 +    else                    __ jcc(assembler_con, L_fallthrough) /*omit semi*/
   31.23  
   31.24 +    // The following is a strange variation of the fast path which requires
   31.25 +    // one less register, because needed values are on the argument stack.
   31.26 +    // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
   31.27 +    //                                  L_success, L_failure, NULL);
   31.28      assert_different_registers(sub_klass, temp);
   31.29  
   31.30 -    // a couple of useful fields in sub_klass:
   31.31 -    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
   31.32 -                     Klass::secondary_supers_offset_in_bytes());
   31.33      int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
   31.34                       Klass::secondary_super_cache_offset_in_bytes());
   31.35 -    Address secondary_supers_addr(sub_klass, ss_offset);
   31.36 -    Address super_cache_addr(     sub_klass, sc_offset);
   31.37  
   31.38      // if the pointers are equal, we are done (e.g., String[] elements)
   31.39      __ cmpptr(sub_klass, super_klass_addr);
   31.40 -    __ jcc(Assembler::equal, L_success);
   31.41 +    LOCAL_JCC(Assembler::equal, L_success);
   31.42  
   31.43      // check the supertype display:
   31.44      __ movl2ptr(temp, super_check_offset_addr);
   31.45      Address super_check_addr(sub_klass, temp, Address::times_1, 0);
   31.46      __ movptr(temp, super_check_addr); // load displayed supertype
   31.47      __ cmpptr(temp, super_klass_addr); // test the super type
   31.48 -    __ jcc(Assembler::equal, L_success);
   31.49 +    LOCAL_JCC(Assembler::equal, L_success);
   31.50  
   31.51      // if it was a primary super, we can just fail immediately
   31.52      __ cmpl(super_check_offset_addr, sc_offset);
   31.53 -    __ jcc(Assembler::notEqual, L_failure);
   31.54 +    LOCAL_JCC(Assembler::notEqual, L_failure);
   31.55  
   31.56 -    // Now do a linear scan of the secondary super-klass chain.
   31.57 -    // This code is rarely used, so simplicity is a virtue here.
   31.58 -    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
   31.59 -    {
   31.60 -      // The repne_scan instruction uses fixed registers, which we must spill.
   31.61 -      // (We need a couple more temps in any case.)
   31.62 -      __ push(rax);
   31.63 -      __ push(rcx);
   31.64 -      __ push(rdi);
   31.65 -      assert_different_registers(sub_klass, rax, rcx, rdi);
   31.66 +    // The repne_scan instruction uses fixed registers, which will get spilled.
   31.67 +    // We happen to know this works best when super_klass is in rax.
   31.68 +    Register super_klass = temp;
   31.69 +    __ movptr(super_klass, super_klass_addr);
   31.70 +    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
   31.71 +                                     L_success, L_failure);
   31.72  
   31.73 -      __ movptr(rdi, secondary_supers_addr);
   31.74 -      // Load the array length.
   31.75 -      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
   31.76 -      // Skip to start of data.
   31.77 -      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   31.78 -      // Scan rcx words at [edi] for occurance of rax,
   31.79 -      // Set NZ/Z based on last compare
   31.80 -      __ movptr(rax, super_klass_addr);
   31.81 -      __ repne_scan();
   31.82 +    __ bind(L_fallthrough);
   31.83  
   31.84 -      // Unspill the temp. registers:
   31.85 -      __ pop(rdi);
   31.86 -      __ pop(rcx);
   31.87 -      __ pop(rax);
   31.88 -    }
   31.89 -    __ jcc(Assembler::notEqual, L_failure);
   31.90 +    if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
   31.91 +    if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
   31.92  
   31.93 -    // Success.  Cache the super we found and proceed in triumph.
   31.94 -    __ movptr(temp, super_klass_addr); // note: rax, is dead
   31.95 -    __ movptr(super_cache_addr, temp);
   31.96 -
   31.97 -    if (!fall_through_on_success)
   31.98 -      __ jmp(L_success);
   31.99 -
  31.100 -    // Fall through on failure!
  31.101 -    __ bind(L_fallthrough);
  31.102 +#undef LOCAL_JCC
  31.103    }
  31.104  
  31.105    //
    32.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Mar 20 22:08:48 2009 -0400
    32.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Mar 23 10:42:20 2009 -0400
    32.3 @@ -2091,66 +2091,9 @@
    32.4  
    32.5      Label L_miss;
    32.6  
    32.7 -    // a couple of useful fields in sub_klass:
    32.8 -    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
    32.9 -                     Klass::secondary_supers_offset_in_bytes());
   32.10 -    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
   32.11 -                     Klass::secondary_super_cache_offset_in_bytes());
   32.12 -    Address secondary_supers_addr(sub_klass, ss_offset);
   32.13 -    Address super_cache_addr(     sub_klass, sc_offset);
   32.14 -
   32.15 -    // if the pointers are equal, we are done (e.g., String[] elements)
   32.16 -    __ cmpptr(super_klass, sub_klass);
   32.17 -    __ jcc(Assembler::equal, L_success);
   32.18 -
   32.19 -    // check the supertype display:
   32.20 -    Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
   32.21 -    __ cmpptr(super_klass, super_check_addr); // test the super type
   32.22 -    __ jcc(Assembler::equal, L_success);
   32.23 -
   32.24 -    // if it was a primary super, we can just fail immediately
   32.25 -    __ cmpl(super_check_offset, sc_offset);
   32.26 -    __ jcc(Assembler::notEqual, L_miss);
   32.27 -
   32.28 -    // Now do a linear scan of the secondary super-klass chain.
   32.29 -    // The repne_scan instruction uses fixed registers, which we must spill.
   32.30 -    // (We need a couple more temps in any case.)
   32.31 -    // This code is rarely used, so simplicity is a virtue here.
   32.32 -    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
   32.33 -    {
   32.34 -      __ push(rax);
   32.35 -      __ push(rcx);
   32.36 -      __ push(rdi);
   32.37 -      assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
   32.38 -
   32.39 -      __ movptr(rdi, secondary_supers_addr);
   32.40 -      // Load the array length.
   32.41 -      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
   32.42 -      // Skip to start of data.
   32.43 -      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   32.44 -      // Scan rcx words at [rdi] for occurance of rax
   32.45 -      // Set NZ/Z based on last compare
   32.46 -      __ movptr(rax, super_klass);
   32.47 -      if (UseCompressedOops) {
   32.48 -        // Compare against compressed form.  Don't need to uncompress because
   32.49 -        // looks like orig rax is restored in popq below.
   32.50 -        __ encode_heap_oop(rax);
   32.51 -        __ repne_scanl();
   32.52 -      } else {
   32.53 -        __ repne_scan();
   32.54 -      }
   32.55 -
   32.56 -      // Unspill the temp. registers:
   32.57 -      __ pop(rdi);
   32.58 -      __ pop(rcx);
   32.59 -      __ pop(rax);
   32.60 -
   32.61 -      __ jcc(Assembler::notEqual, L_miss);
   32.62 -    }
   32.63 -
   32.64 -    // Success.  Cache the super we found and proceed in triumph.
   32.65 -    __ movptr(super_cache_addr, super_klass); // note: rax is dead
   32.66 -    __ jmp(L_success);
   32.67 +    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg,        &L_success, &L_miss, NULL,
   32.68 +                                     super_check_offset);
   32.69 +    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
   32.70  
   32.71      // Fall through on failure!
   32.72      __ BIND(L_miss);
    33.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Fri Mar 20 22:08:48 2009 -0400
    33.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Mon Mar 23 10:42:20 2009 -0400
    33.3 @@ -284,7 +284,7 @@
    33.4    }
    33.5  
    33.6    char buf[256];
    33.7 -  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
    33.8 +  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
    33.9                 cores_per_cpu(), threads_per_core(),
   33.10                 cpu_family(), _model, _stepping,
   33.11                 (supports_cmov() ? ", cmov" : ""),
   33.12 @@ -297,6 +297,7 @@
   33.13                 (supports_ssse3()? ", ssse3": ""),
   33.14                 (supports_sse4_1() ? ", sse4.1" : ""),
   33.15                 (supports_sse4_2() ? ", sse4.2" : ""),
   33.16 +               (supports_popcnt() ? ", popcnt" : ""),
   33.17                 (supports_mmx_ext() ? ", mmxext" : ""),
   33.18                 (supports_3dnow()   ? ", 3dnow"  : ""),
   33.19                 (supports_3dnow2()  ? ", 3dnowext" : ""),
   33.20 @@ -410,6 +411,13 @@
   33.21      }
   33.22    }
   33.23  
   33.24 +  // Use population count instruction if available.
   33.25 +  if (supports_popcnt()) {
   33.26 +    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
   33.27 +      UsePopCountInstruction = true;
   33.28 +    }
   33.29 +  }
   33.30 +
   33.31    assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   33.32    assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
   33.33  
    34.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp	Fri Mar 20 22:08:48 2009 -0400
    34.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp	Mon Mar 23 10:42:20 2009 -0400
    34.3 @@ -70,7 +70,9 @@
    34.4                 dca      : 1,
    34.5                 sse4_1   : 1,
    34.6                 sse4_2   : 1,
    34.7 -                        : 11;
    34.8 +                        : 2,
    34.9 +               popcnt   : 1,
   34.10 +                        : 8;
   34.11      } bits;
   34.12    };
   34.13  
   34.14 @@ -179,7 +181,8 @@
   34.15       CPU_SSSE3  = (1 << 9),
   34.16       CPU_SSE4A  = (1 << 10),
   34.17       CPU_SSE4_1 = (1 << 11),
   34.18 -     CPU_SSE4_2 = (1 << 12)
   34.19 +     CPU_SSE4_2 = (1 << 12),
   34.20 +     CPU_POPCNT = (1 << 13)
   34.21     } cpuFeatureFlags;
   34.22  
   34.23    // cpuid information block.  All info derived from executing cpuid with
   34.24 @@ -290,6 +293,8 @@
   34.25        result |= CPU_SSE4_1;
   34.26      if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
   34.27        result |= CPU_SSE4_2;
   34.28 +    if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
   34.29 +      result |= CPU_POPCNT;
   34.30      return result;
   34.31    }
   34.32  
   34.33 @@ -379,6 +384,7 @@
   34.34    static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
   34.35    static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   34.36    static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   34.37 +  static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
   34.38    //
   34.39    // AMD features
   34.40    //
    35.1 --- a/src/cpu/x86/vm/x86_32.ad	Fri Mar 20 22:08:48 2009 -0400
    35.2 +++ b/src/cpu/x86/vm/x86_32.ad	Mon Mar 23 10:42:20 2009 -0400
    35.3 @@ -1483,16 +1483,20 @@
    35.4    // main source block for now.  In future, we can generalize this by
    35.5    // adding a syntax that specifies the sizes of fields in an order,
    35.6    // so that the adlc can build the emit functions automagically
    35.7 -  enc_class OpcP %{             // Emit opcode
    35.8 -    emit_opcode(cbuf,$primary);
    35.9 -  %}
   35.10 -
   35.11 -  enc_class OpcS %{             // Emit opcode
   35.12 -    emit_opcode(cbuf,$secondary);
   35.13 -  %}
   35.14 -
   35.15 -  enc_class Opcode(immI d8 ) %{ // Emit opcode
   35.16 -    emit_opcode(cbuf,$d8$$constant);
   35.17 +
   35.18 +  // Emit primary opcode
   35.19 +  enc_class OpcP %{
   35.20 +    emit_opcode(cbuf, $primary);
   35.21 +  %}
   35.22 +
   35.23 +  // Emit secondary opcode
   35.24 +  enc_class OpcS %{
   35.25 +    emit_opcode(cbuf, $secondary);
   35.26 +  %}
   35.27 +
   35.28 +  // Emit opcode directly
   35.29 +  enc_class Opcode(immI d8) %{
   35.30 +    emit_opcode(cbuf, $d8$$constant);
   35.31    %}
   35.32  
   35.33    enc_class SizePrefix %{
   35.34 @@ -1688,26 +1692,15 @@
   35.35      Register Reax = as_Register(EAX_enc); // super class
   35.36      Register Recx = as_Register(ECX_enc); // killed
   35.37      Register Resi = as_Register(ESI_enc); // sub class
   35.38 -    Label hit, miss;
   35.39 +    Label miss;
   35.40  
   35.41      MacroAssembler _masm(&cbuf);
   35.42 -    // Compare super with sub directly, since super is not in its own SSA.
   35.43 -    // The compiler used to emit this test, but we fold it in here,
   35.44 -    // to allow platform-specific tweaking on sparc.
   35.45 -    __ cmpptr(Reax, Resi);
   35.46 -    __ jcc(Assembler::equal, hit);
   35.47 -#ifndef PRODUCT
   35.48 -    __ incrementl(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
   35.49 -#endif //PRODUCT
   35.50 -    __ movptr(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
   35.51 -    __ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes()));
   35.52 -    __ addptr(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   35.53 -    __ repne_scan();
   35.54 -    __ jcc(Assembler::notEqual, miss);
   35.55 -    __ movptr(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax);
   35.56 -    __ bind(hit);
   35.57 -    if( $primary )
   35.58 -      __ xorptr(Redi,Redi);
   35.59 +    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
   35.60 +                                     NULL, &miss,
   35.61 +                                     /*set_cond_codes:*/ true);
   35.62 +    if ($primary) {
   35.63 +      __ xorptr(Redi, Redi);
   35.64 +    }
   35.65      __ bind(miss);
   35.66    %}
   35.67  
   35.68 @@ -6387,6 +6380,67 @@
   35.69  %}
   35.70  
   35.71  
   35.72 +//---------- Population Count Instructions -------------------------------------
   35.73 +
   35.74 +instruct popCountI(eRegI dst, eRegI src) %{
   35.75 +  predicate(UsePopCountInstruction);
   35.76 +  match(Set dst (PopCountI src));
   35.77 +
   35.78 +  format %{ "POPCNT $dst, $src" %}
   35.79 +  ins_encode %{
   35.80 +    __ popcntl($dst$$Register, $src$$Register);
   35.81 +  %}
   35.82 +  ins_pipe(ialu_reg);
   35.83 +%}
   35.84 +
   35.85 +instruct popCountI_mem(eRegI dst, memory mem) %{
   35.86 +  predicate(UsePopCountInstruction);
   35.87 +  match(Set dst (PopCountI (LoadI mem)));
   35.88 +
   35.89 +  format %{ "POPCNT $dst, $mem" %}
   35.90 +  ins_encode %{
   35.91 +    __ popcntl($dst$$Register, $mem$$Address);
   35.92 +  %}
   35.93 +  ins_pipe(ialu_reg);
   35.94 +%}
   35.95 +
   35.96 +// Note: Long.bitCount(long) returns an int.
   35.97 +instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
   35.98 +  predicate(UsePopCountInstruction);
   35.99 +  match(Set dst (PopCountL src));
  35.100 +  effect(KILL cr, TEMP tmp, TEMP dst);
  35.101 +
  35.102 +  format %{ "POPCNT $dst, $src.lo\n\t"
  35.103 +            "POPCNT $tmp, $src.hi\n\t"
  35.104 +            "ADD    $dst, $tmp" %}
  35.105 +  ins_encode %{
  35.106 +    __ popcntl($dst$$Register, $src$$Register);
  35.107 +    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
  35.108 +    __ addl($dst$$Register, $tmp$$Register);
  35.109 +  %}
  35.110 +  ins_pipe(ialu_reg);
  35.111 +%}
  35.112 +
  35.113 +// Note: Long.bitCount(long) returns an int.
  35.114 +instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
  35.115 +  predicate(UsePopCountInstruction);
  35.116 +  match(Set dst (PopCountL (LoadL mem)));
  35.117 +  effect(KILL cr, TEMP tmp, TEMP dst);
  35.118 +
  35.119 +  format %{ "POPCNT $dst, $mem\n\t"
  35.120 +            "POPCNT $tmp, $mem+4\n\t"
  35.121 +            "ADD    $dst, $tmp" %}
  35.122 +  ins_encode %{
  35.123 +    //__ popcntl($dst$$Register, $mem$$Address$$first);
  35.124 +    //__ popcntl($tmp$$Register, $mem$$Address$$second);
  35.125 +    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
  35.126 +    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
  35.127 +    __ addl($dst$$Register, $tmp$$Register);
  35.128 +  %}
  35.129 +  ins_pipe(ialu_reg);
  35.130 +%}
  35.131 +
  35.132 +
  35.133  //----------Load/Store/Move Instructions---------------------------------------
  35.134  //----------Load Instructions--------------------------------------------------
  35.135  // Load Byte (8bit signed)
  35.136 @@ -12501,15 +12555,12 @@
  35.137    effect( KILL rcx, KILL cr );
  35.138  
  35.139    ins_cost(1100);  // slightly larger than the next version
  35.140 -  format %{ "CMPL   EAX,ESI\n\t"
  35.141 -            "JEQ,s  hit\n\t"
  35.142 -            "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
  35.143 +  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
  35.144              "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
  35.145              "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
  35.146              "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
  35.147              "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
  35.148              "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
  35.149 -     "hit:\n\t"
  35.150              "XOR    $result,$result\t\t Hit: EDI zero\n\t"
  35.151       "miss:\t" %}
  35.152  
  35.153 @@ -12523,9 +12574,7 @@
  35.154    effect( KILL rcx, KILL result );
  35.155  
  35.156    ins_cost(1000);
  35.157 -  format %{ "CMPL   EAX,ESI\n\t"
  35.158 -            "JEQ,s  miss\t# Actually a hit; we are done.\n\t"
  35.159 -            "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
  35.160 +  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
  35.161              "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
  35.162              "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
  35.163              "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
    36.1 --- a/src/cpu/x86/vm/x86_64.ad	Fri Mar 20 22:08:48 2009 -0400
    36.2 +++ b/src/cpu/x86/vm/x86_64.ad	Mon Mar 23 10:42:20 2009 -0400
    36.3 @@ -326,7 +326,6 @@
    36.4                           R9,  R9_H,
    36.5                           R10, R10_H,
    36.6                           R11, R11_H,
    36.7 -                         R12, R12_H,
    36.8                           R13, R13_H,
    36.9                           R14, R14_H);
   36.10  
   36.11 @@ -340,7 +339,6 @@
   36.12                           R9,  R9_H,
   36.13                           R10, R10_H,
   36.14                           R11, R11_H,
   36.15 -                         R12, R12_H,
   36.16                           R13, R13_H,
   36.17                           R14, R14_H);
   36.18  
   36.19 @@ -354,7 +352,6 @@
   36.20                               R9,  R9_H,
   36.21                               R10, R10_H,
   36.22                               R11, R11_H,
   36.23 -                             R12, R12_H,
   36.24                               R13, R13_H,
   36.25                               R14, R14_H);
   36.26  
   36.27 @@ -444,9 +441,6 @@
   36.28  // Singleton class for RDX long register
   36.29  reg_class long_rdx_reg(RDX, RDX_H);
   36.30  
   36.31 -// Singleton class for R12 long register
   36.32 -reg_class long_r12_reg(R12, R12_H);
   36.33 -
   36.34  // Class for all int registers (except RSP)
   36.35  reg_class int_reg(RAX,
   36.36                    RDX,
   36.37 @@ -1842,7 +1836,9 @@
   36.38  {
   36.39    if (UseCompressedOops) {
   36.40      st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
   36.41 -    st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
   36.42 +    if (Universe::narrow_oop_shift() != 0) {
   36.43 +      st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
   36.44 +    }
   36.45      st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
   36.46    } else {
   36.47      st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
   36.48 @@ -1891,7 +1887,11 @@
   36.49  uint MachUEPNode::size(PhaseRegAlloc* ra_) const
   36.50  {
   36.51    if (UseCompressedOops) {
   36.52 -    return OptoBreakpoint ? 19 : 20;
   36.53 +    if (Universe::narrow_oop_shift() == 0) {
   36.54 +      return OptoBreakpoint ? 15 : 16;
   36.55 +    } else {
   36.56 +      return OptoBreakpoint ? 19 : 20;
   36.57 +    }
   36.58    } else {
   36.59      return OptoBreakpoint ? 11 : 12;
   36.60    }
   36.61 @@ -2575,45 +2575,13 @@
   36.62      Register Rrax = as_Register(RAX_enc); // super class
   36.63      Register Rrcx = as_Register(RCX_enc); // killed
   36.64      Register Rrsi = as_Register(RSI_enc); // sub class
   36.65 -    Label hit, miss, cmiss;
   36.66 +    Label miss;
   36.67 +    const bool set_cond_codes = true;
   36.68  
   36.69      MacroAssembler _masm(&cbuf);
   36.70 -    // Compare super with sub directly, since super is not in its own SSA.
   36.71 -    // The compiler used to emit this test, but we fold it in here,
   36.72 -    // to allow platform-specific tweaking on sparc.
   36.73 -    __ cmpptr(Rrax, Rrsi);
   36.74 -    __ jcc(Assembler::equal, hit);
   36.75 -#ifndef PRODUCT
   36.76 -    __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
   36.77 -    __ incrementl(Address(Rrcx, 0));
   36.78 -#endif //PRODUCT
   36.79 -    __ movptr(Rrdi, Address(Rrsi, 
   36.80 -                          sizeof(oopDesc) + 
   36.81 -                          Klass::secondary_supers_offset_in_bytes()));
   36.82 -    __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
   36.83 -    __ addptr(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   36.84 -    if (UseCompressedOops) {
   36.85 -      __ encode_heap_oop(Rrax);
   36.86 -      __ repne_scanl();
   36.87 -      __ jcc(Assembler::notEqual, cmiss);
   36.88 -      __ decode_heap_oop(Rrax);
   36.89 -      __ movptr(Address(Rrsi,
   36.90 -                      sizeof(oopDesc) +
   36.91 -                      Klass::secondary_super_cache_offset_in_bytes()),
   36.92 -              Rrax);
   36.93 -      __ jmp(hit);
   36.94 -      __ bind(cmiss);
   36.95 -      __ decode_heap_oop(Rrax);
   36.96 -      __ jmp(miss);
   36.97 -    } else {
   36.98 -      __ repne_scan();
   36.99 -      __ jcc(Assembler::notEqual, miss);
  36.100 -      __ movptr(Address(Rrsi,
  36.101 -                      sizeof(oopDesc) +
  36.102 -                      Klass::secondary_super_cache_offset_in_bytes()),
  36.103 -              Rrax);
  36.104 -    }
  36.105 -    __ bind(hit);
  36.106 +    __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
  36.107 +                                     NULL, &miss,
  36.108 +                                     /*set_cond_codes:*/ true);
  36.109      if ($primary) {
  36.110        __ xorptr(Rrdi, Rrdi);
  36.111      }
  36.112 @@ -4906,15 +4874,6 @@
  36.113    interface(REG_INTER);
  36.114  %}
  36.115  
  36.116 -
  36.117 -operand r12RegL() %{
  36.118 -  constraint(ALLOC_IN_RC(long_r12_reg));
  36.119 -  match(RegL);
  36.120 -
  36.121 -  format %{ %}
  36.122 -  interface(REG_INTER);
  36.123 -%}
  36.124 -
  36.125  operand rRegN() %{
  36.126    constraint(ALLOC_IN_RC(int_reg));
  36.127    match(RegN);
  36.128 @@ -5289,21 +5248,6 @@
  36.129    %}
  36.130  %}
  36.131  
  36.132 -// Indirect Narrow Oop Plus Offset Operand
  36.133 -operand indNarrowOopOffset(rRegN src, immL32 off) %{
  36.134 -  constraint(ALLOC_IN_RC(ptr_reg));
  36.135 -  match(AddP (DecodeN src) off);
  36.136 -
  36.137 -  op_cost(10);
  36.138 -  format %{"[R12 + $src << 3 + $off] (compressed oop addressing)" %}
  36.139 -  interface(MEMORY_INTER) %{
  36.140 -    base(0xc); // R12
  36.141 -    index($src);
  36.142 -    scale(0x3);
  36.143 -    disp($off);
  36.144 -  %}
  36.145 -%}
  36.146 -
  36.147  // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
  36.148  operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
  36.149  %{
  36.150 @@ -5321,6 +5265,158 @@
  36.151    %}
  36.152  %}
  36.153  
  36.154 +// Indirect Narrow Oop Plus Offset Operand
  36.155 +// Note: x86 architecture doesn't support "scale * index + offset" without a base
  36.156 +// we can't free r12 even with Universe::narrow_oop_base() == NULL.
  36.157 +operand indCompressedOopOffset(rRegN reg, immL32 off) %{
  36.158 +  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
  36.159 +  constraint(ALLOC_IN_RC(ptr_reg));
  36.160 +  match(AddP (DecodeN reg) off);
  36.161 +
  36.162 +  op_cost(10);
  36.163 +  format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
  36.164 +  interface(MEMORY_INTER) %{
  36.165 +    base(0xc); // R12
  36.166 +    index($reg);
  36.167 +    scale(0x3);
  36.168 +    disp($off);
  36.169 +  %}
  36.170 +%}
  36.171 +
  36.172 +// Indirect Memory Operand
  36.173 +operand indirectNarrow(rRegN reg)
  36.174 +%{
  36.175 +  predicate(Universe::narrow_oop_shift() == 0);
  36.176 +  constraint(ALLOC_IN_RC(ptr_reg));
  36.177 +  match(DecodeN reg);
  36.178 +
  36.179 +  format %{ "[$reg]" %}
  36.180 +  interface(MEMORY_INTER) %{
  36.181 +    base($reg);
  36.182 +    index(0x4);
  36.183 +    scale(0x0);
  36.184 +    disp(0x0);
  36.185 +  %}
  36.186 +%}
  36.187 +
  36.188 +// Indirect Memory Plus Short Offset Operand
  36.189 +operand indOffset8Narrow(rRegN reg, immL8 off)
  36.190 +%{
  36.191 +  predicate(Universe::narrow_oop_shift() == 0);
  36.192 +  constraint(ALLOC_IN_RC(ptr_reg));
  36.193 +  match(AddP (DecodeN reg) off);
  36.194 +
  36.195 +  format %{ "[$reg + $off (8-bit)]" %}
  36.196 +  interface(MEMORY_INTER) %{
  36.197 +    base($reg);
  36.198 +    index(0x4);
  36.199 +    scale(0x0);
  36.200 +    disp($off);
  36.201 +  %}
  36.202 +%}
  36.203 +
  36.204 +// Indirect Memory Plus Long Offset Operand
  36.205 +operand indOffset32Narrow(rRegN reg, immL32 off)
  36.206 +%{
  36.207 +  predicate(Universe::narrow_oop_shift() == 0);
  36.208 +  constraint(ALLOC_IN_RC(ptr_reg));
  36.209 +  match(AddP (DecodeN reg) off);
  36.210 +
  36.211 +  format %{ "[$reg + $off (32-bit)]" %}
  36.212 +  interface(MEMORY_INTER) %{
  36.213 +    base($reg);
  36.214 +    index(0x4);
  36.215 +    scale(0x0);
  36.216 +    disp($off);
  36.217 +  %}
  36.218 +%}
  36.219 +
  36.220 +// Indirect Memory Plus Index Register Plus Offset Operand
  36.221 +operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
  36.222 +%{
  36.223 +  predicate(Universe::narrow_oop_shift() == 0);
  36.224 +  constraint(ALLOC_IN_RC(ptr_reg));
  36.225 +  match(AddP (AddP (DecodeN reg) lreg) off);
  36.226 +
  36.227 +  op_cost(10);
  36.228 +  format %{"[$reg + $off + $lreg]" %}
  36.229 +  interface(MEMORY_INTER) %{
  36.230 +    base($reg);
  36.231 +    index($lreg);
  36.232 +    scale(0x0);
  36.233 +    disp($off);
  36.234 +  %}
  36.235 +%}
  36.236 +
  36.237 +// Indirect Memory Plus Index Register Plus Offset Operand
  36.238 +operand indIndexNarrow(rRegN reg, rRegL lreg)
  36.239 +%{
  36.240 +  predicate(Universe::narrow_oop_shift() == 0);
  36.241 +  constraint(ALLOC_IN_RC(ptr_reg));
  36.242 +  match(AddP (DecodeN reg) lreg);
  36.243 +
  36.244 +  op_cost(10);
  36.245 +  format %{"[$reg + $lreg]" %}
  36.246 +  interface(MEMORY_INTER) %{
  36.247 +    base($reg);
  36.248 +    index($lreg);
  36.249 +    scale(0x0);
  36.250 +    disp(0x0);
  36.251 +  %}
  36.252 +%}
  36.253 +
  36.254 +// Indirect Memory Times Scale Plus Index Register
  36.255 +operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
  36.256 +%{
  36.257 +  predicate(Universe::narrow_oop_shift() == 0);
  36.258 +  constraint(ALLOC_IN_RC(ptr_reg));
  36.259 +  match(AddP (DecodeN reg) (LShiftL lreg scale));
  36.260 +
  36.261 +  op_cost(10);
  36.262 +  format %{"[$reg + $lreg << $scale]" %}
  36.263 +  interface(MEMORY_INTER) %{
  36.264 +    base($reg);
  36.265 +    index($lreg);
  36.266 +    scale($scale);
  36.267 +    disp(0x0);
  36.268 +  %}
  36.269 +%}
  36.270 +
  36.271 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
  36.272 +operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
  36.273 +%{
  36.274 +  predicate(Universe::narrow_oop_shift() == 0);
  36.275 +  constraint(ALLOC_IN_RC(ptr_reg));
  36.276 +  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
  36.277 +
  36.278 +  op_cost(10);
  36.279 +  format %{"[$reg + $off + $lreg << $scale]" %}
  36.280 +  interface(MEMORY_INTER) %{
  36.281 +    base($reg);
  36.282 +    index($lreg);
  36.283 +    scale($scale);
  36.284 +    disp($off);
  36.285 +  %}
  36.286 +%}
  36.287 +
  36.288 +// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
  36.289 +operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
  36.290 +%{
  36.291 +  constraint(ALLOC_IN_RC(ptr_reg));
  36.292 +  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
  36.293 +  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
  36.294 +
  36.295 +  op_cost(10);
  36.296 +  format %{"[$reg + $off + $idx << $scale]" %}
  36.297 +  interface(MEMORY_INTER) %{
  36.298 +    base($reg);
  36.299 +    index($idx);
  36.300 +    scale($scale);
  36.301 +    disp($off);
  36.302 +  %}
  36.303 +%}
  36.304 +
  36.305 +
  36.306  //----------Special Memory Operands--------------------------------------------
  36.307  // Stack Slot Operand - This operand is used for loading and storing temporary
  36.308  //                      values on the stack where a match requires a value to
  36.309 @@ -5488,7 +5584,10 @@
  36.310  
  36.311  opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
  36.312                 indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
  36.313 -               indNarrowOopOffset);
  36.314 +               indCompressedOopOffset,
  36.315 +               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
  36.316 +               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
  36.317 +               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
  36.318  
  36.319  //----------PIPELINE-----------------------------------------------------------
  36.320  // Rules which define the behavior of the target architectures pipeline.
  36.321 @@ -6234,9 +6333,7 @@
  36.322     ins_cost(125); // XXX
  36.323     format %{ "movl    $dst, $mem\t# compressed ptr" %}
  36.324     ins_encode %{
  36.325 -     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
  36.326 -     Register dst = as_Register($dst$$reg);
  36.327 -     __ movl(dst, addr);
  36.328 +     __ movl($dst$$Register, $mem$$Address);
  36.329     %}
  36.330     ins_pipe(ialu_reg_mem); // XXX
  36.331  %}
  36.332 @@ -6262,9 +6359,7 @@
  36.333    ins_cost(125); // XXX
  36.334    format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
  36.335    ins_encode %{
  36.336 -    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
  36.337 -    Register dst = as_Register($dst$$reg);
  36.338 -    __ movl(dst, addr);
  36.339 +    __ movl($dst$$Register, $mem$$Address);
  36.340    %}
  36.341    ins_pipe(ialu_reg_mem); // XXX
  36.342  %}
  36.343 @@ -6418,6 +6513,102 @@
  36.344    ins_pipe(ialu_reg_reg_fat);
  36.345  %}
  36.346  
  36.347 +instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
  36.348 +%{
  36.349 +  match(Set dst mem);
  36.350 +
  36.351 +  ins_cost(110);
  36.352 +  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
  36.353 +  opcode(0x8D);
  36.354 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  36.355 +  ins_pipe(ialu_reg_reg_fat);
  36.356 +%}
  36.357 +
  36.358 +// Load Effective Address which uses Narrow (32-bits) oop
  36.359 +instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
  36.360 +%{
  36.361 +  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
  36.362 +  match(Set dst mem);
  36.363 +
  36.364 +  ins_cost(110);
  36.365 +  format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
  36.366 +  opcode(0x8D);
  36.367 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  36.368 +  ins_pipe(ialu_reg_reg_fat);
  36.369 +%}
  36.370 +
  36.371 +instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
  36.372 +%{
  36.373 +  predicate(Universe::narrow_oop_shift() == 0);
  36.374 +  match(Set dst mem);
  36.375 +
  36.376 +  ins_cost(110); // XXX
  36.377 +  format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
  36.378 +  opcode(0x8D);
  36.379 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  36.380 +  ins_pipe(ialu_reg_reg_fat);
  36.381 +%}
  36.382 +
  36.383 +instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
  36.384 +%{
  36.385 +  predicate(Universe::narrow_oop_shift() == 0);
  36.386 +  match(Set dst mem);
  36.387 +
  36.388 +  ins_cost(110);
  36.389 +  format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
  36.390 +  opcode(0x8D);
  36.391 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  36.392 +  ins_pipe(ialu_reg_reg_fat);
  36.393 +%}
  36.394 +
  36.395 +instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
  36.396 +%{
  36.397 +  predicate(Universe::narrow_oop_shift() == 0);
  36.398 +  match(Set dst mem);
  36.399 +
  36.400 +  ins_cost(110);
  36.401 +  format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
  36.402 +  opcode(0x8D);
  36.403 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  36.404 +  ins_pipe(ialu_reg_reg_fat);
  36.405 +%}
  36.406 +
  36.407 +instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
  36.408 +%{
  36.409 +  predicate(Universe::narrow_oop_shift() == 0);
  36.410 +  match(Set dst mem);
  36.411 +
  36.412 +  ins_cost(110);
  36.413 +  format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
  36.414 +  opcode(0x8D);
  36.415 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  36.416 +  ins_pipe(ialu_reg_reg_fat);
  36.417 +%}
  36.418 +
  36.419 +instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
  36.420 +%{
  36.421 +  predicate(Universe::narrow_oop_shift() == 0);
  36.422 +  match(Set dst mem);
  36.423 +
  36.424 +  ins_cost(110);
  36.425 +  format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
  36.426 +  opcode(0x8D);
  36.427 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  36.428 +  ins_pipe(ialu_reg_reg_fat);
  36.429 +%}
  36.430 +
  36.431 +instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
  36.432 +%{
  36.433 +  predicate(Universe::narrow_oop_shift() == 0);
  36.434 +  match(Set dst mem);
  36.435 +
  36.436 +  ins_cost(110);
  36.437 +  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
  36.438 +  opcode(0x8D);
  36.439 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  36.440 +  ins_pipe(ialu_reg_reg_fat);
  36.441 +%}
  36.442 +
  36.443  instruct loadConI(rRegI dst, immI src)
  36.444  %{
  36.445    match(Set dst src);
  36.446 @@ -6528,8 +6719,7 @@
  36.447    effect(KILL cr);
  36.448    format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
  36.449    ins_encode %{
  36.450 -    Register dst = $dst$$Register;
  36.451 -    __ xorq(dst, dst);
  36.452 +    __ xorq($dst$$Register, $dst$$Register);
  36.453    %}
  36.454    ins_pipe(ialu_reg);
  36.455  %}
  36.456 @@ -6541,11 +6731,10 @@
  36.457    format %{ "movl    $dst, $src\t# compressed ptr" %}
  36.458    ins_encode %{
  36.459      address con = (address)$src$$constant;
  36.460 -    Register dst = $dst$$Register;
  36.461      if (con == NULL) {
  36.462        ShouldNotReachHere();
  36.463      } else {
  36.464 -      __ set_narrow_oop(dst, (jobject)$src$$constant);
  36.465 +      __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
  36.466      }
  36.467    %}
  36.468    ins_pipe(ialu_reg_fat); // XXX
  36.469 @@ -6794,12 +6983,25 @@
  36.470    ins_pipe(ialu_mem_reg);
  36.471  %}
  36.472  
  36.473 +instruct storeImmP0(memory mem, immP0 zero)
  36.474 +%{
  36.475 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  36.476 +  match(Set mem (StoreP mem zero));
  36.477 +
  36.478 +  ins_cost(125); // XXX
  36.479 +  format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
  36.480 +  ins_encode %{
  36.481 +    __ movq($mem$$Address, r12);
  36.482 +  %}
  36.483 +  ins_pipe(ialu_mem_reg);
  36.484 +%}
  36.485 +
  36.486  // Store NULL Pointer, mark word, or other simple pointer constant.
  36.487  instruct storeImmP(memory mem, immP31 src)
  36.488  %{
  36.489    match(Set mem (StoreP mem src));
  36.490  
  36.491 -  ins_cost(125); // XXX
  36.492 +  ins_cost(150); // XXX
  36.493    format %{ "movq    $mem, $src\t# ptr" %}
  36.494    opcode(0xC7); /* C7 /0 */
  36.495    ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
  36.496 @@ -6814,14 +7016,55 @@
  36.497    ins_cost(125); // XXX
  36.498    format %{ "movl    $mem, $src\t# compressed ptr" %}
  36.499    ins_encode %{
  36.500 -    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
  36.501 -    Register src = as_Register($src$$reg);
  36.502 -    __ movl(addr, src);
  36.503 +    __ movl($mem$$Address, $src$$Register);
  36.504    %}
  36.505    ins_pipe(ialu_mem_reg);
  36.506  %}
  36.507  
  36.508 +instruct storeImmN0(memory mem, immN0 zero)
  36.509 +%{
  36.510 +  predicate(Universe::narrow_oop_base() == NULL);
  36.511 +  match(Set mem (StoreN mem zero));
  36.512 +
  36.513 +  ins_cost(125); // XXX
  36.514 +  format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
  36.515 +  ins_encode %{
  36.516 +    __ movl($mem$$Address, r12);
  36.517 +  %}
  36.518 +  ins_pipe(ialu_mem_reg);
  36.519 +%}
  36.520 +
  36.521 +instruct storeImmN(memory mem, immN src)
  36.522 +%{
  36.523 +  match(Set mem (StoreN mem src));
  36.524 +
  36.525 +  ins_cost(150); // XXX
  36.526 +  format %{ "movl    $mem, $src\t# compressed ptr" %}
  36.527 +  ins_encode %{
  36.528 +    address con = (address)$src$$constant;
  36.529 +    if (con == NULL) {
  36.530 +      __ movl($mem$$Address, (int32_t)0);
  36.531 +    } else {
  36.532 +      __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
  36.533 +    }
  36.534 +  %}
  36.535 +  ins_pipe(ialu_mem_imm);
  36.536 +%}
  36.537 +
  36.538  // Store Integer Immediate
  36.539 +instruct storeImmI0(memory mem, immI0 zero)
  36.540 +%{
  36.541 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  36.542 +  match(Set mem (StoreI mem zero));
  36.543 +
  36.544 +  ins_cost(125); // XXX
  36.545 +  format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
  36.546 +  ins_encode %{
  36.547 +    __ movl($mem$$Address, r12);
  36.548 +  %}
  36.549 +  ins_pipe(ialu_mem_reg);
  36.550 +%}
  36.551 +
  36.552  instruct storeImmI(memory mem, immI src)
  36.553  %{
  36.554    match(Set mem (StoreI mem src));
  36.555 @@ -6834,6 +7077,19 @@
  36.556  %}
  36.557  
  36.558  // Store Long Immediate
  36.559 +instruct storeImmL0(memory mem, immL0 zero)
  36.560 +%{
  36.561 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  36.562 +  match(Set mem (StoreL mem zero));
  36.563 +
  36.564 +  ins_cost(125); // XXX
  36.565 +  format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
  36.566 +  ins_encode %{
  36.567 +    __ movq($mem$$Address, r12);
  36.568 +  %}
  36.569 +  ins_pipe(ialu_mem_reg);
  36.570 +%}
  36.571 +
  36.572  instruct storeImmL(memory mem, immL32 src)
  36.573  %{
  36.574    match(Set mem (StoreL mem src));
  36.575 @@ -6846,6 +7102,19 @@
  36.576  %}
  36.577  
  36.578  // Store Short/Char Immediate
  36.579 +instruct storeImmC0(memory mem, immI0 zero)
  36.580 +%{
  36.581 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  36.582 +  match(Set mem (StoreC mem zero));
  36.583 +
  36.584 +  ins_cost(125); // XXX
  36.585 +  format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
  36.586 +  ins_encode %{
  36.587 +    __ movw($mem$$Address, r12);
  36.588 +  %}
  36.589 +  ins_pipe(ialu_mem_reg);
  36.590 +%}
  36.591 +
  36.592  instruct storeImmI16(memory mem, immI16 src)
  36.593  %{
  36.594    predicate(UseStoreImmI16);
  36.595 @@ -6859,6 +7128,19 @@
  36.596  %}
  36.597  
  36.598  // Store Byte Immediate
  36.599 +instruct storeImmB0(memory mem, immI0 zero)
  36.600 +%{
  36.601 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  36.602 +  match(Set mem (StoreB mem zero));
  36.603 +
  36.604 +  ins_cost(125); // XXX
  36.605 +  format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
  36.606 +  ins_encode %{
  36.607 +    __ movb($mem$$Address, r12);
  36.608 +  %}
  36.609 +  ins_pipe(ialu_mem_reg);
  36.610 +%}
  36.611 +
  36.612  instruct storeImmB(memory mem, immI8 src)
  36.613  %{
  36.614    match(Set mem (StoreB mem src));
  36.615 @@ -6898,6 +7180,19 @@
  36.616  %}
  36.617  
  36.618  // Store CMS card-mark Immediate
  36.619 +instruct storeImmCM0_reg(memory mem, immI0 zero)
  36.620 +%{
  36.621 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  36.622 +  match(Set mem (StoreCM mem zero));
  36.623 +
  36.624 +  ins_cost(125); // XXX
  36.625 +  format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
  36.626 +  ins_encode %{
  36.627 +    __ movb($mem$$Address, r12);
  36.628 +  %}
  36.629 +  ins_pipe(ialu_mem_reg);
  36.630 +%}
  36.631 +
  36.632  instruct storeImmCM0(memory mem, immI0 src)
  36.633  %{
  36.634    match(Set mem (StoreCM mem src));
  36.635 @@ -6931,6 +7226,19 @@
  36.636  %}
  36.637  
  36.638  // Store immediate Float value (it is faster than store from XMM register)
  36.639 +instruct storeF0(memory mem, immF0 zero)
  36.640 +%{
  36.641 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  36.642 +  match(Set mem (StoreF mem zero));
  36.643 +
  36.644 +  ins_cost(25); // XXX
  36.645 +  format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
  36.646 +  ins_encode %{
  36.647 +    __ movl($mem$$Address, r12);
  36.648 +  %}
  36.649 +  ins_pipe(ialu_mem_reg);
  36.650 +%}
  36.651 +
  36.652  instruct storeF_imm(memory mem, immF src)
  36.653  %{
  36.654    match(Set mem (StoreF mem src));
  36.655 @@ -6957,6 +7265,7 @@
  36.656  // Store immediate double 0.0 (it is faster than store from XMM register)
  36.657  instruct storeD0_imm(memory mem, immD0 src)
  36.658  %{
  36.659 +  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
  36.660    match(Set mem (StoreD mem src));
  36.661  
  36.662    ins_cost(50);
  36.663 @@ -6966,6 +7275,19 @@
  36.664    ins_pipe(ialu_mem_imm);
  36.665  %}
  36.666  
  36.667 +instruct storeD0(memory mem, immD0 zero)
  36.668 +%{
  36.669 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  36.670 +  match(Set mem (StoreD mem zero));
  36.671 +
  36.672 +  ins_cost(25); // XXX
  36.673 +  format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
  36.674 +  ins_encode %{
  36.675 +    __ movq($mem$$Address, r12);
  36.676 +  %}
  36.677 +  ins_pipe(ialu_mem_reg);
  36.678 +%}
  36.679 +
  36.680  instruct storeSSI(stackSlotI dst, rRegI src)
  36.681  %{
  36.682    match(Set dst src);
  36.683 @@ -7077,6 +7399,56 @@
  36.684    ins_pipe( ialu_mem_reg );
  36.685  %}
  36.686  
  36.687 +
  36.688 +//---------- Population Count Instructions -------------------------------------
  36.689 +
  36.690 +instruct popCountI(rRegI dst, rRegI src) %{
  36.691 +  predicate(UsePopCountInstruction);
  36.692 +  match(Set dst (PopCountI src));
  36.693 +
  36.694 +  format %{ "popcnt  $dst, $src" %}
  36.695 +  ins_encode %{
  36.696 +    __ popcntl($dst$$Register, $src$$Register);
  36.697 +  %}
  36.698 +  ins_pipe(ialu_reg);
  36.699 +%}
  36.700 +
  36.701 +instruct popCountI_mem(rRegI dst, memory mem) %{
  36.702 +  predicate(UsePopCountInstruction);
  36.703 +  match(Set dst (PopCountI (LoadI mem)));
  36.704 +
  36.705 +  format %{ "popcnt  $dst, $mem" %}
  36.706 +  ins_encode %{
  36.707 +    __ popcntl($dst$$Register, $mem$$Address);
  36.708 +  %}
  36.709 +  ins_pipe(ialu_reg);
  36.710 +%}
  36.711 +
  36.712 +// Note: Long.bitCount(long) returns an int.
  36.713 +instruct popCountL(rRegI dst, rRegL src) %{
  36.714 +  predicate(UsePopCountInstruction);
  36.715 +  match(Set dst (PopCountL src));
  36.716 +
  36.717 +  format %{ "popcnt  $dst, $src" %}
  36.718 +  ins_encode %{
  36.719 +    __ popcntq($dst$$Register, $src$$Register);
  36.720 +  %}
  36.721 +  ins_pipe(ialu_reg);
  36.722 +%}
  36.723 +
  36.724 +// Note: Long.bitCount(long) returns an int.
  36.725 +instruct popCountL_mem(rRegI dst, memory mem) %{
  36.726 +  predicate(UsePopCountInstruction);
  36.727 +  match(Set dst (PopCountL (LoadL mem)));
  36.728 +
  36.729 +  format %{ "popcnt  $dst, $mem" %}
  36.730 +  ins_encode %{
  36.731 +    __ popcntq($dst$$Register, $mem$$Address);
  36.732 +  %}
  36.733 +  ins_pipe(ialu_reg);
  36.734 +%}
  36.735 +
  36.736 +
  36.737  //----------MemBar Instructions-----------------------------------------------
  36.738  // Memory barrier flavors
  36.739  
  36.740 @@ -7192,9 +7564,7 @@
  36.741    effect(KILL cr);
  36.742    format %{ "encode_heap_oop_not_null $dst,$src" %}
  36.743    ins_encode %{
  36.744 -    Register s = $src$$Register;
  36.745 -    Register d = $dst$$Register;
  36.746 -    __ encode_heap_oop_not_null(d, s);
  36.747 +    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
  36.748    %}
  36.749    ins_pipe(ialu_reg_long);
  36.750  %}
  36.751 @@ -7224,7 +7594,11 @@
  36.752    ins_encode %{
  36.753      Register s = $src$$Register;
  36.754      Register d = $dst$$Register;
  36.755 -    __ decode_heap_oop_not_null(d, s);
  36.756 +    if (s != d) {
  36.757 +      __ decode_heap_oop_not_null(d, s);
  36.758 +    } else {
  36.759 +      __ decode_heap_oop_not_null(d);
  36.760 +    }
  36.761    %}
  36.762    ins_pipe(ialu_reg_long);
  36.763  %}
  36.764 @@ -11389,8 +11763,9 @@
  36.765  
  36.766  // This will generate a signed flags result. This should be OK since
  36.767  // any compare to a zero should be eq/neq.
  36.768 -instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero)
  36.769 -%{
  36.770 +instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
  36.771 +%{
  36.772 +  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
  36.773    match(Set cr (CmpP (LoadP op) zero));
  36.774  
  36.775    ins_cost(500); // XXX
  36.776 @@ -11401,13 +11776,24 @@
  36.777    ins_pipe(ialu_cr_reg_imm);
  36.778  %}
  36.779  
  36.780 +instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
  36.781 +%{
  36.782 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  36.783 +  match(Set cr (CmpP (LoadP mem) zero));
  36.784 +
  36.785 +  format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
  36.786 +  ins_encode %{
  36.787 +    __ cmpq(r12, $mem$$Address);
  36.788 +  %}
  36.789 +  ins_pipe(ialu_cr_reg_mem);
  36.790 +%}
  36.791  
  36.792  instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
  36.793  %{
  36.794    match(Set cr (CmpN op1 op2));
  36.795  
  36.796    format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
  36.797 -  ins_encode %{ __ cmpl(as_Register($op1$$reg), as_Register($op2$$reg)); %}
  36.798 +  ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
  36.799    ins_pipe(ialu_cr_reg_reg);
  36.800  %}
  36.801  
  36.802 @@ -11415,11 +11801,30 @@
  36.803  %{
  36.804    match(Set cr (CmpN src (LoadN mem)));
  36.805  
  36.806 -  ins_cost(500); // XXX
  36.807 -  format %{ "cmpl    $src, mem\t# compressed ptr" %}
  36.808 +  format %{ "cmpl    $src, $mem\t# compressed ptr" %}
  36.809    ins_encode %{
  36.810 -    Address adr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
  36.811 -    __ cmpl(as_Register($src$$reg), adr);
  36.812 +    __ cmpl($src$$Register, $mem$$Address);
  36.813 +  %}
  36.814 +  ins_pipe(ialu_cr_reg_mem);
  36.815 +%}
  36.816 +
  36.817 +instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
  36.818 +  match(Set cr (CmpN op1 op2));
  36.819 +
  36.820 +  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
  36.821 +  ins_encode %{
  36.822 +    __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
  36.823 +  %}
  36.824 +  ins_pipe(ialu_cr_reg_imm);
  36.825 +%}
  36.826 +
  36.827 +instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
  36.828 +%{
  36.829 +  match(Set cr (CmpN src (LoadN mem)));
  36.830 +
  36.831 +  format %{ "cmpl    $mem, $src\t# compressed ptr" %}
  36.832 +  ins_encode %{
  36.833 +    __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
  36.834    %}
  36.835    ins_pipe(ialu_cr_reg_mem);
  36.836  %}
  36.837 @@ -11432,15 +11837,27 @@
  36.838    ins_pipe(ialu_cr_reg_imm);
  36.839  %}
  36.840  
  36.841 -instruct testN_reg_mem(rFlagsReg cr, memory mem, immN0 zero)
  36.842 -%{
  36.843 +instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
  36.844 +%{
  36.845 +  predicate(Universe::narrow_oop_base() != NULL);
  36.846    match(Set cr (CmpN (LoadN mem) zero));
  36.847  
  36.848    ins_cost(500); // XXX
  36.849    format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
  36.850    ins_encode %{
  36.851 -    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
  36.852 -    __ cmpl(addr, (int)0xFFFFFFFF);
  36.853 +    __ cmpl($mem$$Address, (int)0xFFFFFFFF);
  36.854 +  %}
  36.855 +  ins_pipe(ialu_cr_reg_mem);
  36.856 +%}
  36.857 +
  36.858 +instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
  36.859 +%{
  36.860 +  predicate(Universe::narrow_oop_base() == NULL);
  36.861 +  match(Set cr (CmpN (LoadN mem) zero));
  36.862 +
  36.863 +  format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
  36.864 +  ins_encode %{
  36.865 +    __ cmpl(r12, $mem$$Address);
  36.866    %}
  36.867    ins_pipe(ialu_cr_reg_mem);
  36.868  %}
  36.869 @@ -11472,7 +11889,6 @@
  36.870  %{
  36.871    match(Set cr (CmpL op1 (LoadL op2)));
  36.872  
  36.873 -  ins_cost(500); // XXX
  36.874    format %{ "cmpq    $op1, $op2" %}
  36.875    opcode(0x3B); /* Opcode 3B /r */
  36.876    ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
  36.877 @@ -11733,15 +12149,12 @@
  36.878    effect(KILL rcx, KILL cr);
  36.879  
  36.880    ins_cost(1100);  // slightly larger than the next version
  36.881 -  format %{ "cmpq    rax, rsi\n\t"
  36.882 -            "jeq,s   hit\n\t"
  36.883 -            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
  36.884 +  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
  36.885              "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
  36.886              "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
  36.887              "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
  36.888              "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
  36.889              "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
  36.890 -    "hit:\n\t"
  36.891              "xorq    $result, $result\t\t Hit: rdi zero\n\t"
  36.892      "miss:\t" %}
  36.893  
  36.894 @@ -11756,13 +12169,10 @@
  36.895                                       rdi_RegP result)
  36.896  %{
  36.897    match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
  36.898 -  predicate(!UseCompressedOops); // decoding oop kills condition codes
  36.899    effect(KILL rcx, KILL result);
  36.900  
  36.901    ins_cost(1000);
  36.902 -  format %{ "cmpq    rax, rsi\n\t"
  36.903 -            "jeq,s   miss\t# Actually a hit; we are done.\n\t"
  36.904 -            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
  36.905 +  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
  36.906              "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
  36.907              "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
  36.908              "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
    37.1 --- a/src/os/linux/vm/os_linux.cpp	Fri Mar 20 22:08:48 2009 -0400
    37.2 +++ b/src/os/linux/vm/os_linux.cpp	Mon Mar 23 10:42:20 2009 -0400
    37.3 @@ -2582,7 +2582,7 @@
    37.4  #define SHM_HUGETLB 04000
    37.5  #endif
    37.6  
    37.7 -char* os::reserve_memory_special(size_t bytes) {
    37.8 +char* os::reserve_memory_special(size_t bytes, char* req_addr) {
    37.9    assert(UseLargePages, "only for large pages");
   37.10  
   37.11    key_t key = IPC_PRIVATE;
    38.1 --- a/src/os/solaris/dtrace/generateJvmOffsets.cpp	Fri Mar 20 22:08:48 2009 -0400
    38.2 +++ b/src/os/solaris/dtrace/generateJvmOffsets.cpp	Mon Mar 23 10:42:20 2009 -0400
    38.3 @@ -249,6 +249,10 @@
    38.4  
    38.5    printf("\n");
    38.6  
    38.7 +  GEN_OFFS(NarrowOopStruct, _base);
    38.8 +  GEN_OFFS(NarrowOopStruct, _shift);
    38.9 +  printf("\n");
   38.10 +
   38.11    GEN_VALUE(SIZE_HeapBlockHeader, sizeof(HeapBlock::Header));
   38.12    GEN_SIZE(oopDesc);
   38.13    GEN_SIZE(constantPoolOopDesc);
    39.1 --- a/src/os/solaris/dtrace/jhelper.d	Fri Mar 20 22:08:48 2009 -0400
    39.2 +++ b/src/os/solaris/dtrace/jhelper.d	Mon Mar 23 10:42:20 2009 -0400
    39.3 @@ -46,7 +46,10 @@
    39.4  extern pointer __1cJCodeCacheF_heap_;
    39.5  extern pointer __1cIUniverseP_methodKlassObj_;
    39.6  extern pointer __1cIUniverseO_collectedHeap_;
    39.7 -extern pointer __1cIUniverseK_heap_base_;
    39.8 +extern pointer __1cIUniverseL_narrow_oop_;
    39.9 +#ifdef _LP64
   39.10 +extern pointer UseCompressedOops;
   39.11 +#endif
   39.12  
   39.13  extern pointer __1cHnmethodG__vtbl_;
   39.14  extern pointer __1cKBufferBlobG__vtbl_;
   39.15 @@ -56,6 +59,7 @@
   39.16  #define copyin_uint16(ADDR) *(uint16_t*) copyin((pointer) (ADDR), sizeof(uint16_t))
   39.17  #define copyin_uint32(ADDR) *(uint32_t*) copyin((pointer) (ADDR), sizeof(uint32_t))
   39.18  #define copyin_int32(ADDR)  *(int32_t*)  copyin((pointer) (ADDR), sizeof(int32_t))
   39.19 +#define copyin_uint8(ADDR)  *(uint8_t*)  copyin((pointer) (ADDR), sizeof(uint8_t))
   39.20  
   39.21  #define SAME(x) x
   39.22  #define copyin_offset(JVM_CONST)  JVM_CONST = \
   39.23 @@ -132,6 +136,9 @@
   39.24    copyin_offset(SIZE_oopDesc);
   39.25    copyin_offset(SIZE_constantPoolOopDesc);
   39.26  
   39.27 +  copyin_offset(OFFSET_NarrowOopStruct_base);
   39.28 +  copyin_offset(OFFSET_NarrowOopStruct_shift);
   39.29 +
   39.30    /*
   39.31     * The PC to translate is in arg0.
   39.32     */
   39.33 @@ -151,9 +158,19 @@
   39.34  
   39.35    this->Universe_methodKlassOop = copyin_ptr(&``__1cIUniverseP_methodKlassObj_);
   39.36    this->CodeCache_heap_address = copyin_ptr(&``__1cJCodeCacheF_heap_);
   39.37 -  this->Universe_heap_base = copyin_ptr(&``__1cIUniverseK_heap_base_);
   39.38  
   39.39    /* Reading volatile values */
   39.40 +#ifdef _LP64
   39.41 +  this->Use_Compressed_Oops  = copyin_uint8(&``UseCompressedOops);
   39.42 +#else
   39.43 +  this->Use_Compressed_Oops  = 0;
   39.44 +#endif
   39.45 +
   39.46 +  this->Universe_narrow_oop_base  = copyin_ptr(&``__1cIUniverseL_narrow_oop_ +
   39.47 +                                               OFFSET_NarrowOopStruct_base);
   39.48 +  this->Universe_narrow_oop_shift = copyin_int32(&``__1cIUniverseL_narrow_oop_ +
   39.49 +                                                 OFFSET_NarrowOopStruct_shift);
   39.50 +
   39.51    this->CodeCache_low = copyin_ptr(this->CodeCache_heap_address + 
   39.52        OFFSET_CodeHeap_memory + OFFSET_VirtualSpace_low);
   39.53  
   39.54 @@ -295,7 +312,7 @@
   39.55  
   39.56  dtrace:helper:ustack:
   39.57  /!this->done && this->vtbl == this->BufferBlob_vtbl &&
   39.58 -this->Universe_heap_base == NULL &&
   39.59 +this->Use_Compressed_Oops == 0 &&
   39.60  this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
   39.61  {
   39.62    MARK_LINE;
   39.63 @@ -306,7 +323,7 @@
   39.64  
   39.65  dtrace:helper:ustack:
   39.66  /!this->done && this->vtbl == this->BufferBlob_vtbl &&
   39.67 -this->Universe_heap_base != NULL &&
   39.68 +this->Use_Compressed_Oops != 0 &&
   39.69  this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
   39.70  {
   39.71    MARK_LINE;
   39.72 @@ -314,8 +331,8 @@
   39.73     * Read compressed pointer and  decode heap oop, same as oop.inline.hpp
   39.74     */
   39.75    this->cklass = copyin_uint32(this->methodOopPtr + OFFSET_oopDesc_metadata);
   39.76 -  this->klass = (uint64_t)((uintptr_t)this->Universe_heap_base +
   39.77 -                ((uintptr_t)this->cklass << 3));
   39.78 +  this->klass = (uint64_t)((uintptr_t)this->Universe_narrow_oop_base +
   39.79 +                ((uintptr_t)this->cklass << this->Universe_narrow_oop_shift));
   39.80    this->methodOop = this->klass == this->Universe_methodKlassOop;
   39.81    this->done = !this->methodOop;
   39.82  }
    40.1 --- a/src/os/solaris/dtrace/libjvm_db.c	Fri Mar 20 22:08:48 2009 -0400
    40.2 +++ b/src/os/solaris/dtrace/libjvm_db.c	Mon Mar 23 10:42:20 2009 -0400
    40.3 @@ -146,13 +146,17 @@
    40.4    uint64_t BufferBlob_vtbl;
    40.5    uint64_t RuntimeStub_vtbl;
    40.6  
    40.7 +  uint64_t Use_Compressed_Oops_address;
    40.8    uint64_t Universe_methodKlassObj_address;
    40.9 +  uint64_t Universe_narrow_oop_base_address;
   40.10 +  uint64_t Universe_narrow_oop_shift_address;
   40.11    uint64_t CodeCache_heap_address;
   40.12 -  uint64_t Universe_heap_base_address;
   40.13  
   40.14    /* Volatiles */
   40.15 +  uint8_t  Use_Compressed_Oops;
   40.16    uint64_t Universe_methodKlassObj;
   40.17 -  uint64_t Universe_heap_base;
   40.18 +  uint64_t Universe_narrow_oop_base;
   40.19 +  uint32_t Universe_narrow_oop_shift;
   40.20    uint64_t CodeCache_low;
   40.21    uint64_t CodeCache_high;
   40.22    uint64_t CodeCache_segmap_low;
   40.23 @@ -279,8 +283,11 @@
   40.24        if (strcmp("_methodKlassObj", vmp->fieldName) == 0) {
   40.25          J->Universe_methodKlassObj_address = vmp->address;
   40.26        }
   40.27 -      if (strcmp("_heap_base", vmp->fieldName) == 0) {
   40.28 -        J->Universe_heap_base_address = vmp->address;
   40.29 +      if (strcmp("_narrow_oop._base", vmp->fieldName) == 0) {
   40.30 +        J->Universe_narrow_oop_base_address = vmp->address;
   40.31 +      }
   40.32 +      if (strcmp("_narrow_oop._shift", vmp->fieldName) == 0) {
   40.33 +        J->Universe_narrow_oop_shift_address = vmp->address;
   40.34        }
   40.35      }
   40.36      CHECK_FAIL(err);
   40.37 @@ -298,14 +305,39 @@
   40.38    return -1;
   40.39  }
   40.40  
   40.41 +static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) {
   40.42 +  psaddr_t sym_addr;
   40.43 +  int err;
   40.44 +
   40.45 +  err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
   40.46 +  if (err != PS_OK) goto fail;
   40.47 +  *valuep = sym_addr;
   40.48 +  return PS_OK;
   40.49 +
   40.50 + fail:
   40.51 +  return err;
   40.52 +}
   40.53 +
   40.54  static int read_volatiles(jvm_agent_t* J) {
   40.55    uint64_t ptr;
   40.56    int err;
   40.57  
   40.58 +  err = find_symbol(J, "UseCompressedOops", &J->Use_Compressed_Oops_address);
   40.59 +  if (err == PS_OK) {
   40.60 +    err = ps_pread(J->P,  J->Use_Compressed_Oops_address, &J->Use_Compressed_Oops, sizeof(uint8_t));
   40.61 +    CHECK_FAIL(err);
   40.62 +  } else {
   40.63 +    J->Use_Compressed_Oops = 0;
   40.64 +  }
   40.65 +
   40.66    err = read_pointer(J, J->Universe_methodKlassObj_address, &J->Universe_methodKlassObj);
   40.67    CHECK_FAIL(err);
   40.68 -  err = read_pointer(J, J->Universe_heap_base_address, &J->Universe_heap_base);
   40.69 +
   40.70 +  err = read_pointer(J, J->Universe_narrow_oop_base_address, &J->Universe_narrow_oop_base);
   40.71    CHECK_FAIL(err);
   40.72 +  err = ps_pread(J->P,  J->Universe_narrow_oop_shift_address, &J->Universe_narrow_oop_shift, sizeof(uint32_t));
   40.73 +  CHECK_FAIL(err);
   40.74 +
   40.75    err = read_pointer(J, J->CodeCache_heap_address + OFFSET_CodeHeap_memory +
   40.76                       OFFSET_VirtualSpace_low, &J->CodeCache_low);
   40.77    CHECK_FAIL(err);
   40.78 @@ -374,19 +406,6 @@
   40.79    return -1;
   40.80  }
   40.81  
   40.82 -static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) {
   40.83 -  psaddr_t sym_addr;
   40.84 -  int err;
   40.85 -
   40.86 -  err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
   40.87 -  if (err != PS_OK) goto fail;
   40.88 -  *valuep = sym_addr;
   40.89 -  return PS_OK;
   40.90 -
   40.91 - fail:
   40.92 -  return err;
   40.93 -}
   40.94 -
   40.95  static int find_jlong_constant(jvm_agent_t* J, const char *name, uint64_t* valuep) {
   40.96    psaddr_t sym_addr;
   40.97    int err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
   40.98 @@ -458,14 +477,14 @@
   40.99  static int is_methodOop(jvm_agent_t* J, uint64_t methodOopPtr) {
  40.100    uint64_t klass;
  40.101    int err;
  40.102 -  // If heap_base is nonnull, this was a compressed oop.
  40.103 -  if (J->Universe_heap_base != NULL) {
  40.104 +  // If UseCompressedOops, this was a compressed oop.
  40.105 +  if (J->Use_Compressed_Oops != 0) {
  40.106      uint32_t cklass;
  40.107      err = read_compressed_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata,
  40.108            &cklass);
  40.109      // decode heap oop, same as oop.inline.hpp
  40.110 -    klass = (uint64_t)((uintptr_t)J->Universe_heap_base +
  40.111 -            ((uintptr_t)cklass << 3));
  40.112 +    klass = (uint64_t)((uintptr_t)J->Universe_narrow_oop_base +
  40.113 +            ((uintptr_t)cklass << J->Universe_narrow_oop_shift));
  40.114    } else {
  40.115      err = read_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata, &klass);
  40.116    }
    41.1 --- a/src/os/solaris/vm/os_solaris.cpp	Fri Mar 20 22:08:48 2009 -0400
    41.2 +++ b/src/os/solaris/vm/os_solaris.cpp	Mon Mar 23 10:42:20 2009 -0400
    41.3 @@ -3220,7 +3220,7 @@
    41.4    return true;
    41.5  }
    41.6  
    41.7 -char* os::reserve_memory_special(size_t bytes) {
    41.8 +char* os::reserve_memory_special(size_t bytes, char* addr) {
    41.9    assert(UseLargePages && UseISM, "only for ISM large pages");
   41.10  
   41.11    size_t size = bytes;
   41.12 @@ -4451,6 +4451,9 @@
   41.13  int_fnP_thread_t os::Solaris::_thr_suspend_mutator;
   41.14  int_fnP_thread_t os::Solaris::_thr_continue_mutator;
   41.15  
   41.16 +// (Static) wrapper for getisax(2) call.
   41.17 +os::Solaris::getisax_func_t os::Solaris::_getisax = 0;
   41.18 +
   41.19  // (Static) wrappers for the liblgrp API
   41.20  os::Solaris::lgrp_home_func_t os::Solaris::_lgrp_home;
   41.21  os::Solaris::lgrp_init_func_t os::Solaris::_lgrp_init;
   41.22 @@ -4465,16 +4468,19 @@
   41.23  // (Static) wrapper for meminfo() call.
   41.24  os::Solaris::meminfo_func_t os::Solaris::_meminfo = 0;
   41.25  
   41.26 -static address resolve_symbol(const char *name) {
   41.27 -  address addr;
   41.28 -
   41.29 -  addr = (address) dlsym(RTLD_DEFAULT, name);
   41.30 +static address resolve_symbol_lazy(const char* name) {
   41.31 +  address addr = (address) dlsym(RTLD_DEFAULT, name);
   41.32    if(addr == NULL) {
   41.33      // RTLD_DEFAULT was not defined on some early versions of 2.5.1
   41.34      addr = (address) dlsym(RTLD_NEXT, name);
   41.35 -    if(addr == NULL) {
   41.36 -      fatal(dlerror());
   41.37 -    }
   41.38 +  }
   41.39 +  return addr;
   41.40 +}
   41.41 +
   41.42 +static address resolve_symbol(const char* name) {
   41.43 +  address addr = resolve_symbol_lazy(name);
   41.44 +  if(addr == NULL) {
   41.45 +    fatal(dlerror());
   41.46    }
   41.47    return addr;
   41.48  }
   41.49 @@ -4673,15 +4679,26 @@
   41.50  }
   41.51  
   41.52  void os::Solaris::misc_sym_init() {
   41.53 -  address func = (address)dlsym(RTLD_DEFAULT, "meminfo");
   41.54 -  if(func == NULL) {
   41.55 -    func = (address) dlsym(RTLD_NEXT, "meminfo");
   41.56 -  }
   41.57 +  address func;
   41.58 +
   41.59 +  // getisax
   41.60 +  func = resolve_symbol_lazy("getisax");
   41.61 +  if (func != NULL) {
   41.62 +    os::Solaris::_getisax = CAST_TO_FN_PTR(getisax_func_t, func);
   41.63 +  }
   41.64 +
   41.65 +  // meminfo
   41.66 +  func = resolve_symbol_lazy("meminfo");
   41.67    if (func != NULL) {
   41.68      os::Solaris::set_meminfo(CAST_TO_FN_PTR(meminfo_func_t, func));
   41.69    }
   41.70  }
   41.71  
   41.72 +uint_t os::Solaris::getisax(uint32_t* array, uint_t n) {
   41.73 +  assert(_getisax != NULL, "_getisax not set");
   41.74 +  return _getisax(array, n);
   41.75 +}
   41.76 +
   41.77  // Symbol doesn't exist in Solaris 8 pset.h
   41.78  #ifndef PS_MYID
   41.79  #define PS_MYID -3
   41.80 @@ -4716,6 +4733,10 @@
   41.81  
   41.82    Solaris::initialize_system_info();
   41.83  
   41.84 +  // Initialize misc. symbols as soon as possible, so we can use them
   41.85 +  // if we need them.
   41.86 +  Solaris::misc_sym_init();
   41.87 +
   41.88    int fd = open("/dev/zero", O_RDWR);
   41.89    if (fd < 0) {
   41.90      fatal1("os::init: cannot open /dev/zero (%s)", strerror(errno));
   41.91 @@ -4857,7 +4878,6 @@
   41.92      }
   41.93    }
   41.94  
   41.95 -  Solaris::misc_sym_init();
   41.96    Solaris::signal_sets_init();
   41.97    Solaris::init_signal_mem();
   41.98    Solaris::install_signal_handlers();
    42.1 --- a/src/os/solaris/vm/os_solaris.hpp	Fri Mar 20 22:08:48 2009 -0400
    42.2 +++ b/src/os/solaris/vm/os_solaris.hpp	Mon Mar 23 10:42:20 2009 -0400
    42.3 @@ -1,5 +1,5 @@
    42.4  /*
    42.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    42.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    42.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    42.8   *
    42.9   * This code is free software; you can redistribute it and/or modify it
   42.10 @@ -72,6 +72,8 @@
   42.11          LGRP_VIEW_OS            /* what's available to operating system */
   42.12    } lgrp_view_t;
   42.13  
   42.14 +  typedef uint_t (*getisax_func_t)(uint32_t* array, uint_t n);
   42.15 +
   42.16    typedef lgrp_id_t (*lgrp_home_func_t)(idtype_t idtype, id_t id);
   42.17    typedef lgrp_cookie_t (*lgrp_init_func_t)(lgrp_view_t view);
   42.18    typedef int (*lgrp_fini_func_t)(lgrp_cookie_t cookie);
   42.19 @@ -87,6 +89,8 @@
   42.20                                  const uint_t  info_req[],  int info_count,
   42.21                                  uint64_t  outdata[], uint_t validity[]);
   42.22  
   42.23 +  static getisax_func_t _getisax;
   42.24 +
   42.25    static lgrp_home_func_t _lgrp_home;
   42.26    static lgrp_init_func_t _lgrp_init;
   42.27    static lgrp_fini_func_t _lgrp_fini;
   42.28 @@ -283,6 +287,9 @@
   42.29    }
   42.30    static lgrp_cookie_t lgrp_cookie()                 { return _lgrp_cookie; }
   42.31  
   42.32 +  static bool supports_getisax()                     { return _getisax != NULL; }
   42.33 +  static uint_t getisax(uint32_t* array, uint_t n);
   42.34 +
   42.35    static void set_meminfo(meminfo_func_t func)       { _meminfo = func; }
   42.36    static int meminfo (const uint64_t inaddr[],   int addr_count,
   42.37                       const uint_t  info_req[],  int info_count,
    43.1 --- a/src/os/windows/vm/os_windows.cpp	Fri Mar 20 22:08:48 2009 -0400
    43.2 +++ b/src/os/windows/vm/os_windows.cpp	Mon Mar 23 10:42:20 2009 -0400
    43.3 @@ -2595,7 +2595,7 @@
    43.4    return true;
    43.5  }
    43.6  
    43.7 -char* os::reserve_memory_special(size_t bytes) {
    43.8 +char* os::reserve_memory_special(size_t bytes, char* addr) {
    43.9  
   43.10    if (UseLargePagesIndividualAllocation) {
   43.11      if (TracePageSizes && Verbose) {
   43.12 @@ -2615,7 +2615,7 @@
   43.13          "use -XX:-UseLargePagesIndividualAllocation to turn off");
   43.14        return NULL;
   43.15      }
   43.16 -    p_buf = (char *) VirtualAlloc(NULL,
   43.17 +    p_buf = (char *) VirtualAlloc(addr,
   43.18                                   size_of_reserve,  // size of Reserve
   43.19                                   MEM_RESERVE,
   43.20                                   PAGE_EXECUTE_READWRITE);
    44.1 --- a/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp	Fri Mar 20 22:08:48 2009 -0400
    44.2 +++ b/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp	Mon Mar 23 10:42:20 2009 -0400
    44.3 @@ -30,5 +30,7 @@
    44.4  define_pd_global(uintx, JVMInvokeMethodSlack,    12288);
    44.5  define_pd_global(intx, CompilerThreadStackSize,  0);
    44.6  
    44.7 +// Only used on 64 bit platforms
    44.8 +define_pd_global(uintx, HeapBaseMinAddress,      4*G);
    44.9  // Only used on 64 bit Windows platforms
   44.10  define_pd_global(bool, UseVectoredExceptions, false);
    45.1 --- a/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp	Fri Mar 20 22:08:48 2009 -0400
    45.2 +++ b/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp	Mon Mar 23 10:42:20 2009 -0400
    45.3 @@ -43,5 +43,7 @@
    45.4  
    45.5  define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
    45.6  
    45.7 +// Only used on 64 bit platforms
    45.8 +define_pd_global(uintx, HeapBaseMinAddress,      2*G);
    45.9  // Only used on 64 bit Windows platforms
   45.10  define_pd_global(bool, UseVectoredExceptions,    false);
    46.1 --- a/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp	Fri Mar 20 22:08:48 2009 -0400
    46.2 +++ b/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp	Mon Mar 23 10:42:20 2009 -0400
    46.3 @@ -30,5 +30,9 @@
    46.4  define_pd_global(uintx, JVMInvokeMethodSlack,    12288);
    46.5  define_pd_global(intx, CompilerThreadStackSize,  0);
    46.6  
    46.7 +// Only used on 64 bit platforms
    46.8 +define_pd_global(uintx, HeapBaseMinAddress,      4*G);
    46.9  // Only used on 64 bit Windows platforms
   46.10  define_pd_global(bool, UseVectoredExceptions,    false);
   46.11 +
   46.12 +
    47.1 --- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Fri Mar 20 22:08:48 2009 -0400
    47.2 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Mon Mar 23 10:42:20 2009 -0400
    47.3 @@ -1,5 +1,5 @@
    47.4  /*
    47.5 - * Copyright 2006 Sun Microsystems, Inc.  All Rights Reserved.
    47.6 + * Copyright 2006-2009 Sun Microsystems, Inc.  All Rights Reserved.
    47.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    47.8   *
    47.9   * This code is free software; you can redistribute it and/or modify it
   47.10 @@ -25,58 +25,107 @@
   47.11  # include "incls/_precompiled.incl"
   47.12  # include "incls/_vm_version_solaris_sparc.cpp.incl"
   47.13  
   47.14 +# include <sys/auxv.h>
   47.15 +# include <sys/auxv_SPARC.h>
   47.16  # include <sys/systeminfo.h>
   47.17  
   47.18 +// We need to keep these here as long as we have to build on Solaris
   47.19 +// versions before 10.
   47.20 +#ifndef SI_ARCHITECTURE_32
   47.21 +#define SI_ARCHITECTURE_32      516     /* basic 32-bit SI_ARCHITECTURE */
   47.22 +#endif
   47.23 +
   47.24 +#ifndef SI_ARCHITECTURE_64
   47.25 +#define SI_ARCHITECTURE_64      517     /* basic 64-bit SI_ARCHITECTURE */
   47.26 +#endif
   47.27 +
   47.28 +static void do_sysinfo(int si, const char* string, int* features, int mask) {
   47.29 +  char   tmp;
   47.30 +  size_t bufsize = sysinfo(si, &tmp, 1);
   47.31 +
   47.32 +  // All SI defines used below must be supported.
   47.33 +  guarantee(bufsize != -1, "must be supported");
   47.34 +
   47.35 +  char* buf = (char*) malloc(bufsize);
   47.36 +
   47.37 +  if (buf == NULL)
   47.38 +    return;
   47.39 +
   47.40 +  if (sysinfo(si, buf, bufsize) == bufsize) {
   47.41 +    // Compare the string.
   47.42 +    if (strcmp(buf, string) == 0) {
   47.43 +      *features |= mask;
   47.44 +    }
   47.45 +  }
   47.46 +
   47.47 +  free(buf);
   47.48 +}
   47.49 +
   47.50  int VM_Version::platform_features(int features) {
   47.51 -  // We determine what sort of hardware we have via sysinfo(SI_ISALIST, ...).
   47.52 -  // This isn't the best of all possible ways because there's not enough
   47.53 -  // detail in the isa list it returns, but it's a bit less arcane than
   47.54 -  // generating assembly code and an illegal instruction handler.  We used
   47.55 -  // to generate a getpsr trap, but that's even more arcane.
   47.56 -  //
   47.57 -  // Another possibility would be to use sysinfo(SI_PLATFORM, ...), but
   47.58 -  // that would require more knowledge here than is wise.
   47.59 +  // getisax(2), SI_ARCHITECTURE_32, and SI_ARCHITECTURE_64 are
   47.60 +  // supported on Solaris 10 and later.
   47.61 +  if (os::Solaris::supports_getisax()) {
   47.62 +#ifndef PRODUCT
   47.63 +    if (PrintMiscellaneous && Verbose)
   47.64 +      tty->print_cr("getisax(2) supported.");
   47.65 +#endif
   47.66  
   47.67 -  // isalist spec via 'man isalist' as of 01-Aug-2001
   47.68 +    // Check 32-bit architecture.
   47.69 +    do_sysinfo(SI_ARCHITECTURE_32, "sparc", &features, v8_instructions_m);
   47.70  
   47.71 -  char   tmp;
   47.72 -  size_t bufsize  = sysinfo(SI_ISALIST, &tmp, 1);
   47.73 -  char*  buf      = (char*)malloc(bufsize);
   47.74 +    // Check 64-bit architecture.
   47.75 +    do_sysinfo(SI_ARCHITECTURE_64, "sparcv9", &features, generic_v9_m);
   47.76  
   47.77 -  if (buf != NULL) {
   47.78 -    if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) {
   47.79 -      // Figure out what kind of sparc we have
   47.80 -      char *sparc_string = strstr(buf, "sparc");
   47.81 -      if (sparc_string != NULL) {            features |= v8_instructions_m;
   47.82 -        if (sparc_string[5] == 'v') {
   47.83 -          if (sparc_string[6] == '8') {
   47.84 -            if (sparc_string[7] == '-')      features |= hardware_int_muldiv_m;
   47.85 -            else if (sparc_string[7] == 'p') features |= generic_v9_m;
   47.86 -            else                      features |= generic_v8_m;
   47.87 -          } else if (sparc_string[6] == '9') features |= generic_v9_m;
   47.88 +    // Extract valid instruction set extensions.
   47.89 +    uint_t av;
   47.90 +    uint_t avn = os::Solaris::getisax(&av, 1);
   47.91 +    assert(avn == 1, "should only return one av");
   47.92 +
   47.93 +    if (av & AV_SPARC_MUL32)  features |= hardware_mul32_m;
   47.94 +    if (av & AV_SPARC_DIV32)  features |= hardware_div32_m;
   47.95 +    if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m;
   47.96 +    if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m;
   47.97 +    if (av & AV_SPARC_POPC)   features |= hardware_popc_m;
   47.98 +    if (av & AV_SPARC_VIS)    features |= vis1_instructions_m;
   47.99 +    if (av & AV_SPARC_VIS2)   features |= vis2_instructions_m;
  47.100 +  } else {
  47.101 +    // getisax(2) failed, use the old legacy code.
  47.102 +#ifndef PRODUCT
  47.103 +    if (PrintMiscellaneous && Verbose)
  47.104 +      tty->print_cr("getisax(2) not supported.");
  47.105 +#endif
  47.106 +
  47.107 +    char   tmp;
  47.108 +    size_t bufsize = sysinfo(SI_ISALIST, &tmp, 1);
  47.109 +    char*  buf     = (char*) malloc(bufsize);
  47.110 +
  47.111 +    if (buf != NULL) {
  47.112 +      if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) {
  47.113 +        // Figure out what kind of sparc we have
  47.114 +        char *sparc_string = strstr(buf, "sparc");
  47.115 +        if (sparc_string != NULL) {              features |= v8_instructions_m;
  47.116 +          if (sparc_string[5] == 'v') {
  47.117 +            if (sparc_string[6] == '8') {
  47.118 +              if (sparc_string[7] == '-') {      features |= hardware_mul32_m;
  47.119 +                                                 features |= hardware_div32_m;
  47.120 +              } else if (sparc_string[7] == 'p') features |= generic_v9_m;
  47.121 +              else                               features |= generic_v8_m;
  47.122 +            } else if (sparc_string[6] == '9')   features |= generic_v9_m;
  47.123 +          }
  47.124 +        }
  47.125 +
  47.126 +        // Check for visualization instructions
  47.127 +        char *vis = strstr(buf, "vis");
  47.128 +        if (vis != NULL) {                       features |= vis1_instructions_m;
  47.129 +          if (vis[3] == '2')                     features |= vis2_instructions_m;
  47.130          }
  47.131        }
  47.132 -
  47.133 -      // Check for visualization instructions
  47.134 -      char *vis = strstr(buf, "vis");
  47.135 -      if (vis != NULL) {              features |= vis1_instructions_m;
  47.136 -        if (vis[3] == '2')            features |= vis2_instructions_m;
  47.137 -      }
  47.138 +      free(buf);
  47.139      }
  47.140 -    free(buf);
  47.141    }
  47.142  
  47.143 -  bufsize = sysinfo(SI_MACHINE, &tmp, 1);
  47.144 -  buf     = (char*)malloc(bufsize);
  47.145 -
  47.146 -  if (buf != NULL) {
  47.147 -    if (sysinfo(SI_MACHINE, buf, bufsize) == bufsize) {
  47.148 -      if (strstr(buf, "sun4v") != NULL) {
  47.149 -        features |= sun4v_m;
  47.150 -      }
  47.151 -    }
  47.152 -    free(buf);
  47.153 -  }
  47.154 +  // Determine the machine type.
  47.155 +  do_sysinfo(SI_MACHINE, "sun4v", &features, sun4v_m);
  47.156  
  47.157    return features;
  47.158  }
    48.1 --- a/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp	Fri Mar 20 22:08:48 2009 -0400
    48.2 +++ b/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp	Mon Mar 23 10:42:20 2009 -0400
    48.3 @@ -46,5 +46,7 @@
    48.4  
    48.5  define_pd_global(intx, CompilerThreadStackSize,  0);
    48.6  
    48.7 +// Only used on 64 bit platforms
    48.8 +define_pd_global(uintx, HeapBaseMinAddress,      256*M);
    48.9  // Only used on 64 bit Windows platforms
   48.10  define_pd_global(bool, UseVectoredExceptions,    false);
    49.1 --- a/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp	Fri Mar 20 22:08:48 2009 -0400
    49.2 +++ b/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp	Mon Mar 23 10:42:20 2009 -0400
    49.3 @@ -45,5 +45,7 @@
    49.4  
    49.5  define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
    49.6  
    49.7 +// Only used on 64 bit platforms
    49.8 +define_pd_global(uintx, HeapBaseMinAddress,      2*G);
    49.9  // Only used on 64 bit Windows platforms
   49.10  define_pd_global(bool, UseVectoredExceptions,    false);
    50.1 --- a/src/os_cpu/windows_x86/vm/unwind_windows_x86.hpp	Fri Mar 20 22:08:48 2009 -0400
    50.2 +++ b/src/os_cpu/windows_x86/vm/unwind_windows_x86.hpp	Mon Mar 23 10:42:20 2009 -0400
    50.3 @@ -68,6 +68,9 @@
    50.4      PVOID HandlerData;
    50.5  } DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT;
    50.6  
    50.7 +#if MSC_VER < 1500
    50.8 +
    50.9 +/* Not needed for VS2008 compiler, comes from winnt.h. */
   50.10  typedef EXCEPTION_DISPOSITION (*PEXCEPTION_ROUTINE) (
   50.11      IN PEXCEPTION_RECORD ExceptionRecord,
   50.12      IN ULONG64 EstablisherFrame,
   50.13 @@ -75,4 +78,6 @@
   50.14      IN OUT PDISPATCHER_CONTEXT DispatcherContext
   50.15  );
   50.16  
   50.17 +#endif
   50.18 +
   50.19  #endif // AMD64
    51.1 --- a/src/share/vm/adlc/adlc.hpp	Fri Mar 20 22:08:48 2009 -0400
    51.2 +++ b/src/share/vm/adlc/adlc.hpp	Mon Mar 23 10:42:20 2009 -0400
    51.3 @@ -44,7 +44,7 @@
    51.4  #error "Something is wrong with the detection of MSC_VER in the makefiles"
    51.5  #endif
    51.6  
    51.7 -#if _MSC_VER >= 1400 && !defined(_WIN64)
    51.8 +#if _MSC_VER >= 1400
    51.9  #define strdup _strdup
   51.10  #endif
   51.11  
    52.1 --- a/src/share/vm/asm/assembler.cpp	Fri Mar 20 22:08:48 2009 -0400
    52.2 +++ b/src/share/vm/asm/assembler.cpp	Mon Mar 23 10:42:20 2009 -0400
    52.3 @@ -321,16 +321,19 @@
    52.4  bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
    52.5    // Exception handler checks the nmethod's implicit null checks table
    52.6    // only when this method returns false.
    52.7 -  if (UseCompressedOops) {
    52.8 +#ifdef _LP64
    52.9 +  if (UseCompressedOops && Universe::narrow_oop_base() != NULL) {
   52.10 +    assert (Universe::heap() != NULL, "java heap should be initialized");
   52.11      // The first page after heap_base is unmapped and
   52.12      // the 'offset' is equal to [heap_base + offset] for
   52.13      // narrow oop implicit null checks.
   52.14 -    uintptr_t heap_base = (uintptr_t)Universe::heap_base();
   52.15 -    if ((uintptr_t)offset >= heap_base) {
   52.16 +    uintptr_t base = (uintptr_t)Universe::narrow_oop_base();
   52.17 +    if ((uintptr_t)offset >= base) {
   52.18        // Normalize offset for the next check.
   52.19 -      offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
   52.20 +      offset = (intptr_t)(pointer_delta((void*)offset, (void*)base, 1));
   52.21      }
   52.22    }
   52.23 +#endif
   52.24    return offset < 0 || os::vm_page_size() <= offset;
   52.25  }
   52.26  
    53.1 --- a/src/share/vm/ci/ciMethodBlocks.cpp	Fri Mar 20 22:08:48 2009 -0400
    53.2 +++ b/src/share/vm/ci/ciMethodBlocks.cpp	Mon Mar 23 10:42:20 2009 -0400
    53.3 @@ -284,6 +284,11 @@
    53.4        //
    53.5        int ex_start = handler->start();
    53.6        int ex_end = handler->limit();
    53.7 +      // ensure a block at the start of exception range and start of following code
    53.8 +      (void) make_block_at(ex_start);
    53.9 +      if (ex_end < _code_size)
   53.10 +        (void) make_block_at(ex_end);
   53.11 +
   53.12        if (eb->is_handler()) {
   53.13          // Extend old handler exception range to cover additional range.
   53.14          int old_ex_start = eb->ex_start_bci();
   53.15 @@ -295,10 +300,6 @@
   53.16          eb->clear_exception_handler(); // Reset exception information
   53.17        }
   53.18        eb->set_exception_range(ex_start, ex_end);
   53.19 -      // ensure a block at the start of exception range and start of following code
   53.20 -      (void) make_block_at(ex_start);
   53.21 -      if (ex_end < _code_size)
   53.22 -        (void) make_block_at(ex_end);
   53.23      }
   53.24    }
   53.25  
    54.1 --- a/src/share/vm/classfile/vmSymbols.hpp	Fri Mar 20 22:08:48 2009 -0400
    54.2 +++ b/src/share/vm/classfile/vmSymbols.hpp	Mon Mar 23 10:42:20 2009 -0400
    54.3 @@ -284,6 +284,7 @@
    54.4    template(value_name,                                "value")                                    \
    54.5    template(frontCacheEnabled_name,                    "frontCacheEnabled")                        \
    54.6    template(stringCacheEnabled_name,                   "stringCacheEnabled")                       \
    54.7 +  template(bitCount_name,                             "bitCount")                                 \
    54.8                                                                                                    \
    54.9    /* non-intrinsic name/signature pairs: */                                                       \
   54.10    template(register_method_name,                      "register")                                 \
   54.11 @@ -304,6 +305,7 @@
   54.12    template(double_long_signature,                     "(D)J")                                     \
   54.13    template(double_double_signature,                   "(D)D")                                     \
   54.14    template(int_float_signature,                       "(I)F")                                     \
   54.15 +  template(long_int_signature,                        "(J)I")                                     \
   54.16    template(long_long_signature,                       "(J)J")                                     \
   54.17    template(long_double_signature,                     "(J)D")                                     \
   54.18    template(byte_signature,                            "B")                                        \
   54.19 @@ -507,6 +509,10 @@
   54.20     do_name(     doubleToLongBits_name,                           "doubleToLongBits")                                    \
   54.21    do_intrinsic(_longBitsToDouble,         java_lang_Double,       longBitsToDouble_name,    long_double_signature, F_S) \
   54.22     do_name(     longBitsToDouble_name,                           "longBitsToDouble")                                    \
   54.23 +                                                                                                                        \
   54.24 +  do_intrinsic(_bitCount_i,               java_lang_Integer,      bitCount_name,            int_int_signature,   F_S)   \
   54.25 +  do_intrinsic(_bitCount_l,               java_lang_Long,         bitCount_name,            long_int_signature,  F_S)   \
   54.26 +                                                                                                                        \
   54.27    do_intrinsic(_reverseBytes_i,           java_lang_Integer,      reverseBytes_name,        int_int_signature,   F_S)   \
   54.28     do_name(     reverseBytes_name,                               "reverseBytes")                                        \
   54.29    do_intrinsic(_reverseBytes_l,           java_lang_Long,         reverseBytes_name,        long_long_signature, F_S)   \
   54.30 @@ -696,7 +702,6 @@
   54.31    do_signature(putShort_raw_signature,    "(JS)V")                                                                      \
   54.32    do_signature(getChar_raw_signature,     "(J)C")                                                                       \
   54.33    do_signature(putChar_raw_signature,     "(JC)V")                                                                      \
   54.34 -  do_signature(getInt_raw_signature,      "(J)I")                                                                       \
   54.35    do_signature(putInt_raw_signature,      "(JI)V")                                                                      \
   54.36        do_alias(getLong_raw_signature,    /*(J)J*/ long_long_signature)                                                  \
   54.37        do_alias(putLong_raw_signature,    /*(JJ)V*/ long_long_void_signature)                                            \
   54.38 @@ -713,7 +718,7 @@
   54.39    do_intrinsic(_getByte_raw,              sun_misc_Unsafe,        getByte_name, getByte_raw_signature,           F_RN)  \
   54.40    do_intrinsic(_getShort_raw,             sun_misc_Unsafe,        getShort_name, getShort_raw_signature,         F_RN)  \
   54.41    do_intrinsic(_getChar_raw,              sun_misc_Unsafe,        getChar_name, getChar_raw_signature,           F_RN)  \
   54.42 -  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, getInt_raw_signature,             F_RN)  \
   54.43 +  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, long_int_signature,               F_RN)  \
   54.44    do_intrinsic(_getLong_raw,              sun_misc_Unsafe,        getLong_name, getLong_raw_signature,           F_RN)  \
   54.45    do_intrinsic(_getFloat_raw,             sun_misc_Unsafe,        getFloat_name, getFloat_raw_signature,         F_RN)  \
   54.46    do_intrinsic(_getDouble_raw,            sun_misc_Unsafe,        getDouble_name, getDouble_raw_signature,       F_RN)  \
    55.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Fri Mar 20 22:08:48 2009 -0400
    55.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp	Mon Mar 23 10:42:20 2009 -0400
    55.3 @@ -145,14 +145,9 @@
    55.4    if (G1RSBarrierUseQueue) {
    55.5      DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
    55.6      dcqs.abandon_logs();
    55.7 -    if (_cg1rThread->do_traversal()) {
    55.8 -      _pya = PYA_restart;
    55.9 -    } else {
   55.10 -      _cg1rThread->set_do_traversal(true);
   55.11 -      // Reset the post-yield actions.
   55.12 -      _pya = PYA_continue;
   55.13 -      _last_pya = PYA_continue;
   55.14 -    }
   55.15 +    // Reset the post-yield actions.
   55.16 +    _pya = PYA_continue;
   55.17 +    _last_pya = PYA_continue;
   55.18    } else {
   55.19      _pya = PYA_restart;
   55.20    }
    56.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Mar 20 22:08:48 2009 -0400
    56.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Mar 23 10:42:20 2009 -0400
    56.3 @@ -107,7 +107,7 @@
    56.4  #ifndef PRODUCT
    56.5  bool CMBitMapRO::covers(ReservedSpace rs) const {
    56.6    // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
    56.7 -  assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
    56.8 +  assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
    56.9           "size inconsistency");
   56.10    return _bmStartWord == (HeapWord*)(rs.base()) &&
   56.11           _bmWordSize  == rs.size()>>LogHeapWordSize;
   56.12 @@ -1232,7 +1232,16 @@
   56.13      if (!_final && _regions_done == 0)
   56.14        _start_vtime_sec = os::elapsedVTime();
   56.15  
   56.16 -    if (hr->continuesHumongous()) return false;
   56.17 +    if (hr->continuesHumongous()) {
   56.18 +      HeapRegion* hum_start = hr->humongous_start_region();
   56.19 +      // If the head region of the humongous region has been determined
   56.20 +      // to be alive, then all the tail regions should be marked
   56.21 +      // such as well.
   56.22 +      if (_region_bm->at(hum_start->hrs_index())) {
   56.23 +        _region_bm->par_at_put(hr->hrs_index(), 1);
   56.24 +      }
   56.25 +      return false;
   56.26 +    }
   56.27  
   56.28      HeapWord* nextTop = hr->next_top_at_mark_start();
   56.29      HeapWord* start   = hr->top_at_conc_mark_count();
    57.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Mar 20 22:08:48 2009 -0400
    57.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Mar 23 10:42:20 2009 -0400
    57.3 @@ -786,6 +786,12 @@
    57.4    }
    57.5  }
    57.6  
    57.7 +void G1CollectedHeap::abandon_gc_alloc_regions() {
    57.8 +  // first, make sure that the GC alloc region list is empty (it should!)
    57.9 +  assert(_gc_alloc_region_list == NULL, "invariant");
   57.10 +  release_gc_alloc_regions(true /* totally */);
   57.11 +}
   57.12 +
   57.13  class PostMCRemSetClearClosure: public HeapRegionClosure {
   57.14    ModRefBarrierSet* _mr_bs;
   57.15  public:
   57.16 @@ -914,6 +920,7 @@
   57.17  
   57.18      // Make sure we'll choose a new allocation region afterwards.
   57.19      abandon_cur_alloc_region();
   57.20 +    abandon_gc_alloc_regions();
   57.21      assert(_cur_alloc_region == NULL, "Invariant.");
   57.22      g1_rem_set()->as_HRInto_G1RemSet()->cleanupHRRS();
   57.23      tear_down_region_lists();
   57.24 @@ -954,6 +961,7 @@
   57.25      if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
   57.26        HandleMark hm;  // Discard invalid handles created during verification
   57.27        gclog_or_tty->print(" VerifyAfterGC:");
   57.28 +      prepare_for_verify();
   57.29        Universe::verify(false);
   57.30      }
   57.31      NOT_PRODUCT(ref_processor()->verify_no_references_recorded());
   57.32 @@ -1306,7 +1314,7 @@
   57.33  }
   57.34  
   57.35  void G1CollectedHeap::shrink(size_t shrink_bytes) {
   57.36 -  release_gc_alloc_regions();
   57.37 +  release_gc_alloc_regions(true /* totally */);
   57.38    tear_down_region_lists();  // We will rebuild them in a moment.
   57.39    shrink_helper(shrink_bytes);
   57.40    rebuild_region_lists();
   57.41 @@ -1345,8 +1353,7 @@
   57.42    _gc_time_stamp(0),
   57.43    _surviving_young_words(NULL),
   57.44    _in_cset_fast_test(NULL),
   57.45 -  _in_cset_fast_test_base(NULL)
   57.46 -{
   57.47 +  _in_cset_fast_test_base(NULL) {
   57.48    _g1h = this; // To catch bugs.
   57.49    if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
   57.50      vm_exit_during_initialization("Failed necessary allocation.");
   57.51 @@ -1371,9 +1378,19 @@
   57.52    }
   57.53  
   57.54    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
   57.55 -    _gc_alloc_regions[ap]       = NULL;
   57.56 -    _gc_alloc_region_counts[ap] = 0;
   57.57 -  }
   57.58 +    _gc_alloc_regions[ap]          = NULL;
   57.59 +    _gc_alloc_region_counts[ap]    = 0;
   57.60 +    _retained_gc_alloc_regions[ap] = NULL;
   57.61 +    // by default, we do not retain a GC alloc region for each ap;
   57.62 +    // we'll override this, when appropriate, below
   57.63 +    _retain_gc_alloc_region[ap]    = false;
   57.64 +  }
   57.65 +
   57.66 +  // We will try to remember the last half-full tenured region we
   57.67 +  // allocated to at the end of a collection so that we can re-use it
   57.68 +  // during the next collection.
   57.69 +  _retain_gc_alloc_region[GCAllocForTenured]  = true;
   57.70 +
   57.71    guarantee(_task_queues != NULL, "task_queues allocation failure.");
   57.72  }
   57.73  
   57.74 @@ -1405,9 +1422,34 @@
   57.75    // Reserve the maximum.
   57.76    PermanentGenerationSpec* pgs = collector_policy()->permanent_generation();
   57.77    // Includes the perm-gen.
   57.78 +
   57.79 +  const size_t total_reserved = max_byte_size + pgs->max_size();
   57.80 +  char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
   57.81 +
   57.82    ReservedSpace heap_rs(max_byte_size + pgs->max_size(),
   57.83                          HeapRegion::GrainBytes,
   57.84 -                        false /*ism*/);
   57.85 +                        false /*ism*/, addr);
   57.86 +
   57.87 +  if (UseCompressedOops) {
   57.88 +    if (addr != NULL && !heap_rs.is_reserved()) {
   57.89 +      // Failed to reserve at specified address - the requested memory
   57.90 +      // region is taken already, for example, by 'java' launcher.
   57.91 +      // Try again to reserver heap higher.
   57.92 +      addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
   57.93 +      ReservedSpace heap_rs0(total_reserved, HeapRegion::GrainBytes,
   57.94 +                             false /*ism*/, addr);
   57.95 +      if (addr != NULL && !heap_rs0.is_reserved()) {
   57.96 +        // Failed to reserve at specified address again - give up.
   57.97 +        addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
   57.98 +        assert(addr == NULL, "");
   57.99 +        ReservedSpace heap_rs1(total_reserved, HeapRegion::GrainBytes,
  57.100 +                               false /*ism*/, addr);
  57.101 +        heap_rs = heap_rs1;
  57.102 +      } else {
  57.103 +        heap_rs = heap_rs0;
  57.104 +      }
  57.105 +    }
  57.106 +  }
  57.107  
  57.108    if (!heap_rs.is_reserved()) {
  57.109      vm_exit_during_initialization("Could not reserve enough space for object heap");
  57.110 @@ -2119,15 +2161,7 @@
  57.111    bool doHeapRegion(HeapRegion* r) {
  57.112      guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue,
  57.113                "Should be unclaimed at verify points.");
  57.114 -    if (r->isHumongous()) {
  57.115 -      if (r->startsHumongous()) {
  57.116 -        // Verify the single H object.
  57.117 -        oop(r->bottom())->verify();
  57.118 -        size_t word_sz = oop(r->bottom())->size();
  57.119 -        guarantee(r->top() == r->bottom() + word_sz,
  57.120 -                  "Only one object in a humongous region");
  57.121 -      }
  57.122 -    } else {
  57.123 +    if (!r->continuesHumongous()) {
  57.124        VerifyObjsInRegionClosure not_dead_yet_cl(r);
  57.125        r->verify(_allow_dirty);
  57.126        r->object_iterate(&not_dead_yet_cl);
  57.127 @@ -2179,6 +2213,7 @@
  57.128      _g1h(g1h), _allow_dirty(allow_dirty) { }
  57.129  
  57.130    void work(int worker_i) {
  57.131 +    HandleMark hm;
  57.132      VerifyRegionClosure blk(_allow_dirty, true);
  57.133      _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
  57.134                                            HeapRegion::ParVerifyClaimValue);
  57.135 @@ -2644,7 +2679,7 @@
  57.136          popular_region->set_popular_pending(false);
  57.137        }
  57.138  
  57.139 -      release_gc_alloc_regions();
  57.140 +      release_gc_alloc_regions(false /* totally */);
  57.141  
  57.142        cleanup_surviving_young_words();
  57.143  
  57.144 @@ -2697,6 +2732,7 @@
  57.145      if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
  57.146        HandleMark hm;  // Discard invalid handles created during verification
  57.147        gclog_or_tty->print(" VerifyAfterGC:");
  57.148 +      prepare_for_verify();
  57.149        Universe::verify(false);
  57.150      }
  57.151  
  57.152 @@ -2735,6 +2771,10 @@
  57.153  
  57.154  void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) {
  57.155    assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose");
  57.156 +  // make sure we don't call set_gc_alloc_region() multiple times on
  57.157 +  // the same region
  57.158 +  assert(r == NULL || !r->is_gc_alloc_region(),
  57.159 +         "shouldn't already be a GC alloc region");
  57.160    HeapWord* original_top = NULL;
  57.161    if (r != NULL)
  57.162      original_top = r->top();
  57.163 @@ -2824,6 +2864,12 @@
  57.164    while (_gc_alloc_region_list != NULL) {
  57.165      HeapRegion* r = _gc_alloc_region_list;
  57.166      assert(r->is_gc_alloc_region(), "Invariant.");
  57.167 +    // We need HeapRegion::oops_on_card_seq_iterate_careful() to work on
  57.168 +    // newly allocated data in order to be able to apply deferred updates
  57.169 +    // before the GC is done for verification purposes (i.e to allow
  57.170 +    // G1HRRSFlushLogBuffersOnVerify). It's safe thing to do after the
  57.171 +    // collection.
  57.172 +    r->ContiguousSpace::set_saved_mark();
  57.173      _gc_alloc_region_list = r->next_gc_alloc_region();
  57.174      r->set_next_gc_alloc_region(NULL);
  57.175      r->set_is_gc_alloc_region(false);
  57.176 @@ -2851,23 +2897,55 @@
  57.177  }
  57.178  
  57.179  void G1CollectedHeap::get_gc_alloc_regions() {
  57.180 +  // First, let's check that the GC alloc region list is empty (it should)
  57.181 +  assert(_gc_alloc_region_list == NULL, "invariant");
  57.182 +
  57.183    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
  57.184 +    assert(_gc_alloc_regions[ap] == NULL, "invariant");
  57.185 +
  57.186      // Create new GC alloc regions.
  57.187 -    HeapRegion* alloc_region = _gc_alloc_regions[ap];
  57.188 -    // Clear this alloc region, so that in case it turns out to be
  57.189 -    // unacceptable, we end up with no allocation region, rather than a bad
  57.190 -    // one.
  57.191 -    _gc_alloc_regions[ap] = NULL;
  57.192 -    if (alloc_region == NULL || alloc_region->in_collection_set()) {
  57.193 -      // Can't re-use old one.  Allocate a new one.
  57.194 +    HeapRegion* alloc_region = _retained_gc_alloc_regions[ap];
  57.195 +    _retained_gc_alloc_regions[ap] = NULL;
  57.196 +
  57.197 +    if (alloc_region != NULL) {
  57.198 +      assert(_retain_gc_alloc_region[ap], "only way to retain a GC region");
  57.199 +
  57.200 +      // let's make sure that the GC alloc region is not tagged as such
  57.201 +      // outside a GC operation
  57.202 +      assert(!alloc_region->is_gc_alloc_region(), "sanity");
  57.203 +
  57.204 +      if (alloc_region->in_collection_set() ||
  57.205 +          alloc_region->top() == alloc_region->end() ||
  57.206 +          alloc_region->top() == alloc_region->bottom()) {
  57.207 +        // we will discard the current GC alloc region if it's in the
  57.208 +        // collection set (it can happen!), if it's already full (no
  57.209 +        // point in using it), or if it's empty (this means that it
  57.210 +        // was emptied during a cleanup and it should be on the free
  57.211 +        // list now).
  57.212 +
  57.213 +        alloc_region = NULL;
  57.214 +      }
  57.215 +    }
  57.216 +
  57.217 +    if (alloc_region == NULL) {
  57.218 +      // we will get a new GC alloc region
  57.219        alloc_region = newAllocRegionWithExpansion(ap, 0);
  57.220      }
  57.221 +
  57.222      if (alloc_region != NULL) {
  57.223 +      assert(_gc_alloc_regions[ap] == NULL, "pre-condition");
  57.224        set_gc_alloc_region(ap, alloc_region);
  57.225      }
  57.226 +
  57.227 +    assert(_gc_alloc_regions[ap] == NULL ||
  57.228 +           _gc_alloc_regions[ap]->is_gc_alloc_region(),
  57.229 +           "the GC alloc region should be tagged as such");
  57.230 +    assert(_gc_alloc_regions[ap] == NULL ||
  57.231 +           _gc_alloc_regions[ap] == _gc_alloc_region_list,
  57.232 +           "the GC alloc region should be the same as the GC alloc list head");
  57.233    }
  57.234    // Set alternative regions for allocation purposes that have reached
  57.235 -  // thier limit.
  57.236 +  // their limit.
  57.237    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
  57.238      GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(ap);
  57.239      if (_gc_alloc_regions[ap] == NULL && alt_purpose != ap) {
  57.240 @@ -2877,27 +2955,55 @@
  57.241    assert(check_gc_alloc_regions(), "alloc regions messed up");
  57.242  }
  57.243  
  57.244 -void G1CollectedHeap::release_gc_alloc_regions() {
  57.245 +void G1CollectedHeap::release_gc_alloc_regions(bool totally) {
  57.246    // We keep a separate list of all regions that have been alloc regions in
  57.247 -  // the current collection pause.  Forget that now.
  57.248 +  // the current collection pause. Forget that now. This method will
  57.249 +  // untag the GC alloc regions and tear down the GC alloc region
  57.250 +  // list. It's desirable that no regions are tagged as GC alloc
  57.251 +  // outside GCs.
  57.252    forget_alloc_region_list();
  57.253  
  57.254    // The current alloc regions contain objs that have survived
  57.255    // collection. Make them no longer GC alloc regions.
  57.256    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
  57.257      HeapRegion* r = _gc_alloc_regions[ap];
  57.258 -    if (r != NULL && r->is_empty()) {
  57.259 -      {
  57.260 +    _retained_gc_alloc_regions[ap] = NULL;
  57.261 +
  57.262 +    if (r != NULL) {
  57.263 +      // we retain nothing on _gc_alloc_regions between GCs
  57.264 +      set_gc_alloc_region(ap, NULL);
  57.265 +      _gc_alloc_region_counts[ap] = 0;
  57.266 +
  57.267 +      if (r->is_empty()) {
  57.268 +        // we didn't actually allocate anything in it; let's just put
  57.269 +        // it on the free list
  57.270          MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
  57.271          r->set_zero_fill_complete();
  57.272          put_free_region_on_list_locked(r);
  57.273 +      } else if (_retain_gc_alloc_region[ap] && !totally) {
  57.274 +        // retain it so that we can use it at the beginning of the next GC
  57.275 +        _retained_gc_alloc_regions[ap] = r;
  57.276        }
  57.277      }
  57.278 -    // set_gc_alloc_region will also NULLify all aliases to the region
  57.279 -    set_gc_alloc_region(ap, NULL);
  57.280 -    _gc_alloc_region_counts[ap] = 0;
  57.281 -  }
  57.282 -}
  57.283 +  }
  57.284 +}
  57.285 +
  57.286 +#ifndef PRODUCT
  57.287 +// Useful for debugging
  57.288 +
  57.289 +void G1CollectedHeap::print_gc_alloc_regions() {
  57.290 +  gclog_or_tty->print_cr("GC alloc regions");
  57.291 +  for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
  57.292 +    HeapRegion* r = _gc_alloc_regions[ap];
  57.293 +    if (r == NULL) {
  57.294 +      gclog_or_tty->print_cr("  %2d : "PTR_FORMAT, ap, NULL);
  57.295 +    } else {
  57.296 +      gclog_or_tty->print_cr("  %2d : "PTR_FORMAT" "SIZE_FORMAT,
  57.297 +                             ap, r->bottom(), r->used());
  57.298 +    }
  57.299 +  }
  57.300 +}
  57.301 +#endif // PRODUCT
  57.302  
  57.303  void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) {
  57.304    _drain_in_progress = false;
  57.305 @@ -3658,7 +3764,9 @@
  57.306    CardTableModRefBS* ctbs()                      { return _ct_bs; }
  57.307  
  57.308    void immediate_rs_update(HeapRegion* from, oop* p, int tid) {
  57.309 -    _g1_rem->par_write_ref(from, p, tid);
  57.310 +    if (!from->is_survivor()) {
  57.311 +      _g1_rem->par_write_ref(from, p, tid);
  57.312 +    }
  57.313    }
  57.314  
  57.315    void deferred_rs_update(HeapRegion* from, oop* p, int tid) {
    58.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Mar 20 22:08:48 2009 -0400
    58.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Mar 23 10:42:20 2009 -0400
    58.3 @@ -172,7 +172,6 @@
    58.4      NumAPIs = HeapRegion::MaxAge
    58.5    };
    58.6  
    58.7 -
    58.8    // The one and only G1CollectedHeap, so static functions can find it.
    58.9    static G1CollectedHeap* _g1h;
   58.10  
   58.11 @@ -217,11 +216,20 @@
   58.12  
   58.13    // Postcondition: cur_alloc_region == NULL.
   58.14    void abandon_cur_alloc_region();
   58.15 +  void abandon_gc_alloc_regions();
   58.16  
   58.17    // The to-space memory regions into which objects are being copied during
   58.18    // a GC.
   58.19    HeapRegion* _gc_alloc_regions[GCAllocPurposeCount];
   58.20    size_t _gc_alloc_region_counts[GCAllocPurposeCount];
   58.21 +  // These are the regions, one per GCAllocPurpose, that are half-full
   58.22 +  // at the end of a collection and that we want to reuse during the
   58.23 +  // next collection.
   58.24 +  HeapRegion* _retained_gc_alloc_regions[GCAllocPurposeCount];
   58.25 +  // This specifies whether we will keep the last half-full region at
   58.26 +  // the end of a collection so that it can be reused during the next
   58.27 +  // collection (this is specified per GCAllocPurpose)
   58.28 +  bool _retain_gc_alloc_region[GCAllocPurposeCount];
   58.29  
   58.30    // A list of the regions that have been set to be alloc regions in the
   58.31    // current collection.
   58.32 @@ -589,8 +597,21 @@
   58.33  
   58.34    // Ensure that the relevant gc_alloc regions are set.
   58.35    void get_gc_alloc_regions();
   58.36 -  // We're done with GC alloc regions; release them, as appropriate.
   58.37 -  void release_gc_alloc_regions();
   58.38 +  // We're done with GC alloc regions. We are going to tear down the
   58.39 +  // gc alloc list and remove the gc alloc tag from all the regions on
   58.40 +  // that list. However, we will also retain the last (i.e., the one
   58.41 +  // that is half-full) GC alloc region, per GCAllocPurpose, for
   58.42 +  // possible reuse during the next collection, provided
   58.43 +  // _retain_gc_alloc_region[] indicates that it should be the
   58.44 +  // case. Said regions are kept in the _retained_gc_alloc_regions[]
   58.45 +  // array. If the parameter totally is set, we will not retain any
   58.46 +  // regions, irrespective of what _retain_gc_alloc_region[]
   58.47 +  // indicates.
   58.48 +  void release_gc_alloc_regions(bool totally);
   58.49 +#ifndef PRODUCT
   58.50 +  // Useful for debugging.
   58.51 +  void print_gc_alloc_regions();
   58.52 +#endif // !PRODUCT
   58.53  
   58.54    // ("Weak") Reference processing support
   58.55    ReferenceProcessor* _ref_processor;
    59.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Mar 20 22:08:48 2009 -0400
    59.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Mar 23 10:42:20 2009 -0400
    59.3 @@ -1087,6 +1087,7 @@
    59.4  
    59.5    assert(_g1->used_regions() == _g1->recalculate_used_regions(),
    59.6           "sanity");
    59.7 +  assert(_g1->used() == _g1->recalculate_used(), "sanity");
    59.8  
    59.9    double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0;
   59.10    _all_stop_world_times_ms->add(s_w_t_ms);
    60.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Mar 20 22:08:48 2009 -0400
    60.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Mar 23 10:42:20 2009 -0400
    60.3 @@ -502,14 +502,17 @@
    60.4    }
    60.5  
    60.6    if (ParallelGCThreads > 0) {
    60.7 -    // This is a temporary change to serialize the update and scanning
    60.8 -    // of remembered sets. There are some race conditions when this is
    60.9 -    // done in parallel and they are causing failures. When we resolve
   60.10 -    // said race conditions, we'll revert back to parallel remembered
   60.11 -    // set updating and scanning. See CRs 6677707 and 6677708.
   60.12 -    if (worker_i == 0) {
   60.13 +    // The two flags below were introduced temporarily to serialize
   60.14 +    // the updating and scanning of remembered sets. There are some
   60.15 +    // race conditions when these two operations are done in parallel
   60.16 +    // and they are causing failures. When we resolve said race
   60.17 +    // conditions, we'll revert back to parallel remembered set
   60.18 +    // updating and scanning. See CRs 6677707 and 6677708.
   60.19 +    if (G1EnableParallelRSetUpdating || (worker_i == 0)) {
   60.20        updateRS(worker_i);
   60.21        scanNewRefsRS(oc, worker_i);
   60.22 +    }
   60.23 +    if (G1EnableParallelRSetScanning || (worker_i == 0)) {
   60.24        scanRS(oc, worker_i);
   60.25      }
   60.26    } else {
   60.27 @@ -716,8 +719,7 @@
   60.28    bool doHeapRegion(HeapRegion* r) {
   60.29      if (!r->in_collection_set() &&
   60.30          !r->continuesHumongous() &&
   60.31 -        !r->is_young() &&
   60.32 -        !r->is_survivor()) {
   60.33 +        !r->is_young()) {
   60.34        _update_rs_oop_cl.set_from(r);
   60.35        UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl);
   60.36  
   60.37 @@ -854,7 +856,7 @@
   60.38    // before all the cards on the region are dirtied. This is unlikely,
   60.39    // and it doesn't happen often, but it can happen. So, the extra
   60.40    // check below filters out those cards.
   60.41 -  if (r->is_young() || r->is_survivor()) {
   60.42 +  if (r->is_young()) {
   60.43      return;
   60.44    }
   60.45    // While we are processing RSet buffers during the collection, we
   60.46 @@ -1025,7 +1027,9 @@
   60.47    }
   60.48  }
   60.49  void HRInto_G1RemSet::prepare_for_verify() {
   60.50 -  if (G1HRRSFlushLogBuffersOnVerify && VerifyBeforeGC && !_g1->full_collection()) {
   60.51 +  if (G1HRRSFlushLogBuffersOnVerify &&
   60.52 +      (VerifyBeforeGC || VerifyAfterGC)
   60.53 +      &&  !_g1->full_collection()) {
   60.54      cleanupHRRS();
   60.55      _g1->set_refine_cte_cl_concurrency(false);
   60.56      if (SafepointSynchronize::is_at_safepoint()) {
   60.57 @@ -1036,5 +1040,7 @@
   60.58      _cg1r->set_use_cache(false);
   60.59      updateRS(0);
   60.60      _cg1r->set_use_cache(cg1r_use_cache);
   60.61 +
   60.62 +    assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
   60.63    }
   60.64  }
    61.1 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Mar 20 22:08:48 2009 -0400
    61.2 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Mar 23 10:42:20 2009 -0400
    61.3 @@ -295,6 +295,14 @@
    61.4                                                                              \
    61.5    product(uintx, G1FixedSurvivorSpaceSize, 0,                               \
    61.6            "If non-0 is the size of the G1 survivor space, "                 \
    61.7 -          "otherwise SurvivorRatio is used to determine the size")
    61.8 +          "otherwise SurvivorRatio is used to determine the size")          \
    61.9 +                                                                            \
   61.10 +  experimental(bool, G1EnableParallelRSetUpdating, false,                   \
   61.11 +          "Enables the parallelization of remembered set updating "         \
   61.12 +          "during evacuation pauses")                                       \
   61.13 +                                                                            \
   61.14 +  experimental(bool, G1EnableParallelRSetScanning, false,                   \
   61.15 +          "Enables the parallelization of remembered set scanning "         \
   61.16 +          "during evacuation pauses")
   61.17  
   61.18  G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
    62.1 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Fri Mar 20 22:08:48 2009 -0400
    62.2 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Mon Mar 23 10:42:20 2009 -0400
    62.3 @@ -508,7 +508,7 @@
    62.4    typedef PosParPRT* PosParPRTPtr;
    62.5    if (_max_fine_entries == 0) {
    62.6      assert(_mod_max_fine_entries_mask == 0, "Both or none.");
    62.7 -    _max_fine_entries = (1 << G1LogRSRegionEntries);
    62.8 +    _max_fine_entries = (size_t)(1 << G1LogRSRegionEntries);
    62.9      _mod_max_fine_entries_mask = _max_fine_entries - 1;
   62.10  #if SAMPLE_FOR_EVICTION
   62.11      assert(_fine_eviction_sample_size == 0
    63.1 --- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp	Fri Mar 20 22:08:48 2009 -0400
    63.2 +++ b/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp	Mon Mar 23 10:42:20 2009 -0400
    63.3 @@ -63,9 +63,8 @@
    63.4    // return NULL.
    63.5    HeapWord* allocate(size_t word_sz) {
    63.6      HeapWord* res = _top;
    63.7 -    HeapWord* new_top = _top + word_sz;
    63.8 -    if (new_top <= _end) {
    63.9 -      _top = new_top;
   63.10 +    if (pointer_delta(_end, _top) >= word_sz) {
   63.11 +      _top = _top + word_sz;
   63.12        return res;
   63.13      } else {
   63.14        return NULL;
   63.15 @@ -75,10 +74,9 @@
   63.16    // Undo the last allocation in the buffer, which is required to be of the
   63.17    // "obj" of the given "word_sz".
   63.18    void undo_allocation(HeapWord* obj, size_t word_sz) {
   63.19 -    assert(_top - word_sz >= _bottom
   63.20 -           && _top - word_sz == obj,
   63.21 -           "Bad undo_allocation");
   63.22 -    _top = _top - word_sz;
   63.23 +    assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo");
   63.24 +    assert(pointer_delta(_top, obj)     == word_sz, "Bad undo");
   63.25 +    _top = obj;
   63.26    }
   63.27  
   63.28    // The total (word) size of the buffer, including both allocated and
    64.1 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Fri Mar 20 22:08:48 2009 -0400
    64.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Mon Mar 23 10:42:20 2009 -0400
    64.3 @@ -104,12 +104,38 @@
    64.4                    og_min_size, og_max_size,
    64.5                    yg_min_size, yg_max_size);
    64.6  
    64.7 +  const size_t total_reserved = pg_max_size + og_max_size + yg_max_size;
    64.8 +  char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
    64.9 +
   64.10    // The main part of the heap (old gen + young gen) can often use a larger page
   64.11    // size than is needed or wanted for the perm gen.  Use the "compound
   64.12    // alignment" ReservedSpace ctor to avoid having to use the same page size for
   64.13    // all gens.
   64.14 +
   64.15    ReservedHeapSpace heap_rs(pg_max_size, pg_align, og_max_size + yg_max_size,
   64.16 -                            og_align);
   64.17 +                            og_align, addr);
   64.18 +
   64.19 +  if (UseCompressedOops) {
   64.20 +    if (addr != NULL && !heap_rs.is_reserved()) {
   64.21 +      // Failed to reserve at specified address - the requested memory
   64.22 +      // region is taken already, for example, by 'java' launcher.
   64.23 +      // Try again to reserver heap higher.
   64.24 +      addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
   64.25 +      ReservedHeapSpace heap_rs0(pg_max_size, pg_align, og_max_size + yg_max_size,
   64.26 +                                 og_align, addr);
   64.27 +      if (addr != NULL && !heap_rs0.is_reserved()) {
   64.28 +        // Failed to reserve at specified address again - give up.
   64.29 +        addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
   64.30 +        assert(addr == NULL, "");
   64.31 +        ReservedHeapSpace heap_rs1(pg_max_size, pg_align, og_max_size + yg_max_size,
   64.32 +                                   og_align, addr);
   64.33 +        heap_rs = heap_rs1;
   64.34 +      } else {
   64.35 +        heap_rs = heap_rs0;
   64.36 +      }
   64.37 +    }
   64.38 +  }
   64.39 +
   64.40    os::trace_page_sizes("ps perm", pg_min_size, pg_max_size, pg_page_sz,
   64.41                         heap_rs.base(), pg_max_size);
   64.42    os::trace_page_sizes("ps main", og_min_size + yg_min_size,
    65.1 --- a/src/share/vm/includeDB_core	Fri Mar 20 22:08:48 2009 -0400
    65.2 +++ b/src/share/vm/includeDB_core	Mon Mar 23 10:42:20 2009 -0400
    65.3 @@ -4598,6 +4598,7 @@
    65.4  vm_version_<arch>.hpp                   globals_extension.hpp
    65.5  vm_version_<arch>.hpp                   vm_version.hpp
    65.6  
    65.7 +vm_version_<os_arch>.cpp                os.hpp
    65.8  vm_version_<os_arch>.cpp                vm_version_<arch>.hpp
    65.9  
   65.10  vmreg.cpp                               assembler.hpp
    66.1 --- a/src/share/vm/memory/blockOffsetTable.hpp	Fri Mar 20 22:08:48 2009 -0400
    66.2 +++ b/src/share/vm/memory/blockOffsetTable.hpp	Mon Mar 23 10:42:20 2009 -0400
    66.3 @@ -235,7 +235,7 @@
    66.4    };
    66.5  
    66.6    static size_t power_to_cards_back(uint i) {
    66.7 -    return 1 << (LogBase * i);
    66.8 +    return (size_t)(1 << (LogBase * i));
    66.9    }
   66.10    static size_t power_to_words_back(uint i) {
   66.11      return power_to_cards_back(i) * N_words;
    67.1 --- a/src/share/vm/memory/genCollectedHeap.cpp	Fri Mar 20 22:08:48 2009 -0400
    67.2 +++ b/src/share/vm/memory/genCollectedHeap.cpp	Mon Mar 23 10:42:20 2009 -0400
    67.3 @@ -218,6 +218,31 @@
    67.4      heap_address -= total_reserved;
    67.5    } else {
    67.6      heap_address = NULL;  // any address will do.
    67.7 +    if (UseCompressedOops) {
    67.8 +      heap_address = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
    67.9 +      *_total_reserved = total_reserved;
   67.10 +      *_n_covered_regions = n_covered_regions;
   67.11 +      *heap_rs = ReservedHeapSpace(total_reserved, alignment,
   67.12 +                                   UseLargePages, heap_address);
   67.13 +
   67.14 +      if (heap_address != NULL && !heap_rs->is_reserved()) {
   67.15 +        // Failed to reserve at specified address - the requested memory
   67.16 +        // region is taken already, for example, by 'java' launcher.
   67.17 +        // Try again to reserver heap higher.
   67.18 +        heap_address = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
   67.19 +        *heap_rs = ReservedHeapSpace(total_reserved, alignment,
   67.20 +                                     UseLargePages, heap_address);
   67.21 +
   67.22 +        if (heap_address != NULL && !heap_rs->is_reserved()) {
   67.23 +          // Failed to reserve at specified address again - give up.
   67.24 +          heap_address = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
   67.25 +          assert(heap_address == NULL, "");
   67.26 +          *heap_rs = ReservedHeapSpace(total_reserved, alignment,
   67.27 +                                       UseLargePages, heap_address);
   67.28 +        }
   67.29 +      }
   67.30 +      return heap_address;
   67.31 +    }
   67.32    }
   67.33  
   67.34    *_total_reserved = total_reserved;
    68.1 --- a/src/share/vm/memory/universe.cpp	Fri Mar 20 22:08:48 2009 -0400
    68.2 +++ b/src/share/vm/memory/universe.cpp	Mon Mar 23 10:42:20 2009 -0400
    68.3 @@ -99,7 +99,8 @@
    68.4  size_t          Universe::_heap_used_at_last_gc = 0;
    68.5  
    68.6  CollectedHeap*  Universe::_collectedHeap = NULL;
    68.7 -address         Universe::_heap_base = NULL;
    68.8 +
    68.9 +NarrowOopStruct Universe::_narrow_oop = { NULL, 0, true };
   68.10  
   68.11  
   68.12  void Universe::basic_type_classes_do(void f(klassOop)) {
   68.13 @@ -729,6 +730,53 @@
   68.14    return JNI_OK;
   68.15  }
   68.16  
   68.17 +// Choose the heap base address and oop encoding mode
   68.18 +// when compressed oops are used:
   68.19 +// Unscaled  - Use 32-bits oops without encoding when
   68.20 +//     NarrowOopHeapBaseMin + heap_size < 4Gb
   68.21 +// ZeroBased - Use zero based compressed oops with encoding when
   68.22 +//     NarrowOopHeapBaseMin + heap_size < 32Gb
   68.23 +// HeapBased - Use compressed oops with heap base + encoding.
   68.24 +
   68.25 +// 4Gb
   68.26 +static const uint64_t NarrowOopHeapMax = (uint64_t(max_juint) + 1);
   68.27 +// 32Gb
   68.28 +static const uint64_t OopEncodingHeapMax = NarrowOopHeapMax << LogMinObjAlignmentInBytes;
   68.29 +
   68.30 +char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) {
   68.31 +#ifdef _LP64
   68.32 +  if (UseCompressedOops) {
   68.33 +    assert(mode == UnscaledNarrowOop  ||
   68.34 +           mode == ZeroBasedNarrowOop ||
   68.35 +           mode == HeapBasedNarrowOop, "mode is invalid");
   68.36 +
   68.37 +    const size_t total_size = heap_size + HeapBaseMinAddress;
   68.38 +    if (total_size <= OopEncodingHeapMax && (mode != HeapBasedNarrowOop)) {
   68.39 +      if (total_size <= NarrowOopHeapMax && (mode == UnscaledNarrowOop) &&
   68.40 +          (Universe::narrow_oop_shift() == 0)) {
   68.41 +        // Use 32-bits oops without encoding and
   68.42 +        // place heap's top on the 4Gb boundary
   68.43 +        return (char*)(NarrowOopHeapMax - heap_size);
   68.44 +      } else {
   68.45 +        // Can't reserve with NarrowOopShift == 0
   68.46 +        Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
   68.47 +        if (mode == UnscaledNarrowOop ||
   68.48 +            mode == ZeroBasedNarrowOop && total_size <= NarrowOopHeapMax) {
   68.49 +          // Use zero based compressed oops with encoding and
   68.50 +          // place heap's top on the 32Gb boundary in case
   68.51 +          // total_size > 4Gb or failed to reserve below 4Gb.
   68.52 +          return (char*)(OopEncodingHeapMax - heap_size);
   68.53 +        }
   68.54 +      }
   68.55 +    } else {
   68.56 +      // Can't reserve below 32Gb.
   68.57 +      Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
   68.58 +    }
   68.59 +  }
   68.60 +#endif
   68.61 +  return NULL; // also return NULL (don't care) for 32-bit VM
   68.62 +}
   68.63 +
   68.64  jint Universe::initialize_heap() {
   68.65  
   68.66    if (UseParallelGC) {
   68.67 @@ -773,6 +821,8 @@
   68.68    if (status != JNI_OK) {
   68.69      return status;
   68.70    }
   68.71 +
   68.72 +#ifdef _LP64
   68.73    if (UseCompressedOops) {
   68.74      // Subtract a page because something can get allocated at heap base.
   68.75      // This also makes implicit null checking work, because the
   68.76 @@ -780,8 +830,49 @@
   68.77      // See needs_explicit_null_check.
   68.78      // Only set the heap base for compressed oops because it indicates
   68.79      // compressed oops for pstack code.
   68.80 -    Universe::_heap_base = Universe::heap()->base() - os::vm_page_size();
   68.81 +    if (PrintCompressedOopsMode) {
   68.82 +      tty->cr();
   68.83 +      tty->print("heap address: "PTR_FORMAT, Universe::heap()->base());
   68.84 +    }
   68.85 +    if ((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax) {
   68.86 +      // Can't reserve heap below 32Gb.
   68.87 +      Universe::set_narrow_oop_base(Universe::heap()->base() - os::vm_page_size());
   68.88 +      Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
   68.89 +      if (PrintCompressedOopsMode) {
   68.90 +        tty->print(", Compressed Oops with base: "PTR_FORMAT, Universe::narrow_oop_base());
   68.91 +      }
   68.92 +    } else {
   68.93 +      Universe::set_narrow_oop_base(0);
   68.94 +      if (PrintCompressedOopsMode) {
   68.95 +        tty->print(", zero based Compressed Oops");
   68.96 +      }
   68.97 +#ifdef _WIN64
   68.98 +      if (!Universe::narrow_oop_use_implicit_null_checks()) {
   68.99 +        // Don't need guard page for implicit checks in indexed addressing
  68.100 +        // mode with zero based Compressed Oops.
  68.101 +        Universe::set_narrow_oop_use_implicit_null_checks(true);
  68.102 +      }
  68.103 +#endif //  _WIN64
  68.104 +      if((uint64_t)Universe::heap()->reserved_region().end() > NarrowOopHeapMax) {
  68.105 +        // Can't reserve heap below 4Gb.
  68.106 +        Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
  68.107 +      } else {
  68.108 +        assert(Universe::narrow_oop_shift() == 0, "use unscaled narrow oop");
  68.109 +        if (PrintCompressedOopsMode) {
  68.110 +          tty->print(", 32-bits Oops");
  68.111 +        }
  68.112 +      }
  68.113 +    }
  68.114 +    if (PrintCompressedOopsMode) {
  68.115 +      tty->cr();
  68.116 +      tty->cr();
  68.117 +    }
  68.118    }
  68.119 +  assert(Universe::narrow_oop_base() == (Universe::heap()->base() - os::vm_page_size()) ||
  68.120 +         Universe::narrow_oop_base() == NULL, "invalid value");
  68.121 +  assert(Universe::narrow_oop_shift() == LogMinObjAlignmentInBytes ||
  68.122 +         Universe::narrow_oop_shift() == 0, "invalid value");
  68.123 +#endif
  68.124  
  68.125    // We will never reach the CATCH below since Exceptions::_throw will cause
  68.126    // the VM to exit if an exception is thrown during initialization
    69.1 --- a/src/share/vm/memory/universe.hpp	Fri Mar 20 22:08:48 2009 -0400
    69.2 +++ b/src/share/vm/memory/universe.hpp	Mon Mar 23 10:42:20 2009 -0400
    69.3 @@ -90,6 +90,19 @@
    69.4    methodOop get_methodOop();
    69.5  };
    69.6  
    69.7 +// For UseCompressedOops.
    69.8 +struct NarrowOopStruct {
    69.9 +  // Base address for oop-within-java-object materialization.
   69.10 +  // NULL if using wide oops or zero based narrow oops.
   69.11 +  address _base;
   69.12 +  // Number of shift bits for encoding/decoding narrow oops.
   69.13 +  // 0 if using wide oops or zero based unscaled narrow oops,
   69.14 +  // LogMinObjAlignmentInBytes otherwise.
   69.15 +  int     _shift;
   69.16 +  // Generate code with implicit null checks for narrow oops.
   69.17 +  bool    _use_implicit_null_checks;
   69.18 +};
   69.19 +
   69.20  
   69.21  class Universe: AllStatic {
   69.22    // Ugh.  Universe is much too friendly.
   69.23 @@ -181,9 +194,9 @@
   69.24  
   69.25    // The particular choice of collected heap.
   69.26    static CollectedHeap* _collectedHeap;
   69.27 -  // Base address for oop-within-java-object materialization.
   69.28 -  // NULL if using wide oops.  Doubles as heap oop null value.
   69.29 -  static address        _heap_base;
   69.30 +
   69.31 +  // For UseCompressedOops.
   69.32 +  static struct NarrowOopStruct _narrow_oop;
   69.33  
   69.34    // array of dummy objects used with +FullGCAlot
   69.35    debug_only(static objArrayOop _fullgc_alot_dummy_array;)
   69.36 @@ -328,8 +341,25 @@
   69.37    static CollectedHeap* heap() { return _collectedHeap; }
   69.38  
   69.39    // For UseCompressedOops
   69.40 -  static address heap_base()       { return _heap_base; }
   69.41 -  static address* heap_base_addr() { return &_heap_base; }
   69.42 +  static address* narrow_oop_base_addr()              { return &_narrow_oop._base; }
   69.43 +  static address  narrow_oop_base()                   { return  _narrow_oop._base; }
   69.44 +  static int      narrow_oop_shift()                  { return  _narrow_oop._shift; }
   69.45 +  static void     set_narrow_oop_base(address base)   { _narrow_oop._base  = base; }
   69.46 +  static void     set_narrow_oop_shift(int shift)     { _narrow_oop._shift = shift; }
   69.47 +  static bool     narrow_oop_use_implicit_null_checks()             { return  _narrow_oop._use_implicit_null_checks; }
   69.48 +  static void     set_narrow_oop_use_implicit_null_checks(bool use) { _narrow_oop._use_implicit_null_checks = use; }
   69.49 +  // Narrow Oop encoding mode:
   69.50 +  // 0 - Use 32-bits oops without encoding when
   69.51 +  //     NarrowOopHeapBaseMin + heap_size < 4Gb
   69.52 +  // 1 - Use zero based compressed oops with encoding when
   69.53 +  //     NarrowOopHeapBaseMin + heap_size < 32Gb
   69.54 +  // 2 - Use compressed oops with heap base + encoding.
   69.55 +  enum NARROW_OOP_MODE {
   69.56 +    UnscaledNarrowOop  = 0,
   69.57 +    ZeroBasedNarrowOop = 1,
   69.58 +    HeapBasedNarrowOop = 2
   69.59 +  };
   69.60 +  static char* preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode);
   69.61  
   69.62    // Historic gc information
   69.63    static size_t get_heap_capacity_at_last_gc()         { return _heap_capacity_at_last_gc; }
    70.1 --- a/src/share/vm/oops/oop.inline.hpp	Fri Mar 20 22:08:48 2009 -0400
    70.2 +++ b/src/share/vm/oops/oop.inline.hpp	Mon Mar 23 10:42:20 2009 -0400
    70.3 @@ -148,10 +148,11 @@
    70.4  
    70.5  inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) {
    70.6    assert(!is_null(v), "oop value can never be zero");
    70.7 -  address heap_base = Universe::heap_base();
    70.8 -  uint64_t pd = (uint64_t)(pointer_delta((void*)v, (void*)heap_base, 1));
    70.9 +  address base = Universe::narrow_oop_base();
   70.10 +  int    shift = Universe::narrow_oop_shift();
   70.11 +  uint64_t  pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1));
   70.12    assert(OopEncodingHeapMax > pd, "change encoding max if new encoding");
   70.13 -  uint64_t result = pd >> LogMinObjAlignmentInBytes;
   70.14 +  uint64_t result = pd >> shift;
   70.15    assert((result & CONST64(0xffffffff00000000)) == 0, "narrow oop overflow");
   70.16    return (narrowOop)result;
   70.17  }
   70.18 @@ -162,8 +163,9 @@
   70.19  
   70.20  inline oop oopDesc::decode_heap_oop_not_null(narrowOop v) {
   70.21    assert(!is_null(v), "narrow oop value can never be zero");
   70.22 -  address heap_base = Universe::heap_base();
   70.23 -  return (oop)(void*)((uintptr_t)heap_base + ((uintptr_t)v << LogMinObjAlignmentInBytes));
   70.24 +  address base = Universe::narrow_oop_base();
   70.25 +  int    shift = Universe::narrow_oop_shift();
   70.26 +  return (oop)(void*)((uintptr_t)base + ((uintptr_t)v << shift));
   70.27  }
   70.28  
   70.29  inline oop oopDesc::decode_heap_oop(narrowOop v) {
    71.1 --- a/src/share/vm/opto/addnode.cpp	Fri Mar 20 22:08:48 2009 -0400
    71.2 +++ b/src/share/vm/opto/addnode.cpp	Mon Mar 23 10:42:20 2009 -0400
    71.3 @@ -756,7 +756,13 @@
    71.4        if ( eti == NULL ) {
    71.5          // there must be one pointer among the operands
    71.6          guarantee(tptr == NULL, "must be only one pointer operand");
    71.7 -        tptr = et->isa_oopptr();
    71.8 +        if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
    71.9 +          // 32-bits narrow oop can be the base of address expressions
   71.10 +          tptr = et->make_ptr()->isa_oopptr();
   71.11 +        } else {
   71.12 +          // only regular oops are expected here
   71.13 +          tptr = et->isa_oopptr();
   71.14 +        }
   71.15          guarantee(tptr != NULL, "non-int operand must be pointer");
   71.16          if (tptr->higher_equal(tp->add_offset(tptr->offset())))
   71.17            tp = tptr; // Set more precise type for bailout
    72.1 --- a/src/share/vm/opto/classes.hpp	Fri Mar 20 22:08:48 2009 -0400
    72.2 +++ b/src/share/vm/opto/classes.hpp	Mon Mar 23 10:42:20 2009 -0400
    72.3 @@ -184,6 +184,8 @@
    72.4  macro(Parm)
    72.5  macro(PartialSubtypeCheck)
    72.6  macro(Phi)
    72.7 +macro(PopCountI)
    72.8 +macro(PopCountL)
    72.9  macro(PowD)
   72.10  macro(PrefetchRead)
   72.11  macro(PrefetchWrite)
    73.1 --- a/src/share/vm/opto/compile.cpp	Fri Mar 20 22:08:48 2009 -0400
    73.2 +++ b/src/share/vm/opto/compile.cpp	Mon Mar 23 10:42:20 2009 -0400
    73.3 @@ -2081,7 +2081,7 @@
    73.4  
    73.5  #ifdef _LP64
    73.6    case Op_CastPP:
    73.7 -    if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) {
    73.8 +    if (n->in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks()) {
    73.9        Compile* C = Compile::current();
   73.10        Node* in1 = n->in(1);
   73.11        const Type* t = n->bottom_type();
   73.12 @@ -2136,7 +2136,7 @@
   73.13          new_in2 = in2->in(1);
   73.14        } else if (in2->Opcode() == Op_ConP) {
   73.15          const Type* t = in2->bottom_type();
   73.16 -        if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) {
   73.17 +        if (t == TypePtr::NULL_PTR && Universe::narrow_oop_use_implicit_null_checks()) {
   73.18            new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
   73.19            //
   73.20            // This transformation together with CastPP transformation above
    74.1 --- a/src/share/vm/opto/connode.cpp	Fri Mar 20 22:08:48 2009 -0400
    74.2 +++ b/src/share/vm/opto/connode.cpp	Mon Mar 23 10:42:20 2009 -0400
    74.3 @@ -433,7 +433,7 @@
    74.4  // If not converting int->oop, throw away cast after constant propagation
    74.5  Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
    74.6    const Type *t = ccp->type(in(1));
    74.7 -  if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) {
    74.8 +  if (!t->isa_oop_ptr() || (in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks())) {
    74.9      return NULL; // do not transform raw pointers or narrow oops
   74.10    }
   74.11    return ConstraintCastNode::Ideal_DU_postCCP(ccp);
    75.1 --- a/src/share/vm/opto/connode.hpp	Fri Mar 20 22:08:48 2009 -0400
    75.2 +++ b/src/share/vm/opto/connode.hpp	Mon Mar 23 10:42:20 2009 -0400
    75.3 @@ -1,5 +1,5 @@
    75.4  /*
    75.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    75.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    75.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    75.8   *
    75.9   * This code is free software; you can redistribute it and/or modify it
   75.10 @@ -635,3 +635,23 @@
   75.11    virtual uint ideal_reg() const { return Op_RegL; }
   75.12    virtual const Type* Value( PhaseTransform *phase ) const;
   75.13  };
   75.14 +
   75.15 +//---------- PopCountINode -----------------------------------------------------
   75.16 +// Population count (bit count) of an integer.
   75.17 +class PopCountINode : public Node {
   75.18 +public:
   75.19 +  PopCountINode(Node* in1) : Node(0, in1) {}
   75.20 +  virtual int Opcode() const;
   75.21 +  const Type* bottom_type() const { return TypeInt::INT; }
   75.22 +  virtual uint ideal_reg() const { return Op_RegI; }
   75.23 +};
   75.24 +
   75.25 +//---------- PopCountLNode -----------------------------------------------------
   75.26 +// Population count (bit count) of a long.
   75.27 +class PopCountLNode : public Node {
   75.28 +public:
   75.29 +  PopCountLNode(Node* in1) : Node(0, in1) {}
   75.30 +  virtual int Opcode() const;
   75.31 +  const Type* bottom_type() const { return TypeInt::INT; }
   75.32 +  virtual uint ideal_reg() const { return Op_RegI; }
   75.33 +};
    76.1 --- a/src/share/vm/opto/graphKit.cpp	Fri Mar 20 22:08:48 2009 -0400
    76.2 +++ b/src/share/vm/opto/graphKit.cpp	Mon Mar 23 10:42:20 2009 -0400
    76.3 @@ -2277,7 +2277,7 @@
    76.4    r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) );
    76.5    set_control(                _gvn.transform( new (C, 1) IfFalseNode(iff2) ) );
    76.6  
    76.7 -  // Check for self.  Very rare to get here, but its taken 1/3 the time.
    76.8 +  // Check for self.  Very rare to get here, but it is taken 1/3 the time.
    76.9    // No performance impact (too rare) but allows sharing of secondary arrays
   76.10    // which has some footprint reduction.
   76.11    Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) );
   76.12 @@ -2286,11 +2286,27 @@
   76.13    r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) );
   76.14    set_control(               _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) );
   76.15  
   76.16 +  // -- Roads not taken here: --
   76.17 +  // We could also have chosen to perform the self-check at the beginning
   76.18 +  // of this code sequence, as the assembler does.  This would not pay off
   76.19 +  // the same way, since the optimizer, unlike the assembler, can perform
   76.20 +  // static type analysis to fold away many successful self-checks.
   76.21 +  // Non-foldable self checks work better here in second position, because
   76.22 +  // the initial primary superclass check subsumes a self-check for most
   76.23 +  // types.  An exception would be a secondary type like array-of-interface,
   76.24 +  // which does not appear in its own primary supertype display.
   76.25 +  // Finally, we could have chosen to move the self-check into the
   76.26 +  // PartialSubtypeCheckNode, and from there out-of-line in a platform
   76.27 +  // dependent manner.  But it is worthwhile to have the check here,
   76.28 +  // where it can be perhaps be optimized.  The cost in code space is
   76.29 +  // small (register compare, branch).
   76.30 +
   76.31    // Now do a linear scan of the secondary super-klass array.  Again, no real
   76.32    // performance impact (too rare) but it's gotta be done.
   76.33 -  // (The stub also contains the self-check of subklass == superklass.
   76.34    // Since the code is rarely used, there is no penalty for moving it
   76.35 -  // out of line, and it can only improve I-cache density.)
   76.36 +  // out of line, and it can only improve I-cache density.
   76.37 +  // The decision to inline or out-of-line this final check is platform
   76.38 +  // dependent, and is found in the AD file definition of PartialSubtypeCheck.
   76.39    Node* psc = _gvn.transform(
   76.40      new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) );
   76.41  
    77.1 --- a/src/share/vm/opto/lcm.cpp	Fri Mar 20 22:08:48 2009 -0400
    77.2 +++ b/src/share/vm/opto/lcm.cpp	Mon Mar 23 10:42:20 2009 -0400
    77.3 @@ -158,7 +158,14 @@
    77.4            continue;             // Give up if offset is beyond page size
    77.5          // cannot reason about it; is probably not implicit null exception
    77.6        } else {
    77.7 -        const TypePtr* tptr = base->bottom_type()->is_ptr();
    77.8 +        const TypePtr* tptr;
    77.9 +        if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
   77.10 +          // 32-bits narrow oop can be the base of address expressions
   77.11 +          tptr = base->bottom_type()->make_ptr();
   77.12 +        } else {
   77.13 +          // only regular oops are expected here
   77.14 +          tptr = base->bottom_type()->is_ptr();
   77.15 +        }
   77.16          // Give up if offset is not a compile-time constant
   77.17          if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
   77.18            continue;
    78.1 --- a/src/share/vm/opto/library_call.cpp	Fri Mar 20 22:08:48 2009 -0400
    78.2 +++ b/src/share/vm/opto/library_call.cpp	Mon Mar 23 10:42:20 2009 -0400
    78.3 @@ -1,5 +1,5 @@
    78.4  /*
    78.5 - * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
    78.6 + * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
    78.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    78.8   *
    78.9   * This code is free software; you can redistribute it and/or modify it
   78.10 @@ -221,6 +221,7 @@
   78.11    bool inline_unsafe_CAS(BasicType type);
   78.12    bool inline_unsafe_ordered_store(BasicType type);
   78.13    bool inline_fp_conversions(vmIntrinsics::ID id);
   78.14 +  bool inline_bitCount(vmIntrinsics::ID id);
   78.15    bool inline_reverseBytes(vmIntrinsics::ID id);
   78.16  };
   78.17  
   78.18 @@ -314,6 +315,11 @@
   78.19      if (!JDK_Version::is_gte_jdk14x_version())  return NULL;
   78.20      break;
   78.21  
   78.22 +  case vmIntrinsics::_bitCount_i:
   78.23 +  case vmIntrinsics::_bitCount_l:
   78.24 +    if (!UsePopCountInstruction)  return NULL;
   78.25 +    break;
   78.26 +
   78.27   default:
   78.28      break;
   78.29    }
   78.30 @@ -617,6 +623,10 @@
   78.31    case vmIntrinsics::_longBitsToDouble:
   78.32      return inline_fp_conversions(intrinsic_id());
   78.33  
   78.34 +  case vmIntrinsics::_bitCount_i:
   78.35 +  case vmIntrinsics::_bitCount_l:
   78.36 +    return inline_bitCount(intrinsic_id());
   78.37 +
   78.38    case vmIntrinsics::_reverseBytes_i:
   78.39    case vmIntrinsics::_reverseBytes_l:
   78.40      return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
   78.41 @@ -1714,6 +1724,27 @@
   78.42    }
   78.43  }
   78.44  
   78.45 +//----------------------------inline_bitCount_int/long-----------------------
   78.46 +// inline int Integer.bitCount(int)
   78.47 +// inline int Long.bitCount(long)
   78.48 +bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) {
   78.49 +  assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount");
   78.50 +  if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false;
   78.51 +  if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false;
   78.52 +  _sp += arg_size();  // restore stack pointer
   78.53 +  switch (id) {
   78.54 +  case vmIntrinsics::_bitCount_i:
   78.55 +    push(_gvn.transform(new (C, 2) PopCountINode(pop())));
   78.56 +    break;
   78.57 +  case vmIntrinsics::_bitCount_l:
   78.58 +    push(_gvn.transform(new (C, 2) PopCountLNode(pop_pair())));
   78.59 +    break;
   78.60 +  default:
   78.61 +    ShouldNotReachHere();
   78.62 +  }
   78.63 +  return true;
   78.64 +}
   78.65 +
   78.66  //----------------------------inline_reverseBytes_int/long-------------------
   78.67  // inline Integer.reverseBytes(int)
   78.68  // inline Long.reverseBytes(long)
    79.1 --- a/src/share/vm/opto/matcher.cpp	Fri Mar 20 22:08:48 2009 -0400
    79.2 +++ b/src/share/vm/opto/matcher.cpp	Mon Mar 23 10:42:20 2009 -0400
    79.3 @@ -1481,8 +1481,13 @@
    79.4        const Type* mach_at = mach->adr_type();
    79.5        // DecodeN node consumed by an address may have different type
    79.6        // then its input. Don't compare types for such case.
    79.7 -      if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() &&
    79.8 -          m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) {
    79.9 +      if (m->adr_type() != mach_at &&
   79.10 +          (m->in(MemNode::Address)->is_DecodeN() ||
   79.11 +           m->in(MemNode::Address)->is_AddP() &&
   79.12 +           m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN() ||
   79.13 +           m->in(MemNode::Address)->is_AddP() &&
   79.14 +           m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() &&
   79.15 +           m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeN())) {
   79.16          mach_at = m->adr_type();
   79.17        }
   79.18        if (m->adr_type() != mach_at) {
    80.1 --- a/src/share/vm/runtime/arguments.cpp	Fri Mar 20 22:08:48 2009 -0400
    80.2 +++ b/src/share/vm/runtime/arguments.cpp	Mon Mar 23 10:42:20 2009 -0400
    80.3 @@ -1211,7 +1211,9 @@
    80.4      if (UseLargePages && UseCompressedOops) {
    80.5        // Cannot allocate guard pages for implicit checks in indexed addressing
    80.6        // mode, when large pages are specified on windows.
    80.7 -      FLAG_SET_DEFAULT(UseImplicitNullCheckForNarrowOop, false);
    80.8 +      // This flag could be switched ON if narrow oop base address is set to 0,
    80.9 +      // see code in Universe::initialize_heap().
   80.10 +      Universe::set_narrow_oop_use_implicit_null_checks(false);
   80.11      }
   80.12  #endif //  _WIN64
   80.13    } else {
    81.1 --- a/src/share/vm/runtime/globals.hpp	Fri Mar 20 22:08:48 2009 -0400
    81.2 +++ b/src/share/vm/runtime/globals.hpp	Mon Mar 23 10:42:20 2009 -0400
    81.3 @@ -303,11 +303,14 @@
    81.4              "Use 32-bit object references in 64-bit VM. "                   \
    81.5              "lp64_product means flag is always constant in 32 bit VM")      \
    81.6                                                                              \
    81.7 -  lp64_product(bool, CheckCompressedOops, trueInDebug,                      \
    81.8 -            "generate checks in encoding/decoding code")                    \
    81.9 -                                                                            \
   81.10 -  product(bool, UseImplicitNullCheckForNarrowOop, true,                     \
   81.11 -            "generate implicit null check in indexed addressing mode.")     \
   81.12 +  notproduct(bool, CheckCompressedOops, true,                               \
   81.13 +            "generate checks in encoding/decoding code in debug VM")        \
   81.14 +                                                                            \
   81.15 +  product_pd(uintx, HeapBaseMinAddress,                                     \
   81.16 +            "OS specific low limit for heap base address")                  \
   81.17 +                                                                            \
   81.18 +  diagnostic(bool, PrintCompressedOopsMode, false,                          \
   81.19 +            "Print compressed oops base address and encoding mode")         \
   81.20                                                                              \
   81.21    /* UseMembar is theoretically a temp flag used for memory barrier         \
   81.22     * removal testing.  It was supposed to be removed before FCS but has     \
   81.23 @@ -2169,6 +2172,9 @@
   81.24    diagnostic(bool, PrintIntrinsics, false,                                  \
   81.25            "prints attempted and successful inlining of intrinsics")         \
   81.26                                                                              \
   81.27 +  product(bool, UsePopCountInstruction, false,                              \
   81.28 +          "Use population count instruction")                               \
   81.29 +                                                                            \
   81.30    diagnostic(ccstrlist, DisableIntrinsic, "",                               \
   81.31            "do not expand intrinsics whose (internal) names appear here")    \
   81.32                                                                              \
    82.1 --- a/src/share/vm/runtime/os.hpp	Fri Mar 20 22:08:48 2009 -0400
    82.2 +++ b/src/share/vm/runtime/os.hpp	Mon Mar 23 10:42:20 2009 -0400
    82.3 @@ -243,7 +243,7 @@
    82.4  
    82.5    static char*  non_memory_address_word();
    82.6    // reserve, commit and pin the entire memory region
    82.7 -  static char*  reserve_memory_special(size_t size);
    82.8 +  static char*  reserve_memory_special(size_t size, char* addr = NULL);
    82.9    static bool   release_memory_special(char* addr, size_t bytes);
   82.10    static bool   large_page_init();
   82.11    static size_t large_page_size();
    83.1 --- a/src/share/vm/runtime/virtualspace.cpp	Fri Mar 20 22:08:48 2009 -0400
    83.2 +++ b/src/share/vm/runtime/virtualspace.cpp	Mon Mar 23 10:42:20 2009 -0400
    83.3 @@ -109,6 +109,7 @@
    83.4                               const size_t prefix_align,
    83.5                               const size_t suffix_size,
    83.6                               const size_t suffix_align,
    83.7 +                             char* requested_address,
    83.8                               const size_t noaccess_prefix)
    83.9  {
   83.10    assert(prefix_size != 0, "sanity");
   83.11 @@ -131,7 +132,7 @@
   83.12    const bool try_reserve_special = UseLargePages &&
   83.13      prefix_align == os::large_page_size();
   83.14    if (!os::can_commit_large_page_memory() && try_reserve_special) {
   83.15 -    initialize(size, prefix_align, true, NULL, noaccess_prefix);
   83.16 +    initialize(size, prefix_align, true, requested_address, noaccess_prefix);
   83.17      return;
   83.18    }
   83.19  
   83.20 @@ -146,7 +147,13 @@
   83.21           noaccess_prefix == prefix_align, "noaccess prefix wrong");
   83.22  
   83.23    // Optimistically try to reserve the exact size needed.
   83.24 -  char* addr = os::reserve_memory(size, NULL, prefix_align);
   83.25 +  char* addr;
   83.26 +  if (requested_address != 0) {
   83.27 +    addr = os::attempt_reserve_memory_at(size,
   83.28 +                                         requested_address-noaccess_prefix);
   83.29 +  } else {
   83.30 +    addr = os::reserve_memory(size, NULL, prefix_align);
   83.31 +  }
   83.32    if (addr == NULL) return;
   83.33  
   83.34    // Check whether the result has the needed alignment (unlikely unless
   83.35 @@ -206,12 +213,8 @@
   83.36    char* base = NULL;
   83.37  
   83.38    if (special) {
   83.39 -    // It's not hard to implement reserve_memory_special() such that it can
   83.40 -    // allocate at fixed address, but there seems no use of this feature
   83.41 -    // for now, so it's not implemented.
   83.42 -    assert(requested_address == NULL, "not implemented");
   83.43  
   83.44 -    base = os::reserve_memory_special(size);
   83.45 +    base = os::reserve_memory_special(size, requested_address);
   83.46  
   83.47      if (base != NULL) {
   83.48        // Check alignment constraints
   83.49 @@ -372,7 +375,8 @@
   83.50                                       bool large, char* requested_address) :
   83.51    ReservedSpace(size, alignment, large,
   83.52                  requested_address,
   83.53 -                UseCompressedOops && UseImplicitNullCheckForNarrowOop ?
   83.54 +                (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
   83.55 +                 Universe::narrow_oop_use_implicit_null_checks()) ?
   83.56                    lcm(os::vm_page_size(), alignment) : 0) {
   83.57    // Only reserved space for the java heap should have a noaccess_prefix
   83.58    // if using compressed oops.
   83.59 @@ -382,9 +386,12 @@
   83.60  ReservedHeapSpace::ReservedHeapSpace(const size_t prefix_size,
   83.61                                       const size_t prefix_align,
   83.62                                       const size_t suffix_size,
   83.63 -                                     const size_t suffix_align) :
   83.64 +                                     const size_t suffix_align,
   83.65 +                                     char* requested_address) :
   83.66    ReservedSpace(prefix_size, prefix_align, suffix_size, suffix_align,
   83.67 -                UseCompressedOops && UseImplicitNullCheckForNarrowOop ?
   83.68 +                requested_address,
   83.69 +                (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
   83.70 +                 Universe::narrow_oop_use_implicit_null_checks()) ?
   83.71                    lcm(os::vm_page_size(), prefix_align) : 0) {
   83.72    protect_noaccess_prefix(prefix_size+suffix_size);
   83.73  }
    84.1 --- a/src/share/vm/runtime/virtualspace.hpp	Fri Mar 20 22:08:48 2009 -0400
    84.2 +++ b/src/share/vm/runtime/virtualspace.hpp	Mon Mar 23 10:42:20 2009 -0400
    84.3 @@ -73,7 +73,8 @@
    84.4                  const size_t noaccess_prefix = 0);
    84.5    ReservedSpace(const size_t prefix_size, const size_t prefix_align,
    84.6                  const size_t suffix_size, const size_t suffix_align,
    84.7 -                const size_t noaccess_prefix);
    84.8 +                char* requested_address,
    84.9 +                const size_t noaccess_prefix = 0);
   84.10  
   84.11    // Accessors
   84.12    char*  base()      const { return _base;      }
   84.13 @@ -121,7 +122,8 @@
   84.14    ReservedHeapSpace(size_t size, size_t forced_base_alignment,
   84.15                      bool large, char* requested_address);
   84.16    ReservedHeapSpace(const size_t prefix_size, const size_t prefix_align,
   84.17 -                    const size_t suffix_size, const size_t suffix_align);
   84.18 +                    const size_t suffix_size, const size_t suffix_align,
   84.19 +                    char* requested_address);
   84.20  };
   84.21  
   84.22  // VirtualSpace is data structure for committing a previously reserved address range in smaller chunks.
    85.1 --- a/src/share/vm/runtime/vmStructs.cpp	Fri Mar 20 22:08:48 2009 -0400
    85.2 +++ b/src/share/vm/runtime/vmStructs.cpp	Mon Mar 23 10:42:20 2009 -0400
    85.3 @@ -263,7 +263,9 @@
    85.4       static_field(Universe,                    _bootstrapping,                                bool)                                  \
    85.5       static_field(Universe,                    _fully_initialized,                            bool)                                  \
    85.6       static_field(Universe,                    _verify_count,                                 int)                                   \
    85.7 -     static_field(Universe,                    _heap_base,                                    address)                                   \
    85.8 +     static_field(Universe,                    _narrow_oop._base,                             address)                               \
    85.9 +     static_field(Universe,                    _narrow_oop._shift,                            int)                                   \
   85.10 +     static_field(Universe,                    _narrow_oop._use_implicit_null_checks,         bool)                                  \
   85.11                                                                                                                                       \
   85.12    /**********************************************************************************/                                               \
   85.13    /* Generation and Space hierarchies                                               */                                               \
    86.1 --- a/src/share/vm/runtime/vm_version.cpp	Fri Mar 20 22:08:48 2009 -0400
    86.2 +++ b/src/share/vm/runtime/vm_version.cpp	Mon Mar 23 10:42:20 2009 -0400
    86.3 @@ -163,9 +163,11 @@
    86.4        #elif _MSC_VER == 1200
    86.5          #define HOTSPOT_BUILD_COMPILER "MS VC++ 6.0"
    86.6        #elif _MSC_VER == 1310
    86.7 -        #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1"
    86.8 +        #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1 (VS2003)"
    86.9        #elif _MSC_VER == 1400
   86.10 -        #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0"
   86.11 +        #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0 (VS2005)"
   86.12 +      #elif _MSC_VER == 1500
   86.13 +        #define HOTSPOT_BUILD_COMPILER "MS VC++ 9.0 (VS2008)"
   86.14        #else
   86.15          #define HOTSPOT_BUILD_COMPILER "unknown MS VC++:" XSTR(_MSC_VER)
   86.16        #endif
    87.1 --- a/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Fri Mar 20 22:08:48 2009 -0400
    87.2 +++ b/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Mon Mar 23 10:42:20 2009 -0400
    87.3 @@ -162,7 +162,7 @@
    87.4  }
    87.5  
    87.6  // Visual Studio 2005 deprecates POSIX names - use ISO C++ names instead
    87.7 -#if _MSC_VER >= 1400 && !defined(_WIN64)
    87.8 +#if _MSC_VER >= 1400
    87.9  #define open _open
   87.10  #define close _close
   87.11  #define read  _read
    88.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    88.2 +++ b/test/compiler/6378821/Test6378821.java	Mon Mar 23 10:42:20 2009 -0400
    88.3 @@ -0,0 +1,75 @@
    88.4 +/*
    88.5 + * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
    88.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    88.7 + *
    88.8 + * This code is free software; you can redistribute it and/or modify it
    88.9 + * under the terms of the GNU General Public License version 2 only, as
   88.10 + * published by the Free Software Foundation.
   88.11 + *
   88.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
   88.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   88.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   88.15 + * version 2 for more details (a copy is included in the LICENSE file that
   88.16 + * accompanied this code).
   88.17 + *
   88.18 + * You should have received a copy of the GNU General Public License version
   88.19 + * 2 along with this work; if not, write to the Free Software Foundation,
   88.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   88.21 + *
   88.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
   88.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
   88.24 + * have any questions.
   88.25 + */
   88.26 +
   88.27 +/**
   88.28 + * @test
   88.29 + * @bug 6378821
   88.30 + * @summary where available, bitCount() should use POPC on SPARC processors and AMD+10h
   88.31 + *
   88.32 + * @run main/othervm -Xcomp -XX:CompileOnly=Test6378821.fcomp Test6378821
   88.33 + */
   88.34 +
   88.35 +public class Test6378821 {
   88.36 +    static final int[]  ia = new int[]  { 0x12345678 };
   88.37 +    static final long[] la = new long[] { 0x12345678abcdefL };
   88.38 +
   88.39 +    public static void main(String [] args) {
   88.40 +        // Resolve the class and the method.
   88.41 +        Integer.bitCount(1);
   88.42 +        Long.bitCount(1);
   88.43 +
   88.44 +        sub(ia[0]);
   88.45 +        sub(la[0]);
   88.46 +        sub(ia);
   88.47 +        sub(la);
   88.48 +    }
   88.49 +
   88.50 +    static void check(int i, int expected, int result) {
   88.51 +        if (result != expected) {
   88.52 +            throw new InternalError("Wrong population count for " + i + ": " + result + " != " + expected);
   88.53 +        }
   88.54 +    }
   88.55 +
   88.56 +    static void check(long l, int expected, int result) {
   88.57 +        if (result != expected) {
   88.58 +            throw new InternalError("Wrong population count for " + l + ": " + result + " != " + expected);
   88.59 +        }
   88.60 +    }
   88.61 +
   88.62 +    static void sub(int i)     { check(i,     fint(i),  fcomp(i) ); }
   88.63 +    static void sub(int[] ia)  { check(ia[0], fint(ia), fcomp(ia)); }
   88.64 +    static void sub(long l)    { check(l,     fint(l),  fcomp(l) ); }
   88.65 +    static void sub(long[] la) { check(la[0], fint(la), fcomp(la)); }
   88.66 +
   88.67 +    static int fint (int i)     { return Integer.bitCount(i); }
   88.68 +    static int fcomp(int i)     { return Integer.bitCount(i); }
   88.69 +
   88.70 +    static int fint (int[] ia)  { return Integer.bitCount(ia[0]); }
   88.71 +    static int fcomp(int[] ia)  { return Integer.bitCount(ia[0]); }
   88.72 +
   88.73 +    static int fint (long l)    { return Long.bitCount(l); }
   88.74 +    static int fcomp(long l)    { return Long.bitCount(l); }
   88.75 +
   88.76 +    static int fint (long[] la) { return Long.bitCount(la[0]); }
   88.77 +    static int fcomp(long[] la) { return Long.bitCount(la[0]); }
   88.78 +}

mercurial