Mon, 23 Mar 2009 10:42:20 -0400
Merge
agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java | file | annotate | diff | comparison | revisions | |
src/share/vm/classfile/vmSymbols.hpp | file | annotate | diff | comparison | revisions |
1.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/Debugger.java Fri Mar 20 22:08:48 2009 -0400 1.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/Debugger.java Mon Mar 23 10:42:20 2009 -0400 1.3 @@ -118,9 +118,9 @@ 1.4 public long getJIntSize(); 1.5 public long getJLongSize(); 1.6 public long getJShortSize(); 1.7 - public long getHeapBase(); 1.8 public long getHeapOopSize(); 1.9 - public long getLogMinObjAlignmentInBytes(); 1.10 + public long getNarrowOopBase(); 1.11 + public int getNarrowOopShift(); 1.12 1.13 public ReadResult readBytesFromProcess(long address, long numBytes) 1.14 throws DebuggerException;
2.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java Fri Mar 20 22:08:48 2009 -0400 2.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java Mon Mar 23 10:42:20 2009 -0400 2.3 @@ -56,8 +56,8 @@ 2.4 // heap data. 2.5 protected long oopSize; 2.6 protected long heapOopSize; 2.7 - protected long heapBase; // heap base for compressed oops. 2.8 - protected long logMinObjAlignmentInBytes; // Used to decode compressed oops. 2.9 + protected long narrowOopBase; // heap base for compressed oops. 2.10 + protected int narrowOopShift; // shift to decode compressed oops. 2.11 // Should be initialized if desired by calling initCache() 2.12 private PageCache cache; 2.13 2.14 @@ -159,10 +159,10 @@ 2.15 javaPrimitiveTypesConfigured = true; 2.16 } 2.17 2.18 - public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignmentInBytes) { 2.19 - this.heapBase = heapBase; 2.20 + public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift) { 2.21 this.heapOopSize = heapOopSize; 2.22 - this.logMinObjAlignmentInBytes = logMinObjAlignmentInBytes; 2.23 + this.narrowOopBase = narrowOopBase; 2.24 + this.narrowOopShift = narrowOopShift; 2.25 } 2.26 2.27 /** May be called by subclasses if desired to initialize the page 2.28 @@ -459,7 +459,7 @@ 2.29 long value = readCInteger(address, getHeapOopSize(), true); 2.30 if (value != 0) { 2.31 // See oop.inline.hpp decode_heap_oop 2.32 - value = (long)(heapBase + (long)(value << logMinObjAlignmentInBytes)); 2.33 + value = (long)(narrowOopBase + (long)(value << narrowOopShift)); 2.34 } 2.35 return value; 2.36 } 2.37 @@ -545,10 +545,10 @@ 2.38 return heapOopSize; 2.39 } 2.40 2.41 - public long getHeapBase() { 2.42 - return heapBase; 2.43 + public long getNarrowOopBase() { 2.44 + return narrowOopBase; 2.45 } 2.46 - public long getLogMinObjAlignmentInBytes() { 2.47 - return logMinObjAlignmentInBytes; 2.48 + public int getNarrowOopShift() { 2.49 + return narrowOopShift; 2.50 } 2.51 }
3.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/JVMDebugger.java Fri Mar 20 22:08:48 2009 -0400 3.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/JVMDebugger.java Mon Mar 23 10:42:20 2009 -0400 3.3 @@ -42,5 +42,5 @@ 3.4 long jintSize, 3.5 long jlongSize, 3.6 long jshortSize); 3.7 - public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignment); 3.8 + public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift); 3.9 }
4.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebugger.java Fri Mar 20 22:08:48 2009 -0400 4.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebugger.java Mon Mar 23 10:42:20 2009 -0400 4.3 @@ -65,9 +65,10 @@ 4.4 public long getJIntSize() throws RemoteException; 4.5 public long getJLongSize() throws RemoteException; 4.6 public long getJShortSize() throws RemoteException; 4.7 - public long getHeapBase() throws RemoteException; 4.8 public long getHeapOopSize() throws RemoteException; 4.9 - public long getLogMinObjAlignmentInBytes() throws RemoteException; 4.10 + public long getNarrowOopBase() throws RemoteException; 4.11 + public int getNarrowOopShift() throws RemoteException; 4.12 + 4.13 public boolean areThreadsEqual(long addrOrId1, boolean isAddress1, 4.14 long addrOrId2, boolean isAddress2) throws RemoteException; 4.15 public int getThreadHashCode(long addrOrId, boolean isAddress) throws RemoteException;
5.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java Fri Mar 20 22:08:48 2009 -0400 5.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java Mon Mar 23 10:42:20 2009 -0400 5.3 @@ -85,9 +85,9 @@ 5.4 jlongSize = remoteDebugger.getJLongSize(); 5.5 jshortSize = remoteDebugger.getJShortSize(); 5.6 javaPrimitiveTypesConfigured = true; 5.7 - heapBase = remoteDebugger.getHeapBase(); 5.8 + narrowOopBase = remoteDebugger.getNarrowOopBase(); 5.9 + narrowOopShift = remoteDebugger.getNarrowOopShift(); 5.10 heapOopSize = remoteDebugger.getHeapOopSize(); 5.11 - logMinObjAlignmentInBytes = remoteDebugger.getLogMinObjAlignmentInBytes(); 5.12 } 5.13 catch (RemoteException e) { 5.14 throw new DebuggerException(e);
6.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerServer.java Fri Mar 20 22:08:48 2009 -0400 6.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerServer.java Mon Mar 23 10:42:20 2009 -0400 6.3 @@ -114,17 +114,18 @@ 6.4 return debugger.getJShortSize(); 6.5 } 6.6 6.7 - public long getHeapBase() throws RemoteException { 6.8 - return debugger.getHeapBase(); 6.9 - } 6.10 - 6.11 public long getHeapOopSize() throws RemoteException { 6.12 return debugger.getHeapOopSize(); 6.13 } 6.14 6.15 - public long getLogMinObjAlignmentInBytes() throws RemoteException { 6.16 - return debugger.getLogMinObjAlignmentInBytes(); 6.17 + public long getNarrowOopBase() throws RemoteException { 6.18 + return debugger.getNarrowOopBase(); 6.19 } 6.20 + 6.21 + public int getNarrowOopShift() throws RemoteException { 6.22 + return debugger.getNarrowOopShift(); 6.23 + } 6.24 + 6.25 public boolean areThreadsEqual(long addrOrId1, boolean isAddress1, 6.26 long addrOrId2, boolean isAddress2) throws RemoteException { 6.27 ThreadProxy t1 = getThreadProxy(addrOrId1, isAddress1);
7.1 --- a/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java Fri Mar 20 22:08:48 2009 -0400 7.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java Mon Mar 23 10:42:20 2009 -0400 7.3 @@ -53,7 +53,8 @@ 7.4 // system obj array klass object 7.5 private static sun.jvm.hotspot.types.OopField systemObjArrayKlassObjField; 7.6 7.7 - private static AddressField heapBaseField; 7.8 + private static AddressField narrowOopBaseField; 7.9 + private static CIntegerField narrowOopShiftField; 7.10 7.11 static { 7.12 VM.registerVMInitializedObserver(new Observer() { 7.13 @@ -86,7 +87,8 @@ 7.14 7.15 systemObjArrayKlassObjField = type.getOopField("_systemObjArrayKlassObj"); 7.16 7.17 - heapBaseField = type.getAddressField("_heap_base"); 7.18 + narrowOopBaseField = type.getAddressField("_narrow_oop._base"); 7.19 + narrowOopShiftField = type.getCIntegerField("_narrow_oop._shift"); 7.20 } 7.21 7.22 public Universe() { 7.23 @@ -100,14 +102,18 @@ 7.24 } 7.25 } 7.26 7.27 - public static long getHeapBase() { 7.28 - if (heapBaseField.getValue() == null) { 7.29 + public static long getNarrowOopBase() { 7.30 + if (narrowOopBaseField.getValue() == null) { 7.31 return 0; 7.32 } else { 7.33 - return heapBaseField.getValue().minus(null); 7.34 + return narrowOopBaseField.getValue().minus(null); 7.35 } 7.36 } 7.37 7.38 + public static int getNarrowOopShift() { 7.39 + return (int)narrowOopShiftField.getValue(); 7.40 + } 7.41 + 7.42 /** Returns "TRUE" iff "p" points into the allocated area of the heap. */ 7.43 public boolean isIn(Address p) { 7.44 return heap().isIn(p);
8.1 --- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java Fri Mar 20 22:08:48 2009 -0400 8.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java Mon Mar 23 10:42:20 2009 -0400 8.3 @@ -342,13 +342,12 @@ 8.4 throw new RuntimeException("Attempt to initialize VM twice"); 8.5 } 8.6 soleInstance = new VM(db, debugger, debugger.getMachineDescription().isBigEndian()); 8.7 - 8.8 + debugger.putHeapConst(soleInstance.getHeapOopSize(), Universe.getNarrowOopBase(), 8.9 + Universe.getNarrowOopShift()); 8.10 for (Iterator iter = vmInitializedObservers.iterator(); iter.hasNext(); ) { 8.11 ((Observer) iter.next()).update(null, null); 8.12 } 8.13 8.14 - debugger.putHeapConst(Universe.getHeapBase(), soleInstance.getHeapOopSize(), 8.15 - soleInstance.logMinObjAlignmentInBytes); 8.16 } 8.17 8.18 /** This is used by the debugging system */
9.1 --- a/make/jprt.properties Fri Mar 20 22:08:48 2009 -0400 9.2 +++ b/make/jprt.properties Mon Mar 23 10:42:20 2009 -0400 9.3 @@ -19,12 +19,12 @@ 9.4 # Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 9.5 # CA 95054 USA or visit www.sun.com if you need additional information or 9.6 # have any questions. 9.7 -# 9.8 +# 9.9 # 9.10 9.11 # Properties for jprt 9.12 9.13 -# All build result bundles are full jdks, so the 64bit testing does not 9.14 +# All build result bundles are full jdks, so the 64bit testing does not 9.15 # need the 32bit sibling bundle installed. 9.16 # Note: If the hotspot/make/Makefile changed to only bundle the 64bit files 9.17 # when bundling 64bit, and stripped out the 64bit files from any 32bit 9.18 @@ -89,60 +89,52 @@ 9.19 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jvm98, \ 9.20 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-scimark, \ 9.21 ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese, \ 9.22 - ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese_Xcomp, \ 9.23 - ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese_Xcomp_2, \ 9.24 - ${jprt.my.solaris.sparc}-product-{c1|c2}-runThese_Xcomp_3, \ 9.25 ${jprt.my.solaris.sparc}-fastdebug-c1-runThese_Xshare, \ 9.26 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_default, \ 9.27 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_SerialGC, \ 9.28 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC, \ 9.29 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC, \ 9.30 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_CMS, \ 9.31 - ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_default_2, \ 9.32 - ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_SerialGC_2, \ 9.33 - ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC_2, \ 9.34 - ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC_2, \ 9.35 - ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_CMS_2, \ 9.36 + ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_G1, \ 9.37 + ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCBasher_ParOldGC, \ 9.38 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_default, \ 9.39 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_SerialGC, \ 9.40 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_ParallelGC, \ 9.41 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_ParNewGC, \ 9.42 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_CMS, \ 9.43 + ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_G1, \ 9.44 + ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-GCOld_ParOldGC, \ 9.45 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_default, \ 9.46 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_SerialGC, \ 9.47 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_ParallelGC, \ 9.48 ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_CMS, \ 9.49 - ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-scimark_2, \ 9.50 - ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-scimark_3 9.51 + ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_G1, \ 9.52 + ${jprt.my.solaris.sparc}-{product|fastdebug}-{c1|c2}-jbb_ParOldGC 9.53 9.54 jprt.my.solaris.sparcv9.test.targets= \ 9.55 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jvm98, \ 9.56 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-scimark, \ 9.57 ${jprt.my.solaris.sparcv9}-product-c2-runThese, \ 9.58 - ${jprt.my.solaris.sparcv9}-product-c2-runThese_Xcomp, \ 9.59 - ${jprt.my.solaris.sparcv9}-product-c2-runThese_Xcomp_2, \ 9.60 - ${jprt.my.solaris.sparcv9}-product-c2-runThese_Xcomp_3, \ 9.61 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_default, \ 9.62 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_SerialGC, \ 9.63 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParallelGC, \ 9.64 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParNewGC, \ 9.65 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_CMS, \ 9.66 - ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_default_2, \ 9.67 - ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_SerialGC_2, \ 9.68 - ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParallelGC_2, \ 9.69 - ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParNewGC_2, \ 9.70 - ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_CMS_2, \ 9.71 + ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_G1, \ 9.72 + ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCBasher_ParOldGC, \ 9.73 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_default, \ 9.74 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_SerialGC, \ 9.75 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_ParallelGC, \ 9.76 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_ParNewGC, \ 9.77 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_CMS, \ 9.78 + ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_G1, \ 9.79 + ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GCOld_ParOldGC, \ 9.80 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_default, \ 9.81 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_SerialGC, \ 9.82 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_ParallelGC, \ 9.83 ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_CMS, \ 9.84 - ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-scimark_2, \ 9.85 - ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-scimark_3 9.86 + ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_G1, \ 9.87 + ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-jbb_ParOldGC 9.88 9.89 jprt.my.solaris.x64.test.targets= \ 9.90 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jvm98, \ 9.91 @@ -154,73 +146,80 @@ 9.92 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \ 9.93 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \ 9.94 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_CMS, \ 9.95 - ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_default_2, \ 9.96 - ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_SerialGC_2, \ 9.97 - ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC_2, \ 9.98 - ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC_2, \ 9.99 - ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_CMS_2, \ 9.100 + ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_G1, \ 9.101 + ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \ 9.102 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_default, \ 9.103 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \ 9.104 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParallelGC, \ 9.105 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \ 9.106 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_CMS, \ 9.107 + ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_G1, \ 9.108 + ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \ 9.109 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_default, \ 9.110 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_SerialGC, \ 9.111 ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \ 9.112 - ${jprt.my.solaris.x64}-{product|fastdebug}-c2-jbb_CMS 9.113 + ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_CMS, \ 9.114 + ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_G1, \ 9.115 + ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GCOld_ParOldGC 9.116 9.117 jprt.my.solaris.i586.test.targets= \ 9.118 ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-jvm98, \ 9.119 ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-scimark, \ 9.120 ${jprt.my.solaris.i586}-product-{c1|c2}-runThese_Xcomp, \ 9.121 - ${jprt.my.solaris.i586}-product-c2-runThese_Xcomp_2, \ 9.122 - ${jprt.my.solaris.i586}-fastdebug-c1-runThese_Xcomp_2, \ 9.123 + ${jprt.my.solaris.i586}-fastdebug-c1-runThese_Xcomp, \ 9.124 ${jprt.my.solaris.i586}-fastdebug-c1-runThese_Xshare, \ 9.125 ${jprt.my.solaris.i586}-product-c1-GCBasher_default, \ 9.126 ${jprt.my.solaris.i586}-product-c1-GCBasher_SerialGC, \ 9.127 ${jprt.my.solaris.i586}-product-c1-GCBasher_ParallelGC, \ 9.128 ${jprt.my.solaris.i586}-product-c1-GCBasher_ParNewGC, \ 9.129 ${jprt.my.solaris.i586}-product-c1-GCBasher_CMS, \ 9.130 + ${jprt.my.solaris.i586}-product-c1-GCBasher_G1, \ 9.131 + ${jprt.my.solaris.i586}-product-c1-GCBasher_ParOldGC, \ 9.132 ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_default, \ 9.133 ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_SerialGC, \ 9.134 ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_ParallelGC, \ 9.135 ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_ParNewGC, \ 9.136 ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_CMS, \ 9.137 + ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_G1, \ 9.138 + ${jprt.my.solaris.i586}-fastdebug-c2-GCBasher_ParOldGC, \ 9.139 ${jprt.my.solaris.i586}-product-c1-GCOld_default, \ 9.140 ${jprt.my.solaris.i586}-product-c1-GCOld_SerialGC, \ 9.141 ${jprt.my.solaris.i586}-product-c1-GCOld_ParallelGC, \ 9.142 ${jprt.my.solaris.i586}-product-c1-GCOld_ParNewGC, \ 9.143 ${jprt.my.solaris.i586}-product-c1-GCOld_CMS, \ 9.144 + ${jprt.my.solaris.i586}-product-c1-GCOld_G1, \ 9.145 + ${jprt.my.solaris.i586}-product-c1-GCOld_ParOldGC, \ 9.146 ${jprt.my.solaris.i586}-fastdebug-c2-jbb_default, \ 9.147 ${jprt.my.solaris.i586}-fastdebug-c2-jbb_ParallelGC, \ 9.148 ${jprt.my.solaris.i586}-fastdebug-c2-jbb_CMS, \ 9.149 - ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-scimark_2, \ 9.150 - ${jprt.my.solaris.i586}-{product|fastdebug}-{c1|c2}-scimark_3 9.151 + ${jprt.my.solaris.i586}-fastdebug-c2-jbb_G1, \ 9.152 + ${jprt.my.solaris.i586}-fastdebug-c2-jbb_ParOldGC 9.153 9.154 jprt.my.linux.i586.test.targets = \ 9.155 ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-jvm98, \ 9.156 ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-scimark, \ 9.157 ${jprt.my.linux.i586}-product-c1-runThese_Xcomp, \ 9.158 - ${jprt.my.linux.i586}-product-c1-runThese_Xcomp_2, \ 9.159 - ${jprt.my.linux.i586}-product-c1-runThese_Xcomp_3, \ 9.160 ${jprt.my.linux.i586}-fastdebug-c1-runThese_Xshare, \ 9.161 ${jprt.my.linux.i586}-fastdebug-c2-runThese_Xcomp, \ 9.162 - ${jprt.my.linux.i586}-fastdebug-c2-runThese_Xcomp_2, \ 9.163 ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_default, \ 9.164 ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_SerialGC, \ 9.165 ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC, \ 9.166 ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC, \ 9.167 ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_CMS, \ 9.168 + ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_G1, \ 9.169 + ${jprt.my.linux.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParOldGC, \ 9.170 ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_default, \ 9.171 ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_SerialGC, \ 9.172 ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_ParallelGC, \ 9.173 ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_ParNewGC, \ 9.174 ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_CMS, \ 9.175 + ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_G1, \ 9.176 + ${jprt.my.linux.i586}-product-{c1|c2}-GCOld_ParOldGC, \ 9.177 ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_default, \ 9.178 ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_ParallelGC, \ 9.179 ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_CMS, \ 9.180 - ${jprt.my.linux.i586}-{product|fastdebug}-c2-scimark_2, \ 9.181 - ${jprt.my.linux.i586}-{product|fastdebug}-c2-scimark_3 9.182 + ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_G1, \ 9.183 + ${jprt.my.linux.i586}-{product|fastdebug}-c1-jbb_ParOldGC 9.184 9.185 jprt.my.linux.x64.test.targets = \ 9.186 ${jprt.my.linux.x64}-{product|fastdebug}-c2-jvm98, \ 9.187 @@ -230,15 +229,19 @@ 9.188 ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \ 9.189 ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \ 9.190 ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_CMS, \ 9.191 + ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_G1, \ 9.192 + ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \ 9.193 ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_default, \ 9.194 ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \ 9.195 ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_ParallelGC, \ 9.196 ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \ 9.197 ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_CMS, \ 9.198 + ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_G1, \ 9.199 + ${jprt.my.linux.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \ 9.200 ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_default, \ 9.201 ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_ParallelGC, \ 9.202 - ${jprt.my.linux.x64}-{product|fastdebug}-c2-scimark_2, \ 9.203 - ${jprt.my.linux.x64}-{product|fastdebug}-c2-scimark_3 9.204 + ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_G1, \ 9.205 + ${jprt.my.linux.x64}-{product|fastdebug}-c2-jbb_ParOldGC 9.206 9.207 jprt.my.windows.i586.test.targets = \ 9.208 ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-jvm98, \ 9.209 @@ -251,16 +254,20 @@ 9.210 ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParallelGC, \ 9.211 ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParNewGC, \ 9.212 ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_CMS, \ 9.213 + ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_G1, \ 9.214 + ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-GCBasher_ParOldGC, \ 9.215 ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_default, \ 9.216 ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_SerialGC, \ 9.217 ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_ParallelGC, \ 9.218 ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_ParNewGC, \ 9.219 ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_CMS, \ 9.220 + ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_G1, \ 9.221 + ${jprt.my.windows.i586}-product-{c1|c2}-GCOld_ParOldGC, \ 9.222 ${jprt.my.windows.i586}-{product|fastdebug}-{c1|c2}-jbb_default, \ 9.223 ${jprt.my.windows.i586}-product-{c1|c2}-jbb_ParallelGC, \ 9.224 ${jprt.my.windows.i586}-product-{c1|c2}-jbb_CMS, \ 9.225 - ${jprt.my.windows.i586}-product-{c1|c2}-scimark_2, \ 9.226 - ${jprt.my.windows.i586}-product-{c1|c2}-scimark_3 9.227 + ${jprt.my.windows.i586}-product-{c1|c2}-jbb_G1, \ 9.228 + ${jprt.my.windows.i586}-product-{c1|c2}-jbb_ParOldGC 9.229 9.230 jprt.my.windows.x64.test.targets = \ 9.231 ${jprt.my.windows.x64}-{product|fastdebug}-c2-jvm98, \ 9.232 @@ -272,16 +279,20 @@ 9.233 ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \ 9.234 ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \ 9.235 ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_CMS, \ 9.236 + ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_G1, \ 9.237 + ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \ 9.238 ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_default, \ 9.239 ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \ 9.240 ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_ParallelGC, \ 9.241 ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_ParNewGC, \ 9.242 ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_CMS, \ 9.243 + ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_G1, \ 9.244 + ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_ParOldGC, \ 9.245 ${jprt.my.windows.x64}-{product|fastdebug}-c2-jbb_default, \ 9.246 ${jprt.my.windows.x64}-product-c2-jbb_CMS, \ 9.247 ${jprt.my.windows.x64}-product-c2-jbb_ParallelGC, \ 9.248 - ${jprt.my.windows.x64}-{product|fastdebug}-c2-scimark_2, \ 9.249 - ${jprt.my.windows.x64}-{product|fastdebug}-c2-scimark_3 9.250 + ${jprt.my.windows.x64}-product-c2-jbb_G1, \ 9.251 + ${jprt.my.windows.x64}-product-c2-jbb_ParOldGC 9.252 9.253 # The complete list of test targets for jprt 9.254
10.1 --- a/make/windows/get_msc_ver.sh Fri Mar 20 22:08:48 2009 -0400 10.2 +++ b/make/windows/get_msc_ver.sh Mon Mar 23 10:42:20 2009 -0400 10.3 @@ -29,6 +29,7 @@ 10.4 # cl version 13.10.3077 returns "MSC_VER=1310" 10.5 # cl version 14.00.30701 returns "MSC_VER=1399" (OLD_MSSDK version) 10.6 # cl version 14.00.40310.41 returns "MSC_VER=1400" 10.7 +# cl version 15.00.21022.8 returns "MSC_VER=1500" 10.8 10.9 # Note that we currently do not have a way to set HotSpotMksHome in 10.10 # the batch build, but so far this has not seemed to be a problem. The
11.1 --- a/make/windows/makefiles/compile.make Fri Mar 20 22:08:48 2009 -0400 11.2 +++ b/make/windows/makefiles/compile.make Mon Mar 23 10:42:20 2009 -0400 11.3 @@ -170,11 +170,9 @@ 11.4 # Manifest Tool - used in VS2005 and later to adjust manifests stored 11.5 # as resources inside build artifacts. 11.6 MT=mt.exe 11.7 -!if "$(BUILDARCH)" == "i486" 11.8 -# VS2005 on x86 restricts the use of certain libc functions without this 11.9 +# VS2005 and later restricts the use of certain libc functions without this 11.10 CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE 11.11 !endif 11.12 -!endif 11.13 11.14 !if "$(COMPILER_NAME)" == "VS2008" 11.15 PRODUCT_OPT_OPTION = /O2 /Oy- 11.16 @@ -185,11 +183,9 @@ 11.17 # Manifest Tool - used in VS2005 and later to adjust manifests stored 11.18 # as resources inside build artifacts. 11.19 MT=mt.exe 11.20 -!if "$(BUILDARCH)" == "i486" 11.21 -# VS2005 on x86 restricts the use of certain libc functions without this 11.22 +# VS2005 and later restricts the use of certain libc functions without this 11.23 CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE 11.24 !endif 11.25 -!endif 11.26 11.27 # Compile for space above time. 11.28 !if "$(Variant)" == "kernel"
12.1 --- a/make/windows/makefiles/sa.make Fri Mar 20 22:08:48 2009 -0400 12.2 +++ b/make/windows/makefiles/sa.make Mon Mar 23 10:42:20 2009 -0400 12.3 @@ -89,9 +89,11 @@ 12.4 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c 12.5 !elseif "$(BUILDARCH)" == "amd64" 12.6 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c 12.7 +!if "$(COMPILER_NAME)" == "VS2005" 12.8 # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 12.9 # otherwise we get missing __security_check_cookie externals at link time. 12.10 SA_LINK_FLAGS = bufferoverflowU.lib 12.11 +!endif 12.12 !else 12.13 SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /ZI /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c 12.14 !endif
13.1 --- a/make/windows/makefiles/sanity.make Fri Mar 20 22:08:48 2009 -0400 13.2 +++ b/make/windows/makefiles/sanity.make Mon Mar 23 10:42:20 2009 -0400 13.3 @@ -27,9 +27,9 @@ 13.4 all: checkCL checkLink 13.5 13.6 checkCL: 13.7 - @ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" \ 13.8 + @ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" if "$(MSC_VER)" NEQ "1500" \ 13.9 echo *** WARNING *** unrecognized cl.exe version $(MSC_VER) ($(RAW_MSC_VER)). Use FORCE_MSC_VER to override automatic detection. 13.10 13.11 checkLink: 13.12 - @ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" \ 13.13 + @ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" if "$(LINK_VER)" NEQ "900" \ 13.14 echo *** WARNING *** unrecognized link.exe version $(LINK_VER) ($(RAW_LINK_VER)). Use FORCE_LINK_VER to override automatic detection.
14.1 --- a/src/cpu/sparc/vm/assembler_sparc.cpp Fri Mar 20 22:08:48 2009 -0400 14.2 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Mon Mar 23 10:42:20 2009 -0400 14.3 @@ -2767,6 +2767,268 @@ 14.4 } 14.5 14.6 14.7 +void MacroAssembler::check_klass_subtype(Register sub_klass, 14.8 + Register super_klass, 14.9 + Register temp_reg, 14.10 + Register temp2_reg, 14.11 + Label& L_success) { 14.12 + Label L_failure, L_pop_to_failure; 14.13 + check_klass_subtype_fast_path(sub_klass, super_klass, 14.14 + temp_reg, temp2_reg, 14.15 + &L_success, &L_failure, NULL); 14.16 + Register sub_2 = sub_klass; 14.17 + Register sup_2 = super_klass; 14.18 + if (!sub_2->is_global()) sub_2 = L0; 14.19 + if (!sup_2->is_global()) sup_2 = L1; 14.20 + 14.21 + save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2); 14.22 + check_klass_subtype_slow_path(sub_2, sup_2, 14.23 + L2, L3, L4, L5, 14.24 + NULL, &L_pop_to_failure); 14.25 + 14.26 + // on success: 14.27 + restore(); 14.28 + ba(false, L_success); 14.29 + delayed()->nop(); 14.30 + 14.31 + // on failure: 14.32 + bind(L_pop_to_failure); 14.33 + restore(); 14.34 + bind(L_failure); 14.35 +} 14.36 + 14.37 + 14.38 +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 14.39 + Register super_klass, 14.40 + Register temp_reg, 14.41 + Register temp2_reg, 14.42 + Label* L_success, 14.43 + Label* L_failure, 14.44 + Label* L_slow_path, 14.45 + RegisterConstant super_check_offset, 14.46 + Register instanceof_hack) { 14.47 + int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 14.48 + Klass::secondary_super_cache_offset_in_bytes()); 14.49 + int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 14.50 + Klass::super_check_offset_offset_in_bytes()); 14.51 + 14.52 + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 14.53 + bool need_slow_path = (must_load_sco || 14.54 + super_check_offset.constant_or_zero() == sco_offset); 14.55 + 14.56 + assert_different_registers(sub_klass, super_klass, temp_reg); 14.57 + if (super_check_offset.is_register()) { 14.58 + assert_different_registers(sub_klass, super_klass, 14.59 + super_check_offset.as_register()); 14.60 + } else if (must_load_sco) { 14.61 + assert(temp2_reg != noreg, "supply either a temp or a register offset"); 14.62 + } 14.63 + 14.64 + Label L_fallthrough; 14.65 + int label_nulls = 0; 14.66 + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 14.67 + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 14.68 + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 14.69 + assert(label_nulls <= 1 || instanceof_hack != noreg || 14.70 + (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), 14.71 + "at most one NULL in the batch, usually"); 14.72 + 14.73 + // Support for the instanceof hack, which uses delay slots to 14.74 + // set a destination register to zero or one. 14.75 + bool do_bool_sets = (instanceof_hack != noreg); 14.76 +#define BOOL_SET(bool_value) \ 14.77 + if (do_bool_sets && bool_value >= 0) \ 14.78 + set(bool_value, instanceof_hack) 14.79 +#define DELAYED_BOOL_SET(bool_value) \ 14.80 + if (do_bool_sets && bool_value >= 0) \ 14.81 + delayed()->set(bool_value, instanceof_hack); \ 14.82 + else delayed()->nop() 14.83 + // Hacked ba(), which may only be used just before L_fallthrough. 14.84 +#define FINAL_JUMP(label, bool_value) \ 14.85 + if (&(label) == &L_fallthrough) { \ 14.86 + BOOL_SET(bool_value); \ 14.87 + } else { \ 14.88 + ba((do_bool_sets && bool_value >= 0), label); \ 14.89 + DELAYED_BOOL_SET(bool_value); \ 14.90 + } 14.91 + 14.92 + // If the pointers are equal, we are done (e.g., String[] elements). 14.93 + // This self-check enables sharing of secondary supertype arrays among 14.94 + // non-primary types such as array-of-interface. Otherwise, each such 14.95 + // type would need its own customized SSA. 14.96 + // We move this check to the front of the fast path because many 14.97 + // type checks are in fact trivially successful in this manner, 14.98 + // so we get a nicely predicted branch right at the start of the check. 14.99 + cmp(super_klass, sub_klass); 14.100 + brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success); 14.101 + DELAYED_BOOL_SET(1); 14.102 + 14.103 + // Check the supertype display: 14.104 + if (must_load_sco) { 14.105 + // The super check offset is always positive... 14.106 + lduw(super_klass, sco_offset, temp2_reg); 14.107 + super_check_offset = RegisterConstant(temp2_reg); 14.108 + } 14.109 + ld_ptr(sub_klass, super_check_offset, temp_reg); 14.110 + cmp(super_klass, temp_reg); 14.111 + 14.112 + // This check has worked decisively for primary supers. 14.113 + // Secondary supers are sought in the super_cache ('super_cache_addr'). 14.114 + // (Secondary supers are interfaces and very deeply nested subtypes.) 14.115 + // This works in the same check above because of a tricky aliasing 14.116 + // between the super_cache and the primary super display elements. 14.117 + // (The 'super_check_addr' can address either, as the case requires.) 14.118 + // Note that the cache is updated below if it does not help us find 14.119 + // what we need immediately. 14.120 + // So if it was a primary super, we can just fail immediately. 14.121 + // Otherwise, it's the slow path for us (no success at this point). 14.122 + 14.123 + if (super_check_offset.is_register()) { 14.124 + brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success); 14.125 + delayed(); if (do_bool_sets) BOOL_SET(1); 14.126 + // if !do_bool_sets, sneak the next cmp into the delay slot: 14.127 + cmp(super_check_offset.as_register(), sc_offset); 14.128 + 14.129 + if (L_failure == &L_fallthrough) { 14.130 + brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path); 14.131 + delayed()->nop(); 14.132 + BOOL_SET(0); // fallthrough on failure 14.133 + } else { 14.134 + brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure); 14.135 + DELAYED_BOOL_SET(0); 14.136 + FINAL_JUMP(*L_slow_path, -1); // -1 => vanilla delay slot 14.137 + } 14.138 + } else if (super_check_offset.as_constant() == sc_offset) { 14.139 + // Need a slow path; fast failure is impossible. 14.140 + if (L_slow_path == &L_fallthrough) { 14.141 + brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success); 14.142 + DELAYED_BOOL_SET(1); 14.143 + } else { 14.144 + brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path); 14.145 + delayed()->nop(); 14.146 + FINAL_JUMP(*L_success, 1); 14.147 + } 14.148 + } else { 14.149 + // No slow path; it's a fast decision. 14.150 + if (L_failure == &L_fallthrough) { 14.151 + brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success); 14.152 + DELAYED_BOOL_SET(1); 14.153 + BOOL_SET(0); 14.154 + } else { 14.155 + brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure); 14.156 + DELAYED_BOOL_SET(0); 14.157 + FINAL_JUMP(*L_success, 1); 14.158 + } 14.159 + } 14.160 + 14.161 + bind(L_fallthrough); 14.162 + 14.163 +#undef final_jump 14.164 +#undef bool_set 14.165 +#undef DELAYED_BOOL_SET 14.166 +#undef final_jump 14.167 +} 14.168 + 14.169 + 14.170 +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 14.171 + Register super_klass, 14.172 + Register count_temp, 14.173 + Register scan_temp, 14.174 + Register scratch_reg, 14.175 + Register coop_reg, 14.176 + Label* L_success, 14.177 + Label* L_failure) { 14.178 + assert_different_registers(sub_klass, super_klass, 14.179 + count_temp, scan_temp, scratch_reg, coop_reg); 14.180 + 14.181 + Label L_fallthrough, L_loop; 14.182 + int label_nulls = 0; 14.183 + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 14.184 + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 14.185 + assert(label_nulls <= 1, "at most one NULL in the batch"); 14.186 + 14.187 + // a couple of useful fields in sub_klass: 14.188 + int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 14.189 + Klass::secondary_supers_offset_in_bytes()); 14.190 + int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 14.191 + Klass::secondary_super_cache_offset_in_bytes()); 14.192 + 14.193 + // Do a linear scan of the secondary super-klass chain. 14.194 + // This code is rarely used, so simplicity is a virtue here. 14.195 + 14.196 +#ifndef PRODUCT 14.197 + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 14.198 + inc_counter((address) pst_counter, count_temp, scan_temp); 14.199 +#endif 14.200 + 14.201 + // We will consult the secondary-super array. 14.202 + ld_ptr(sub_klass, ss_offset, scan_temp); 14.203 + 14.204 + // Compress superclass if necessary. 14.205 + Register search_key = super_klass; 14.206 + bool decode_super_klass = false; 14.207 + if (UseCompressedOops) { 14.208 + if (coop_reg != noreg) { 14.209 + encode_heap_oop_not_null(super_klass, coop_reg); 14.210 + search_key = coop_reg; 14.211 + } else { 14.212 + encode_heap_oop_not_null(super_klass); 14.213 + decode_super_klass = true; // scarce temps! 14.214 + } 14.215 + // The superclass is never null; it would be a basic system error if a null 14.216 + // pointer were to sneak in here. Note that we have already loaded the 14.217 + // Klass::super_check_offset from the super_klass in the fast path, 14.218 + // so if there is a null in that register, we are already in the afterlife. 14.219 + } 14.220 + 14.221 + // Load the array length. (Positive movl does right thing on LP64.) 14.222 + lduw(scan_temp, arrayOopDesc::length_offset_in_bytes(), count_temp); 14.223 + 14.224 + // Check for empty secondary super list 14.225 + tst(count_temp); 14.226 + 14.227 + // Top of search loop 14.228 + bind(L_loop); 14.229 + br(Assembler::equal, false, Assembler::pn, *L_failure); 14.230 + delayed()->add(scan_temp, heapOopSize, scan_temp); 14.231 + assert(heapOopSize != 0, "heapOopSize should be initialized"); 14.232 + 14.233 + // Skip the array header in all array accesses. 14.234 + int elem_offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT); 14.235 + elem_offset -= heapOopSize; // the scan pointer was pre-incremented also 14.236 + 14.237 + // Load next super to check 14.238 + if (UseCompressedOops) { 14.239 + // Don't use load_heap_oop; we don't want to decode the element. 14.240 + lduw( scan_temp, elem_offset, scratch_reg ); 14.241 + } else { 14.242 + ld_ptr( scan_temp, elem_offset, scratch_reg ); 14.243 + } 14.244 + 14.245 + // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list 14.246 + cmp(scratch_reg, search_key); 14.247 + 14.248 + // A miss means we are NOT a subtype and need to keep looping 14.249 + brx(Assembler::notEqual, false, Assembler::pn, L_loop); 14.250 + delayed()->deccc(count_temp); // decrement trip counter in delay slot 14.251 + 14.252 + // Falling out the bottom means we found a hit; we ARE a subtype 14.253 + if (decode_super_klass) decode_heap_oop(super_klass); 14.254 + 14.255 + // Success. Cache the super we found and proceed in triumph. 14.256 + st_ptr(super_klass, sub_klass, sc_offset); 14.257 + 14.258 + if (L_success != &L_fallthrough) { 14.259 + ba(false, *L_success); 14.260 + delayed()->nop(); 14.261 + } 14.262 + 14.263 + bind(L_fallthrough); 14.264 +} 14.265 + 14.266 + 14.267 + 14.268 + 14.269 void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, 14.270 Register temp_reg, 14.271 Label& done, Label* slow_case, 14.272 @@ -4316,7 +4578,13 @@ 14.273 14.274 void MacroAssembler::encode_heap_oop(Register src, Register dst) { 14.275 assert (UseCompressedOops, "must be compressed"); 14.276 + assert (Universe::heap() != NULL, "java heap should be initialized"); 14.277 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 14.278 verify_oop(src); 14.279 + if (Universe::narrow_oop_base() == NULL) { 14.280 + srlx(src, LogMinObjAlignmentInBytes, dst); 14.281 + return; 14.282 + } 14.283 Label done; 14.284 if (src == dst) { 14.285 // optimize for frequent case src == dst 14.286 @@ -4338,26 +4606,39 @@ 14.287 14.288 void MacroAssembler::encode_heap_oop_not_null(Register r) { 14.289 assert (UseCompressedOops, "must be compressed"); 14.290 + assert (Universe::heap() != NULL, "java heap should be initialized"); 14.291 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 14.292 verify_oop(r); 14.293 - sub(r, G6_heapbase, r); 14.294 + if (Universe::narrow_oop_base() != NULL) 14.295 + sub(r, G6_heapbase, r); 14.296 srlx(r, LogMinObjAlignmentInBytes, r); 14.297 } 14.298 14.299 void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) { 14.300 assert (UseCompressedOops, "must be compressed"); 14.301 + assert (Universe::heap() != NULL, "java heap should be initialized"); 14.302 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 14.303 verify_oop(src); 14.304 - sub(src, G6_heapbase, dst); 14.305 - srlx(dst, LogMinObjAlignmentInBytes, dst); 14.306 + if (Universe::narrow_oop_base() == NULL) { 14.307 + srlx(src, LogMinObjAlignmentInBytes, dst); 14.308 + } else { 14.309 + sub(src, G6_heapbase, dst); 14.310 + srlx(dst, LogMinObjAlignmentInBytes, dst); 14.311 + } 14.312 } 14.313 14.314 // Same algorithm as oops.inline.hpp decode_heap_oop. 14.315 void MacroAssembler::decode_heap_oop(Register src, Register dst) { 14.316 assert (UseCompressedOops, "must be compressed"); 14.317 - Label done; 14.318 + assert (Universe::heap() != NULL, "java heap should be initialized"); 14.319 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 14.320 sllx(src, LogMinObjAlignmentInBytes, dst); 14.321 - bpr(rc_nz, true, Assembler::pt, dst, done); 14.322 - delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken 14.323 - bind(done); 14.324 + if (Universe::narrow_oop_base() != NULL) { 14.325 + Label done; 14.326 + bpr(rc_nz, true, Assembler::pt, dst, done); 14.327 + delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken 14.328 + bind(done); 14.329 + } 14.330 verify_oop(dst); 14.331 } 14.332 14.333 @@ -4366,8 +4647,11 @@ 14.334 // pd_code_size_limit. 14.335 // Also do not verify_oop as this is called by verify_oop. 14.336 assert (UseCompressedOops, "must be compressed"); 14.337 + assert (Universe::heap() != NULL, "java heap should be initialized"); 14.338 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 14.339 sllx(r, LogMinObjAlignmentInBytes, r); 14.340 - add(r, G6_heapbase, r); 14.341 + if (Universe::narrow_oop_base() != NULL) 14.342 + add(r, G6_heapbase, r); 14.343 } 14.344 14.345 void MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) { 14.346 @@ -4375,14 +4659,17 @@ 14.347 // pd_code_size_limit. 14.348 // Also do not verify_oop as this is called by verify_oop. 14.349 assert (UseCompressedOops, "must be compressed"); 14.350 + assert (Universe::heap() != NULL, "java heap should be initialized"); 14.351 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 14.352 sllx(src, LogMinObjAlignmentInBytes, dst); 14.353 - add(dst, G6_heapbase, dst); 14.354 + if (Universe::narrow_oop_base() != NULL) 14.355 + add(dst, G6_heapbase, dst); 14.356 } 14.357 14.358 void MacroAssembler::reinit_heapbase() { 14.359 if (UseCompressedOops) { 14.360 // call indirectly to solve generation ordering problem 14.361 - Address base(G6_heapbase, (address)Universe::heap_base_addr()); 14.362 + Address base(G6_heapbase, (address)Universe::narrow_oop_base_addr()); 14.363 load_ptr_contents(base, G6_heapbase); 14.364 } 14.365 }
15.1 --- a/src/cpu/sparc/vm/assembler_sparc.hpp Fri Mar 20 22:08:48 2009 -0400 15.2 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Mon Mar 23 10:42:20 2009 -0400 15.3 @@ -2327,6 +2327,46 @@ 15.4 Register temp_reg, Register temp2_reg, 15.5 Label& no_such_interface); 15.6 15.7 + // Test sub_klass against super_klass, with fast and slow paths. 15.8 + 15.9 + // The fast path produces a tri-state answer: yes / no / maybe-slow. 15.10 + // One of the three labels can be NULL, meaning take the fall-through. 15.11 + // If super_check_offset is -1, the value is loaded up from super_klass. 15.12 + // No registers are killed, except temp_reg and temp2_reg. 15.13 + // If super_check_offset is not -1, temp2_reg is not used and can be noreg. 15.14 + void check_klass_subtype_fast_path(Register sub_klass, 15.15 + Register super_klass, 15.16 + Register temp_reg, 15.17 + Register temp2_reg, 15.18 + Label* L_success, 15.19 + Label* L_failure, 15.20 + Label* L_slow_path, 15.21 + RegisterConstant super_check_offset = RegisterConstant(-1), 15.22 + Register instanceof_hack = noreg); 15.23 + 15.24 + // The rest of the type check; must be wired to a corresponding fast path. 15.25 + // It does not repeat the fast path logic, so don't use it standalone. 15.26 + // The temp_reg can be noreg, if no temps are available. 15.27 + // It can also be sub_klass or super_klass, meaning it's OK to kill that one. 15.28 + // Updates the sub's secondary super cache as necessary. 15.29 + void check_klass_subtype_slow_path(Register sub_klass, 15.30 + Register super_klass, 15.31 + Register temp_reg, 15.32 + Register temp2_reg, 15.33 + Register temp3_reg, 15.34 + Register temp4_reg, 15.35 + Label* L_success, 15.36 + Label* L_failure); 15.37 + 15.38 + // Simplified, combined version, good for typical uses. 15.39 + // Falls through on failure. 15.40 + void check_klass_subtype(Register sub_klass, 15.41 + Register super_klass, 15.42 + Register temp_reg, 15.43 + Register temp2_reg, 15.44 + Label& L_success); 15.45 + 15.46 + 15.47 // Stack overflow checking 15.48 15.49 // Note: this clobbers G3_scratch
16.1 --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Fri Mar 20 22:08:48 2009 -0400 16.2 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Mon Mar 23 10:42:20 2009 -0400 16.3 @@ -2393,23 +2393,11 @@ 16.4 16.5 // get instance klass 16.6 load(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc), k_RInfo, T_OBJECT, NULL); 16.7 - // get super_check_offset 16.8 - load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL); 16.9 - // See if we get an immediate positive hit 16.10 - __ ld_ptr(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr->as_register()); 16.11 - __ cmp(k_RInfo, O7); 16.12 - __ br(Assembler::equal, false, Assembler::pn, done); 16.13 - __ delayed()->nop(); 16.14 - // check for immediate negative hit 16.15 - __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); 16.16 - __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry()); 16.17 - __ delayed()->nop(); 16.18 - // check for self 16.19 - __ cmp(klass_RInfo, k_RInfo); 16.20 - __ br(Assembler::equal, false, Assembler::pn, done); 16.21 - __ delayed()->nop(); 16.22 - 16.23 - // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); 16.24 + // perform the fast part of the checking logic 16.25 + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done, stub->entry(), NULL); 16.26 + 16.27 + // call out-of-line instance of __ check_klass_subtype_slow_path(...): 16.28 + assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup"); 16.29 __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); 16.30 __ delayed()->nop(); 16.31 __ cmp(G3, 0); 16.32 @@ -2493,58 +2481,30 @@ 16.33 __ delayed()->nop(); 16.34 __ bind(done); 16.35 } else { 16.36 + bool need_slow_path = true; 16.37 if (k->is_loaded()) { 16.38 - load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL); 16.39 - 16.40 - if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) { 16.41 - // See if we get an immediate positive hit 16.42 - __ cmp(Rtmp1, k_RInfo ); 16.43 - __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry()); 16.44 - __ delayed()->nop(); 16.45 - } else { 16.46 - // See if we get an immediate positive hit 16.47 - assert_different_registers(Rtmp1, k_RInfo, klass_RInfo); 16.48 - __ cmp(Rtmp1, k_RInfo ); 16.49 - __ br(Assembler::equal, false, Assembler::pn, done); 16.50 - // check for self 16.51 - __ delayed()->cmp(klass_RInfo, k_RInfo); 16.52 - __ br(Assembler::equal, false, Assembler::pn, done); 16.53 - __ delayed()->nop(); 16.54 - 16.55 - // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); 16.56 - __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); 16.57 - __ delayed()->nop(); 16.58 - __ cmp(G3, 0); 16.59 - __ br(Assembler::equal, false, Assembler::pn, *stub->entry()); 16.60 - __ delayed()->nop(); 16.61 - } 16.62 - __ bind(done); 16.63 + if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()) 16.64 + need_slow_path = false; 16.65 + // perform the fast part of the checking logic 16.66 + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg, 16.67 + (need_slow_path ? &done : NULL), 16.68 + stub->entry(), NULL, 16.69 + RegisterConstant(k->super_check_offset())); 16.70 } else { 16.71 - assert_different_registers(Rtmp1, klass_RInfo, k_RInfo); 16.72 - 16.73 - load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL); 16.74 - // See if we get an immediate positive hit 16.75 - load(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr, T_OBJECT); 16.76 - __ cmp(k_RInfo, O7); 16.77 - __ br(Assembler::equal, false, Assembler::pn, done); 16.78 - __ delayed()->nop(); 16.79 - // check for immediate negative hit 16.80 - __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); 16.81 - __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry()); 16.82 - // check for self 16.83 - __ delayed()->cmp(klass_RInfo, k_RInfo); 16.84 - __ br(Assembler::equal, false, Assembler::pn, done); 16.85 - __ delayed()->nop(); 16.86 - 16.87 - // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); 16.88 + // perform the fast part of the checking logic 16.89 + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, 16.90 + &done, stub->entry(), NULL); 16.91 + } 16.92 + if (need_slow_path) { 16.93 + // call out-of-line instance of __ check_klass_subtype_slow_path(...): 16.94 + assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup"); 16.95 __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); 16.96 __ delayed()->nop(); 16.97 __ cmp(G3, 0); 16.98 __ br(Assembler::equal, false, Assembler::pn, *stub->entry()); 16.99 __ delayed()->nop(); 16.100 - __ bind(done); 16.101 } 16.102 - 16.103 + __ bind(done); 16.104 } 16.105 __ mov(obj, dst); 16.106 } else if (code == lir_instanceof) { 16.107 @@ -2582,58 +2542,32 @@ 16.108 __ set(0, dst); 16.109 __ bind(done); 16.110 } else { 16.111 + bool need_slow_path = true; 16.112 if (k->is_loaded()) { 16.113 - assert_different_registers(Rtmp1, klass_RInfo, k_RInfo); 16.114 - load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL); 16.115 - 16.116 - if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) { 16.117 - // See if we get an immediate positive hit 16.118 - __ cmp(Rtmp1, k_RInfo ); 16.119 - __ br(Assembler::equal, true, Assembler::pt, done); 16.120 - __ delayed()->set(1, dst); 16.121 - __ set(0, dst); 16.122 - __ bind(done); 16.123 - } else { 16.124 - // See if we get an immediate positive hit 16.125 - assert_different_registers(Rtmp1, k_RInfo, klass_RInfo); 16.126 - __ cmp(Rtmp1, k_RInfo ); 16.127 - __ br(Assembler::equal, true, Assembler::pt, done); 16.128 - __ delayed()->set(1, dst); 16.129 - // check for self 16.130 - __ cmp(klass_RInfo, k_RInfo); 16.131 - __ br(Assembler::equal, true, Assembler::pt, done); 16.132 - __ delayed()->set(1, dst); 16.133 - 16.134 - // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); 16.135 - __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); 16.136 - __ delayed()->nop(); 16.137 - __ mov(G3, dst); 16.138 - __ bind(done); 16.139 - } 16.140 + if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()) 16.141 + need_slow_path = false; 16.142 + // perform the fast part of the checking logic 16.143 + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, noreg, 16.144 + (need_slow_path ? &done : NULL), 16.145 + (need_slow_path ? &done : NULL), NULL, 16.146 + RegisterConstant(k->super_check_offset()), 16.147 + dst); 16.148 } else { 16.149 assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers"); 16.150 - 16.151 - load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), dst, T_INT, NULL); 16.152 - // See if we get an immediate positive hit 16.153 - load(klass_RInfo, dst, FrameMap::O7_oop_opr, T_OBJECT); 16.154 - __ cmp(k_RInfo, O7); 16.155 - __ br(Assembler::equal, true, Assembler::pt, done); 16.156 - __ delayed()->set(1, dst); 16.157 - // check for immediate negative hit 16.158 - __ cmp(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); 16.159 - __ br(Assembler::notEqual, true, Assembler::pt, done); 16.160 - __ delayed()->set(0, dst); 16.161 - // check for self 16.162 - __ cmp(klass_RInfo, k_RInfo); 16.163 - __ br(Assembler::equal, true, Assembler::pt, done); 16.164 - __ delayed()->set(1, dst); 16.165 - 16.166 - // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); 16.167 + // perform the fast part of the checking logic 16.168 + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, dst, 16.169 + &done, &done, NULL, 16.170 + RegisterConstant(-1), 16.171 + dst); 16.172 + } 16.173 + if (need_slow_path) { 16.174 + // call out-of-line instance of __ check_klass_subtype_slow_path(...): 16.175 + assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup"); 16.176 __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); 16.177 __ delayed()->nop(); 16.178 __ mov(G3, dst); 16.179 - __ bind(done); 16.180 } 16.181 + __ bind(done); 16.182 } 16.183 } else { 16.184 ShouldNotReachHere();
17.1 --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Fri Mar 20 22:08:48 2009 -0400 17.2 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Mon Mar 23 10:42:20 2009 -0400 17.3 @@ -714,38 +714,19 @@ 17.4 // sub : G3, argument, destroyed 17.5 // super: G1, argument, not changed 17.6 // raddr: O7, blown by call 17.7 - Label loop, miss; 17.8 + Label miss; 17.9 17.10 __ save_frame(0); // Blow no registers! 17.11 17.12 - __ ld_ptr( G3, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 ); 17.13 - __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0); // length in l0 17.14 - __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1); // ptr into array 17.15 - __ clr(L4); // Index 17.16 - // Load a little early; will load 1 off the end of the array. 17.17 - // Ok for now; revisit if we have other uses of this routine. 17.18 - __ ld_ptr(L1,0,L2); // Will load a little early 17.19 - 17.20 - // The scan loop 17.21 - __ bind(loop); 17.22 - __ add(L1,wordSize,L1); // Bump by OOP size 17.23 - __ cmp(L4,L0); 17.24 - __ br(Assembler::equal,false,Assembler::pn,miss); 17.25 - __ delayed()->inc(L4); // Bump index 17.26 - __ subcc(L2,G1,L3); // Check for match; zero in L3 for a hit 17.27 - __ brx( Assembler::notEqual, false, Assembler::pt, loop ); 17.28 - __ delayed()->ld_ptr(L1,0,L2); // Will load a little early 17.29 - 17.30 - // Got a hit; report success; set cache 17.31 - __ st_ptr( G1, G3, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); 17.32 + __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss); 17.33 17.34 __ mov(1, G3); 17.35 - __ ret(); // Result in G5 is ok; flags set 17.36 + __ ret(); // Result in G5 is 'true' 17.37 __ delayed()->restore(); // free copy or add can go here 17.38 17.39 __ bind(miss); 17.40 __ mov(0, G3); 17.41 - __ ret(); // Result in G5 is ok; flags set 17.42 + __ ret(); // Result in G5 is 'false' 17.43 __ delayed()->restore(); // free copy or add can go here 17.44 } 17.45
18.1 --- a/src/cpu/sparc/vm/interp_masm_sparc.cpp Fri Mar 20 22:08:48 2009 -0400 18.2 +++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp Mon Mar 23 10:42:20 2009 -0400 18.3 @@ -866,65 +866,18 @@ 18.4 Register Rtmp2, 18.5 Register Rtmp3, 18.6 Label &ok_is_subtype ) { 18.7 - Label not_subtype, loop; 18.8 + Label not_subtype; 18.9 18.10 // Profile the not-null value's klass. 18.11 profile_typecheck(Rsub_klass, Rtmp1); 18.12 18.13 - // Load the super-klass's check offset into Rtmp1 18.14 - ld( Rsuper_klass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1 ); 18.15 - // Load from the sub-klass's super-class display list, or a 1-word cache of 18.16 - // the secondary superclass list, or a failing value with a sentinel offset 18.17 - // if the super-klass is an interface or exceptionally deep in the Java 18.18 - // hierarchy and we have to scan the secondary superclass list the hard way. 18.19 - ld_ptr( Rsub_klass, Rtmp1, Rtmp2 ); 18.20 - // See if we get an immediate positive hit 18.21 - cmp( Rtmp2, Rsuper_klass ); 18.22 - brx( Assembler::equal, false, Assembler::pt, ok_is_subtype ); 18.23 - // In the delay slot, check for immediate negative hit 18.24 - delayed()->cmp( Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); 18.25 - br( Assembler::notEqual, false, Assembler::pt, not_subtype ); 18.26 - // In the delay slot, check for self 18.27 - delayed()->cmp( Rsub_klass, Rsuper_klass ); 18.28 - brx( Assembler::equal, false, Assembler::pt, ok_is_subtype ); 18.29 - 18.30 - // Now do a linear scan of the secondary super-klass chain. 18.31 - delayed()->ld_ptr( Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), Rtmp2 ); 18.32 - 18.33 - // compress superclass 18.34 - if (UseCompressedOops) encode_heap_oop(Rsuper_klass); 18.35 - 18.36 - // Rtmp2 holds the objArrayOop of secondary supers. 18.37 - ld( Rtmp2, arrayOopDesc::length_offset_in_bytes(), Rtmp1 );// Load the array length 18.38 - // Check for empty secondary super list 18.39 - tst(Rtmp1); 18.40 - 18.41 - // Top of search loop 18.42 - bind( loop ); 18.43 - br( Assembler::equal, false, Assembler::pn, not_subtype ); 18.44 - delayed()->nop(); 18.45 - 18.46 - // load next super to check 18.47 - if (UseCompressedOops) { 18.48 - lduw( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3); 18.49 - // Bump array pointer forward one oop 18.50 - add( Rtmp2, 4, Rtmp2 ); 18.51 - } else { 18.52 - ld_ptr( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3); 18.53 - // Bump array pointer forward one oop 18.54 - add( Rtmp2, wordSize, Rtmp2); 18.55 - } 18.56 - // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list 18.57 - cmp( Rtmp3, Rsuper_klass ); 18.58 - // A miss means we are NOT a subtype and need to keep looping 18.59 - brx( Assembler::notEqual, false, Assembler::pt, loop ); 18.60 - delayed()->deccc( Rtmp1 ); // dec trip counter in delay slot 18.61 - // Falling out the bottom means we found a hit; we ARE a subtype 18.62 - if (UseCompressedOops) decode_heap_oop(Rsuper_klass); 18.63 - br( Assembler::always, false, Assembler::pt, ok_is_subtype ); 18.64 - // Update the cache 18.65 - delayed()->st_ptr( Rsuper_klass, Rsub_klass, 18.66 - sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); 18.67 + check_klass_subtype_fast_path(Rsub_klass, Rsuper_klass, 18.68 + Rtmp1, Rtmp2, 18.69 + &ok_is_subtype, ¬_subtype, NULL); 18.70 + 18.71 + check_klass_subtype_slow_path(Rsub_klass, Rsuper_klass, 18.72 + Rtmp1, Rtmp2, Rtmp3, /*hack:*/ noreg, 18.73 + &ok_is_subtype, NULL); 18.74 18.75 bind(not_subtype); 18.76 profile_typecheck_failed(Rtmp1);
19.1 --- a/src/cpu/sparc/vm/sparc.ad Fri Mar 20 22:08:48 2009 -0400 19.2 +++ b/src/cpu/sparc/vm/sparc.ad Mon Mar 23 10:42:20 2009 -0400 19.3 @@ -547,7 +547,11 @@ 19.4 int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes(); 19.5 int klass_load_size; 19.6 if (UseCompressedOops) { 19.7 - klass_load_size = 3*BytesPerInstWord; // see MacroAssembler::load_klass() 19.8 + assert(Universe::heap() != NULL, "java heap should be initialized"); 19.9 + if (Universe::narrow_oop_base() == NULL) 19.10 + klass_load_size = 2*BytesPerInstWord; // see MacroAssembler::load_klass() 19.11 + else 19.12 + klass_load_size = 3*BytesPerInstWord; 19.13 } else { 19.14 klass_load_size = 1*BytesPerInstWord; 19.15 } 19.16 @@ -1601,9 +1605,11 @@ 19.17 st->print_cr("\nUEP:"); 19.18 #ifdef _LP64 19.19 if (UseCompressedOops) { 19.20 + assert(Universe::heap() != NULL, "java heap should be initialized"); 19.21 st->print_cr("\tLDUW [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass"); 19.22 st->print_cr("\tSLL R_G5,3,R_G5"); 19.23 - st->print_cr("\tADD R_G5,R_G6_heap_base,R_G5"); 19.24 + if (Universe::narrow_oop_base() != NULL) 19.25 + st->print_cr("\tADD R_G5,R_G6_heap_base,R_G5"); 19.26 } else { 19.27 st->print_cr("\tLDX [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check"); 19.28 } 19.29 @@ -2502,7 +2508,11 @@ 19.30 __ load_klass(O0, G3_scratch); 19.31 int klass_load_size; 19.32 if (UseCompressedOops) { 19.33 - klass_load_size = 3*BytesPerInstWord; 19.34 + assert(Universe::heap() != NULL, "java heap should be initialized"); 19.35 + if (Universe::narrow_oop_base() == NULL) 19.36 + klass_load_size = 2*BytesPerInstWord; 19.37 + else 19.38 + klass_load_size = 3*BytesPerInstWord; 19.39 } else { 19.40 klass_load_size = 1*BytesPerInstWord; 19.41 } 19.42 @@ -9005,6 +9015,33 @@ 19.43 ins_pipe(long_memory_op); 19.44 %} 19.45 19.46 + 19.47 +//---------- Population Count Instructions ------------------------------------- 19.48 + 19.49 +instruct popCountI(iRegI dst, iRegI src) %{ 19.50 + predicate(UsePopCountInstruction); 19.51 + match(Set dst (PopCountI src)); 19.52 + 19.53 + format %{ "POPC $src, $dst" %} 19.54 + ins_encode %{ 19.55 + __ popc($src$$Register, $dst$$Register); 19.56 + %} 19.57 + ins_pipe(ialu_reg); 19.58 +%} 19.59 + 19.60 +// Note: Long.bitCount(long) returns an int. 19.61 +instruct popCountL(iRegI dst, iRegL src) %{ 19.62 + predicate(UsePopCountInstruction); 19.63 + match(Set dst (PopCountL src)); 19.64 + 19.65 + format %{ "POPC $src, $dst" %} 19.66 + ins_encode %{ 19.67 + __ popc($src$$Register, $dst$$Register); 19.68 + %} 19.69 + ins_pipe(ialu_reg); 19.70 +%} 19.71 + 19.72 + 19.73 // ============================================================================ 19.74 //------------Bytes reverse-------------------------------------------------- 19.75
20.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Mar 20 22:08:48 2009 -0400 20.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Mon Mar 23 10:42:20 2009 -0400 20.3 @@ -900,19 +900,7 @@ 20.4 __ align(CodeEntryAlignment); 20.5 StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); 20.6 address start = __ pc(); 20.7 - Label loop, miss; 20.8 - 20.9 - // Compare super with sub directly, since super is not in its own SSA. 20.10 - // The compiler used to emit this test, but we fold it in here, 20.11 - // to increase overall code density, with no real loss of speed. 20.12 - { Label L; 20.13 - __ cmp(O1, O2); 20.14 - __ brx(Assembler::notEqual, false, Assembler::pt, L); 20.15 - __ delayed()->nop(); 20.16 - __ retl(); 20.17 - __ delayed()->addcc(G0,0,O0); // set Z flags, zero result 20.18 - __ bind(L); 20.19 - } 20.20 + Label miss; 20.21 20.22 #if defined(COMPILER2) && !defined(_LP64) 20.23 // Do not use a 'save' because it blows the 64-bit O registers. 20.24 @@ -936,56 +924,12 @@ 20.25 Register L2_super = L2; 20.26 Register L3_index = L3; 20.27 20.28 -#ifdef _LP64 20.29 - Register L4_ooptmp = L4; 20.30 - 20.31 - if (UseCompressedOops) { 20.32 - // this must be under UseCompressedOops check, as we rely upon fact 20.33 - // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save 20.34 - // on stack, see several lines above 20.35 - __ encode_heap_oop(Rsuper, L4_ooptmp); 20.36 - } 20.37 -#endif 20.38 - 20.39 - inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1); 20.40 - 20.41 - __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 ); 20.42 - __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len); 20.43 - __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr); 20.44 - __ clr(L3_index); // zero index 20.45 - // Load a little early; will load 1 off the end of the array. 20.46 - // Ok for now; revisit if we have other uses of this routine. 20.47 - if (UseCompressedOops) { 20.48 - __ lduw(L1_ary_ptr,0,L2_super);// Will load a little early 20.49 - } else { 20.50 - __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early 20.51 - } 20.52 - 20.53 - assert(heapOopSize != 0, "heapOopSize should be initialized"); 20.54 - // The scan loop 20.55 - __ BIND(loop); 20.56 - __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size 20.57 - __ cmp(L3_index,L0_ary_len); 20.58 - __ br(Assembler::equal,false,Assembler::pn,miss); 20.59 - __ delayed()->inc(L3_index); // Bump index 20.60 - 20.61 - if (UseCompressedOops) { 20.62 -#ifdef _LP64 20.63 - __ subcc(L2_super,L4_ooptmp,Rret); // Check for match; zero in Rret for a hit 20.64 - __ br( Assembler::notEqual, false, Assembler::pt, loop ); 20.65 - __ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early 20.66 -#else 20.67 - ShouldNotReachHere(); 20.68 -#endif 20.69 - } else { 20.70 - __ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit 20.71 - __ brx( Assembler::notEqual, false, Assembler::pt, loop ); 20.72 - __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early 20.73 - } 20.74 - 20.75 - // Got a hit; report success; set cache. Cache load doesn't 20.76 - // happen here; for speed it is directly emitted by the compiler. 20.77 - __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); 20.78 + __ check_klass_subtype_slow_path(Rsub, Rsuper, 20.79 + L0, L1, L2, L3, 20.80 + NULL, &miss); 20.81 + 20.82 + // Match falls through here. 20.83 + __ addcc(G0,0,Rret); // set Z flags, Z result 20.84 20.85 #if defined(COMPILER2) && !defined(_LP64) 20.86 __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); 20.87 @@ -999,7 +943,6 @@ 20.88 __ delayed()->restore(); 20.89 #endif 20.90 20.91 - // Hit or miss falls through here 20.92 __ BIND(miss); 20.93 __ addcc(G0,1,Rret); // set NZ flags, NZ result 20.94 20.95 @@ -2330,51 +2273,31 @@ 20.96 Register super_check_offset, 20.97 Register super_klass, 20.98 Register temp, 20.99 - Label& L_success, 20.100 - Register deccc_hack = noreg) { 20.101 + Label& L_success) { 20.102 assert_different_registers(sub_klass, super_check_offset, super_klass, temp); 20.103 20.104 BLOCK_COMMENT("type_check:"); 20.105 20.106 - Label L_miss; 20.107 + Label L_miss, L_pop_to_miss; 20.108 20.109 assert_clean_int(super_check_offset, temp); 20.110 20.111 - // maybe decrement caller's trip count: 20.112 -#define DELAY_SLOT delayed(); \ 20.113 - { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); } 20.114 - 20.115 - // if the pointers are equal, we are done (e.g., String[] elements) 20.116 - __ cmp(sub_klass, super_klass); 20.117 - __ brx(Assembler::equal, true, Assembler::pt, L_success); 20.118 - __ DELAY_SLOT; 20.119 - 20.120 - // check the supertype display: 20.121 - __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type 20.122 - __ cmp(super_klass, temp); // test the super type 20.123 - __ brx(Assembler::equal, true, Assembler::pt, L_success); 20.124 - __ DELAY_SLOT; 20.125 - 20.126 - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 20.127 - Klass::secondary_super_cache_offset_in_bytes()); 20.128 - __ cmp(super_klass, sc_offset); 20.129 - __ brx(Assembler::notEqual, true, Assembler::pt, L_miss); 20.130 - __ delayed()->nop(); 20.131 - 20.132 + __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, 20.133 + &L_success, &L_miss, NULL, 20.134 + super_check_offset); 20.135 + 20.136 + BLOCK_COMMENT("type_check_slow_path:"); 20.137 __ save_frame(0); 20.138 - __ mov(sub_klass->after_save(), O1); 20.139 - // mov(super_klass->after_save(), O2); //fill delay slot 20.140 - assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation"); 20.141 - __ call(StubRoutines::Sparc::_partial_subtype_check); 20.142 - __ delayed()->mov(super_klass->after_save(), O2); 20.143 + __ check_klass_subtype_slow_path(sub_klass->after_save(), 20.144 + super_klass->after_save(), 20.145 + L0, L1, L2, L4, 20.146 + NULL, &L_pop_to_miss); 20.147 + __ ba(false, L_success); 20.148 + __ delayed()->restore(); 20.149 + 20.150 + __ bind(L_pop_to_miss); 20.151 __ restore(); 20.152 20.153 - // Upon return, the condition codes are already set. 20.154 - __ brx(Assembler::equal, true, Assembler::pt, L_success); 20.155 - __ DELAY_SLOT; 20.156 - 20.157 -#undef DELAY_SLOT 20.158 - 20.159 // Fall through on failure! 20.160 __ BIND(L_miss); 20.161 } 20.162 @@ -2411,7 +2334,7 @@ 20.163 gen_write_ref_array_pre_barrier(O1, O2); 20.164 20.165 #ifdef ASSERT 20.166 - // We sometimes save a frame (see partial_subtype_check below). 20.167 + // We sometimes save a frame (see generate_type_check below). 20.168 // If this will cause trouble, let's fail now instead of later. 20.169 __ save_frame(0); 20.170 __ restore(); 20.171 @@ -2455,41 +2378,39 @@ 20.172 // G3, G4, G5 --- current oop, oop.klass, oop.klass.super 20.173 __ align(16); 20.174 20.175 - __ bind(store_element); 20.176 - // deccc(G1_remain); // decrement the count (hoisted) 20.177 + __ BIND(store_element); 20.178 + __ deccc(G1_remain); // decrement the count 20.179 __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop 20.180 __ inc(O5_offset, heapOopSize); // step to next offset 20.181 __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); 20.182 __ delayed()->set(0, O0); // return -1 on success 20.183 20.184 // ======== loop entry is here ======== 20.185 - __ bind(load_element); 20.186 + __ BIND(load_element); 20.187 __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop 20.188 __ br_null(G3_oop, true, Assembler::pt, store_element); 20.189 - __ delayed()->deccc(G1_remain); // decrement the count 20.190 + __ delayed()->nop(); 20.191 20.192 __ load_klass(G3_oop, G4_klass); // query the object klass 20.193 20.194 generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, 20.195 // branch to this on success: 20.196 - store_element, 20.197 - // decrement this on success: 20.198 - G1_remain); 20.199 + store_element); 20.200 // ======== end loop ======== 20.201 20.202 // It was a real error; we must depend on the caller to finish the job. 20.203 // Register G1 has number of *remaining* oops, O2 number of *total* oops. 20.204 // Emit GC store barriers for the oops we have copied (O2 minus G1), 20.205 // and report their number to the caller. 20.206 - __ bind(fail); 20.207 + __ BIND(fail); 20.208 __ subcc(O2_count, G1_remain, O2_count); 20.209 __ brx(Assembler::zero, false, Assembler::pt, done); 20.210 __ delayed()->not1(O2_count, O0); // report (-1^K) to caller 20.211 20.212 - __ bind(do_card_marks); 20.213 + __ BIND(do_card_marks); 20.214 gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] 20.215 20.216 - __ bind(done); 20.217 + __ BIND(done); 20.218 inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); 20.219 __ retl(); 20.220 __ delayed()->nop(); // return value in 00 20.221 @@ -2942,14 +2863,15 @@ 20.222 StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry; 20.223 StubRoutines::_fence_entry = generate_fence(); 20.224 #endif // COMPILER2 !=> _LP64 20.225 - 20.226 - StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); 20.227 } 20.228 20.229 20.230 void generate_all() { 20.231 // Generates all stubs and initializes the entry points 20.232 20.233 + // Generate partial_subtype_check first here since its code depends on 20.234 + // UseZeroBaseCompressedOops which is defined after heap initialization. 20.235 + StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check(); 20.236 // These entry points require SharedInfo::stack0 to be set up in non-core builds 20.237 StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); 20.238 StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false);
21.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Fri Mar 20 22:08:48 2009 -0400 21.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Mar 23 10:42:20 2009 -0400 21.3 @@ -1,5 +1,5 @@ 21.4 /* 21.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 21.6 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 21.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 21.8 * 21.9 * This code is free software; you can redistribute it and/or modify it 21.10 @@ -72,6 +72,9 @@ 21.11 FLAG_SET_ERGO(bool, UseCompressedOops, false); 21.12 } 21.13 } 21.14 + // 32-bit oops don't make sense for the 64-bit VM on sparc 21.15 + // since the 32-bit VM has the same registers and smaller objects. 21.16 + Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes); 21.17 #endif // _LP64 21.18 #ifdef COMPILER2 21.19 // Indirect branch is the same cost as direct 21.20 @@ -89,16 +92,26 @@ 21.21 #endif 21.22 } 21.23 21.24 + // Use hardware population count instruction if available. 21.25 + if (has_hardware_popc()) { 21.26 + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 21.27 + UsePopCountInstruction = true; 21.28 + } 21.29 + } 21.30 + 21.31 char buf[512]; 21.32 - jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s", 21.33 + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s", 21.34 (has_v8() ? ", has_v8" : ""), 21.35 (has_v9() ? ", has_v9" : ""), 21.36 + (has_hardware_popc() ? ", popc" : ""), 21.37 (has_vis1() ? ", has_vis1" : ""), 21.38 (has_vis2() ? ", has_vis2" : ""), 21.39 (is_ultra3() ? ", is_ultra3" : ""), 21.40 (is_sun4v() ? ", is_sun4v" : ""), 21.41 (is_niagara1() ? ", is_niagara1" : ""), 21.42 - (!has_hardware_int_muldiv() ? ", no-muldiv" : ""), 21.43 + (is_niagara1_plus() ? ", is_niagara1_plus" : ""), 21.44 + (!has_hardware_mul32() ? ", no-mul32" : ""), 21.45 + (!has_hardware_div32() ? ", no-div32" : ""), 21.46 (!has_hardware_fsmuld() ? ", no-fsmuld" : "")); 21.47 21.48 // buf is started with ", " or is empty
22.1 --- a/src/cpu/sparc/vm/vm_version_sparc.hpp Fri Mar 20 22:08:48 2009 -0400 22.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Mon Mar 23 10:42:20 2009 -0400 22.3 @@ -1,5 +1,5 @@ 22.4 /* 22.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 22.6 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 22.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 22.8 * 22.9 * This code is free software; you can redistribute it and/or modify it 22.10 @@ -25,34 +25,38 @@ 22.11 class VM_Version: public Abstract_VM_Version { 22.12 protected: 22.13 enum Feature_Flag { 22.14 - v8_instructions = 0, 22.15 - hardware_int_muldiv = 1, 22.16 - hardware_fsmuld = 2, 22.17 - v9_instructions = 3, 22.18 - vis1_instructions = 4, 22.19 - vis2_instructions = 5, 22.20 - sun4v_instructions = 6 22.21 + v8_instructions = 0, 22.22 + hardware_mul32 = 1, 22.23 + hardware_div32 = 2, 22.24 + hardware_fsmuld = 3, 22.25 + hardware_popc = 4, 22.26 + v9_instructions = 5, 22.27 + vis1_instructions = 6, 22.28 + vis2_instructions = 7, 22.29 + sun4v_instructions = 8 22.30 }; 22.31 22.32 enum Feature_Flag_Set { 22.33 - unknown_m = 0, 22.34 - all_features_m = -1, 22.35 + unknown_m = 0, 22.36 + all_features_m = -1, 22.37 22.38 - v8_instructions_m = 1 << v8_instructions, 22.39 - hardware_int_muldiv_m = 1 << hardware_int_muldiv, 22.40 - hardware_fsmuld_m = 1 << hardware_fsmuld, 22.41 - v9_instructions_m = 1 << v9_instructions, 22.42 - vis1_instructions_m = 1 << vis1_instructions, 22.43 - vis2_instructions_m = 1 << vis2_instructions, 22.44 - sun4v_m = 1 << sun4v_instructions, 22.45 + v8_instructions_m = 1 << v8_instructions, 22.46 + hardware_mul32_m = 1 << hardware_mul32, 22.47 + hardware_div32_m = 1 << hardware_div32, 22.48 + hardware_fsmuld_m = 1 << hardware_fsmuld, 22.49 + hardware_popc_m = 1 << hardware_popc, 22.50 + v9_instructions_m = 1 << v9_instructions, 22.51 + vis1_instructions_m = 1 << vis1_instructions, 22.52 + vis2_instructions_m = 1 << vis2_instructions, 22.53 + sun4v_m = 1 << sun4v_instructions, 22.54 22.55 - generic_v8_m = v8_instructions_m | hardware_int_muldiv_m | hardware_fsmuld_m, 22.56 - generic_v9_m = generic_v8_m | v9_instructions_m | vis1_instructions_m, 22.57 - ultra3_m = generic_v9_m | vis2_instructions_m, 22.58 + generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m, 22.59 + generic_v9_m = generic_v8_m | v9_instructions_m, 22.60 + ultra3_m = generic_v9_m | vis1_instructions_m | vis2_instructions_m, 22.61 22.62 // Temporary until we have something more accurate 22.63 - niagara1_unique_m = sun4v_m, 22.64 - niagara1_m = generic_v9_m | niagara1_unique_m 22.65 + niagara1_unique_m = sun4v_m, 22.66 + niagara1_m = generic_v9_m | niagara1_unique_m 22.67 }; 22.68 22.69 static int _features; 22.70 @@ -62,7 +66,7 @@ 22.71 static int determine_features(); 22.72 static int platform_features(int features); 22.73 22.74 - static bool is_niagara1(int features) { return (features & niagara1_m) == niagara1_m; } 22.75 + static bool is_niagara1(int features) { return (features & sun4v_m) != 0; } 22.76 22.77 static int maximum_niagara1_processor_count() { return 32; } 22.78 // Returns true if the platform is in the niagara line and 22.79 @@ -76,8 +80,10 @@ 22.80 // Instruction support 22.81 static bool has_v8() { return (_features & v8_instructions_m) != 0; } 22.82 static bool has_v9() { return (_features & v9_instructions_m) != 0; } 22.83 - static bool has_hardware_int_muldiv() { return (_features & hardware_int_muldiv_m) != 0; } 22.84 + static bool has_hardware_mul32() { return (_features & hardware_mul32_m) != 0; } 22.85 + static bool has_hardware_div32() { return (_features & hardware_div32_m) != 0; } 22.86 static bool has_hardware_fsmuld() { return (_features & hardware_fsmuld_m) != 0; } 22.87 + static bool has_hardware_popc() { return (_features & hardware_popc_m) != 0; } 22.88 static bool has_vis1() { return (_features & vis1_instructions_m) != 0; } 22.89 static bool has_vis2() { return (_features & vis2_instructions_m) != 0; } 22.90
23.1 --- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp Fri Mar 20 22:08:48 2009 -0400 23.2 +++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp Mon Mar 23 10:42:20 2009 -0400 23.3 @@ -221,13 +221,15 @@ 23.4 if (is_vtable_stub) { 23.5 // ld;ld;ld,jmp,nop 23.6 const int basic = 5*BytesPerInstWord + 23.7 - // shift;add for load_klass 23.8 - (UseCompressedOops ? 2*BytesPerInstWord : 0); 23.9 + // shift;add for load_klass (only shift with zero heap based) 23.10 + (UseCompressedOops ? 23.11 + ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0); 23.12 return basic + slop; 23.13 } else { 23.14 const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord + 23.15 - // shift;add for load_klass 23.16 - (UseCompressedOops ? 2*BytesPerInstWord : 0); 23.17 + // shift;add for load_klass (only shift with zero heap based) 23.18 + (UseCompressedOops ? 23.19 + ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0); 23.20 return (basic + slop); 23.21 } 23.22 }
24.1 --- a/src/cpu/x86/vm/assembler_x86.cpp Fri Mar 20 22:08:48 2009 -0400 24.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp Mon Mar 23 10:42:20 2009 -0400 24.3 @@ -727,7 +727,7 @@ 24.4 } 24.5 24.6 #ifdef _LP64 24.7 - assert(false, "fix locate_operand"); 24.8 + assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 24.9 #else 24.10 assert(which == imm_operand, "instruction has only an imm field"); 24.11 #endif // LP64 24.12 @@ -2193,6 +2193,25 @@ 24.13 emit_byte(0x58 | encode); 24.14 } 24.15 24.16 +void Assembler::popcntl(Register dst, Address src) { 24.17 + assert(VM_Version::supports_popcnt(), "must support"); 24.18 + InstructionMark im(this); 24.19 + emit_byte(0xF3); 24.20 + prefix(src, dst); 24.21 + emit_byte(0x0F); 24.22 + emit_byte(0xB8); 24.23 + emit_operand(dst, src); 24.24 +} 24.25 + 24.26 +void Assembler::popcntl(Register dst, Register src) { 24.27 + assert(VM_Version::supports_popcnt(), "must support"); 24.28 + emit_byte(0xF3); 24.29 + int encode = prefix_and_encode(dst->encoding(), src->encoding()); 24.30 + emit_byte(0x0F); 24.31 + emit_byte(0xB8); 24.32 + emit_byte(0xC0 | encode); 24.33 +} 24.34 + 24.35 void Assembler::popf() { 24.36 emit_byte(0x9D); 24.37 } 24.38 @@ -3224,12 +3243,6 @@ 24.39 emit_byte(0xF1); 24.40 } 24.41 24.42 -void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format) { 24.43 - InstructionMark im(this); 24.44 - int encode = prefix_and_encode(dst->encoding()); 24.45 - emit_byte(0xB8 | encode); 24.46 - emit_data((int)imm32, rspec, format); 24.47 -} 24.48 24.49 #ifndef _LP64 24.50 24.51 @@ -3249,6 +3262,12 @@ 24.52 emit_data((int)imm32, rspec, 0); 24.53 } 24.54 24.55 +void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 24.56 + InstructionMark im(this); 24.57 + int encode = prefix_and_encode(dst->encoding()); 24.58 + emit_byte(0xB8 | encode); 24.59 + emit_data((int)imm32, rspec, 0); 24.60 +} 24.61 24.62 void Assembler::popa() { // 32bit 24.63 emit_byte(0x61); 24.64 @@ -3857,6 +3876,37 @@ 24.65 emit_data64(imm64, rspec); 24.66 } 24.67 24.68 +void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 24.69 + InstructionMark im(this); 24.70 + int encode = prefix_and_encode(dst->encoding()); 24.71 + emit_byte(0xB8 | encode); 24.72 + emit_data((int)imm32, rspec, narrow_oop_operand); 24.73 +} 24.74 + 24.75 +void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 24.76 + InstructionMark im(this); 24.77 + prefix(dst); 24.78 + emit_byte(0xC7); 24.79 + emit_operand(rax, dst, 4); 24.80 + emit_data((int)imm32, rspec, narrow_oop_operand); 24.81 +} 24.82 + 24.83 +void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 24.84 + InstructionMark im(this); 24.85 + int encode = prefix_and_encode(src1->encoding()); 24.86 + emit_byte(0x81); 24.87 + emit_byte(0xF8 | encode); 24.88 + emit_data((int)imm32, rspec, narrow_oop_operand); 24.89 +} 24.90 + 24.91 +void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 24.92 + InstructionMark im(this); 24.93 + prefix(src1); 24.94 + emit_byte(0x81); 24.95 + emit_operand(rax, src1, 4); 24.96 + emit_data((int)imm32, rspec, narrow_oop_operand); 24.97 +} 24.98 + 24.99 void Assembler::movdq(XMMRegister dst, Register src) { 24.100 // table D-1 says MMX/SSE2 24.101 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 24.102 @@ -4049,6 +4099,25 @@ 24.103 addq(rsp, 16 * wordSize); 24.104 } 24.105 24.106 +void Assembler::popcntq(Register dst, Address src) { 24.107 + assert(VM_Version::supports_popcnt(), "must support"); 24.108 + InstructionMark im(this); 24.109 + emit_byte(0xF3); 24.110 + prefixq(src, dst); 24.111 + emit_byte(0x0F); 24.112 + emit_byte(0xB8); 24.113 + emit_operand(dst, src); 24.114 +} 24.115 + 24.116 +void Assembler::popcntq(Register dst, Register src) { 24.117 + assert(VM_Version::supports_popcnt(), "must support"); 24.118 + emit_byte(0xF3); 24.119 + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 24.120 + emit_byte(0x0F); 24.121 + emit_byte(0xB8); 24.122 + emit_byte(0xC0 | encode); 24.123 +} 24.124 + 24.125 void Assembler::popq(Address dst) { 24.126 InstructionMark im(this); 24.127 prefixq(dst); 24.128 @@ -7217,6 +7286,225 @@ 24.129 } 24.130 24.131 24.132 +void MacroAssembler::check_klass_subtype(Register sub_klass, 24.133 + Register super_klass, 24.134 + Register temp_reg, 24.135 + Label& L_success) { 24.136 + Label L_failure; 24.137 + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 24.138 + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 24.139 + bind(L_failure); 24.140 +} 24.141 + 24.142 + 24.143 +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 24.144 + Register super_klass, 24.145 + Register temp_reg, 24.146 + Label* L_success, 24.147 + Label* L_failure, 24.148 + Label* L_slow_path, 24.149 + RegisterConstant super_check_offset) { 24.150 + assert_different_registers(sub_klass, super_klass, temp_reg); 24.151 + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 24.152 + if (super_check_offset.is_register()) { 24.153 + assert_different_registers(sub_klass, super_klass, 24.154 + super_check_offset.as_register()); 24.155 + } else if (must_load_sco) { 24.156 + assert(temp_reg != noreg, "supply either a temp or a register offset"); 24.157 + } 24.158 + 24.159 + Label L_fallthrough; 24.160 + int label_nulls = 0; 24.161 + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 24.162 + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 24.163 + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 24.164 + assert(label_nulls <= 1, "at most one NULL in the batch"); 24.165 + 24.166 + int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 24.167 + Klass::secondary_super_cache_offset_in_bytes()); 24.168 + int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 24.169 + Klass::super_check_offset_offset_in_bytes()); 24.170 + Address super_check_offset_addr(super_klass, sco_offset); 24.171 + 24.172 + // Hacked jcc, which "knows" that L_fallthrough, at least, is in 24.173 + // range of a jccb. If this routine grows larger, reconsider at 24.174 + // least some of these. 24.175 +#define local_jcc(assembler_cond, label) \ 24.176 + if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 24.177 + else jcc( assembler_cond, label) /*omit semi*/ 24.178 + 24.179 + // Hacked jmp, which may only be used just before L_fallthrough. 24.180 +#define final_jmp(label) \ 24.181 + if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 24.182 + else jmp(label) /*omit semi*/ 24.183 + 24.184 + // If the pointers are equal, we are done (e.g., String[] elements). 24.185 + // This self-check enables sharing of secondary supertype arrays among 24.186 + // non-primary types such as array-of-interface. Otherwise, each such 24.187 + // type would need its own customized SSA. 24.188 + // We move this check to the front of the fast path because many 24.189 + // type checks are in fact trivially successful in this manner, 24.190 + // so we get a nicely predicted branch right at the start of the check. 24.191 + cmpptr(sub_klass, super_klass); 24.192 + local_jcc(Assembler::equal, *L_success); 24.193 + 24.194 + // Check the supertype display: 24.195 + if (must_load_sco) { 24.196 + // Positive movl does right thing on LP64. 24.197 + movl(temp_reg, super_check_offset_addr); 24.198 + super_check_offset = RegisterConstant(temp_reg); 24.199 + } 24.200 + Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 24.201 + cmpptr(super_klass, super_check_addr); // load displayed supertype 24.202 + 24.203 + // This check has worked decisively for primary supers. 24.204 + // Secondary supers are sought in the super_cache ('super_cache_addr'). 24.205 + // (Secondary supers are interfaces and very deeply nested subtypes.) 24.206 + // This works in the same check above because of a tricky aliasing 24.207 + // between the super_cache and the primary super display elements. 24.208 + // (The 'super_check_addr' can address either, as the case requires.) 24.209 + // Note that the cache is updated below if it does not help us find 24.210 + // what we need immediately. 24.211 + // So if it was a primary super, we can just fail immediately. 24.212 + // Otherwise, it's the slow path for us (no success at this point). 24.213 + 24.214 + if (super_check_offset.is_register()) { 24.215 + local_jcc(Assembler::equal, *L_success); 24.216 + cmpl(super_check_offset.as_register(), sc_offset); 24.217 + if (L_failure == &L_fallthrough) { 24.218 + local_jcc(Assembler::equal, *L_slow_path); 24.219 + } else { 24.220 + local_jcc(Assembler::notEqual, *L_failure); 24.221 + final_jmp(*L_slow_path); 24.222 + } 24.223 + } else if (super_check_offset.as_constant() == sc_offset) { 24.224 + // Need a slow path; fast failure is impossible. 24.225 + if (L_slow_path == &L_fallthrough) { 24.226 + local_jcc(Assembler::equal, *L_success); 24.227 + } else { 24.228 + local_jcc(Assembler::notEqual, *L_slow_path); 24.229 + final_jmp(*L_success); 24.230 + } 24.231 + } else { 24.232 + // No slow path; it's a fast decision. 24.233 + if (L_failure == &L_fallthrough) { 24.234 + local_jcc(Assembler::equal, *L_success); 24.235 + } else { 24.236 + local_jcc(Assembler::notEqual, *L_failure); 24.237 + final_jmp(*L_success); 24.238 + } 24.239 + } 24.240 + 24.241 + bind(L_fallthrough); 24.242 + 24.243 +#undef local_jcc 24.244 +#undef final_jmp 24.245 +} 24.246 + 24.247 + 24.248 +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 24.249 + Register super_klass, 24.250 + Register temp_reg, 24.251 + Register temp2_reg, 24.252 + Label* L_success, 24.253 + Label* L_failure, 24.254 + bool set_cond_codes) { 24.255 + assert_different_registers(sub_klass, super_klass, temp_reg); 24.256 + if (temp2_reg != noreg) 24.257 + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 24.258 +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 24.259 + 24.260 + Label L_fallthrough; 24.261 + int label_nulls = 0; 24.262 + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 24.263 + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 24.264 + assert(label_nulls <= 1, "at most one NULL in the batch"); 24.265 + 24.266 + // a couple of useful fields in sub_klass: 24.267 + int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 24.268 + Klass::secondary_supers_offset_in_bytes()); 24.269 + int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 24.270 + Klass::secondary_super_cache_offset_in_bytes()); 24.271 + Address secondary_supers_addr(sub_klass, ss_offset); 24.272 + Address super_cache_addr( sub_klass, sc_offset); 24.273 + 24.274 + // Do a linear scan of the secondary super-klass chain. 24.275 + // This code is rarely used, so simplicity is a virtue here. 24.276 + // The repne_scan instruction uses fixed registers, which we must spill. 24.277 + // Don't worry too much about pre-existing connections with the input regs. 24.278 + 24.279 + assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 24.280 + assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 24.281 + 24.282 + // Get super_klass value into rax (even if it was in rdi or rcx). 24.283 + bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 24.284 + if (super_klass != rax || UseCompressedOops) { 24.285 + if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 24.286 + mov(rax, super_klass); 24.287 + } 24.288 + if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 24.289 + if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 24.290 + 24.291 +#ifndef PRODUCT 24.292 + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 24.293 + ExternalAddress pst_counter_addr((address) pst_counter); 24.294 + NOT_LP64( incrementl(pst_counter_addr) ); 24.295 + LP64_ONLY( lea(rcx, pst_counter_addr) ); 24.296 + LP64_ONLY( incrementl(Address(rcx, 0)) ); 24.297 +#endif //PRODUCT 24.298 + 24.299 + // We will consult the secondary-super array. 24.300 + movptr(rdi, secondary_supers_addr); 24.301 + // Load the array length. (Positive movl does right thing on LP64.) 24.302 + movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 24.303 + // Skip to start of data. 24.304 + addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 24.305 + 24.306 + // Scan RCX words at [RDI] for an occurrence of RAX. 24.307 + // Set NZ/Z based on last compare. 24.308 +#ifdef _LP64 24.309 + // This part is tricky, as values in supers array could be 32 or 64 bit wide 24.310 + // and we store values in objArrays always encoded, thus we need to encode 24.311 + // the value of rax before repne. Note that rax is dead after the repne. 24.312 + if (UseCompressedOops) { 24.313 + encode_heap_oop_not_null(rax); 24.314 + // The superclass is never null; it would be a basic system error if a null 24.315 + // pointer were to sneak in here. Note that we have already loaded the 24.316 + // Klass::super_check_offset from the super_klass in the fast path, 24.317 + // so if there is a null in that register, we are already in the afterlife. 24.318 + repne_scanl(); 24.319 + } else 24.320 +#endif // _LP64 24.321 + repne_scan(); 24.322 + 24.323 + // Unspill the temp. registers: 24.324 + if (pushed_rdi) pop(rdi); 24.325 + if (pushed_rcx) pop(rcx); 24.326 + if (pushed_rax) pop(rax); 24.327 + 24.328 + if (set_cond_codes) { 24.329 + // Special hack for the AD files: rdi is guaranteed non-zero. 24.330 + assert(!pushed_rdi, "rdi must be left non-NULL"); 24.331 + // Also, the condition codes are properly set Z/NZ on succeed/failure. 24.332 + } 24.333 + 24.334 + if (L_failure == &L_fallthrough) 24.335 + jccb(Assembler::notEqual, *L_failure); 24.336 + else jcc(Assembler::notEqual, *L_failure); 24.337 + 24.338 + // Success. Cache the super we found and proceed in triumph. 24.339 + movptr(super_cache_addr, super_klass); 24.340 + 24.341 + if (L_success != &L_fallthrough) { 24.342 + jmp(*L_success); 24.343 + } 24.344 + 24.345 +#undef IS_A_TEMP 24.346 + 24.347 + bind(L_fallthrough); 24.348 +} 24.349 + 24.350 + 24.351 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 24.352 ucomisd(dst, as_Address(src)); 24.353 } 24.354 @@ -7710,14 +7998,21 @@ 24.355 void MacroAssembler::load_prototype_header(Register dst, Register src) { 24.356 #ifdef _LP64 24.357 if (UseCompressedOops) { 24.358 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.359 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 24.360 - movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 24.361 + if (Universe::narrow_oop_shift() != 0) { 24.362 + assert(Address::times_8 == LogMinObjAlignmentInBytes && 24.363 + Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); 24.364 + movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 24.365 + } else { 24.366 + movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 24.367 + } 24.368 } else 24.369 #endif 24.370 - { 24.371 - movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 24.372 - movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 24.373 - } 24.374 + { 24.375 + movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 24.376 + movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 24.377 + } 24.378 } 24.379 24.380 void MacroAssembler::store_klass(Register dst, Register src) { 24.381 @@ -7760,11 +8055,20 @@ 24.382 // Algorithm must match oop.inline.hpp encode_heap_oop. 24.383 void MacroAssembler::encode_heap_oop(Register r) { 24.384 assert (UseCompressedOops, "should be compressed"); 24.385 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.386 + if (Universe::narrow_oop_base() == NULL) { 24.387 + verify_oop(r, "broken oop in encode_heap_oop"); 24.388 + if (Universe::narrow_oop_shift() != 0) { 24.389 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 24.390 + shrq(r, LogMinObjAlignmentInBytes); 24.391 + } 24.392 + return; 24.393 + } 24.394 #ifdef ASSERT 24.395 if (CheckCompressedOops) { 24.396 Label ok; 24.397 push(rscratch1); // cmpptr trashes rscratch1 24.398 - cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); 24.399 + cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 24.400 jcc(Assembler::equal, ok); 24.401 stop("MacroAssembler::encode_heap_oop: heap base corrupted?"); 24.402 bind(ok); 24.403 @@ -7780,6 +8084,7 @@ 24.404 24.405 void MacroAssembler::encode_heap_oop_not_null(Register r) { 24.406 assert (UseCompressedOops, "should be compressed"); 24.407 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.408 #ifdef ASSERT 24.409 if (CheckCompressedOops) { 24.410 Label ok; 24.411 @@ -7790,12 +8095,18 @@ 24.412 } 24.413 #endif 24.414 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 24.415 - subq(r, r12_heapbase); 24.416 - shrq(r, LogMinObjAlignmentInBytes); 24.417 + if (Universe::narrow_oop_base() != NULL) { 24.418 + subq(r, r12_heapbase); 24.419 + } 24.420 + if (Universe::narrow_oop_shift() != 0) { 24.421 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 24.422 + shrq(r, LogMinObjAlignmentInBytes); 24.423 + } 24.424 } 24.425 24.426 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 24.427 assert (UseCompressedOops, "should be compressed"); 24.428 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.429 #ifdef ASSERT 24.430 if (CheckCompressedOops) { 24.431 Label ok; 24.432 @@ -7809,18 +8120,32 @@ 24.433 if (dst != src) { 24.434 movq(dst, src); 24.435 } 24.436 - subq(dst, r12_heapbase); 24.437 - shrq(dst, LogMinObjAlignmentInBytes); 24.438 + if (Universe::narrow_oop_base() != NULL) { 24.439 + subq(dst, r12_heapbase); 24.440 + } 24.441 + if (Universe::narrow_oop_shift() != 0) { 24.442 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 24.443 + shrq(dst, LogMinObjAlignmentInBytes); 24.444 + } 24.445 } 24.446 24.447 void MacroAssembler::decode_heap_oop(Register r) { 24.448 assert (UseCompressedOops, "should be compressed"); 24.449 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.450 + if (Universe::narrow_oop_base() == NULL) { 24.451 + if (Universe::narrow_oop_shift() != 0) { 24.452 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 24.453 + shlq(r, LogMinObjAlignmentInBytes); 24.454 + } 24.455 + verify_oop(r, "broken oop in decode_heap_oop"); 24.456 + return; 24.457 + } 24.458 #ifdef ASSERT 24.459 if (CheckCompressedOops) { 24.460 Label ok; 24.461 push(rscratch1); 24.462 cmpptr(r12_heapbase, 24.463 - ExternalAddress((address)Universe::heap_base_addr())); 24.464 + ExternalAddress((address)Universe::narrow_oop_base_addr())); 24.465 jcc(Assembler::equal, ok); 24.466 stop("MacroAssembler::decode_heap_oop: heap base corrupted?"); 24.467 bind(ok); 24.468 @@ -7844,32 +8169,76 @@ 24.469 24.470 void MacroAssembler::decode_heap_oop_not_null(Register r) { 24.471 assert (UseCompressedOops, "should only be used for compressed headers"); 24.472 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.473 // Cannot assert, unverified entry point counts instructions (see .ad file) 24.474 // vtableStubs also counts instructions in pd_code_size_limit. 24.475 // Also do not verify_oop as this is called by verify_oop. 24.476 - assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong"); 24.477 - leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); 24.478 + if (Universe::narrow_oop_base() == NULL) { 24.479 + if (Universe::narrow_oop_shift() != 0) { 24.480 + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 24.481 + shlq(r, LogMinObjAlignmentInBytes); 24.482 + } 24.483 + } else { 24.484 + assert (Address::times_8 == LogMinObjAlignmentInBytes && 24.485 + Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); 24.486 + leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); 24.487 + } 24.488 } 24.489 24.490 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 24.491 assert (UseCompressedOops, "should only be used for compressed headers"); 24.492 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.493 // Cannot assert, unverified entry point counts instructions (see .ad file) 24.494 // vtableStubs also counts instructions in pd_code_size_limit. 24.495 // Also do not verify_oop as this is called by verify_oop. 24.496 - assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong"); 24.497 - leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 24.498 + if (Universe::narrow_oop_shift() != 0) { 24.499 + assert (Address::times_8 == LogMinObjAlignmentInBytes && 24.500 + Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); 24.501 + leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 24.502 + } else if (dst != src) { 24.503 + movq(dst, src); 24.504 + } 24.505 } 24.506 24.507 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 24.508 - assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); 24.509 + assert (UseCompressedOops, "should only be used for compressed headers"); 24.510 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.511 + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 24.512 int oop_index = oop_recorder()->find_index(obj); 24.513 RelocationHolder rspec = oop_Relocation::spec(oop_index); 24.514 - mov_literal32(dst, oop_index, rspec, narrow_oop_operand); 24.515 + mov_narrow_oop(dst, oop_index, rspec); 24.516 +} 24.517 + 24.518 +void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 24.519 + assert (UseCompressedOops, "should only be used for compressed headers"); 24.520 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.521 + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 24.522 + int oop_index = oop_recorder()->find_index(obj); 24.523 + RelocationHolder rspec = oop_Relocation::spec(oop_index); 24.524 + mov_narrow_oop(dst, oop_index, rspec); 24.525 +} 24.526 + 24.527 +void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 24.528 + assert (UseCompressedOops, "should only be used for compressed headers"); 24.529 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.530 + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 24.531 + int oop_index = oop_recorder()->find_index(obj); 24.532 + RelocationHolder rspec = oop_Relocation::spec(oop_index); 24.533 + Assembler::cmp_narrow_oop(dst, oop_index, rspec); 24.534 +} 24.535 + 24.536 +void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 24.537 + assert (UseCompressedOops, "should only be used for compressed headers"); 24.538 + assert (Universe::heap() != NULL, "java heap should be initialized"); 24.539 + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 24.540 + int oop_index = oop_recorder()->find_index(obj); 24.541 + RelocationHolder rspec = oop_Relocation::spec(oop_index); 24.542 + Assembler::cmp_narrow_oop(dst, oop_index, rspec); 24.543 } 24.544 24.545 void MacroAssembler::reinit_heapbase() { 24.546 if (UseCompressedOops) { 24.547 - movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); 24.548 + movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 24.549 } 24.550 } 24.551 #endif // _LP64
25.1 --- a/src/cpu/x86/vm/assembler_x86.hpp Fri Mar 20 22:08:48 2009 -0400 25.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp Mon Mar 23 10:42:20 2009 -0400 25.3 @@ -578,20 +578,25 @@ 25.4 25.5 // These are all easily abused and hence protected 25.6 25.7 - void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format = 0); 25.8 - 25.9 // 32BIT ONLY SECTION 25.10 #ifndef _LP64 25.11 // Make these disappear in 64bit mode since they would never be correct 25.12 void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 25.13 void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 25.14 25.15 + void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 25.16 void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 25.17 25.18 void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY 25.19 #else 25.20 // 64BIT ONLY SECTION 25.21 void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY 25.22 + 25.23 + void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec); 25.24 + void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec); 25.25 + 25.26 + void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec); 25.27 + void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec); 25.28 #endif // _LP64 25.29 25.30 // These are unique in that we are ensured by the caller that the 32bit 25.31 @@ -1219,6 +1224,14 @@ 25.32 void popq(Address dst); 25.33 #endif 25.34 25.35 + void popcntl(Register dst, Address src); 25.36 + void popcntl(Register dst, Register src); 25.37 + 25.38 +#ifdef _LP64 25.39 + void popcntq(Register dst, Address src); 25.40 + void popcntq(Register dst, Register src); 25.41 +#endif 25.42 + 25.43 // Prefetches (SSE, SSE2, 3DNOW only) 25.44 25.45 void prefetchnta(Address src); 25.46 @@ -1647,6 +1660,9 @@ 25.47 void decode_heap_oop_not_null(Register dst, Register src); 25.48 25.49 void set_narrow_oop(Register dst, jobject obj); 25.50 + void set_narrow_oop(Address dst, jobject obj); 25.51 + void cmp_narrow_oop(Register dst, jobject obj); 25.52 + void cmp_narrow_oop(Address dst, jobject obj); 25.53 25.54 // if heap base register is used - reinit it with the correct value 25.55 void reinit_heapbase(); 25.56 @@ -1791,6 +1807,40 @@ 25.57 Register scan_temp, 25.58 Label& no_such_interface); 25.59 25.60 + // Test sub_klass against super_klass, with fast and slow paths. 25.61 + 25.62 + // The fast path produces a tri-state answer: yes / no / maybe-slow. 25.63 + // One of the three labels can be NULL, meaning take the fall-through. 25.64 + // If super_check_offset is -1, the value is loaded up from super_klass. 25.65 + // No registers are killed, except temp_reg. 25.66 + void check_klass_subtype_fast_path(Register sub_klass, 25.67 + Register super_klass, 25.68 + Register temp_reg, 25.69 + Label* L_success, 25.70 + Label* L_failure, 25.71 + Label* L_slow_path, 25.72 + RegisterConstant super_check_offset = RegisterConstant(-1)); 25.73 + 25.74 + // The rest of the type check; must be wired to a corresponding fast path. 25.75 + // It does not repeat the fast path logic, so don't use it standalone. 25.76 + // The temp_reg and temp2_reg can be noreg, if no temps are available. 25.77 + // Updates the sub's secondary super cache as necessary. 25.78 + // If set_cond_codes, condition codes will be Z on success, NZ on failure. 25.79 + void check_klass_subtype_slow_path(Register sub_klass, 25.80 + Register super_klass, 25.81 + Register temp_reg, 25.82 + Register temp2_reg, 25.83 + Label* L_success, 25.84 + Label* L_failure, 25.85 + bool set_cond_codes = false); 25.86 + 25.87 + // Simplified, combined version, good for typical uses. 25.88 + // Falls through on failure. 25.89 + void check_klass_subtype(Register sub_klass, 25.90 + Register super_klass, 25.91 + Register temp_reg, 25.92 + Label& L_success); 25.93 + 25.94 //---- 25.95 void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 25.96
26.1 --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Fri Mar 20 22:08:48 2009 -0400 26.2 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Mon Mar 23 10:42:20 2009 -0400 26.3 @@ -1598,18 +1598,9 @@ 26.4 26.5 // get instance klass 26.6 __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); 26.7 - // get super_check_offset 26.8 - __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes())); 26.9 - // See if we get an immediate positive hit 26.10 - __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1)); 26.11 - __ jcc(Assembler::equal, done); 26.12 - // check for immediate negative hit 26.13 - __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); 26.14 - __ jcc(Assembler::notEqual, *stub->entry()); 26.15 - // check for self 26.16 - __ cmpptr(klass_RInfo, k_RInfo); 26.17 - __ jcc(Assembler::equal, done); 26.18 - 26.19 + // perform the fast part of the checking logic 26.20 + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL); 26.21 + // call out-of-line instance of __ check_klass_subtype_slow_path(...): 26.22 __ push(klass_RInfo); 26.23 __ push(k_RInfo); 26.24 __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); 26.25 @@ -1735,17 +1726,9 @@ 26.26 } 26.27 __ bind(done); 26.28 } else { 26.29 - __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes())); 26.30 - // See if we get an immediate positive hit 26.31 - __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1)); 26.32 - __ jcc(Assembler::equal, done); 26.33 - // check for immediate negative hit 26.34 - __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); 26.35 - __ jcc(Assembler::notEqual, *stub->entry()); 26.36 - // check for self 26.37 - __ cmpptr(klass_RInfo, k_RInfo); 26.38 - __ jcc(Assembler::equal, done); 26.39 - 26.40 + // perform the fast part of the checking logic 26.41 + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL); 26.42 + // call out-of-line instance of __ check_klass_subtype_slow_path(...): 26.43 __ push(klass_RInfo); 26.44 __ push(k_RInfo); 26.45 __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); 26.46 @@ -1821,23 +1804,15 @@ 26.47 __ pop(dst); 26.48 __ jmp(done); 26.49 } 26.50 - } else { 26.51 -#else 26.52 - { // YUCK 26.53 + } 26.54 + else // next block is unconditional if LP64: 26.55 #endif // LP64 26.56 + { 26.57 assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers"); 26.58 26.59 - __ movl(dst, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes())); 26.60 - // See if we get an immediate positive hit 26.61 - __ cmpptr(k_RInfo, Address(klass_RInfo, dst, Address::times_1)); 26.62 - __ jcc(Assembler::equal, one); 26.63 - // check for immediate negative hit 26.64 - __ cmpl(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); 26.65 - __ jcc(Assembler::notEqual, zero); 26.66 - // check for self 26.67 - __ cmpptr(klass_RInfo, k_RInfo); 26.68 - __ jcc(Assembler::equal, one); 26.69 - 26.70 + // perform the fast part of the checking logic 26.71 + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, dst, &one, &zero, NULL); 26.72 + // call out-of-line instance of __ check_klass_subtype_slow_path(...): 26.73 __ push(klass_RInfo); 26.74 __ push(k_RInfo); 26.75 __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
27.1 --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp Fri Mar 20 22:08:48 2009 -0400 27.2 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp Mon Mar 23 10:42:20 2009 -0400 27.3 @@ -1354,6 +1354,13 @@ 27.4 27.5 case slow_subtype_check_id: 27.6 { 27.7 + // Typical calling sequence: 27.8 + // __ push(klass_RInfo); // object klass or other subclass 27.9 + // __ push(sup_k_RInfo); // array element klass or other superclass 27.10 + // __ call(slow_subtype_check); 27.11 + // Note that the subclass is pushed first, and is therefore deepest. 27.12 + // Previous versions of this code reversed the names 'sub' and 'super'. 27.13 + // This was operationally harmless but made the code unreadable. 27.14 enum layout { 27.15 rax_off, SLOT2(raxH_off) 27.16 rcx_off, SLOT2(rcxH_off) 27.17 @@ -1361,9 +1368,10 @@ 27.18 rdi_off, SLOT2(rdiH_off) 27.19 // saved_rbp_off, SLOT2(saved_rbpH_off) 27.20 return_off, SLOT2(returnH_off) 27.21 - sub_off, SLOT2(subH_off) 27.22 - super_off, SLOT2(superH_off) 27.23 - framesize 27.24 + sup_k_off, SLOT2(sup_kH_off) 27.25 + klass_off, SLOT2(superH_off) 27.26 + framesize, 27.27 + result_off = klass_off // deepest argument is also the return value 27.28 }; 27.29 27.30 __ set_info("slow_subtype_check", dont_gc_arguments); 27.31 @@ -1373,19 +1381,14 @@ 27.32 __ push(rax); 27.33 27.34 // This is called by pushing args and not with C abi 27.35 - __ movptr(rsi, Address(rsp, (super_off) * VMRegImpl::stack_slot_size)); // super 27.36 - __ movptr(rax, Address(rsp, (sub_off ) * VMRegImpl::stack_slot_size)); // sub 27.37 - 27.38 - __ movptr(rdi,Address(rsi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())); 27.39 - // since size is postive movl does right thing on 64bit 27.40 - __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 27.41 - __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 27.42 + __ movptr(rsi, Address(rsp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass 27.43 + __ movptr(rax, Address(rsp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass 27.44 27.45 Label miss; 27.46 - __ repne_scan(); 27.47 - __ jcc(Assembler::notEqual, miss); 27.48 - __ movptr(Address(rsi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax); 27.49 - __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), 1); // result 27.50 + __ check_klass_subtype_slow_path(rsi, rax, rcx, rdi, NULL, &miss); 27.51 + 27.52 + // fallthrough on success: 27.53 + __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), 1); // result 27.54 __ pop(rax); 27.55 __ pop(rcx); 27.56 __ pop(rsi); 27.57 @@ -1393,7 +1396,7 @@ 27.58 __ ret(0); 27.59 27.60 __ bind(miss); 27.61 - __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result 27.62 + __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result 27.63 __ pop(rax); 27.64 __ pop(rcx); 27.65 __ pop(rsi);
28.1 --- a/src/cpu/x86/vm/interp_masm_x86_32.cpp Fri Mar 20 22:08:48 2009 -0400 28.2 +++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp Mon Mar 23 10:42:20 2009 -0400 28.3 @@ -219,47 +219,16 @@ 28.4 // Resets EDI to locals. Register sub_klass cannot be any of the above. 28.5 void InterpreterMacroAssembler::gen_subtype_check( Register Rsub_klass, Label &ok_is_subtype ) { 28.6 assert( Rsub_klass != rax, "rax, holds superklass" ); 28.7 - assert( Rsub_klass != rcx, "rcx holds 2ndary super array length" ); 28.8 - assert( Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr" ); 28.9 - Label not_subtype, loop; 28.10 + assert( Rsub_klass != rcx, "used as a temp" ); 28.11 + assert( Rsub_klass != rdi, "used as a temp, restored from locals" ); 28.12 28.13 // Profile the not-null value's klass. 28.14 - profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi 28.15 + profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi 28.16 28.17 - // Load the super-klass's check offset into ECX 28.18 - movl( rcx, Address(rax, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() ) ); 28.19 - // Load from the sub-klass's super-class display list, or a 1-word cache of 28.20 - // the secondary superclass list, or a failing value with a sentinel offset 28.21 - // if the super-klass is an interface or exceptionally deep in the Java 28.22 - // hierarchy and we have to scan the secondary superclass list the hard way. 28.23 - // See if we get an immediate positive hit 28.24 - cmpptr( rax, Address(Rsub_klass,rcx,Address::times_1) ); 28.25 - jcc( Assembler::equal,ok_is_subtype ); 28.26 + // Do the check. 28.27 + check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx 28.28 28.29 - // Check for immediate negative hit 28.30 - cmpl( rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); 28.31 - jcc( Assembler::notEqual, not_subtype ); 28.32 - // Check for self 28.33 - cmpptr( Rsub_klass, rax ); 28.34 - jcc( Assembler::equal, ok_is_subtype ); 28.35 - 28.36 - // Now do a linear scan of the secondary super-klass chain. 28.37 - movptr( rdi, Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()) ); 28.38 - // EDI holds the objArrayOop of secondary supers. 28.39 - movl( rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));// Load the array length 28.40 - // Skip to start of data; also clear Z flag incase ECX is zero 28.41 - addptr( rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT) ); 28.42 - // Scan ECX words at [EDI] for occurance of EAX 28.43 - // Set NZ/Z based on last compare 28.44 - repne_scan(); 28.45 - restore_locals(); // Restore EDI; Must not blow flags 28.46 - // Not equal? 28.47 - jcc( Assembler::notEqual, not_subtype ); 28.48 - // Must be equal but missed in cache. Update cache. 28.49 - movptr( Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax ); 28.50 - jmp( ok_is_subtype ); 28.51 - 28.52 - bind(not_subtype); 28.53 + // Profile the failure of the check. 28.54 profile_typecheck_failed(rcx); // blows rcx 28.55 } 28.56
29.1 --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp Fri Mar 20 22:08:48 2009 -0400 29.2 +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp Mon Mar 23 10:42:20 2009 -0400 29.3 @@ -232,65 +232,13 @@ 29.4 assert(Rsub_klass != rcx, "rcx holds 2ndary super array length"); 29.5 assert(Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr"); 29.6 29.7 - Label not_subtype, not_subtype_pop, loop; 29.8 + // Profile the not-null value's klass. 29.9 + profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi 29.10 29.11 - // Profile the not-null value's klass. 29.12 - profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi 29.13 + // Do the check. 29.14 + check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx 29.15 29.16 - // Load the super-klass's check offset into rcx 29.17 - movl(rcx, Address(rax, sizeof(oopDesc) + 29.18 - Klass::super_check_offset_offset_in_bytes())); 29.19 - // Load from the sub-klass's super-class display list, or a 1-word 29.20 - // cache of the secondary superclass list, or a failing value with a 29.21 - // sentinel offset if the super-klass is an interface or 29.22 - // exceptionally deep in the Java hierarchy and we have to scan the 29.23 - // secondary superclass list the hard way. See if we get an 29.24 - // immediate positive hit 29.25 - cmpptr(rax, Address(Rsub_klass, rcx, Address::times_1)); 29.26 - jcc(Assembler::equal,ok_is_subtype); 29.27 - 29.28 - // Check for immediate negative hit 29.29 - cmpl(rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); 29.30 - jcc( Assembler::notEqual, not_subtype ); 29.31 - // Check for self 29.32 - cmpptr(Rsub_klass, rax); 29.33 - jcc(Assembler::equal, ok_is_subtype); 29.34 - 29.35 - // Now do a linear scan of the secondary super-klass chain. 29.36 - movptr(rdi, Address(Rsub_klass, sizeof(oopDesc) + 29.37 - Klass::secondary_supers_offset_in_bytes())); 29.38 - // rdi holds the objArrayOop of secondary supers. 29.39 - // Load the array length 29.40 - movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 29.41 - // Skip to start of data; also clear Z flag incase rcx is zero 29.42 - addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 29.43 - // Scan rcx words at [rdi] for occurance of rax 29.44 - // Set NZ/Z based on last compare 29.45 - 29.46 - // this part is kind tricky, as values in supers array could be 32 or 64 bit wide 29.47 - // and we store values in objArrays always encoded, thus we need to encode value 29.48 - // before repne 29.49 - if (UseCompressedOops) { 29.50 - push(rax); 29.51 - encode_heap_oop(rax); 29.52 - repne_scanl(); 29.53 - // Not equal? 29.54 - jcc(Assembler::notEqual, not_subtype_pop); 29.55 - // restore heap oop here for movq 29.56 - pop(rax); 29.57 - } else { 29.58 - repne_scan(); 29.59 - jcc(Assembler::notEqual, not_subtype); 29.60 - } 29.61 - // Must be equal but missed in cache. Update cache. 29.62 - movptr(Address(Rsub_klass, sizeof(oopDesc) + 29.63 - Klass::secondary_super_cache_offset_in_bytes()), rax); 29.64 - jmp(ok_is_subtype); 29.65 - 29.66 - bind(not_subtype_pop); 29.67 - // restore heap oop here for miss 29.68 - if (UseCompressedOops) pop(rax); 29.69 - bind(not_subtype); 29.70 + // Profile the failure of the check. 29.71 profile_typecheck_failed(rcx); // blows rcx 29.72 } 29.73
30.1 --- a/src/cpu/x86/vm/interpreterRT_x86_64.cpp Fri Mar 20 22:08:48 2009 -0400 30.2 +++ b/src/cpu/x86/vm/interpreterRT_x86_64.cpp Mon Mar 23 10:42:20 2009 -0400 30.3 @@ -349,7 +349,7 @@ 30.4 30.5 if (_num_args < Argument::n_float_register_parameters_c-1) { 30.6 *_reg_args++ = from_obj; 30.7 - *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float 30.8 + *_fp_identifiers |= (intptr_t)(0x01 << (_num_args*2)); // mark as float 30.9 _num_args++; 30.10 } else { 30.11 *_to++ = from_obj; 30.12 @@ -364,7 +364,7 @@ 30.13 30.14 if (_num_args < Argument::n_float_register_parameters_c-1) { 30.15 *_reg_args++ = from_obj; 30.16 - *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double 30.17 + *_fp_identifiers |= (intptr_t)(0x3 << (_num_args*2)); // mark as double 30.18 _num_args++; 30.19 } else { 30.20 *_to++ = from_obj;
31.1 --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri Mar 20 22:08:48 2009 -0400 31.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Mon Mar 23 10:42:20 2009 -0400 31.3 @@ -1310,81 +1310,51 @@ 31.4 Address& super_check_offset_addr, 31.5 Address& super_klass_addr, 31.6 Register temp, 31.7 - Label* L_success_ptr, Label* L_failure_ptr) { 31.8 + Label* L_success, Label* L_failure) { 31.9 BLOCK_COMMENT("type_check:"); 31.10 31.11 Label L_fallthrough; 31.12 - bool fall_through_on_success = (L_success_ptr == NULL); 31.13 - if (fall_through_on_success) { 31.14 - L_success_ptr = &L_fallthrough; 31.15 - } else { 31.16 - L_failure_ptr = &L_fallthrough; 31.17 - } 31.18 - Label& L_success = *L_success_ptr; 31.19 - Label& L_failure = *L_failure_ptr; 31.20 +#define LOCAL_JCC(assembler_con, label_ptr) \ 31.21 + if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \ 31.22 + else __ jcc(assembler_con, L_fallthrough) /*omit semi*/ 31.23 31.24 + // The following is a strange variation of the fast path which requires 31.25 + // one less register, because needed values are on the argument stack. 31.26 + // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp, 31.27 + // L_success, L_failure, NULL); 31.28 assert_different_registers(sub_klass, temp); 31.29 31.30 - // a couple of useful fields in sub_klass: 31.31 - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 31.32 - Klass::secondary_supers_offset_in_bytes()); 31.33 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 31.34 Klass::secondary_super_cache_offset_in_bytes()); 31.35 - Address secondary_supers_addr(sub_klass, ss_offset); 31.36 - Address super_cache_addr( sub_klass, sc_offset); 31.37 31.38 // if the pointers are equal, we are done (e.g., String[] elements) 31.39 __ cmpptr(sub_klass, super_klass_addr); 31.40 - __ jcc(Assembler::equal, L_success); 31.41 + LOCAL_JCC(Assembler::equal, L_success); 31.42 31.43 // check the supertype display: 31.44 __ movl2ptr(temp, super_check_offset_addr); 31.45 Address super_check_addr(sub_klass, temp, Address::times_1, 0); 31.46 __ movptr(temp, super_check_addr); // load displayed supertype 31.47 __ cmpptr(temp, super_klass_addr); // test the super type 31.48 - __ jcc(Assembler::equal, L_success); 31.49 + LOCAL_JCC(Assembler::equal, L_success); 31.50 31.51 // if it was a primary super, we can just fail immediately 31.52 __ cmpl(super_check_offset_addr, sc_offset); 31.53 - __ jcc(Assembler::notEqual, L_failure); 31.54 + LOCAL_JCC(Assembler::notEqual, L_failure); 31.55 31.56 - // Now do a linear scan of the secondary super-klass chain. 31.57 - // This code is rarely used, so simplicity is a virtue here. 31.58 - inc_counter_np(SharedRuntime::_partial_subtype_ctr); 31.59 - { 31.60 - // The repne_scan instruction uses fixed registers, which we must spill. 31.61 - // (We need a couple more temps in any case.) 31.62 - __ push(rax); 31.63 - __ push(rcx); 31.64 - __ push(rdi); 31.65 - assert_different_registers(sub_klass, rax, rcx, rdi); 31.66 + // The repne_scan instruction uses fixed registers, which will get spilled. 31.67 + // We happen to know this works best when super_klass is in rax. 31.68 + Register super_klass = temp; 31.69 + __ movptr(super_klass, super_klass_addr); 31.70 + __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, 31.71 + L_success, L_failure); 31.72 31.73 - __ movptr(rdi, secondary_supers_addr); 31.74 - // Load the array length. 31.75 - __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 31.76 - // Skip to start of data. 31.77 - __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 31.78 - // Scan rcx words at [edi] for occurance of rax, 31.79 - // Set NZ/Z based on last compare 31.80 - __ movptr(rax, super_klass_addr); 31.81 - __ repne_scan(); 31.82 + __ bind(L_fallthrough); 31.83 31.84 - // Unspill the temp. registers: 31.85 - __ pop(rdi); 31.86 - __ pop(rcx); 31.87 - __ pop(rax); 31.88 - } 31.89 - __ jcc(Assembler::notEqual, L_failure); 31.90 + if (L_success == NULL) { BLOCK_COMMENT("L_success:"); } 31.91 + if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); } 31.92 31.93 - // Success. Cache the super we found and proceed in triumph. 31.94 - __ movptr(temp, super_klass_addr); // note: rax, is dead 31.95 - __ movptr(super_cache_addr, temp); 31.96 - 31.97 - if (!fall_through_on_success) 31.98 - __ jmp(L_success); 31.99 - 31.100 - // Fall through on failure! 31.101 - __ bind(L_fallthrough); 31.102 +#undef LOCAL_JCC 31.103 } 31.104 31.105 //
32.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Mar 20 22:08:48 2009 -0400 32.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Mon Mar 23 10:42:20 2009 -0400 32.3 @@ -2091,66 +2091,9 @@ 32.4 32.5 Label L_miss; 32.6 32.7 - // a couple of useful fields in sub_klass: 32.8 - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 32.9 - Klass::secondary_supers_offset_in_bytes()); 32.10 - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 32.11 - Klass::secondary_super_cache_offset_in_bytes()); 32.12 - Address secondary_supers_addr(sub_klass, ss_offset); 32.13 - Address super_cache_addr( sub_klass, sc_offset); 32.14 - 32.15 - // if the pointers are equal, we are done (e.g., String[] elements) 32.16 - __ cmpptr(super_klass, sub_klass); 32.17 - __ jcc(Assembler::equal, L_success); 32.18 - 32.19 - // check the supertype display: 32.20 - Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 32.21 - __ cmpptr(super_klass, super_check_addr); // test the super type 32.22 - __ jcc(Assembler::equal, L_success); 32.23 - 32.24 - // if it was a primary super, we can just fail immediately 32.25 - __ cmpl(super_check_offset, sc_offset); 32.26 - __ jcc(Assembler::notEqual, L_miss); 32.27 - 32.28 - // Now do a linear scan of the secondary super-klass chain. 32.29 - // The repne_scan instruction uses fixed registers, which we must spill. 32.30 - // (We need a couple more temps in any case.) 32.31 - // This code is rarely used, so simplicity is a virtue here. 32.32 - inc_counter_np(SharedRuntime::_partial_subtype_ctr); 32.33 - { 32.34 - __ push(rax); 32.35 - __ push(rcx); 32.36 - __ push(rdi); 32.37 - assert_different_registers(sub_klass, super_klass, rax, rcx, rdi); 32.38 - 32.39 - __ movptr(rdi, secondary_supers_addr); 32.40 - // Load the array length. 32.41 - __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 32.42 - // Skip to start of data. 32.43 - __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 32.44 - // Scan rcx words at [rdi] for occurance of rax 32.45 - // Set NZ/Z based on last compare 32.46 - __ movptr(rax, super_klass); 32.47 - if (UseCompressedOops) { 32.48 - // Compare against compressed form. Don't need to uncompress because 32.49 - // looks like orig rax is restored in popq below. 32.50 - __ encode_heap_oop(rax); 32.51 - __ repne_scanl(); 32.52 - } else { 32.53 - __ repne_scan(); 32.54 - } 32.55 - 32.56 - // Unspill the temp. registers: 32.57 - __ pop(rdi); 32.58 - __ pop(rcx); 32.59 - __ pop(rax); 32.60 - 32.61 - __ jcc(Assembler::notEqual, L_miss); 32.62 - } 32.63 - 32.64 - // Success. Cache the super we found and proceed in triumph. 32.65 - __ movptr(super_cache_addr, super_klass); // note: rax is dead 32.66 - __ jmp(L_success); 32.67 + __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, 32.68 + super_check_offset); 32.69 + __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); 32.70 32.71 // Fall through on failure! 32.72 __ BIND(L_miss);
33.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp Fri Mar 20 22:08:48 2009 -0400 33.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Mon Mar 23 10:42:20 2009 -0400 33.3 @@ -284,7 +284,7 @@ 33.4 } 33.5 33.6 char buf[256]; 33.7 - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 33.8 + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 33.9 cores_per_cpu(), threads_per_core(), 33.10 cpu_family(), _model, _stepping, 33.11 (supports_cmov() ? ", cmov" : ""), 33.12 @@ -297,6 +297,7 @@ 33.13 (supports_ssse3()? ", ssse3": ""), 33.14 (supports_sse4_1() ? ", sse4.1" : ""), 33.15 (supports_sse4_2() ? ", sse4.2" : ""), 33.16 + (supports_popcnt() ? ", popcnt" : ""), 33.17 (supports_mmx_ext() ? ", mmxext" : ""), 33.18 (supports_3dnow() ? ", 3dnow" : ""), 33.19 (supports_3dnow2() ? ", 3dnowext" : ""), 33.20 @@ -410,6 +411,13 @@ 33.21 } 33.22 } 33.23 33.24 + // Use population count instruction if available. 33.25 + if (supports_popcnt()) { 33.26 + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 33.27 + UsePopCountInstruction = true; 33.28 + } 33.29 + } 33.30 + 33.31 assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); 33.32 assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); 33.33
34.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp Fri Mar 20 22:08:48 2009 -0400 34.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Mon Mar 23 10:42:20 2009 -0400 34.3 @@ -70,7 +70,9 @@ 34.4 dca : 1, 34.5 sse4_1 : 1, 34.6 sse4_2 : 1, 34.7 - : 11; 34.8 + : 2, 34.9 + popcnt : 1, 34.10 + : 8; 34.11 } bits; 34.12 }; 34.13 34.14 @@ -179,7 +181,8 @@ 34.15 CPU_SSSE3 = (1 << 9), 34.16 CPU_SSE4A = (1 << 10), 34.17 CPU_SSE4_1 = (1 << 11), 34.18 - CPU_SSE4_2 = (1 << 12) 34.19 + CPU_SSE4_2 = (1 << 12), 34.20 + CPU_POPCNT = (1 << 13) 34.21 } cpuFeatureFlags; 34.22 34.23 // cpuid information block. All info derived from executing cpuid with 34.24 @@ -290,6 +293,8 @@ 34.25 result |= CPU_SSE4_1; 34.26 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 34.27 result |= CPU_SSE4_2; 34.28 + if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) 34.29 + result |= CPU_POPCNT; 34.30 return result; 34.31 } 34.32 34.33 @@ -379,6 +384,7 @@ 34.34 static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } 34.35 static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } 34.36 static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } 34.37 + static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } 34.38 // 34.39 // AMD features 34.40 //
35.1 --- a/src/cpu/x86/vm/x86_32.ad Fri Mar 20 22:08:48 2009 -0400 35.2 +++ b/src/cpu/x86/vm/x86_32.ad Mon Mar 23 10:42:20 2009 -0400 35.3 @@ -1483,16 +1483,20 @@ 35.4 // main source block for now. In future, we can generalize this by 35.5 // adding a syntax that specifies the sizes of fields in an order, 35.6 // so that the adlc can build the emit functions automagically 35.7 - enc_class OpcP %{ // Emit opcode 35.8 - emit_opcode(cbuf,$primary); 35.9 - %} 35.10 - 35.11 - enc_class OpcS %{ // Emit opcode 35.12 - emit_opcode(cbuf,$secondary); 35.13 - %} 35.14 - 35.15 - enc_class Opcode(immI d8 ) %{ // Emit opcode 35.16 - emit_opcode(cbuf,$d8$$constant); 35.17 + 35.18 + // Emit primary opcode 35.19 + enc_class OpcP %{ 35.20 + emit_opcode(cbuf, $primary); 35.21 + %} 35.22 + 35.23 + // Emit secondary opcode 35.24 + enc_class OpcS %{ 35.25 + emit_opcode(cbuf, $secondary); 35.26 + %} 35.27 + 35.28 + // Emit opcode directly 35.29 + enc_class Opcode(immI d8) %{ 35.30 + emit_opcode(cbuf, $d8$$constant); 35.31 %} 35.32 35.33 enc_class SizePrefix %{ 35.34 @@ -1688,26 +1692,15 @@ 35.35 Register Reax = as_Register(EAX_enc); // super class 35.36 Register Recx = as_Register(ECX_enc); // killed 35.37 Register Resi = as_Register(ESI_enc); // sub class 35.38 - Label hit, miss; 35.39 + Label miss; 35.40 35.41 MacroAssembler _masm(&cbuf); 35.42 - // Compare super with sub directly, since super is not in its own SSA. 35.43 - // The compiler used to emit this test, but we fold it in here, 35.44 - // to allow platform-specific tweaking on sparc. 35.45 - __ cmpptr(Reax, Resi); 35.46 - __ jcc(Assembler::equal, hit); 35.47 -#ifndef PRODUCT 35.48 - __ incrementl(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr)); 35.49 -#endif //PRODUCT 35.50 - __ movptr(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())); 35.51 - __ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes())); 35.52 - __ addptr(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 35.53 - __ repne_scan(); 35.54 - __ jcc(Assembler::notEqual, miss); 35.55 - __ movptr(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax); 35.56 - __ bind(hit); 35.57 - if( $primary ) 35.58 - __ xorptr(Redi,Redi); 35.59 + __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 35.60 + NULL, &miss, 35.61 + /*set_cond_codes:*/ true); 35.62 + if ($primary) { 35.63 + __ xorptr(Redi, Redi); 35.64 + } 35.65 __ bind(miss); 35.66 %} 35.67 35.68 @@ -6387,6 +6380,67 @@ 35.69 %} 35.70 35.71 35.72 +//---------- Population Count Instructions ------------------------------------- 35.73 + 35.74 +instruct popCountI(eRegI dst, eRegI src) %{ 35.75 + predicate(UsePopCountInstruction); 35.76 + match(Set dst (PopCountI src)); 35.77 + 35.78 + format %{ "POPCNT $dst, $src" %} 35.79 + ins_encode %{ 35.80 + __ popcntl($dst$$Register, $src$$Register); 35.81 + %} 35.82 + ins_pipe(ialu_reg); 35.83 +%} 35.84 + 35.85 +instruct popCountI_mem(eRegI dst, memory mem) %{ 35.86 + predicate(UsePopCountInstruction); 35.87 + match(Set dst (PopCountI (LoadI mem))); 35.88 + 35.89 + format %{ "POPCNT $dst, $mem" %} 35.90 + ins_encode %{ 35.91 + __ popcntl($dst$$Register, $mem$$Address); 35.92 + %} 35.93 + ins_pipe(ialu_reg); 35.94 +%} 35.95 + 35.96 +// Note: Long.bitCount(long) returns an int. 35.97 +instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{ 35.98 + predicate(UsePopCountInstruction); 35.99 + match(Set dst (PopCountL src)); 35.100 + effect(KILL cr, TEMP tmp, TEMP dst); 35.101 + 35.102 + format %{ "POPCNT $dst, $src.lo\n\t" 35.103 + "POPCNT $tmp, $src.hi\n\t" 35.104 + "ADD $dst, $tmp" %} 35.105 + ins_encode %{ 35.106 + __ popcntl($dst$$Register, $src$$Register); 35.107 + __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 35.108 + __ addl($dst$$Register, $tmp$$Register); 35.109 + %} 35.110 + ins_pipe(ialu_reg); 35.111 +%} 35.112 + 35.113 +// Note: Long.bitCount(long) returns an int. 35.114 +instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{ 35.115 + predicate(UsePopCountInstruction); 35.116 + match(Set dst (PopCountL (LoadL mem))); 35.117 + effect(KILL cr, TEMP tmp, TEMP dst); 35.118 + 35.119 + format %{ "POPCNT $dst, $mem\n\t" 35.120 + "POPCNT $tmp, $mem+4\n\t" 35.121 + "ADD $dst, $tmp" %} 35.122 + ins_encode %{ 35.123 + //__ popcntl($dst$$Register, $mem$$Address$$first); 35.124 + //__ popcntl($tmp$$Register, $mem$$Address$$second); 35.125 + __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false)); 35.126 + __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false)); 35.127 + __ addl($dst$$Register, $tmp$$Register); 35.128 + %} 35.129 + ins_pipe(ialu_reg); 35.130 +%} 35.131 + 35.132 + 35.133 //----------Load/Store/Move Instructions--------------------------------------- 35.134 //----------Load Instructions-------------------------------------------------- 35.135 // Load Byte (8bit signed) 35.136 @@ -12501,15 +12555,12 @@ 35.137 effect( KILL rcx, KILL cr ); 35.138 35.139 ins_cost(1100); // slightly larger than the next version 35.140 - format %{ "CMPL EAX,ESI\n\t" 35.141 - "JEQ,s hit\n\t" 35.142 - "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 35.143 + format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 35.144 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" 35.145 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 35.146 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 35.147 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 35.148 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 35.149 - "hit:\n\t" 35.150 "XOR $result,$result\t\t Hit: EDI zero\n\t" 35.151 "miss:\t" %} 35.152 35.153 @@ -12523,9 +12574,7 @@ 35.154 effect( KILL rcx, KILL result ); 35.155 35.156 ins_cost(1000); 35.157 - format %{ "CMPL EAX,ESI\n\t" 35.158 - "JEQ,s miss\t# Actually a hit; we are done.\n\t" 35.159 - "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 35.160 + format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 35.161 "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" 35.162 "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 35.163 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
36.1 --- a/src/cpu/x86/vm/x86_64.ad Fri Mar 20 22:08:48 2009 -0400 36.2 +++ b/src/cpu/x86/vm/x86_64.ad Mon Mar 23 10:42:20 2009 -0400 36.3 @@ -326,7 +326,6 @@ 36.4 R9, R9_H, 36.5 R10, R10_H, 36.6 R11, R11_H, 36.7 - R12, R12_H, 36.8 R13, R13_H, 36.9 R14, R14_H); 36.10 36.11 @@ -340,7 +339,6 @@ 36.12 R9, R9_H, 36.13 R10, R10_H, 36.14 R11, R11_H, 36.15 - R12, R12_H, 36.16 R13, R13_H, 36.17 R14, R14_H); 36.18 36.19 @@ -354,7 +352,6 @@ 36.20 R9, R9_H, 36.21 R10, R10_H, 36.22 R11, R11_H, 36.23 - R12, R12_H, 36.24 R13, R13_H, 36.25 R14, R14_H); 36.26 36.27 @@ -444,9 +441,6 @@ 36.28 // Singleton class for RDX long register 36.29 reg_class long_rdx_reg(RDX, RDX_H); 36.30 36.31 -// Singleton class for R12 long register 36.32 -reg_class long_r12_reg(R12, R12_H); 36.33 - 36.34 // Class for all int registers (except RSP) 36.35 reg_class int_reg(RAX, 36.36 RDX, 36.37 @@ -1842,7 +1836,9 @@ 36.38 { 36.39 if (UseCompressedOops) { 36.40 st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes()); 36.41 - st->print_cr("leaq rscratch1, [r12_heapbase, r, Address::times_8, 0]"); 36.42 + if (Universe::narrow_oop_shift() != 0) { 36.43 + st->print_cr("leaq rscratch1, [r12_heapbase, r, Address::times_8, 0]"); 36.44 + } 36.45 st->print_cr("cmpq rax, rscratch1\t # Inline cache check"); 36.46 } else { 36.47 st->print_cr("cmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t" 36.48 @@ -1891,7 +1887,11 @@ 36.49 uint MachUEPNode::size(PhaseRegAlloc* ra_) const 36.50 { 36.51 if (UseCompressedOops) { 36.52 - return OptoBreakpoint ? 19 : 20; 36.53 + if (Universe::narrow_oop_shift() == 0) { 36.54 + return OptoBreakpoint ? 15 : 16; 36.55 + } else { 36.56 + return OptoBreakpoint ? 19 : 20; 36.57 + } 36.58 } else { 36.59 return OptoBreakpoint ? 11 : 12; 36.60 } 36.61 @@ -2575,45 +2575,13 @@ 36.62 Register Rrax = as_Register(RAX_enc); // super class 36.63 Register Rrcx = as_Register(RCX_enc); // killed 36.64 Register Rrsi = as_Register(RSI_enc); // sub class 36.65 - Label hit, miss, cmiss; 36.66 + Label miss; 36.67 + const bool set_cond_codes = true; 36.68 36.69 MacroAssembler _masm(&cbuf); 36.70 - // Compare super with sub directly, since super is not in its own SSA. 36.71 - // The compiler used to emit this test, but we fold it in here, 36.72 - // to allow platform-specific tweaking on sparc. 36.73 - __ cmpptr(Rrax, Rrsi); 36.74 - __ jcc(Assembler::equal, hit); 36.75 -#ifndef PRODUCT 36.76 - __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr)); 36.77 - __ incrementl(Address(Rrcx, 0)); 36.78 -#endif //PRODUCT 36.79 - __ movptr(Rrdi, Address(Rrsi, 36.80 - sizeof(oopDesc) + 36.81 - Klass::secondary_supers_offset_in_bytes())); 36.82 - __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes())); 36.83 - __ addptr(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 36.84 - if (UseCompressedOops) { 36.85 - __ encode_heap_oop(Rrax); 36.86 - __ repne_scanl(); 36.87 - __ jcc(Assembler::notEqual, cmiss); 36.88 - __ decode_heap_oop(Rrax); 36.89 - __ movptr(Address(Rrsi, 36.90 - sizeof(oopDesc) + 36.91 - Klass::secondary_super_cache_offset_in_bytes()), 36.92 - Rrax); 36.93 - __ jmp(hit); 36.94 - __ bind(cmiss); 36.95 - __ decode_heap_oop(Rrax); 36.96 - __ jmp(miss); 36.97 - } else { 36.98 - __ repne_scan(); 36.99 - __ jcc(Assembler::notEqual, miss); 36.100 - __ movptr(Address(Rrsi, 36.101 - sizeof(oopDesc) + 36.102 - Klass::secondary_super_cache_offset_in_bytes()), 36.103 - Rrax); 36.104 - } 36.105 - __ bind(hit); 36.106 + __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi, 36.107 + NULL, &miss, 36.108 + /*set_cond_codes:*/ true); 36.109 if ($primary) { 36.110 __ xorptr(Rrdi, Rrdi); 36.111 } 36.112 @@ -4906,15 +4874,6 @@ 36.113 interface(REG_INTER); 36.114 %} 36.115 36.116 - 36.117 -operand r12RegL() %{ 36.118 - constraint(ALLOC_IN_RC(long_r12_reg)); 36.119 - match(RegL); 36.120 - 36.121 - format %{ %} 36.122 - interface(REG_INTER); 36.123 -%} 36.124 - 36.125 operand rRegN() %{ 36.126 constraint(ALLOC_IN_RC(int_reg)); 36.127 match(RegN); 36.128 @@ -5289,21 +5248,6 @@ 36.129 %} 36.130 %} 36.131 36.132 -// Indirect Narrow Oop Plus Offset Operand 36.133 -operand indNarrowOopOffset(rRegN src, immL32 off) %{ 36.134 - constraint(ALLOC_IN_RC(ptr_reg)); 36.135 - match(AddP (DecodeN src) off); 36.136 - 36.137 - op_cost(10); 36.138 - format %{"[R12 + $src << 3 + $off] (compressed oop addressing)" %} 36.139 - interface(MEMORY_INTER) %{ 36.140 - base(0xc); // R12 36.141 - index($src); 36.142 - scale(0x3); 36.143 - disp($off); 36.144 - %} 36.145 -%} 36.146 - 36.147 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand 36.148 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale) 36.149 %{ 36.150 @@ -5321,6 +5265,158 @@ 36.151 %} 36.152 %} 36.153 36.154 +// Indirect Narrow Oop Plus Offset Operand 36.155 +// Note: x86 architecture doesn't support "scale * index + offset" without a base 36.156 +// we can't free r12 even with Universe::narrow_oop_base() == NULL. 36.157 +operand indCompressedOopOffset(rRegN reg, immL32 off) %{ 36.158 + predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0)); 36.159 + constraint(ALLOC_IN_RC(ptr_reg)); 36.160 + match(AddP (DecodeN reg) off); 36.161 + 36.162 + op_cost(10); 36.163 + format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %} 36.164 + interface(MEMORY_INTER) %{ 36.165 + base(0xc); // R12 36.166 + index($reg); 36.167 + scale(0x3); 36.168 + disp($off); 36.169 + %} 36.170 +%} 36.171 + 36.172 +// Indirect Memory Operand 36.173 +operand indirectNarrow(rRegN reg) 36.174 +%{ 36.175 + predicate(Universe::narrow_oop_shift() == 0); 36.176 + constraint(ALLOC_IN_RC(ptr_reg)); 36.177 + match(DecodeN reg); 36.178 + 36.179 + format %{ "[$reg]" %} 36.180 + interface(MEMORY_INTER) %{ 36.181 + base($reg); 36.182 + index(0x4); 36.183 + scale(0x0); 36.184 + disp(0x0); 36.185 + %} 36.186 +%} 36.187 + 36.188 +// Indirect Memory Plus Short Offset Operand 36.189 +operand indOffset8Narrow(rRegN reg, immL8 off) 36.190 +%{ 36.191 + predicate(Universe::narrow_oop_shift() == 0); 36.192 + constraint(ALLOC_IN_RC(ptr_reg)); 36.193 + match(AddP (DecodeN reg) off); 36.194 + 36.195 + format %{ "[$reg + $off (8-bit)]" %} 36.196 + interface(MEMORY_INTER) %{ 36.197 + base($reg); 36.198 + index(0x4); 36.199 + scale(0x0); 36.200 + disp($off); 36.201 + %} 36.202 +%} 36.203 + 36.204 +// Indirect Memory Plus Long Offset Operand 36.205 +operand indOffset32Narrow(rRegN reg, immL32 off) 36.206 +%{ 36.207 + predicate(Universe::narrow_oop_shift() == 0); 36.208 + constraint(ALLOC_IN_RC(ptr_reg)); 36.209 + match(AddP (DecodeN reg) off); 36.210 + 36.211 + format %{ "[$reg + $off (32-bit)]" %} 36.212 + interface(MEMORY_INTER) %{ 36.213 + base($reg); 36.214 + index(0x4); 36.215 + scale(0x0); 36.216 + disp($off); 36.217 + %} 36.218 +%} 36.219 + 36.220 +// Indirect Memory Plus Index Register Plus Offset Operand 36.221 +operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off) 36.222 +%{ 36.223 + predicate(Universe::narrow_oop_shift() == 0); 36.224 + constraint(ALLOC_IN_RC(ptr_reg)); 36.225 + match(AddP (AddP (DecodeN reg) lreg) off); 36.226 + 36.227 + op_cost(10); 36.228 + format %{"[$reg + $off + $lreg]" %} 36.229 + interface(MEMORY_INTER) %{ 36.230 + base($reg); 36.231 + index($lreg); 36.232 + scale(0x0); 36.233 + disp($off); 36.234 + %} 36.235 +%} 36.236 + 36.237 +// Indirect Memory Plus Index Register Plus Offset Operand 36.238 +operand indIndexNarrow(rRegN reg, rRegL lreg) 36.239 +%{ 36.240 + predicate(Universe::narrow_oop_shift() == 0); 36.241 + constraint(ALLOC_IN_RC(ptr_reg)); 36.242 + match(AddP (DecodeN reg) lreg); 36.243 + 36.244 + op_cost(10); 36.245 + format %{"[$reg + $lreg]" %} 36.246 + interface(MEMORY_INTER) %{ 36.247 + base($reg); 36.248 + index($lreg); 36.249 + scale(0x0); 36.250 + disp(0x0); 36.251 + %} 36.252 +%} 36.253 + 36.254 +// Indirect Memory Times Scale Plus Index Register 36.255 +operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale) 36.256 +%{ 36.257 + predicate(Universe::narrow_oop_shift() == 0); 36.258 + constraint(ALLOC_IN_RC(ptr_reg)); 36.259 + match(AddP (DecodeN reg) (LShiftL lreg scale)); 36.260 + 36.261 + op_cost(10); 36.262 + format %{"[$reg + $lreg << $scale]" %} 36.263 + interface(MEMORY_INTER) %{ 36.264 + base($reg); 36.265 + index($lreg); 36.266 + scale($scale); 36.267 + disp(0x0); 36.268 + %} 36.269 +%} 36.270 + 36.271 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand 36.272 +operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale) 36.273 +%{ 36.274 + predicate(Universe::narrow_oop_shift() == 0); 36.275 + constraint(ALLOC_IN_RC(ptr_reg)); 36.276 + match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off); 36.277 + 36.278 + op_cost(10); 36.279 + format %{"[$reg + $off + $lreg << $scale]" %} 36.280 + interface(MEMORY_INTER) %{ 36.281 + base($reg); 36.282 + index($lreg); 36.283 + scale($scale); 36.284 + disp($off); 36.285 + %} 36.286 +%} 36.287 + 36.288 +// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand 36.289 +operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale) 36.290 +%{ 36.291 + constraint(ALLOC_IN_RC(ptr_reg)); 36.292 + predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); 36.293 + match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off); 36.294 + 36.295 + op_cost(10); 36.296 + format %{"[$reg + $off + $idx << $scale]" %} 36.297 + interface(MEMORY_INTER) %{ 36.298 + base($reg); 36.299 + index($idx); 36.300 + scale($scale); 36.301 + disp($off); 36.302 + %} 36.303 +%} 36.304 + 36.305 + 36.306 //----------Special Memory Operands-------------------------------------------- 36.307 // Stack Slot Operand - This operand is used for loading and storing temporary 36.308 // values on the stack where a match requires a value to 36.309 @@ -5488,7 +5584,10 @@ 36.310 36.311 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex, 36.312 indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset, 36.313 - indNarrowOopOffset); 36.314 + indCompressedOopOffset, 36.315 + indirectNarrow, indOffset8Narrow, indOffset32Narrow, 36.316 + indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow, 36.317 + indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow); 36.318 36.319 //----------PIPELINE----------------------------------------------------------- 36.320 // Rules which define the behavior of the target architectures pipeline. 36.321 @@ -6234,9 +6333,7 @@ 36.322 ins_cost(125); // XXX 36.323 format %{ "movl $dst, $mem\t# compressed ptr" %} 36.324 ins_encode %{ 36.325 - Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 36.326 - Register dst = as_Register($dst$$reg); 36.327 - __ movl(dst, addr); 36.328 + __ movl($dst$$Register, $mem$$Address); 36.329 %} 36.330 ins_pipe(ialu_reg_mem); // XXX 36.331 %} 36.332 @@ -6262,9 +6359,7 @@ 36.333 ins_cost(125); // XXX 36.334 format %{ "movl $dst, $mem\t# compressed klass ptr" %} 36.335 ins_encode %{ 36.336 - Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 36.337 - Register dst = as_Register($dst$$reg); 36.338 - __ movl(dst, addr); 36.339 + __ movl($dst$$Register, $mem$$Address); 36.340 %} 36.341 ins_pipe(ialu_reg_mem); // XXX 36.342 %} 36.343 @@ -6418,6 +6513,102 @@ 36.344 ins_pipe(ialu_reg_reg_fat); 36.345 %} 36.346 36.347 +instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem) 36.348 +%{ 36.349 + match(Set dst mem); 36.350 + 36.351 + ins_cost(110); 36.352 + format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %} 36.353 + opcode(0x8D); 36.354 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 36.355 + ins_pipe(ialu_reg_reg_fat); 36.356 +%} 36.357 + 36.358 +// Load Effective Address which uses Narrow (32-bits) oop 36.359 +instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem) 36.360 +%{ 36.361 + predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0)); 36.362 + match(Set dst mem); 36.363 + 36.364 + ins_cost(110); 36.365 + format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %} 36.366 + opcode(0x8D); 36.367 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 36.368 + ins_pipe(ialu_reg_reg_fat); 36.369 +%} 36.370 + 36.371 +instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem) 36.372 +%{ 36.373 + predicate(Universe::narrow_oop_shift() == 0); 36.374 + match(Set dst mem); 36.375 + 36.376 + ins_cost(110); // XXX 36.377 + format %{ "leaq $dst, $mem\t# ptr off8narrow" %} 36.378 + opcode(0x8D); 36.379 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 36.380 + ins_pipe(ialu_reg_reg_fat); 36.381 +%} 36.382 + 36.383 +instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem) 36.384 +%{ 36.385 + predicate(Universe::narrow_oop_shift() == 0); 36.386 + match(Set dst mem); 36.387 + 36.388 + ins_cost(110); 36.389 + format %{ "leaq $dst, $mem\t# ptr off32narrow" %} 36.390 + opcode(0x8D); 36.391 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 36.392 + ins_pipe(ialu_reg_reg_fat); 36.393 +%} 36.394 + 36.395 +instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem) 36.396 +%{ 36.397 + predicate(Universe::narrow_oop_shift() == 0); 36.398 + match(Set dst mem); 36.399 + 36.400 + ins_cost(110); 36.401 + format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %} 36.402 + opcode(0x8D); 36.403 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 36.404 + ins_pipe(ialu_reg_reg_fat); 36.405 +%} 36.406 + 36.407 +instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem) 36.408 +%{ 36.409 + predicate(Universe::narrow_oop_shift() == 0); 36.410 + match(Set dst mem); 36.411 + 36.412 + ins_cost(110); 36.413 + format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %} 36.414 + opcode(0x8D); 36.415 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 36.416 + ins_pipe(ialu_reg_reg_fat); 36.417 +%} 36.418 + 36.419 +instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem) 36.420 +%{ 36.421 + predicate(Universe::narrow_oop_shift() == 0); 36.422 + match(Set dst mem); 36.423 + 36.424 + ins_cost(110); 36.425 + format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %} 36.426 + opcode(0x8D); 36.427 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 36.428 + ins_pipe(ialu_reg_reg_fat); 36.429 +%} 36.430 + 36.431 +instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem) 36.432 +%{ 36.433 + predicate(Universe::narrow_oop_shift() == 0); 36.434 + match(Set dst mem); 36.435 + 36.436 + ins_cost(110); 36.437 + format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %} 36.438 + opcode(0x8D); 36.439 + ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); 36.440 + ins_pipe(ialu_reg_reg_fat); 36.441 +%} 36.442 + 36.443 instruct loadConI(rRegI dst, immI src) 36.444 %{ 36.445 match(Set dst src); 36.446 @@ -6528,8 +6719,7 @@ 36.447 effect(KILL cr); 36.448 format %{ "xorq $dst, $src\t# compressed NULL ptr" %} 36.449 ins_encode %{ 36.450 - Register dst = $dst$$Register; 36.451 - __ xorq(dst, dst); 36.452 + __ xorq($dst$$Register, $dst$$Register); 36.453 %} 36.454 ins_pipe(ialu_reg); 36.455 %} 36.456 @@ -6541,11 +6731,10 @@ 36.457 format %{ "movl $dst, $src\t# compressed ptr" %} 36.458 ins_encode %{ 36.459 address con = (address)$src$$constant; 36.460 - Register dst = $dst$$Register; 36.461 if (con == NULL) { 36.462 ShouldNotReachHere(); 36.463 } else { 36.464 - __ set_narrow_oop(dst, (jobject)$src$$constant); 36.465 + __ set_narrow_oop($dst$$Register, (jobject)$src$$constant); 36.466 } 36.467 %} 36.468 ins_pipe(ialu_reg_fat); // XXX 36.469 @@ -6794,12 +6983,25 @@ 36.470 ins_pipe(ialu_mem_reg); 36.471 %} 36.472 36.473 +instruct storeImmP0(memory mem, immP0 zero) 36.474 +%{ 36.475 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 36.476 + match(Set mem (StoreP mem zero)); 36.477 + 36.478 + ins_cost(125); // XXX 36.479 + format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %} 36.480 + ins_encode %{ 36.481 + __ movq($mem$$Address, r12); 36.482 + %} 36.483 + ins_pipe(ialu_mem_reg); 36.484 +%} 36.485 + 36.486 // Store NULL Pointer, mark word, or other simple pointer constant. 36.487 instruct storeImmP(memory mem, immP31 src) 36.488 %{ 36.489 match(Set mem (StoreP mem src)); 36.490 36.491 - ins_cost(125); // XXX 36.492 + ins_cost(150); // XXX 36.493 format %{ "movq $mem, $src\t# ptr" %} 36.494 opcode(0xC7); /* C7 /0 */ 36.495 ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src)); 36.496 @@ -6814,14 +7016,55 @@ 36.497 ins_cost(125); // XXX 36.498 format %{ "movl $mem, $src\t# compressed ptr" %} 36.499 ins_encode %{ 36.500 - Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 36.501 - Register src = as_Register($src$$reg); 36.502 - __ movl(addr, src); 36.503 + __ movl($mem$$Address, $src$$Register); 36.504 %} 36.505 ins_pipe(ialu_mem_reg); 36.506 %} 36.507 36.508 +instruct storeImmN0(memory mem, immN0 zero) 36.509 +%{ 36.510 + predicate(Universe::narrow_oop_base() == NULL); 36.511 + match(Set mem (StoreN mem zero)); 36.512 + 36.513 + ins_cost(125); // XXX 36.514 + format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %} 36.515 + ins_encode %{ 36.516 + __ movl($mem$$Address, r12); 36.517 + %} 36.518 + ins_pipe(ialu_mem_reg); 36.519 +%} 36.520 + 36.521 +instruct storeImmN(memory mem, immN src) 36.522 +%{ 36.523 + match(Set mem (StoreN mem src)); 36.524 + 36.525 + ins_cost(150); // XXX 36.526 + format %{ "movl $mem, $src\t# compressed ptr" %} 36.527 + ins_encode %{ 36.528 + address con = (address)$src$$constant; 36.529 + if (con == NULL) { 36.530 + __ movl($mem$$Address, (int32_t)0); 36.531 + } else { 36.532 + __ set_narrow_oop($mem$$Address, (jobject)$src$$constant); 36.533 + } 36.534 + %} 36.535 + ins_pipe(ialu_mem_imm); 36.536 +%} 36.537 + 36.538 // Store Integer Immediate 36.539 +instruct storeImmI0(memory mem, immI0 zero) 36.540 +%{ 36.541 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 36.542 + match(Set mem (StoreI mem zero)); 36.543 + 36.544 + ins_cost(125); // XXX 36.545 + format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %} 36.546 + ins_encode %{ 36.547 + __ movl($mem$$Address, r12); 36.548 + %} 36.549 + ins_pipe(ialu_mem_reg); 36.550 +%} 36.551 + 36.552 instruct storeImmI(memory mem, immI src) 36.553 %{ 36.554 match(Set mem (StoreI mem src)); 36.555 @@ -6834,6 +7077,19 @@ 36.556 %} 36.557 36.558 // Store Long Immediate 36.559 +instruct storeImmL0(memory mem, immL0 zero) 36.560 +%{ 36.561 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 36.562 + match(Set mem (StoreL mem zero)); 36.563 + 36.564 + ins_cost(125); // XXX 36.565 + format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %} 36.566 + ins_encode %{ 36.567 + __ movq($mem$$Address, r12); 36.568 + %} 36.569 + ins_pipe(ialu_mem_reg); 36.570 +%} 36.571 + 36.572 instruct storeImmL(memory mem, immL32 src) 36.573 %{ 36.574 match(Set mem (StoreL mem src)); 36.575 @@ -6846,6 +7102,19 @@ 36.576 %} 36.577 36.578 // Store Short/Char Immediate 36.579 +instruct storeImmC0(memory mem, immI0 zero) 36.580 +%{ 36.581 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 36.582 + match(Set mem (StoreC mem zero)); 36.583 + 36.584 + ins_cost(125); // XXX 36.585 + format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %} 36.586 + ins_encode %{ 36.587 + __ movw($mem$$Address, r12); 36.588 + %} 36.589 + ins_pipe(ialu_mem_reg); 36.590 +%} 36.591 + 36.592 instruct storeImmI16(memory mem, immI16 src) 36.593 %{ 36.594 predicate(UseStoreImmI16); 36.595 @@ -6859,6 +7128,19 @@ 36.596 %} 36.597 36.598 // Store Byte Immediate 36.599 +instruct storeImmB0(memory mem, immI0 zero) 36.600 +%{ 36.601 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 36.602 + match(Set mem (StoreB mem zero)); 36.603 + 36.604 + ins_cost(125); // XXX 36.605 + format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %} 36.606 + ins_encode %{ 36.607 + __ movb($mem$$Address, r12); 36.608 + %} 36.609 + ins_pipe(ialu_mem_reg); 36.610 +%} 36.611 + 36.612 instruct storeImmB(memory mem, immI8 src) 36.613 %{ 36.614 match(Set mem (StoreB mem src)); 36.615 @@ -6898,6 +7180,19 @@ 36.616 %} 36.617 36.618 // Store CMS card-mark Immediate 36.619 +instruct storeImmCM0_reg(memory mem, immI0 zero) 36.620 +%{ 36.621 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 36.622 + match(Set mem (StoreCM mem zero)); 36.623 + 36.624 + ins_cost(125); // XXX 36.625 + format %{ "movb $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %} 36.626 + ins_encode %{ 36.627 + __ movb($mem$$Address, r12); 36.628 + %} 36.629 + ins_pipe(ialu_mem_reg); 36.630 +%} 36.631 + 36.632 instruct storeImmCM0(memory mem, immI0 src) 36.633 %{ 36.634 match(Set mem (StoreCM mem src)); 36.635 @@ -6931,6 +7226,19 @@ 36.636 %} 36.637 36.638 // Store immediate Float value (it is faster than store from XMM register) 36.639 +instruct storeF0(memory mem, immF0 zero) 36.640 +%{ 36.641 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 36.642 + match(Set mem (StoreF mem zero)); 36.643 + 36.644 + ins_cost(25); // XXX 36.645 + format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %} 36.646 + ins_encode %{ 36.647 + __ movl($mem$$Address, r12); 36.648 + %} 36.649 + ins_pipe(ialu_mem_reg); 36.650 +%} 36.651 + 36.652 instruct storeF_imm(memory mem, immF src) 36.653 %{ 36.654 match(Set mem (StoreF mem src)); 36.655 @@ -6957,6 +7265,7 @@ 36.656 // Store immediate double 0.0 (it is faster than store from XMM register) 36.657 instruct storeD0_imm(memory mem, immD0 src) 36.658 %{ 36.659 + predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL)); 36.660 match(Set mem (StoreD mem src)); 36.661 36.662 ins_cost(50); 36.663 @@ -6966,6 +7275,19 @@ 36.664 ins_pipe(ialu_mem_imm); 36.665 %} 36.666 36.667 +instruct storeD0(memory mem, immD0 zero) 36.668 +%{ 36.669 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 36.670 + match(Set mem (StoreD mem zero)); 36.671 + 36.672 + ins_cost(25); // XXX 36.673 + format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %} 36.674 + ins_encode %{ 36.675 + __ movq($mem$$Address, r12); 36.676 + %} 36.677 + ins_pipe(ialu_mem_reg); 36.678 +%} 36.679 + 36.680 instruct storeSSI(stackSlotI dst, rRegI src) 36.681 %{ 36.682 match(Set dst src); 36.683 @@ -7077,6 +7399,56 @@ 36.684 ins_pipe( ialu_mem_reg ); 36.685 %} 36.686 36.687 + 36.688 +//---------- Population Count Instructions ------------------------------------- 36.689 + 36.690 +instruct popCountI(rRegI dst, rRegI src) %{ 36.691 + predicate(UsePopCountInstruction); 36.692 + match(Set dst (PopCountI src)); 36.693 + 36.694 + format %{ "popcnt $dst, $src" %} 36.695 + ins_encode %{ 36.696 + __ popcntl($dst$$Register, $src$$Register); 36.697 + %} 36.698 + ins_pipe(ialu_reg); 36.699 +%} 36.700 + 36.701 +instruct popCountI_mem(rRegI dst, memory mem) %{ 36.702 + predicate(UsePopCountInstruction); 36.703 + match(Set dst (PopCountI (LoadI mem))); 36.704 + 36.705 + format %{ "popcnt $dst, $mem" %} 36.706 + ins_encode %{ 36.707 + __ popcntl($dst$$Register, $mem$$Address); 36.708 + %} 36.709 + ins_pipe(ialu_reg); 36.710 +%} 36.711 + 36.712 +// Note: Long.bitCount(long) returns an int. 36.713 +instruct popCountL(rRegI dst, rRegL src) %{ 36.714 + predicate(UsePopCountInstruction); 36.715 + match(Set dst (PopCountL src)); 36.716 + 36.717 + format %{ "popcnt $dst, $src" %} 36.718 + ins_encode %{ 36.719 + __ popcntq($dst$$Register, $src$$Register); 36.720 + %} 36.721 + ins_pipe(ialu_reg); 36.722 +%} 36.723 + 36.724 +// Note: Long.bitCount(long) returns an int. 36.725 +instruct popCountL_mem(rRegI dst, memory mem) %{ 36.726 + predicate(UsePopCountInstruction); 36.727 + match(Set dst (PopCountL (LoadL mem))); 36.728 + 36.729 + format %{ "popcnt $dst, $mem" %} 36.730 + ins_encode %{ 36.731 + __ popcntq($dst$$Register, $mem$$Address); 36.732 + %} 36.733 + ins_pipe(ialu_reg); 36.734 +%} 36.735 + 36.736 + 36.737 //----------MemBar Instructions----------------------------------------------- 36.738 // Memory barrier flavors 36.739 36.740 @@ -7192,9 +7564,7 @@ 36.741 effect(KILL cr); 36.742 format %{ "encode_heap_oop_not_null $dst,$src" %} 36.743 ins_encode %{ 36.744 - Register s = $src$$Register; 36.745 - Register d = $dst$$Register; 36.746 - __ encode_heap_oop_not_null(d, s); 36.747 + __ encode_heap_oop_not_null($dst$$Register, $src$$Register); 36.748 %} 36.749 ins_pipe(ialu_reg_long); 36.750 %} 36.751 @@ -7224,7 +7594,11 @@ 36.752 ins_encode %{ 36.753 Register s = $src$$Register; 36.754 Register d = $dst$$Register; 36.755 - __ decode_heap_oop_not_null(d, s); 36.756 + if (s != d) { 36.757 + __ decode_heap_oop_not_null(d, s); 36.758 + } else { 36.759 + __ decode_heap_oop_not_null(d); 36.760 + } 36.761 %} 36.762 ins_pipe(ialu_reg_long); 36.763 %} 36.764 @@ -11389,8 +11763,9 @@ 36.765 36.766 // This will generate a signed flags result. This should be OK since 36.767 // any compare to a zero should be eq/neq. 36.768 -instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero) 36.769 -%{ 36.770 +instruct testP_mem(rFlagsReg cr, memory op, immP0 zero) 36.771 +%{ 36.772 + predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL)); 36.773 match(Set cr (CmpP (LoadP op) zero)); 36.774 36.775 ins_cost(500); // XXX 36.776 @@ -11401,13 +11776,24 @@ 36.777 ins_pipe(ialu_cr_reg_imm); 36.778 %} 36.779 36.780 +instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero) 36.781 +%{ 36.782 + predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL)); 36.783 + match(Set cr (CmpP (LoadP mem) zero)); 36.784 + 36.785 + format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %} 36.786 + ins_encode %{ 36.787 + __ cmpq(r12, $mem$$Address); 36.788 + %} 36.789 + ins_pipe(ialu_cr_reg_mem); 36.790 +%} 36.791 36.792 instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2) 36.793 %{ 36.794 match(Set cr (CmpN op1 op2)); 36.795 36.796 format %{ "cmpl $op1, $op2\t# compressed ptr" %} 36.797 - ins_encode %{ __ cmpl(as_Register($op1$$reg), as_Register($op2$$reg)); %} 36.798 + ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %} 36.799 ins_pipe(ialu_cr_reg_reg); 36.800 %} 36.801 36.802 @@ -11415,11 +11801,30 @@ 36.803 %{ 36.804 match(Set cr (CmpN src (LoadN mem))); 36.805 36.806 - ins_cost(500); // XXX 36.807 - format %{ "cmpl $src, mem\t# compressed ptr" %} 36.808 + format %{ "cmpl $src, $mem\t# compressed ptr" %} 36.809 ins_encode %{ 36.810 - Address adr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 36.811 - __ cmpl(as_Register($src$$reg), adr); 36.812 + __ cmpl($src$$Register, $mem$$Address); 36.813 + %} 36.814 + ins_pipe(ialu_cr_reg_mem); 36.815 +%} 36.816 + 36.817 +instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{ 36.818 + match(Set cr (CmpN op1 op2)); 36.819 + 36.820 + format %{ "cmpl $op1, $op2\t# compressed ptr" %} 36.821 + ins_encode %{ 36.822 + __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant); 36.823 + %} 36.824 + ins_pipe(ialu_cr_reg_imm); 36.825 +%} 36.826 + 36.827 +instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src) 36.828 +%{ 36.829 + match(Set cr (CmpN src (LoadN mem))); 36.830 + 36.831 + format %{ "cmpl $mem, $src\t# compressed ptr" %} 36.832 + ins_encode %{ 36.833 + __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant); 36.834 %} 36.835 ins_pipe(ialu_cr_reg_mem); 36.836 %} 36.837 @@ -11432,15 +11837,27 @@ 36.838 ins_pipe(ialu_cr_reg_imm); 36.839 %} 36.840 36.841 -instruct testN_reg_mem(rFlagsReg cr, memory mem, immN0 zero) 36.842 -%{ 36.843 +instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero) 36.844 +%{ 36.845 + predicate(Universe::narrow_oop_base() != NULL); 36.846 match(Set cr (CmpN (LoadN mem) zero)); 36.847 36.848 ins_cost(500); // XXX 36.849 format %{ "testl $mem, 0xffffffff\t# compressed ptr" %} 36.850 ins_encode %{ 36.851 - Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); 36.852 - __ cmpl(addr, (int)0xFFFFFFFF); 36.853 + __ cmpl($mem$$Address, (int)0xFFFFFFFF); 36.854 + %} 36.855 + ins_pipe(ialu_cr_reg_mem); 36.856 +%} 36.857 + 36.858 +instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero) 36.859 +%{ 36.860 + predicate(Universe::narrow_oop_base() == NULL); 36.861 + match(Set cr (CmpN (LoadN mem) zero)); 36.862 + 36.863 + format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %} 36.864 + ins_encode %{ 36.865 + __ cmpl(r12, $mem$$Address); 36.866 %} 36.867 ins_pipe(ialu_cr_reg_mem); 36.868 %} 36.869 @@ -11472,7 +11889,6 @@ 36.870 %{ 36.871 match(Set cr (CmpL op1 (LoadL op2))); 36.872 36.873 - ins_cost(500); // XXX 36.874 format %{ "cmpq $op1, $op2" %} 36.875 opcode(0x3B); /* Opcode 3B /r */ 36.876 ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2)); 36.877 @@ -11733,15 +12149,12 @@ 36.878 effect(KILL rcx, KILL cr); 36.879 36.880 ins_cost(1100); // slightly larger than the next version 36.881 - format %{ "cmpq rax, rsi\n\t" 36.882 - "jeq,s hit\n\t" 36.883 - "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" 36.884 + format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" 36.885 "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" 36.886 "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" 36.887 "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t" 36.888 "jne,s miss\t\t# Missed: rdi not-zero\n\t" 36.889 "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t" 36.890 - "hit:\n\t" 36.891 "xorq $result, $result\t\t Hit: rdi zero\n\t" 36.892 "miss:\t" %} 36.893 36.894 @@ -11756,13 +12169,10 @@ 36.895 rdi_RegP result) 36.896 %{ 36.897 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 36.898 - predicate(!UseCompressedOops); // decoding oop kills condition codes 36.899 effect(KILL rcx, KILL result); 36.900 36.901 ins_cost(1000); 36.902 - format %{ "cmpq rax, rsi\n\t" 36.903 - "jeq,s miss\t# Actually a hit; we are done.\n\t" 36.904 - "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" 36.905 + format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" 36.906 "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" 36.907 "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" 36.908 "repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
37.1 --- a/src/os/linux/vm/os_linux.cpp Fri Mar 20 22:08:48 2009 -0400 37.2 +++ b/src/os/linux/vm/os_linux.cpp Mon Mar 23 10:42:20 2009 -0400 37.3 @@ -2582,7 +2582,7 @@ 37.4 #define SHM_HUGETLB 04000 37.5 #endif 37.6 37.7 -char* os::reserve_memory_special(size_t bytes) { 37.8 +char* os::reserve_memory_special(size_t bytes, char* req_addr) { 37.9 assert(UseLargePages, "only for large pages"); 37.10 37.11 key_t key = IPC_PRIVATE;
38.1 --- a/src/os/solaris/dtrace/generateJvmOffsets.cpp Fri Mar 20 22:08:48 2009 -0400 38.2 +++ b/src/os/solaris/dtrace/generateJvmOffsets.cpp Mon Mar 23 10:42:20 2009 -0400 38.3 @@ -249,6 +249,10 @@ 38.4 38.5 printf("\n"); 38.6 38.7 + GEN_OFFS(NarrowOopStruct, _base); 38.8 + GEN_OFFS(NarrowOopStruct, _shift); 38.9 + printf("\n"); 38.10 + 38.11 GEN_VALUE(SIZE_HeapBlockHeader, sizeof(HeapBlock::Header)); 38.12 GEN_SIZE(oopDesc); 38.13 GEN_SIZE(constantPoolOopDesc);
39.1 --- a/src/os/solaris/dtrace/jhelper.d Fri Mar 20 22:08:48 2009 -0400 39.2 +++ b/src/os/solaris/dtrace/jhelper.d Mon Mar 23 10:42:20 2009 -0400 39.3 @@ -46,7 +46,10 @@ 39.4 extern pointer __1cJCodeCacheF_heap_; 39.5 extern pointer __1cIUniverseP_methodKlassObj_; 39.6 extern pointer __1cIUniverseO_collectedHeap_; 39.7 -extern pointer __1cIUniverseK_heap_base_; 39.8 +extern pointer __1cIUniverseL_narrow_oop_; 39.9 +#ifdef _LP64 39.10 +extern pointer UseCompressedOops; 39.11 +#endif 39.12 39.13 extern pointer __1cHnmethodG__vtbl_; 39.14 extern pointer __1cKBufferBlobG__vtbl_; 39.15 @@ -56,6 +59,7 @@ 39.16 #define copyin_uint16(ADDR) *(uint16_t*) copyin((pointer) (ADDR), sizeof(uint16_t)) 39.17 #define copyin_uint32(ADDR) *(uint32_t*) copyin((pointer) (ADDR), sizeof(uint32_t)) 39.18 #define copyin_int32(ADDR) *(int32_t*) copyin((pointer) (ADDR), sizeof(int32_t)) 39.19 +#define copyin_uint8(ADDR) *(uint8_t*) copyin((pointer) (ADDR), sizeof(uint8_t)) 39.20 39.21 #define SAME(x) x 39.22 #define copyin_offset(JVM_CONST) JVM_CONST = \ 39.23 @@ -132,6 +136,9 @@ 39.24 copyin_offset(SIZE_oopDesc); 39.25 copyin_offset(SIZE_constantPoolOopDesc); 39.26 39.27 + copyin_offset(OFFSET_NarrowOopStruct_base); 39.28 + copyin_offset(OFFSET_NarrowOopStruct_shift); 39.29 + 39.30 /* 39.31 * The PC to translate is in arg0. 39.32 */ 39.33 @@ -151,9 +158,19 @@ 39.34 39.35 this->Universe_methodKlassOop = copyin_ptr(&``__1cIUniverseP_methodKlassObj_); 39.36 this->CodeCache_heap_address = copyin_ptr(&``__1cJCodeCacheF_heap_); 39.37 - this->Universe_heap_base = copyin_ptr(&``__1cIUniverseK_heap_base_); 39.38 39.39 /* Reading volatile values */ 39.40 +#ifdef _LP64 39.41 + this->Use_Compressed_Oops = copyin_uint8(&``UseCompressedOops); 39.42 +#else 39.43 + this->Use_Compressed_Oops = 0; 39.44 +#endif 39.45 + 39.46 + this->Universe_narrow_oop_base = copyin_ptr(&``__1cIUniverseL_narrow_oop_ + 39.47 + OFFSET_NarrowOopStruct_base); 39.48 + this->Universe_narrow_oop_shift = copyin_int32(&``__1cIUniverseL_narrow_oop_ + 39.49 + OFFSET_NarrowOopStruct_shift); 39.50 + 39.51 this->CodeCache_low = copyin_ptr(this->CodeCache_heap_address + 39.52 OFFSET_CodeHeap_memory + OFFSET_VirtualSpace_low); 39.53 39.54 @@ -295,7 +312,7 @@ 39.55 39.56 dtrace:helper:ustack: 39.57 /!this->done && this->vtbl == this->BufferBlob_vtbl && 39.58 -this->Universe_heap_base == NULL && 39.59 +this->Use_Compressed_Oops == 0 && 39.60 this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/ 39.61 { 39.62 MARK_LINE; 39.63 @@ -306,7 +323,7 @@ 39.64 39.65 dtrace:helper:ustack: 39.66 /!this->done && this->vtbl == this->BufferBlob_vtbl && 39.67 -this->Universe_heap_base != NULL && 39.68 +this->Use_Compressed_Oops != 0 && 39.69 this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/ 39.70 { 39.71 MARK_LINE; 39.72 @@ -314,8 +331,8 @@ 39.73 * Read compressed pointer and decode heap oop, same as oop.inline.hpp 39.74 */ 39.75 this->cklass = copyin_uint32(this->methodOopPtr + OFFSET_oopDesc_metadata); 39.76 - this->klass = (uint64_t)((uintptr_t)this->Universe_heap_base + 39.77 - ((uintptr_t)this->cklass << 3)); 39.78 + this->klass = (uint64_t)((uintptr_t)this->Universe_narrow_oop_base + 39.79 + ((uintptr_t)this->cklass << this->Universe_narrow_oop_shift)); 39.80 this->methodOop = this->klass == this->Universe_methodKlassOop; 39.81 this->done = !this->methodOop; 39.82 }
40.1 --- a/src/os/solaris/dtrace/libjvm_db.c Fri Mar 20 22:08:48 2009 -0400 40.2 +++ b/src/os/solaris/dtrace/libjvm_db.c Mon Mar 23 10:42:20 2009 -0400 40.3 @@ -146,13 +146,17 @@ 40.4 uint64_t BufferBlob_vtbl; 40.5 uint64_t RuntimeStub_vtbl; 40.6 40.7 + uint64_t Use_Compressed_Oops_address; 40.8 uint64_t Universe_methodKlassObj_address; 40.9 + uint64_t Universe_narrow_oop_base_address; 40.10 + uint64_t Universe_narrow_oop_shift_address; 40.11 uint64_t CodeCache_heap_address; 40.12 - uint64_t Universe_heap_base_address; 40.13 40.14 /* Volatiles */ 40.15 + uint8_t Use_Compressed_Oops; 40.16 uint64_t Universe_methodKlassObj; 40.17 - uint64_t Universe_heap_base; 40.18 + uint64_t Universe_narrow_oop_base; 40.19 + uint32_t Universe_narrow_oop_shift; 40.20 uint64_t CodeCache_low; 40.21 uint64_t CodeCache_high; 40.22 uint64_t CodeCache_segmap_low; 40.23 @@ -279,8 +283,11 @@ 40.24 if (strcmp("_methodKlassObj", vmp->fieldName) == 0) { 40.25 J->Universe_methodKlassObj_address = vmp->address; 40.26 } 40.27 - if (strcmp("_heap_base", vmp->fieldName) == 0) { 40.28 - J->Universe_heap_base_address = vmp->address; 40.29 + if (strcmp("_narrow_oop._base", vmp->fieldName) == 0) { 40.30 + J->Universe_narrow_oop_base_address = vmp->address; 40.31 + } 40.32 + if (strcmp("_narrow_oop._shift", vmp->fieldName) == 0) { 40.33 + J->Universe_narrow_oop_shift_address = vmp->address; 40.34 } 40.35 } 40.36 CHECK_FAIL(err); 40.37 @@ -298,14 +305,39 @@ 40.38 return -1; 40.39 } 40.40 40.41 +static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) { 40.42 + psaddr_t sym_addr; 40.43 + int err; 40.44 + 40.45 + err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr); 40.46 + if (err != PS_OK) goto fail; 40.47 + *valuep = sym_addr; 40.48 + return PS_OK; 40.49 + 40.50 + fail: 40.51 + return err; 40.52 +} 40.53 + 40.54 static int read_volatiles(jvm_agent_t* J) { 40.55 uint64_t ptr; 40.56 int err; 40.57 40.58 + err = find_symbol(J, "UseCompressedOops", &J->Use_Compressed_Oops_address); 40.59 + if (err == PS_OK) { 40.60 + err = ps_pread(J->P, J->Use_Compressed_Oops_address, &J->Use_Compressed_Oops, sizeof(uint8_t)); 40.61 + CHECK_FAIL(err); 40.62 + } else { 40.63 + J->Use_Compressed_Oops = 0; 40.64 + } 40.65 + 40.66 err = read_pointer(J, J->Universe_methodKlassObj_address, &J->Universe_methodKlassObj); 40.67 CHECK_FAIL(err); 40.68 - err = read_pointer(J, J->Universe_heap_base_address, &J->Universe_heap_base); 40.69 + 40.70 + err = read_pointer(J, J->Universe_narrow_oop_base_address, &J->Universe_narrow_oop_base); 40.71 CHECK_FAIL(err); 40.72 + err = ps_pread(J->P, J->Universe_narrow_oop_shift_address, &J->Universe_narrow_oop_shift, sizeof(uint32_t)); 40.73 + CHECK_FAIL(err); 40.74 + 40.75 err = read_pointer(J, J->CodeCache_heap_address + OFFSET_CodeHeap_memory + 40.76 OFFSET_VirtualSpace_low, &J->CodeCache_low); 40.77 CHECK_FAIL(err); 40.78 @@ -374,19 +406,6 @@ 40.79 return -1; 40.80 } 40.81 40.82 -static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) { 40.83 - psaddr_t sym_addr; 40.84 - int err; 40.85 - 40.86 - err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr); 40.87 - if (err != PS_OK) goto fail; 40.88 - *valuep = sym_addr; 40.89 - return PS_OK; 40.90 - 40.91 - fail: 40.92 - return err; 40.93 -} 40.94 - 40.95 static int find_jlong_constant(jvm_agent_t* J, const char *name, uint64_t* valuep) { 40.96 psaddr_t sym_addr; 40.97 int err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr); 40.98 @@ -458,14 +477,14 @@ 40.99 static int is_methodOop(jvm_agent_t* J, uint64_t methodOopPtr) { 40.100 uint64_t klass; 40.101 int err; 40.102 - // If heap_base is nonnull, this was a compressed oop. 40.103 - if (J->Universe_heap_base != NULL) { 40.104 + // If UseCompressedOops, this was a compressed oop. 40.105 + if (J->Use_Compressed_Oops != 0) { 40.106 uint32_t cklass; 40.107 err = read_compressed_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata, 40.108 &cklass); 40.109 // decode heap oop, same as oop.inline.hpp 40.110 - klass = (uint64_t)((uintptr_t)J->Universe_heap_base + 40.111 - ((uintptr_t)cklass << 3)); 40.112 + klass = (uint64_t)((uintptr_t)J->Universe_narrow_oop_base + 40.113 + ((uintptr_t)cklass << J->Universe_narrow_oop_shift)); 40.114 } else { 40.115 err = read_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata, &klass); 40.116 }
41.1 --- a/src/os/solaris/vm/os_solaris.cpp Fri Mar 20 22:08:48 2009 -0400 41.2 +++ b/src/os/solaris/vm/os_solaris.cpp Mon Mar 23 10:42:20 2009 -0400 41.3 @@ -3220,7 +3220,7 @@ 41.4 return true; 41.5 } 41.6 41.7 -char* os::reserve_memory_special(size_t bytes) { 41.8 +char* os::reserve_memory_special(size_t bytes, char* addr) { 41.9 assert(UseLargePages && UseISM, "only for ISM large pages"); 41.10 41.11 size_t size = bytes; 41.12 @@ -4451,6 +4451,9 @@ 41.13 int_fnP_thread_t os::Solaris::_thr_suspend_mutator; 41.14 int_fnP_thread_t os::Solaris::_thr_continue_mutator; 41.15 41.16 +// (Static) wrapper for getisax(2) call. 41.17 +os::Solaris::getisax_func_t os::Solaris::_getisax = 0; 41.18 + 41.19 // (Static) wrappers for the liblgrp API 41.20 os::Solaris::lgrp_home_func_t os::Solaris::_lgrp_home; 41.21 os::Solaris::lgrp_init_func_t os::Solaris::_lgrp_init; 41.22 @@ -4465,16 +4468,19 @@ 41.23 // (Static) wrapper for meminfo() call. 41.24 os::Solaris::meminfo_func_t os::Solaris::_meminfo = 0; 41.25 41.26 -static address resolve_symbol(const char *name) { 41.27 - address addr; 41.28 - 41.29 - addr = (address) dlsym(RTLD_DEFAULT, name); 41.30 +static address resolve_symbol_lazy(const char* name) { 41.31 + address addr = (address) dlsym(RTLD_DEFAULT, name); 41.32 if(addr == NULL) { 41.33 // RTLD_DEFAULT was not defined on some early versions of 2.5.1 41.34 addr = (address) dlsym(RTLD_NEXT, name); 41.35 - if(addr == NULL) { 41.36 - fatal(dlerror()); 41.37 - } 41.38 + } 41.39 + return addr; 41.40 +} 41.41 + 41.42 +static address resolve_symbol(const char* name) { 41.43 + address addr = resolve_symbol_lazy(name); 41.44 + if(addr == NULL) { 41.45 + fatal(dlerror()); 41.46 } 41.47 return addr; 41.48 } 41.49 @@ -4673,15 +4679,26 @@ 41.50 } 41.51 41.52 void os::Solaris::misc_sym_init() { 41.53 - address func = (address)dlsym(RTLD_DEFAULT, "meminfo"); 41.54 - if(func == NULL) { 41.55 - func = (address) dlsym(RTLD_NEXT, "meminfo"); 41.56 - } 41.57 + address func; 41.58 + 41.59 + // getisax 41.60 + func = resolve_symbol_lazy("getisax"); 41.61 + if (func != NULL) { 41.62 + os::Solaris::_getisax = CAST_TO_FN_PTR(getisax_func_t, func); 41.63 + } 41.64 + 41.65 + // meminfo 41.66 + func = resolve_symbol_lazy("meminfo"); 41.67 if (func != NULL) { 41.68 os::Solaris::set_meminfo(CAST_TO_FN_PTR(meminfo_func_t, func)); 41.69 } 41.70 } 41.71 41.72 +uint_t os::Solaris::getisax(uint32_t* array, uint_t n) { 41.73 + assert(_getisax != NULL, "_getisax not set"); 41.74 + return _getisax(array, n); 41.75 +} 41.76 + 41.77 // Symbol doesn't exist in Solaris 8 pset.h 41.78 #ifndef PS_MYID 41.79 #define PS_MYID -3 41.80 @@ -4716,6 +4733,10 @@ 41.81 41.82 Solaris::initialize_system_info(); 41.83 41.84 + // Initialize misc. symbols as soon as possible, so we can use them 41.85 + // if we need them. 41.86 + Solaris::misc_sym_init(); 41.87 + 41.88 int fd = open("/dev/zero", O_RDWR); 41.89 if (fd < 0) { 41.90 fatal1("os::init: cannot open /dev/zero (%s)", strerror(errno)); 41.91 @@ -4857,7 +4878,6 @@ 41.92 } 41.93 } 41.94 41.95 - Solaris::misc_sym_init(); 41.96 Solaris::signal_sets_init(); 41.97 Solaris::init_signal_mem(); 41.98 Solaris::install_signal_handlers();
42.1 --- a/src/os/solaris/vm/os_solaris.hpp Fri Mar 20 22:08:48 2009 -0400 42.2 +++ b/src/os/solaris/vm/os_solaris.hpp Mon Mar 23 10:42:20 2009 -0400 42.3 @@ -1,5 +1,5 @@ 42.4 /* 42.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 42.6 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 42.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 42.8 * 42.9 * This code is free software; you can redistribute it and/or modify it 42.10 @@ -72,6 +72,8 @@ 42.11 LGRP_VIEW_OS /* what's available to operating system */ 42.12 } lgrp_view_t; 42.13 42.14 + typedef uint_t (*getisax_func_t)(uint32_t* array, uint_t n); 42.15 + 42.16 typedef lgrp_id_t (*lgrp_home_func_t)(idtype_t idtype, id_t id); 42.17 typedef lgrp_cookie_t (*lgrp_init_func_t)(lgrp_view_t view); 42.18 typedef int (*lgrp_fini_func_t)(lgrp_cookie_t cookie); 42.19 @@ -87,6 +89,8 @@ 42.20 const uint_t info_req[], int info_count, 42.21 uint64_t outdata[], uint_t validity[]); 42.22 42.23 + static getisax_func_t _getisax; 42.24 + 42.25 static lgrp_home_func_t _lgrp_home; 42.26 static lgrp_init_func_t _lgrp_init; 42.27 static lgrp_fini_func_t _lgrp_fini; 42.28 @@ -283,6 +287,9 @@ 42.29 } 42.30 static lgrp_cookie_t lgrp_cookie() { return _lgrp_cookie; } 42.31 42.32 + static bool supports_getisax() { return _getisax != NULL; } 42.33 + static uint_t getisax(uint32_t* array, uint_t n); 42.34 + 42.35 static void set_meminfo(meminfo_func_t func) { _meminfo = func; } 42.36 static int meminfo (const uint64_t inaddr[], int addr_count, 42.37 const uint_t info_req[], int info_count,
43.1 --- a/src/os/windows/vm/os_windows.cpp Fri Mar 20 22:08:48 2009 -0400 43.2 +++ b/src/os/windows/vm/os_windows.cpp Mon Mar 23 10:42:20 2009 -0400 43.3 @@ -2595,7 +2595,7 @@ 43.4 return true; 43.5 } 43.6 43.7 -char* os::reserve_memory_special(size_t bytes) { 43.8 +char* os::reserve_memory_special(size_t bytes, char* addr) { 43.9 43.10 if (UseLargePagesIndividualAllocation) { 43.11 if (TracePageSizes && Verbose) { 43.12 @@ -2615,7 +2615,7 @@ 43.13 "use -XX:-UseLargePagesIndividualAllocation to turn off"); 43.14 return NULL; 43.15 } 43.16 - p_buf = (char *) VirtualAlloc(NULL, 43.17 + p_buf = (char *) VirtualAlloc(addr, 43.18 size_of_reserve, // size of Reserve 43.19 MEM_RESERVE, 43.20 PAGE_EXECUTE_READWRITE);
44.1 --- a/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp Fri Mar 20 22:08:48 2009 -0400 44.2 +++ b/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp Mon Mar 23 10:42:20 2009 -0400 44.3 @@ -30,5 +30,7 @@ 44.4 define_pd_global(uintx, JVMInvokeMethodSlack, 12288); 44.5 define_pd_global(intx, CompilerThreadStackSize, 0); 44.6 44.7 +// Only used on 64 bit platforms 44.8 +define_pd_global(uintx, HeapBaseMinAddress, 4*G); 44.9 // Only used on 64 bit Windows platforms 44.10 define_pd_global(bool, UseVectoredExceptions, false);
45.1 --- a/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp Fri Mar 20 22:08:48 2009 -0400 45.2 +++ b/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp Mon Mar 23 10:42:20 2009 -0400 45.3 @@ -43,5 +43,7 @@ 45.4 45.5 define_pd_global(uintx, JVMInvokeMethodSlack, 8192); 45.6 45.7 +// Only used on 64 bit platforms 45.8 +define_pd_global(uintx, HeapBaseMinAddress, 2*G); 45.9 // Only used on 64 bit Windows platforms 45.10 define_pd_global(bool, UseVectoredExceptions, false);
46.1 --- a/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp Fri Mar 20 22:08:48 2009 -0400 46.2 +++ b/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp Mon Mar 23 10:42:20 2009 -0400 46.3 @@ -30,5 +30,9 @@ 46.4 define_pd_global(uintx, JVMInvokeMethodSlack, 12288); 46.5 define_pd_global(intx, CompilerThreadStackSize, 0); 46.6 46.7 +// Only used on 64 bit platforms 46.8 +define_pd_global(uintx, HeapBaseMinAddress, 4*G); 46.9 // Only used on 64 bit Windows platforms 46.10 define_pd_global(bool, UseVectoredExceptions, false); 46.11 + 46.12 +
47.1 --- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Fri Mar 20 22:08:48 2009 -0400 47.2 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp Mon Mar 23 10:42:20 2009 -0400 47.3 @@ -1,5 +1,5 @@ 47.4 /* 47.5 - * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved. 47.6 + * Copyright 2006-2009 Sun Microsystems, Inc. All Rights Reserved. 47.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 47.8 * 47.9 * This code is free software; you can redistribute it and/or modify it 47.10 @@ -25,58 +25,107 @@ 47.11 # include "incls/_precompiled.incl" 47.12 # include "incls/_vm_version_solaris_sparc.cpp.incl" 47.13 47.14 +# include <sys/auxv.h> 47.15 +# include <sys/auxv_SPARC.h> 47.16 # include <sys/systeminfo.h> 47.17 47.18 +// We need to keep these here as long as we have to build on Solaris 47.19 +// versions before 10. 47.20 +#ifndef SI_ARCHITECTURE_32 47.21 +#define SI_ARCHITECTURE_32 516 /* basic 32-bit SI_ARCHITECTURE */ 47.22 +#endif 47.23 + 47.24 +#ifndef SI_ARCHITECTURE_64 47.25 +#define SI_ARCHITECTURE_64 517 /* basic 64-bit SI_ARCHITECTURE */ 47.26 +#endif 47.27 + 47.28 +static void do_sysinfo(int si, const char* string, int* features, int mask) { 47.29 + char tmp; 47.30 + size_t bufsize = sysinfo(si, &tmp, 1); 47.31 + 47.32 + // All SI defines used below must be supported. 47.33 + guarantee(bufsize != -1, "must be supported"); 47.34 + 47.35 + char* buf = (char*) malloc(bufsize); 47.36 + 47.37 + if (buf == NULL) 47.38 + return; 47.39 + 47.40 + if (sysinfo(si, buf, bufsize) == bufsize) { 47.41 + // Compare the string. 47.42 + if (strcmp(buf, string) == 0) { 47.43 + *features |= mask; 47.44 + } 47.45 + } 47.46 + 47.47 + free(buf); 47.48 +} 47.49 + 47.50 int VM_Version::platform_features(int features) { 47.51 - // We determine what sort of hardware we have via sysinfo(SI_ISALIST, ...). 47.52 - // This isn't the best of all possible ways because there's not enough 47.53 - // detail in the isa list it returns, but it's a bit less arcane than 47.54 - // generating assembly code and an illegal instruction handler. We used 47.55 - // to generate a getpsr trap, but that's even more arcane. 47.56 - // 47.57 - // Another possibility would be to use sysinfo(SI_PLATFORM, ...), but 47.58 - // that would require more knowledge here than is wise. 47.59 + // getisax(2), SI_ARCHITECTURE_32, and SI_ARCHITECTURE_64 are 47.60 + // supported on Solaris 10 and later. 47.61 + if (os::Solaris::supports_getisax()) { 47.62 +#ifndef PRODUCT 47.63 + if (PrintMiscellaneous && Verbose) 47.64 + tty->print_cr("getisax(2) supported."); 47.65 +#endif 47.66 47.67 - // isalist spec via 'man isalist' as of 01-Aug-2001 47.68 + // Check 32-bit architecture. 47.69 + do_sysinfo(SI_ARCHITECTURE_32, "sparc", &features, v8_instructions_m); 47.70 47.71 - char tmp; 47.72 - size_t bufsize = sysinfo(SI_ISALIST, &tmp, 1); 47.73 - char* buf = (char*)malloc(bufsize); 47.74 + // Check 64-bit architecture. 47.75 + do_sysinfo(SI_ARCHITECTURE_64, "sparcv9", &features, generic_v9_m); 47.76 47.77 - if (buf != NULL) { 47.78 - if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) { 47.79 - // Figure out what kind of sparc we have 47.80 - char *sparc_string = strstr(buf, "sparc"); 47.81 - if (sparc_string != NULL) { features |= v8_instructions_m; 47.82 - if (sparc_string[5] == 'v') { 47.83 - if (sparc_string[6] == '8') { 47.84 - if (sparc_string[7] == '-') features |= hardware_int_muldiv_m; 47.85 - else if (sparc_string[7] == 'p') features |= generic_v9_m; 47.86 - else features |= generic_v8_m; 47.87 - } else if (sparc_string[6] == '9') features |= generic_v9_m; 47.88 + // Extract valid instruction set extensions. 47.89 + uint_t av; 47.90 + uint_t avn = os::Solaris::getisax(&av, 1); 47.91 + assert(avn == 1, "should only return one av"); 47.92 + 47.93 + if (av & AV_SPARC_MUL32) features |= hardware_mul32_m; 47.94 + if (av & AV_SPARC_DIV32) features |= hardware_div32_m; 47.95 + if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m; 47.96 + if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m; 47.97 + if (av & AV_SPARC_POPC) features |= hardware_popc_m; 47.98 + if (av & AV_SPARC_VIS) features |= vis1_instructions_m; 47.99 + if (av & AV_SPARC_VIS2) features |= vis2_instructions_m; 47.100 + } else { 47.101 + // getisax(2) failed, use the old legacy code. 47.102 +#ifndef PRODUCT 47.103 + if (PrintMiscellaneous && Verbose) 47.104 + tty->print_cr("getisax(2) not supported."); 47.105 +#endif 47.106 + 47.107 + char tmp; 47.108 + size_t bufsize = sysinfo(SI_ISALIST, &tmp, 1); 47.109 + char* buf = (char*) malloc(bufsize); 47.110 + 47.111 + if (buf != NULL) { 47.112 + if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) { 47.113 + // Figure out what kind of sparc we have 47.114 + char *sparc_string = strstr(buf, "sparc"); 47.115 + if (sparc_string != NULL) { features |= v8_instructions_m; 47.116 + if (sparc_string[5] == 'v') { 47.117 + if (sparc_string[6] == '8') { 47.118 + if (sparc_string[7] == '-') { features |= hardware_mul32_m; 47.119 + features |= hardware_div32_m; 47.120 + } else if (sparc_string[7] == 'p') features |= generic_v9_m; 47.121 + else features |= generic_v8_m; 47.122 + } else if (sparc_string[6] == '9') features |= generic_v9_m; 47.123 + } 47.124 + } 47.125 + 47.126 + // Check for visualization instructions 47.127 + char *vis = strstr(buf, "vis"); 47.128 + if (vis != NULL) { features |= vis1_instructions_m; 47.129 + if (vis[3] == '2') features |= vis2_instructions_m; 47.130 } 47.131 } 47.132 - 47.133 - // Check for visualization instructions 47.134 - char *vis = strstr(buf, "vis"); 47.135 - if (vis != NULL) { features |= vis1_instructions_m; 47.136 - if (vis[3] == '2') features |= vis2_instructions_m; 47.137 - } 47.138 + free(buf); 47.139 } 47.140 - free(buf); 47.141 } 47.142 47.143 - bufsize = sysinfo(SI_MACHINE, &tmp, 1); 47.144 - buf = (char*)malloc(bufsize); 47.145 - 47.146 - if (buf != NULL) { 47.147 - if (sysinfo(SI_MACHINE, buf, bufsize) == bufsize) { 47.148 - if (strstr(buf, "sun4v") != NULL) { 47.149 - features |= sun4v_m; 47.150 - } 47.151 - } 47.152 - free(buf); 47.153 - } 47.154 + // Determine the machine type. 47.155 + do_sysinfo(SI_MACHINE, "sun4v", &features, sun4v_m); 47.156 47.157 return features; 47.158 }
48.1 --- a/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp Fri Mar 20 22:08:48 2009 -0400 48.2 +++ b/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp Mon Mar 23 10:42:20 2009 -0400 48.3 @@ -46,5 +46,7 @@ 48.4 48.5 define_pd_global(intx, CompilerThreadStackSize, 0); 48.6 48.7 +// Only used on 64 bit platforms 48.8 +define_pd_global(uintx, HeapBaseMinAddress, 256*M); 48.9 // Only used on 64 bit Windows platforms 48.10 define_pd_global(bool, UseVectoredExceptions, false);
49.1 --- a/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp Fri Mar 20 22:08:48 2009 -0400 49.2 +++ b/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp Mon Mar 23 10:42:20 2009 -0400 49.3 @@ -45,5 +45,7 @@ 49.4 49.5 define_pd_global(uintx, JVMInvokeMethodSlack, 8192); 49.6 49.7 +// Only used on 64 bit platforms 49.8 +define_pd_global(uintx, HeapBaseMinAddress, 2*G); 49.9 // Only used on 64 bit Windows platforms 49.10 define_pd_global(bool, UseVectoredExceptions, false);
50.1 --- a/src/os_cpu/windows_x86/vm/unwind_windows_x86.hpp Fri Mar 20 22:08:48 2009 -0400 50.2 +++ b/src/os_cpu/windows_x86/vm/unwind_windows_x86.hpp Mon Mar 23 10:42:20 2009 -0400 50.3 @@ -68,6 +68,9 @@ 50.4 PVOID HandlerData; 50.5 } DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT; 50.6 50.7 +#if MSC_VER < 1500 50.8 + 50.9 +/* Not needed for VS2008 compiler, comes from winnt.h. */ 50.10 typedef EXCEPTION_DISPOSITION (*PEXCEPTION_ROUTINE) ( 50.11 IN PEXCEPTION_RECORD ExceptionRecord, 50.12 IN ULONG64 EstablisherFrame, 50.13 @@ -75,4 +78,6 @@ 50.14 IN OUT PDISPATCHER_CONTEXT DispatcherContext 50.15 ); 50.16 50.17 +#endif 50.18 + 50.19 #endif // AMD64
51.1 --- a/src/share/vm/adlc/adlc.hpp Fri Mar 20 22:08:48 2009 -0400 51.2 +++ b/src/share/vm/adlc/adlc.hpp Mon Mar 23 10:42:20 2009 -0400 51.3 @@ -44,7 +44,7 @@ 51.4 #error "Something is wrong with the detection of MSC_VER in the makefiles" 51.5 #endif 51.6 51.7 -#if _MSC_VER >= 1400 && !defined(_WIN64) 51.8 +#if _MSC_VER >= 1400 51.9 #define strdup _strdup 51.10 #endif 51.11
52.1 --- a/src/share/vm/asm/assembler.cpp Fri Mar 20 22:08:48 2009 -0400 52.2 +++ b/src/share/vm/asm/assembler.cpp Mon Mar 23 10:42:20 2009 -0400 52.3 @@ -321,16 +321,19 @@ 52.4 bool MacroAssembler::needs_explicit_null_check(intptr_t offset) { 52.5 // Exception handler checks the nmethod's implicit null checks table 52.6 // only when this method returns false. 52.7 - if (UseCompressedOops) { 52.8 +#ifdef _LP64 52.9 + if (UseCompressedOops && Universe::narrow_oop_base() != NULL) { 52.10 + assert (Universe::heap() != NULL, "java heap should be initialized"); 52.11 // The first page after heap_base is unmapped and 52.12 // the 'offset' is equal to [heap_base + offset] for 52.13 // narrow oop implicit null checks. 52.14 - uintptr_t heap_base = (uintptr_t)Universe::heap_base(); 52.15 - if ((uintptr_t)offset >= heap_base) { 52.16 + uintptr_t base = (uintptr_t)Universe::narrow_oop_base(); 52.17 + if ((uintptr_t)offset >= base) { 52.18 // Normalize offset for the next check. 52.19 - offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1)); 52.20 + offset = (intptr_t)(pointer_delta((void*)offset, (void*)base, 1)); 52.21 } 52.22 } 52.23 +#endif 52.24 return offset < 0 || os::vm_page_size() <= offset; 52.25 } 52.26
53.1 --- a/src/share/vm/ci/ciMethodBlocks.cpp Fri Mar 20 22:08:48 2009 -0400 53.2 +++ b/src/share/vm/ci/ciMethodBlocks.cpp Mon Mar 23 10:42:20 2009 -0400 53.3 @@ -284,6 +284,11 @@ 53.4 // 53.5 int ex_start = handler->start(); 53.6 int ex_end = handler->limit(); 53.7 + // ensure a block at the start of exception range and start of following code 53.8 + (void) make_block_at(ex_start); 53.9 + if (ex_end < _code_size) 53.10 + (void) make_block_at(ex_end); 53.11 + 53.12 if (eb->is_handler()) { 53.13 // Extend old handler exception range to cover additional range. 53.14 int old_ex_start = eb->ex_start_bci(); 53.15 @@ -295,10 +300,6 @@ 53.16 eb->clear_exception_handler(); // Reset exception information 53.17 } 53.18 eb->set_exception_range(ex_start, ex_end); 53.19 - // ensure a block at the start of exception range and start of following code 53.20 - (void) make_block_at(ex_start); 53.21 - if (ex_end < _code_size) 53.22 - (void) make_block_at(ex_end); 53.23 } 53.24 } 53.25
54.1 --- a/src/share/vm/classfile/vmSymbols.hpp Fri Mar 20 22:08:48 2009 -0400 54.2 +++ b/src/share/vm/classfile/vmSymbols.hpp Mon Mar 23 10:42:20 2009 -0400 54.3 @@ -284,6 +284,7 @@ 54.4 template(value_name, "value") \ 54.5 template(frontCacheEnabled_name, "frontCacheEnabled") \ 54.6 template(stringCacheEnabled_name, "stringCacheEnabled") \ 54.7 + template(bitCount_name, "bitCount") \ 54.8 \ 54.9 /* non-intrinsic name/signature pairs: */ \ 54.10 template(register_method_name, "register") \ 54.11 @@ -304,6 +305,7 @@ 54.12 template(double_long_signature, "(D)J") \ 54.13 template(double_double_signature, "(D)D") \ 54.14 template(int_float_signature, "(I)F") \ 54.15 + template(long_int_signature, "(J)I") \ 54.16 template(long_long_signature, "(J)J") \ 54.17 template(long_double_signature, "(J)D") \ 54.18 template(byte_signature, "B") \ 54.19 @@ -507,6 +509,10 @@ 54.20 do_name( doubleToLongBits_name, "doubleToLongBits") \ 54.21 do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_S) \ 54.22 do_name( longBitsToDouble_name, "longBitsToDouble") \ 54.23 + \ 54.24 + do_intrinsic(_bitCount_i, java_lang_Integer, bitCount_name, int_int_signature, F_S) \ 54.25 + do_intrinsic(_bitCount_l, java_lang_Long, bitCount_name, long_int_signature, F_S) \ 54.26 + \ 54.27 do_intrinsic(_reverseBytes_i, java_lang_Integer, reverseBytes_name, int_int_signature, F_S) \ 54.28 do_name( reverseBytes_name, "reverseBytes") \ 54.29 do_intrinsic(_reverseBytes_l, java_lang_Long, reverseBytes_name, long_long_signature, F_S) \ 54.30 @@ -696,7 +702,6 @@ 54.31 do_signature(putShort_raw_signature, "(JS)V") \ 54.32 do_signature(getChar_raw_signature, "(J)C") \ 54.33 do_signature(putChar_raw_signature, "(JC)V") \ 54.34 - do_signature(getInt_raw_signature, "(J)I") \ 54.35 do_signature(putInt_raw_signature, "(JI)V") \ 54.36 do_alias(getLong_raw_signature, /*(J)J*/ long_long_signature) \ 54.37 do_alias(putLong_raw_signature, /*(JJ)V*/ long_long_void_signature) \ 54.38 @@ -713,7 +718,7 @@ 54.39 do_intrinsic(_getByte_raw, sun_misc_Unsafe, getByte_name, getByte_raw_signature, F_RN) \ 54.40 do_intrinsic(_getShort_raw, sun_misc_Unsafe, getShort_name, getShort_raw_signature, F_RN) \ 54.41 do_intrinsic(_getChar_raw, sun_misc_Unsafe, getChar_name, getChar_raw_signature, F_RN) \ 54.42 - do_intrinsic(_getInt_raw, sun_misc_Unsafe, getInt_name, getInt_raw_signature, F_RN) \ 54.43 + do_intrinsic(_getInt_raw, sun_misc_Unsafe, getInt_name, long_int_signature, F_RN) \ 54.44 do_intrinsic(_getLong_raw, sun_misc_Unsafe, getLong_name, getLong_raw_signature, F_RN) \ 54.45 do_intrinsic(_getFloat_raw, sun_misc_Unsafe, getFloat_name, getFloat_raw_signature, F_RN) \ 54.46 do_intrinsic(_getDouble_raw, sun_misc_Unsafe, getDouble_name, getDouble_raw_signature, F_RN) \
55.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Fri Mar 20 22:08:48 2009 -0400 55.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Mon Mar 23 10:42:20 2009 -0400 55.3 @@ -145,14 +145,9 @@ 55.4 if (G1RSBarrierUseQueue) { 55.5 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 55.6 dcqs.abandon_logs(); 55.7 - if (_cg1rThread->do_traversal()) { 55.8 - _pya = PYA_restart; 55.9 - } else { 55.10 - _cg1rThread->set_do_traversal(true); 55.11 - // Reset the post-yield actions. 55.12 - _pya = PYA_continue; 55.13 - _last_pya = PYA_continue; 55.14 - } 55.15 + // Reset the post-yield actions. 55.16 + _pya = PYA_continue; 55.17 + _last_pya = PYA_continue; 55.18 } else { 55.19 _pya = PYA_restart; 55.20 }
56.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Mar 20 22:08:48 2009 -0400 56.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Mon Mar 23 10:42:20 2009 -0400 56.3 @@ -107,7 +107,7 @@ 56.4 #ifndef PRODUCT 56.5 bool CMBitMapRO::covers(ReservedSpace rs) const { 56.6 // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 56.7 - assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize, 56.8 + assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize, 56.9 "size inconsistency"); 56.10 return _bmStartWord == (HeapWord*)(rs.base()) && 56.11 _bmWordSize == rs.size()>>LogHeapWordSize; 56.12 @@ -1232,7 +1232,16 @@ 56.13 if (!_final && _regions_done == 0) 56.14 _start_vtime_sec = os::elapsedVTime(); 56.15 56.16 - if (hr->continuesHumongous()) return false; 56.17 + if (hr->continuesHumongous()) { 56.18 + HeapRegion* hum_start = hr->humongous_start_region(); 56.19 + // If the head region of the humongous region has been determined 56.20 + // to be alive, then all the tail regions should be marked 56.21 + // such as well. 56.22 + if (_region_bm->at(hum_start->hrs_index())) { 56.23 + _region_bm->par_at_put(hr->hrs_index(), 1); 56.24 + } 56.25 + return false; 56.26 + } 56.27 56.28 HeapWord* nextTop = hr->next_top_at_mark_start(); 56.29 HeapWord* start = hr->top_at_conc_mark_count();
57.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Mar 20 22:08:48 2009 -0400 57.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Mar 23 10:42:20 2009 -0400 57.3 @@ -786,6 +786,12 @@ 57.4 } 57.5 } 57.6 57.7 +void G1CollectedHeap::abandon_gc_alloc_regions() { 57.8 + // first, make sure that the GC alloc region list is empty (it should!) 57.9 + assert(_gc_alloc_region_list == NULL, "invariant"); 57.10 + release_gc_alloc_regions(true /* totally */); 57.11 +} 57.12 + 57.13 class PostMCRemSetClearClosure: public HeapRegionClosure { 57.14 ModRefBarrierSet* _mr_bs; 57.15 public: 57.16 @@ -914,6 +920,7 @@ 57.17 57.18 // Make sure we'll choose a new allocation region afterwards. 57.19 abandon_cur_alloc_region(); 57.20 + abandon_gc_alloc_regions(); 57.21 assert(_cur_alloc_region == NULL, "Invariant."); 57.22 g1_rem_set()->as_HRInto_G1RemSet()->cleanupHRRS(); 57.23 tear_down_region_lists(); 57.24 @@ -954,6 +961,7 @@ 57.25 if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { 57.26 HandleMark hm; // Discard invalid handles created during verification 57.27 gclog_or_tty->print(" VerifyAfterGC:"); 57.28 + prepare_for_verify(); 57.29 Universe::verify(false); 57.30 } 57.31 NOT_PRODUCT(ref_processor()->verify_no_references_recorded()); 57.32 @@ -1306,7 +1314,7 @@ 57.33 } 57.34 57.35 void G1CollectedHeap::shrink(size_t shrink_bytes) { 57.36 - release_gc_alloc_regions(); 57.37 + release_gc_alloc_regions(true /* totally */); 57.38 tear_down_region_lists(); // We will rebuild them in a moment. 57.39 shrink_helper(shrink_bytes); 57.40 rebuild_region_lists(); 57.41 @@ -1345,8 +1353,7 @@ 57.42 _gc_time_stamp(0), 57.43 _surviving_young_words(NULL), 57.44 _in_cset_fast_test(NULL), 57.45 - _in_cset_fast_test_base(NULL) 57.46 -{ 57.47 + _in_cset_fast_test_base(NULL) { 57.48 _g1h = this; // To catch bugs. 57.49 if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { 57.50 vm_exit_during_initialization("Failed necessary allocation."); 57.51 @@ -1371,9 +1378,19 @@ 57.52 } 57.53 57.54 for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { 57.55 - _gc_alloc_regions[ap] = NULL; 57.56 - _gc_alloc_region_counts[ap] = 0; 57.57 - } 57.58 + _gc_alloc_regions[ap] = NULL; 57.59 + _gc_alloc_region_counts[ap] = 0; 57.60 + _retained_gc_alloc_regions[ap] = NULL; 57.61 + // by default, we do not retain a GC alloc region for each ap; 57.62 + // we'll override this, when appropriate, below 57.63 + _retain_gc_alloc_region[ap] = false; 57.64 + } 57.65 + 57.66 + // We will try to remember the last half-full tenured region we 57.67 + // allocated to at the end of a collection so that we can re-use it 57.68 + // during the next collection. 57.69 + _retain_gc_alloc_region[GCAllocForTenured] = true; 57.70 + 57.71 guarantee(_task_queues != NULL, "task_queues allocation failure."); 57.72 } 57.73 57.74 @@ -1405,9 +1422,34 @@ 57.75 // Reserve the maximum. 57.76 PermanentGenerationSpec* pgs = collector_policy()->permanent_generation(); 57.77 // Includes the perm-gen. 57.78 + 57.79 + const size_t total_reserved = max_byte_size + pgs->max_size(); 57.80 + char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop); 57.81 + 57.82 ReservedSpace heap_rs(max_byte_size + pgs->max_size(), 57.83 HeapRegion::GrainBytes, 57.84 - false /*ism*/); 57.85 + false /*ism*/, addr); 57.86 + 57.87 + if (UseCompressedOops) { 57.88 + if (addr != NULL && !heap_rs.is_reserved()) { 57.89 + // Failed to reserve at specified address - the requested memory 57.90 + // region is taken already, for example, by 'java' launcher. 57.91 + // Try again to reserver heap higher. 57.92 + addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop); 57.93 + ReservedSpace heap_rs0(total_reserved, HeapRegion::GrainBytes, 57.94 + false /*ism*/, addr); 57.95 + if (addr != NULL && !heap_rs0.is_reserved()) { 57.96 + // Failed to reserve at specified address again - give up. 57.97 + addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop); 57.98 + assert(addr == NULL, ""); 57.99 + ReservedSpace heap_rs1(total_reserved, HeapRegion::GrainBytes, 57.100 + false /*ism*/, addr); 57.101 + heap_rs = heap_rs1; 57.102 + } else { 57.103 + heap_rs = heap_rs0; 57.104 + } 57.105 + } 57.106 + } 57.107 57.108 if (!heap_rs.is_reserved()) { 57.109 vm_exit_during_initialization("Could not reserve enough space for object heap"); 57.110 @@ -2119,15 +2161,7 @@ 57.111 bool doHeapRegion(HeapRegion* r) { 57.112 guarantee(_par || r->claim_value() == HeapRegion::InitialClaimValue, 57.113 "Should be unclaimed at verify points."); 57.114 - if (r->isHumongous()) { 57.115 - if (r->startsHumongous()) { 57.116 - // Verify the single H object. 57.117 - oop(r->bottom())->verify(); 57.118 - size_t word_sz = oop(r->bottom())->size(); 57.119 - guarantee(r->top() == r->bottom() + word_sz, 57.120 - "Only one object in a humongous region"); 57.121 - } 57.122 - } else { 57.123 + if (!r->continuesHumongous()) { 57.124 VerifyObjsInRegionClosure not_dead_yet_cl(r); 57.125 r->verify(_allow_dirty); 57.126 r->object_iterate(¬_dead_yet_cl); 57.127 @@ -2179,6 +2213,7 @@ 57.128 _g1h(g1h), _allow_dirty(allow_dirty) { } 57.129 57.130 void work(int worker_i) { 57.131 + HandleMark hm; 57.132 VerifyRegionClosure blk(_allow_dirty, true); 57.133 _g1h->heap_region_par_iterate_chunked(&blk, worker_i, 57.134 HeapRegion::ParVerifyClaimValue); 57.135 @@ -2644,7 +2679,7 @@ 57.136 popular_region->set_popular_pending(false); 57.137 } 57.138 57.139 - release_gc_alloc_regions(); 57.140 + release_gc_alloc_regions(false /* totally */); 57.141 57.142 cleanup_surviving_young_words(); 57.143 57.144 @@ -2697,6 +2732,7 @@ 57.145 if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { 57.146 HandleMark hm; // Discard invalid handles created during verification 57.147 gclog_or_tty->print(" VerifyAfterGC:"); 57.148 + prepare_for_verify(); 57.149 Universe::verify(false); 57.150 } 57.151 57.152 @@ -2735,6 +2771,10 @@ 57.153 57.154 void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) { 57.155 assert(purpose >= 0 && purpose < GCAllocPurposeCount, "invalid purpose"); 57.156 + // make sure we don't call set_gc_alloc_region() multiple times on 57.157 + // the same region 57.158 + assert(r == NULL || !r->is_gc_alloc_region(), 57.159 + "shouldn't already be a GC alloc region"); 57.160 HeapWord* original_top = NULL; 57.161 if (r != NULL) 57.162 original_top = r->top(); 57.163 @@ -2824,6 +2864,12 @@ 57.164 while (_gc_alloc_region_list != NULL) { 57.165 HeapRegion* r = _gc_alloc_region_list; 57.166 assert(r->is_gc_alloc_region(), "Invariant."); 57.167 + // We need HeapRegion::oops_on_card_seq_iterate_careful() to work on 57.168 + // newly allocated data in order to be able to apply deferred updates 57.169 + // before the GC is done for verification purposes (i.e to allow 57.170 + // G1HRRSFlushLogBuffersOnVerify). It's safe thing to do after the 57.171 + // collection. 57.172 + r->ContiguousSpace::set_saved_mark(); 57.173 _gc_alloc_region_list = r->next_gc_alloc_region(); 57.174 r->set_next_gc_alloc_region(NULL); 57.175 r->set_is_gc_alloc_region(false); 57.176 @@ -2851,23 +2897,55 @@ 57.177 } 57.178 57.179 void G1CollectedHeap::get_gc_alloc_regions() { 57.180 + // First, let's check that the GC alloc region list is empty (it should) 57.181 + assert(_gc_alloc_region_list == NULL, "invariant"); 57.182 + 57.183 for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { 57.184 + assert(_gc_alloc_regions[ap] == NULL, "invariant"); 57.185 + 57.186 // Create new GC alloc regions. 57.187 - HeapRegion* alloc_region = _gc_alloc_regions[ap]; 57.188 - // Clear this alloc region, so that in case it turns out to be 57.189 - // unacceptable, we end up with no allocation region, rather than a bad 57.190 - // one. 57.191 - _gc_alloc_regions[ap] = NULL; 57.192 - if (alloc_region == NULL || alloc_region->in_collection_set()) { 57.193 - // Can't re-use old one. Allocate a new one. 57.194 + HeapRegion* alloc_region = _retained_gc_alloc_regions[ap]; 57.195 + _retained_gc_alloc_regions[ap] = NULL; 57.196 + 57.197 + if (alloc_region != NULL) { 57.198 + assert(_retain_gc_alloc_region[ap], "only way to retain a GC region"); 57.199 + 57.200 + // let's make sure that the GC alloc region is not tagged as such 57.201 + // outside a GC operation 57.202 + assert(!alloc_region->is_gc_alloc_region(), "sanity"); 57.203 + 57.204 + if (alloc_region->in_collection_set() || 57.205 + alloc_region->top() == alloc_region->end() || 57.206 + alloc_region->top() == alloc_region->bottom()) { 57.207 + // we will discard the current GC alloc region if it's in the 57.208 + // collection set (it can happen!), if it's already full (no 57.209 + // point in using it), or if it's empty (this means that it 57.210 + // was emptied during a cleanup and it should be on the free 57.211 + // list now). 57.212 + 57.213 + alloc_region = NULL; 57.214 + } 57.215 + } 57.216 + 57.217 + if (alloc_region == NULL) { 57.218 + // we will get a new GC alloc region 57.219 alloc_region = newAllocRegionWithExpansion(ap, 0); 57.220 } 57.221 + 57.222 if (alloc_region != NULL) { 57.223 + assert(_gc_alloc_regions[ap] == NULL, "pre-condition"); 57.224 set_gc_alloc_region(ap, alloc_region); 57.225 } 57.226 + 57.227 + assert(_gc_alloc_regions[ap] == NULL || 57.228 + _gc_alloc_regions[ap]->is_gc_alloc_region(), 57.229 + "the GC alloc region should be tagged as such"); 57.230 + assert(_gc_alloc_regions[ap] == NULL || 57.231 + _gc_alloc_regions[ap] == _gc_alloc_region_list, 57.232 + "the GC alloc region should be the same as the GC alloc list head"); 57.233 } 57.234 // Set alternative regions for allocation purposes that have reached 57.235 - // thier limit. 57.236 + // their limit. 57.237 for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { 57.238 GCAllocPurpose alt_purpose = g1_policy()->alternative_purpose(ap); 57.239 if (_gc_alloc_regions[ap] == NULL && alt_purpose != ap) { 57.240 @@ -2877,27 +2955,55 @@ 57.241 assert(check_gc_alloc_regions(), "alloc regions messed up"); 57.242 } 57.243 57.244 -void G1CollectedHeap::release_gc_alloc_regions() { 57.245 +void G1CollectedHeap::release_gc_alloc_regions(bool totally) { 57.246 // We keep a separate list of all regions that have been alloc regions in 57.247 - // the current collection pause. Forget that now. 57.248 + // the current collection pause. Forget that now. This method will 57.249 + // untag the GC alloc regions and tear down the GC alloc region 57.250 + // list. It's desirable that no regions are tagged as GC alloc 57.251 + // outside GCs. 57.252 forget_alloc_region_list(); 57.253 57.254 // The current alloc regions contain objs that have survived 57.255 // collection. Make them no longer GC alloc regions. 57.256 for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { 57.257 HeapRegion* r = _gc_alloc_regions[ap]; 57.258 - if (r != NULL && r->is_empty()) { 57.259 - { 57.260 + _retained_gc_alloc_regions[ap] = NULL; 57.261 + 57.262 + if (r != NULL) { 57.263 + // we retain nothing on _gc_alloc_regions between GCs 57.264 + set_gc_alloc_region(ap, NULL); 57.265 + _gc_alloc_region_counts[ap] = 0; 57.266 + 57.267 + if (r->is_empty()) { 57.268 + // we didn't actually allocate anything in it; let's just put 57.269 + // it on the free list 57.270 MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag); 57.271 r->set_zero_fill_complete(); 57.272 put_free_region_on_list_locked(r); 57.273 + } else if (_retain_gc_alloc_region[ap] && !totally) { 57.274 + // retain it so that we can use it at the beginning of the next GC 57.275 + _retained_gc_alloc_regions[ap] = r; 57.276 } 57.277 } 57.278 - // set_gc_alloc_region will also NULLify all aliases to the region 57.279 - set_gc_alloc_region(ap, NULL); 57.280 - _gc_alloc_region_counts[ap] = 0; 57.281 - } 57.282 -} 57.283 + } 57.284 +} 57.285 + 57.286 +#ifndef PRODUCT 57.287 +// Useful for debugging 57.288 + 57.289 +void G1CollectedHeap::print_gc_alloc_regions() { 57.290 + gclog_or_tty->print_cr("GC alloc regions"); 57.291 + for (int ap = 0; ap < GCAllocPurposeCount; ++ap) { 57.292 + HeapRegion* r = _gc_alloc_regions[ap]; 57.293 + if (r == NULL) { 57.294 + gclog_or_tty->print_cr(" %2d : "PTR_FORMAT, ap, NULL); 57.295 + } else { 57.296 + gclog_or_tty->print_cr(" %2d : "PTR_FORMAT" "SIZE_FORMAT, 57.297 + ap, r->bottom(), r->used()); 57.298 + } 57.299 + } 57.300 +} 57.301 +#endif // PRODUCT 57.302 57.303 void G1CollectedHeap::init_for_evac_failure(OopsInHeapRegionClosure* cl) { 57.304 _drain_in_progress = false; 57.305 @@ -3658,7 +3764,9 @@ 57.306 CardTableModRefBS* ctbs() { return _ct_bs; } 57.307 57.308 void immediate_rs_update(HeapRegion* from, oop* p, int tid) { 57.309 - _g1_rem->par_write_ref(from, p, tid); 57.310 + if (!from->is_survivor()) { 57.311 + _g1_rem->par_write_ref(from, p, tid); 57.312 + } 57.313 } 57.314 57.315 void deferred_rs_update(HeapRegion* from, oop* p, int tid) {
58.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Mar 20 22:08:48 2009 -0400 58.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Mar 23 10:42:20 2009 -0400 58.3 @@ -172,7 +172,6 @@ 58.4 NumAPIs = HeapRegion::MaxAge 58.5 }; 58.6 58.7 - 58.8 // The one and only G1CollectedHeap, so static functions can find it. 58.9 static G1CollectedHeap* _g1h; 58.10 58.11 @@ -217,11 +216,20 @@ 58.12 58.13 // Postcondition: cur_alloc_region == NULL. 58.14 void abandon_cur_alloc_region(); 58.15 + void abandon_gc_alloc_regions(); 58.16 58.17 // The to-space memory regions into which objects are being copied during 58.18 // a GC. 58.19 HeapRegion* _gc_alloc_regions[GCAllocPurposeCount]; 58.20 size_t _gc_alloc_region_counts[GCAllocPurposeCount]; 58.21 + // These are the regions, one per GCAllocPurpose, that are half-full 58.22 + // at the end of a collection and that we want to reuse during the 58.23 + // next collection. 58.24 + HeapRegion* _retained_gc_alloc_regions[GCAllocPurposeCount]; 58.25 + // This specifies whether we will keep the last half-full region at 58.26 + // the end of a collection so that it can be reused during the next 58.27 + // collection (this is specified per GCAllocPurpose) 58.28 + bool _retain_gc_alloc_region[GCAllocPurposeCount]; 58.29 58.30 // A list of the regions that have been set to be alloc regions in the 58.31 // current collection. 58.32 @@ -589,8 +597,21 @@ 58.33 58.34 // Ensure that the relevant gc_alloc regions are set. 58.35 void get_gc_alloc_regions(); 58.36 - // We're done with GC alloc regions; release them, as appropriate. 58.37 - void release_gc_alloc_regions(); 58.38 + // We're done with GC alloc regions. We are going to tear down the 58.39 + // gc alloc list and remove the gc alloc tag from all the regions on 58.40 + // that list. However, we will also retain the last (i.e., the one 58.41 + // that is half-full) GC alloc region, per GCAllocPurpose, for 58.42 + // possible reuse during the next collection, provided 58.43 + // _retain_gc_alloc_region[] indicates that it should be the 58.44 + // case. Said regions are kept in the _retained_gc_alloc_regions[] 58.45 + // array. If the parameter totally is set, we will not retain any 58.46 + // regions, irrespective of what _retain_gc_alloc_region[] 58.47 + // indicates. 58.48 + void release_gc_alloc_regions(bool totally); 58.49 +#ifndef PRODUCT 58.50 + // Useful for debugging. 58.51 + void print_gc_alloc_regions(); 58.52 +#endif // !PRODUCT 58.53 58.54 // ("Weak") Reference processing support 58.55 ReferenceProcessor* _ref_processor;
59.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Fri Mar 20 22:08:48 2009 -0400 59.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Mon Mar 23 10:42:20 2009 -0400 59.3 @@ -1087,6 +1087,7 @@ 59.4 59.5 assert(_g1->used_regions() == _g1->recalculate_used_regions(), 59.6 "sanity"); 59.7 + assert(_g1->used() == _g1->recalculate_used(), "sanity"); 59.8 59.9 double s_w_t_ms = (start_time_sec - _stop_world_start) * 1000.0; 59.10 _all_stop_world_times_ms->add(s_w_t_ms);
60.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Fri Mar 20 22:08:48 2009 -0400 60.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Mon Mar 23 10:42:20 2009 -0400 60.3 @@ -502,14 +502,17 @@ 60.4 } 60.5 60.6 if (ParallelGCThreads > 0) { 60.7 - // This is a temporary change to serialize the update and scanning 60.8 - // of remembered sets. There are some race conditions when this is 60.9 - // done in parallel and they are causing failures. When we resolve 60.10 - // said race conditions, we'll revert back to parallel remembered 60.11 - // set updating and scanning. See CRs 6677707 and 6677708. 60.12 - if (worker_i == 0) { 60.13 + // The two flags below were introduced temporarily to serialize 60.14 + // the updating and scanning of remembered sets. There are some 60.15 + // race conditions when these two operations are done in parallel 60.16 + // and they are causing failures. When we resolve said race 60.17 + // conditions, we'll revert back to parallel remembered set 60.18 + // updating and scanning. See CRs 6677707 and 6677708. 60.19 + if (G1EnableParallelRSetUpdating || (worker_i == 0)) { 60.20 updateRS(worker_i); 60.21 scanNewRefsRS(oc, worker_i); 60.22 + } 60.23 + if (G1EnableParallelRSetScanning || (worker_i == 0)) { 60.24 scanRS(oc, worker_i); 60.25 } 60.26 } else { 60.27 @@ -716,8 +719,7 @@ 60.28 bool doHeapRegion(HeapRegion* r) { 60.29 if (!r->in_collection_set() && 60.30 !r->continuesHumongous() && 60.31 - !r->is_young() && 60.32 - !r->is_survivor()) { 60.33 + !r->is_young()) { 60.34 _update_rs_oop_cl.set_from(r); 60.35 UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl); 60.36 60.37 @@ -854,7 +856,7 @@ 60.38 // before all the cards on the region are dirtied. This is unlikely, 60.39 // and it doesn't happen often, but it can happen. So, the extra 60.40 // check below filters out those cards. 60.41 - if (r->is_young() || r->is_survivor()) { 60.42 + if (r->is_young()) { 60.43 return; 60.44 } 60.45 // While we are processing RSet buffers during the collection, we 60.46 @@ -1025,7 +1027,9 @@ 60.47 } 60.48 } 60.49 void HRInto_G1RemSet::prepare_for_verify() { 60.50 - if (G1HRRSFlushLogBuffersOnVerify && VerifyBeforeGC && !_g1->full_collection()) { 60.51 + if (G1HRRSFlushLogBuffersOnVerify && 60.52 + (VerifyBeforeGC || VerifyAfterGC) 60.53 + && !_g1->full_collection()) { 60.54 cleanupHRRS(); 60.55 _g1->set_refine_cte_cl_concurrency(false); 60.56 if (SafepointSynchronize::is_at_safepoint()) { 60.57 @@ -1036,5 +1040,7 @@ 60.58 _cg1r->set_use_cache(false); 60.59 updateRS(0); 60.60 _cg1r->set_use_cache(cg1r_use_cache); 60.61 + 60.62 + assert(JavaThread::dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); 60.63 } 60.64 }
61.1 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Fri Mar 20 22:08:48 2009 -0400 61.2 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Mon Mar 23 10:42:20 2009 -0400 61.3 @@ -295,6 +295,14 @@ 61.4 \ 61.5 product(uintx, G1FixedSurvivorSpaceSize, 0, \ 61.6 "If non-0 is the size of the G1 survivor space, " \ 61.7 - "otherwise SurvivorRatio is used to determine the size") 61.8 + "otherwise SurvivorRatio is used to determine the size") \ 61.9 + \ 61.10 + experimental(bool, G1EnableParallelRSetUpdating, false, \ 61.11 + "Enables the parallelization of remembered set updating " \ 61.12 + "during evacuation pauses") \ 61.13 + \ 61.14 + experimental(bool, G1EnableParallelRSetScanning, false, \ 61.15 + "Enables the parallelization of remembered set scanning " \ 61.16 + "during evacuation pauses") 61.17 61.18 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
62.1 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Fri Mar 20 22:08:48 2009 -0400 62.2 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Mon Mar 23 10:42:20 2009 -0400 62.3 @@ -508,7 +508,7 @@ 62.4 typedef PosParPRT* PosParPRTPtr; 62.5 if (_max_fine_entries == 0) { 62.6 assert(_mod_max_fine_entries_mask == 0, "Both or none."); 62.7 - _max_fine_entries = (1 << G1LogRSRegionEntries); 62.8 + _max_fine_entries = (size_t)(1 << G1LogRSRegionEntries); 62.9 _mod_max_fine_entries_mask = _max_fine_entries - 1; 62.10 #if SAMPLE_FOR_EVICTION 62.11 assert(_fine_eviction_sample_size == 0
63.1 --- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp Fri Mar 20 22:08:48 2009 -0400 63.2 +++ b/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp Mon Mar 23 10:42:20 2009 -0400 63.3 @@ -63,9 +63,8 @@ 63.4 // return NULL. 63.5 HeapWord* allocate(size_t word_sz) { 63.6 HeapWord* res = _top; 63.7 - HeapWord* new_top = _top + word_sz; 63.8 - if (new_top <= _end) { 63.9 - _top = new_top; 63.10 + if (pointer_delta(_end, _top) >= word_sz) { 63.11 + _top = _top + word_sz; 63.12 return res; 63.13 } else { 63.14 return NULL; 63.15 @@ -75,10 +74,9 @@ 63.16 // Undo the last allocation in the buffer, which is required to be of the 63.17 // "obj" of the given "word_sz". 63.18 void undo_allocation(HeapWord* obj, size_t word_sz) { 63.19 - assert(_top - word_sz >= _bottom 63.20 - && _top - word_sz == obj, 63.21 - "Bad undo_allocation"); 63.22 - _top = _top - word_sz; 63.23 + assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo"); 63.24 + assert(pointer_delta(_top, obj) == word_sz, "Bad undo"); 63.25 + _top = obj; 63.26 } 63.27 63.28 // The total (word) size of the buffer, including both allocated and
64.1 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Fri Mar 20 22:08:48 2009 -0400 64.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Mon Mar 23 10:42:20 2009 -0400 64.3 @@ -104,12 +104,38 @@ 64.4 og_min_size, og_max_size, 64.5 yg_min_size, yg_max_size); 64.6 64.7 + const size_t total_reserved = pg_max_size + og_max_size + yg_max_size; 64.8 + char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop); 64.9 + 64.10 // The main part of the heap (old gen + young gen) can often use a larger page 64.11 // size than is needed or wanted for the perm gen. Use the "compound 64.12 // alignment" ReservedSpace ctor to avoid having to use the same page size for 64.13 // all gens. 64.14 + 64.15 ReservedHeapSpace heap_rs(pg_max_size, pg_align, og_max_size + yg_max_size, 64.16 - og_align); 64.17 + og_align, addr); 64.18 + 64.19 + if (UseCompressedOops) { 64.20 + if (addr != NULL && !heap_rs.is_reserved()) { 64.21 + // Failed to reserve at specified address - the requested memory 64.22 + // region is taken already, for example, by 'java' launcher. 64.23 + // Try again to reserver heap higher. 64.24 + addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop); 64.25 + ReservedHeapSpace heap_rs0(pg_max_size, pg_align, og_max_size + yg_max_size, 64.26 + og_align, addr); 64.27 + if (addr != NULL && !heap_rs0.is_reserved()) { 64.28 + // Failed to reserve at specified address again - give up. 64.29 + addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop); 64.30 + assert(addr == NULL, ""); 64.31 + ReservedHeapSpace heap_rs1(pg_max_size, pg_align, og_max_size + yg_max_size, 64.32 + og_align, addr); 64.33 + heap_rs = heap_rs1; 64.34 + } else { 64.35 + heap_rs = heap_rs0; 64.36 + } 64.37 + } 64.38 + } 64.39 + 64.40 os::trace_page_sizes("ps perm", pg_min_size, pg_max_size, pg_page_sz, 64.41 heap_rs.base(), pg_max_size); 64.42 os::trace_page_sizes("ps main", og_min_size + yg_min_size,
65.1 --- a/src/share/vm/includeDB_core Fri Mar 20 22:08:48 2009 -0400 65.2 +++ b/src/share/vm/includeDB_core Mon Mar 23 10:42:20 2009 -0400 65.3 @@ -4598,6 +4598,7 @@ 65.4 vm_version_<arch>.hpp globals_extension.hpp 65.5 vm_version_<arch>.hpp vm_version.hpp 65.6 65.7 +vm_version_<os_arch>.cpp os.hpp 65.8 vm_version_<os_arch>.cpp vm_version_<arch>.hpp 65.9 65.10 vmreg.cpp assembler.hpp
66.1 --- a/src/share/vm/memory/blockOffsetTable.hpp Fri Mar 20 22:08:48 2009 -0400 66.2 +++ b/src/share/vm/memory/blockOffsetTable.hpp Mon Mar 23 10:42:20 2009 -0400 66.3 @@ -235,7 +235,7 @@ 66.4 }; 66.5 66.6 static size_t power_to_cards_back(uint i) { 66.7 - return 1 << (LogBase * i); 66.8 + return (size_t)(1 << (LogBase * i)); 66.9 } 66.10 static size_t power_to_words_back(uint i) { 66.11 return power_to_cards_back(i) * N_words;
67.1 --- a/src/share/vm/memory/genCollectedHeap.cpp Fri Mar 20 22:08:48 2009 -0400 67.2 +++ b/src/share/vm/memory/genCollectedHeap.cpp Mon Mar 23 10:42:20 2009 -0400 67.3 @@ -218,6 +218,31 @@ 67.4 heap_address -= total_reserved; 67.5 } else { 67.6 heap_address = NULL; // any address will do. 67.7 + if (UseCompressedOops) { 67.8 + heap_address = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop); 67.9 + *_total_reserved = total_reserved; 67.10 + *_n_covered_regions = n_covered_regions; 67.11 + *heap_rs = ReservedHeapSpace(total_reserved, alignment, 67.12 + UseLargePages, heap_address); 67.13 + 67.14 + if (heap_address != NULL && !heap_rs->is_reserved()) { 67.15 + // Failed to reserve at specified address - the requested memory 67.16 + // region is taken already, for example, by 'java' launcher. 67.17 + // Try again to reserver heap higher. 67.18 + heap_address = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop); 67.19 + *heap_rs = ReservedHeapSpace(total_reserved, alignment, 67.20 + UseLargePages, heap_address); 67.21 + 67.22 + if (heap_address != NULL && !heap_rs->is_reserved()) { 67.23 + // Failed to reserve at specified address again - give up. 67.24 + heap_address = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop); 67.25 + assert(heap_address == NULL, ""); 67.26 + *heap_rs = ReservedHeapSpace(total_reserved, alignment, 67.27 + UseLargePages, heap_address); 67.28 + } 67.29 + } 67.30 + return heap_address; 67.31 + } 67.32 } 67.33 67.34 *_total_reserved = total_reserved;
68.1 --- a/src/share/vm/memory/universe.cpp Fri Mar 20 22:08:48 2009 -0400 68.2 +++ b/src/share/vm/memory/universe.cpp Mon Mar 23 10:42:20 2009 -0400 68.3 @@ -99,7 +99,8 @@ 68.4 size_t Universe::_heap_used_at_last_gc = 0; 68.5 68.6 CollectedHeap* Universe::_collectedHeap = NULL; 68.7 -address Universe::_heap_base = NULL; 68.8 + 68.9 +NarrowOopStruct Universe::_narrow_oop = { NULL, 0, true }; 68.10 68.11 68.12 void Universe::basic_type_classes_do(void f(klassOop)) { 68.13 @@ -729,6 +730,53 @@ 68.14 return JNI_OK; 68.15 } 68.16 68.17 +// Choose the heap base address and oop encoding mode 68.18 +// when compressed oops are used: 68.19 +// Unscaled - Use 32-bits oops without encoding when 68.20 +// NarrowOopHeapBaseMin + heap_size < 4Gb 68.21 +// ZeroBased - Use zero based compressed oops with encoding when 68.22 +// NarrowOopHeapBaseMin + heap_size < 32Gb 68.23 +// HeapBased - Use compressed oops with heap base + encoding. 68.24 + 68.25 +// 4Gb 68.26 +static const uint64_t NarrowOopHeapMax = (uint64_t(max_juint) + 1); 68.27 +// 32Gb 68.28 +static const uint64_t OopEncodingHeapMax = NarrowOopHeapMax << LogMinObjAlignmentInBytes; 68.29 + 68.30 +char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) { 68.31 +#ifdef _LP64 68.32 + if (UseCompressedOops) { 68.33 + assert(mode == UnscaledNarrowOop || 68.34 + mode == ZeroBasedNarrowOop || 68.35 + mode == HeapBasedNarrowOop, "mode is invalid"); 68.36 + 68.37 + const size_t total_size = heap_size + HeapBaseMinAddress; 68.38 + if (total_size <= OopEncodingHeapMax && (mode != HeapBasedNarrowOop)) { 68.39 + if (total_size <= NarrowOopHeapMax && (mode == UnscaledNarrowOop) && 68.40 + (Universe::narrow_oop_shift() == 0)) { 68.41 + // Use 32-bits oops without encoding and 68.42 + // place heap's top on the 4Gb boundary 68.43 + return (char*)(NarrowOopHeapMax - heap_size); 68.44 + } else { 68.45 + // Can't reserve with NarrowOopShift == 0 68.46 + Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes); 68.47 + if (mode == UnscaledNarrowOop || 68.48 + mode == ZeroBasedNarrowOop && total_size <= NarrowOopHeapMax) { 68.49 + // Use zero based compressed oops with encoding and 68.50 + // place heap's top on the 32Gb boundary in case 68.51 + // total_size > 4Gb or failed to reserve below 4Gb. 68.52 + return (char*)(OopEncodingHeapMax - heap_size); 68.53 + } 68.54 + } 68.55 + } else { 68.56 + // Can't reserve below 32Gb. 68.57 + Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes); 68.58 + } 68.59 + } 68.60 +#endif 68.61 + return NULL; // also return NULL (don't care) for 32-bit VM 68.62 +} 68.63 + 68.64 jint Universe::initialize_heap() { 68.65 68.66 if (UseParallelGC) { 68.67 @@ -773,6 +821,8 @@ 68.68 if (status != JNI_OK) { 68.69 return status; 68.70 } 68.71 + 68.72 +#ifdef _LP64 68.73 if (UseCompressedOops) { 68.74 // Subtract a page because something can get allocated at heap base. 68.75 // This also makes implicit null checking work, because the 68.76 @@ -780,8 +830,49 @@ 68.77 // See needs_explicit_null_check. 68.78 // Only set the heap base for compressed oops because it indicates 68.79 // compressed oops for pstack code. 68.80 - Universe::_heap_base = Universe::heap()->base() - os::vm_page_size(); 68.81 + if (PrintCompressedOopsMode) { 68.82 + tty->cr(); 68.83 + tty->print("heap address: "PTR_FORMAT, Universe::heap()->base()); 68.84 + } 68.85 + if ((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax) { 68.86 + // Can't reserve heap below 32Gb. 68.87 + Universe::set_narrow_oop_base(Universe::heap()->base() - os::vm_page_size()); 68.88 + Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes); 68.89 + if (PrintCompressedOopsMode) { 68.90 + tty->print(", Compressed Oops with base: "PTR_FORMAT, Universe::narrow_oop_base()); 68.91 + } 68.92 + } else { 68.93 + Universe::set_narrow_oop_base(0); 68.94 + if (PrintCompressedOopsMode) { 68.95 + tty->print(", zero based Compressed Oops"); 68.96 + } 68.97 +#ifdef _WIN64 68.98 + if (!Universe::narrow_oop_use_implicit_null_checks()) { 68.99 + // Don't need guard page for implicit checks in indexed addressing 68.100 + // mode with zero based Compressed Oops. 68.101 + Universe::set_narrow_oop_use_implicit_null_checks(true); 68.102 + } 68.103 +#endif // _WIN64 68.104 + if((uint64_t)Universe::heap()->reserved_region().end() > NarrowOopHeapMax) { 68.105 + // Can't reserve heap below 4Gb. 68.106 + Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes); 68.107 + } else { 68.108 + assert(Universe::narrow_oop_shift() == 0, "use unscaled narrow oop"); 68.109 + if (PrintCompressedOopsMode) { 68.110 + tty->print(", 32-bits Oops"); 68.111 + } 68.112 + } 68.113 + } 68.114 + if (PrintCompressedOopsMode) { 68.115 + tty->cr(); 68.116 + tty->cr(); 68.117 + } 68.118 } 68.119 + assert(Universe::narrow_oop_base() == (Universe::heap()->base() - os::vm_page_size()) || 68.120 + Universe::narrow_oop_base() == NULL, "invalid value"); 68.121 + assert(Universe::narrow_oop_shift() == LogMinObjAlignmentInBytes || 68.122 + Universe::narrow_oop_shift() == 0, "invalid value"); 68.123 +#endif 68.124 68.125 // We will never reach the CATCH below since Exceptions::_throw will cause 68.126 // the VM to exit if an exception is thrown during initialization
69.1 --- a/src/share/vm/memory/universe.hpp Fri Mar 20 22:08:48 2009 -0400 69.2 +++ b/src/share/vm/memory/universe.hpp Mon Mar 23 10:42:20 2009 -0400 69.3 @@ -90,6 +90,19 @@ 69.4 methodOop get_methodOop(); 69.5 }; 69.6 69.7 +// For UseCompressedOops. 69.8 +struct NarrowOopStruct { 69.9 + // Base address for oop-within-java-object materialization. 69.10 + // NULL if using wide oops or zero based narrow oops. 69.11 + address _base; 69.12 + // Number of shift bits for encoding/decoding narrow oops. 69.13 + // 0 if using wide oops or zero based unscaled narrow oops, 69.14 + // LogMinObjAlignmentInBytes otherwise. 69.15 + int _shift; 69.16 + // Generate code with implicit null checks for narrow oops. 69.17 + bool _use_implicit_null_checks; 69.18 +}; 69.19 + 69.20 69.21 class Universe: AllStatic { 69.22 // Ugh. Universe is much too friendly. 69.23 @@ -181,9 +194,9 @@ 69.24 69.25 // The particular choice of collected heap. 69.26 static CollectedHeap* _collectedHeap; 69.27 - // Base address for oop-within-java-object materialization. 69.28 - // NULL if using wide oops. Doubles as heap oop null value. 69.29 - static address _heap_base; 69.30 + 69.31 + // For UseCompressedOops. 69.32 + static struct NarrowOopStruct _narrow_oop; 69.33 69.34 // array of dummy objects used with +FullGCAlot 69.35 debug_only(static objArrayOop _fullgc_alot_dummy_array;) 69.36 @@ -328,8 +341,25 @@ 69.37 static CollectedHeap* heap() { return _collectedHeap; } 69.38 69.39 // For UseCompressedOops 69.40 - static address heap_base() { return _heap_base; } 69.41 - static address* heap_base_addr() { return &_heap_base; } 69.42 + static address* narrow_oop_base_addr() { return &_narrow_oop._base; } 69.43 + static address narrow_oop_base() { return _narrow_oop._base; } 69.44 + static int narrow_oop_shift() { return _narrow_oop._shift; } 69.45 + static void set_narrow_oop_base(address base) { _narrow_oop._base = base; } 69.46 + static void set_narrow_oop_shift(int shift) { _narrow_oop._shift = shift; } 69.47 + static bool narrow_oop_use_implicit_null_checks() { return _narrow_oop._use_implicit_null_checks; } 69.48 + static void set_narrow_oop_use_implicit_null_checks(bool use) { _narrow_oop._use_implicit_null_checks = use; } 69.49 + // Narrow Oop encoding mode: 69.50 + // 0 - Use 32-bits oops without encoding when 69.51 + // NarrowOopHeapBaseMin + heap_size < 4Gb 69.52 + // 1 - Use zero based compressed oops with encoding when 69.53 + // NarrowOopHeapBaseMin + heap_size < 32Gb 69.54 + // 2 - Use compressed oops with heap base + encoding. 69.55 + enum NARROW_OOP_MODE { 69.56 + UnscaledNarrowOop = 0, 69.57 + ZeroBasedNarrowOop = 1, 69.58 + HeapBasedNarrowOop = 2 69.59 + }; 69.60 + static char* preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode); 69.61 69.62 // Historic gc information 69.63 static size_t get_heap_capacity_at_last_gc() { return _heap_capacity_at_last_gc; }
70.1 --- a/src/share/vm/oops/oop.inline.hpp Fri Mar 20 22:08:48 2009 -0400 70.2 +++ b/src/share/vm/oops/oop.inline.hpp Mon Mar 23 10:42:20 2009 -0400 70.3 @@ -148,10 +148,11 @@ 70.4 70.5 inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) { 70.6 assert(!is_null(v), "oop value can never be zero"); 70.7 - address heap_base = Universe::heap_base(); 70.8 - uint64_t pd = (uint64_t)(pointer_delta((void*)v, (void*)heap_base, 1)); 70.9 + address base = Universe::narrow_oop_base(); 70.10 + int shift = Universe::narrow_oop_shift(); 70.11 + uint64_t pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1)); 70.12 assert(OopEncodingHeapMax > pd, "change encoding max if new encoding"); 70.13 - uint64_t result = pd >> LogMinObjAlignmentInBytes; 70.14 + uint64_t result = pd >> shift; 70.15 assert((result & CONST64(0xffffffff00000000)) == 0, "narrow oop overflow"); 70.16 return (narrowOop)result; 70.17 } 70.18 @@ -162,8 +163,9 @@ 70.19 70.20 inline oop oopDesc::decode_heap_oop_not_null(narrowOop v) { 70.21 assert(!is_null(v), "narrow oop value can never be zero"); 70.22 - address heap_base = Universe::heap_base(); 70.23 - return (oop)(void*)((uintptr_t)heap_base + ((uintptr_t)v << LogMinObjAlignmentInBytes)); 70.24 + address base = Universe::narrow_oop_base(); 70.25 + int shift = Universe::narrow_oop_shift(); 70.26 + return (oop)(void*)((uintptr_t)base + ((uintptr_t)v << shift)); 70.27 } 70.28 70.29 inline oop oopDesc::decode_heap_oop(narrowOop v) {
71.1 --- a/src/share/vm/opto/addnode.cpp Fri Mar 20 22:08:48 2009 -0400 71.2 +++ b/src/share/vm/opto/addnode.cpp Mon Mar 23 10:42:20 2009 -0400 71.3 @@ -756,7 +756,13 @@ 71.4 if ( eti == NULL ) { 71.5 // there must be one pointer among the operands 71.6 guarantee(tptr == NULL, "must be only one pointer operand"); 71.7 - tptr = et->isa_oopptr(); 71.8 + if (UseCompressedOops && Universe::narrow_oop_shift() == 0) { 71.9 + // 32-bits narrow oop can be the base of address expressions 71.10 + tptr = et->make_ptr()->isa_oopptr(); 71.11 + } else { 71.12 + // only regular oops are expected here 71.13 + tptr = et->isa_oopptr(); 71.14 + } 71.15 guarantee(tptr != NULL, "non-int operand must be pointer"); 71.16 if (tptr->higher_equal(tp->add_offset(tptr->offset()))) 71.17 tp = tptr; // Set more precise type for bailout
72.1 --- a/src/share/vm/opto/classes.hpp Fri Mar 20 22:08:48 2009 -0400 72.2 +++ b/src/share/vm/opto/classes.hpp Mon Mar 23 10:42:20 2009 -0400 72.3 @@ -184,6 +184,8 @@ 72.4 macro(Parm) 72.5 macro(PartialSubtypeCheck) 72.6 macro(Phi) 72.7 +macro(PopCountI) 72.8 +macro(PopCountL) 72.9 macro(PowD) 72.10 macro(PrefetchRead) 72.11 macro(PrefetchWrite)
73.1 --- a/src/share/vm/opto/compile.cpp Fri Mar 20 22:08:48 2009 -0400 73.2 +++ b/src/share/vm/opto/compile.cpp Mon Mar 23 10:42:20 2009 -0400 73.3 @@ -2081,7 +2081,7 @@ 73.4 73.5 #ifdef _LP64 73.6 case Op_CastPP: 73.7 - if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) { 73.8 + if (n->in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks()) { 73.9 Compile* C = Compile::current(); 73.10 Node* in1 = n->in(1); 73.11 const Type* t = n->bottom_type(); 73.12 @@ -2136,7 +2136,7 @@ 73.13 new_in2 = in2->in(1); 73.14 } else if (in2->Opcode() == Op_ConP) { 73.15 const Type* t = in2->bottom_type(); 73.16 - if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) { 73.17 + if (t == TypePtr::NULL_PTR && Universe::narrow_oop_use_implicit_null_checks()) { 73.18 new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); 73.19 // 73.20 // This transformation together with CastPP transformation above
74.1 --- a/src/share/vm/opto/connode.cpp Fri Mar 20 22:08:48 2009 -0400 74.2 +++ b/src/share/vm/opto/connode.cpp Mon Mar 23 10:42:20 2009 -0400 74.3 @@ -433,7 +433,7 @@ 74.4 // If not converting int->oop, throw away cast after constant propagation 74.5 Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { 74.6 const Type *t = ccp->type(in(1)); 74.7 - if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) { 74.8 + if (!t->isa_oop_ptr() || (in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks())) { 74.9 return NULL; // do not transform raw pointers or narrow oops 74.10 } 74.11 return ConstraintCastNode::Ideal_DU_postCCP(ccp);
75.1 --- a/src/share/vm/opto/connode.hpp Fri Mar 20 22:08:48 2009 -0400 75.2 +++ b/src/share/vm/opto/connode.hpp Mon Mar 23 10:42:20 2009 -0400 75.3 @@ -1,5 +1,5 @@ 75.4 /* 75.5 - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. 75.6 + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. 75.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 75.8 * 75.9 * This code is free software; you can redistribute it and/or modify it 75.10 @@ -635,3 +635,23 @@ 75.11 virtual uint ideal_reg() const { return Op_RegL; } 75.12 virtual const Type* Value( PhaseTransform *phase ) const; 75.13 }; 75.14 + 75.15 +//---------- PopCountINode ----------------------------------------------------- 75.16 +// Population count (bit count) of an integer. 75.17 +class PopCountINode : public Node { 75.18 +public: 75.19 + PopCountINode(Node* in1) : Node(0, in1) {} 75.20 + virtual int Opcode() const; 75.21 + const Type* bottom_type() const { return TypeInt::INT; } 75.22 + virtual uint ideal_reg() const { return Op_RegI; } 75.23 +}; 75.24 + 75.25 +//---------- PopCountLNode ----------------------------------------------------- 75.26 +// Population count (bit count) of a long. 75.27 +class PopCountLNode : public Node { 75.28 +public: 75.29 + PopCountLNode(Node* in1) : Node(0, in1) {} 75.30 + virtual int Opcode() const; 75.31 + const Type* bottom_type() const { return TypeInt::INT; } 75.32 + virtual uint ideal_reg() const { return Op_RegI; } 75.33 +};
76.1 --- a/src/share/vm/opto/graphKit.cpp Fri Mar 20 22:08:48 2009 -0400 76.2 +++ b/src/share/vm/opto/graphKit.cpp Mon Mar 23 10:42:20 2009 -0400 76.3 @@ -2277,7 +2277,7 @@ 76.4 r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) ); 76.5 set_control( _gvn.transform( new (C, 1) IfFalseNode(iff2) ) ); 76.6 76.7 - // Check for self. Very rare to get here, but its taken 1/3 the time. 76.8 + // Check for self. Very rare to get here, but it is taken 1/3 the time. 76.9 // No performance impact (too rare) but allows sharing of secondary arrays 76.10 // which has some footprint reduction. 76.11 Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) ); 76.12 @@ -2286,11 +2286,27 @@ 76.13 r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) ); 76.14 set_control( _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) ); 76.15 76.16 + // -- Roads not taken here: -- 76.17 + // We could also have chosen to perform the self-check at the beginning 76.18 + // of this code sequence, as the assembler does. This would not pay off 76.19 + // the same way, since the optimizer, unlike the assembler, can perform 76.20 + // static type analysis to fold away many successful self-checks. 76.21 + // Non-foldable self checks work better here in second position, because 76.22 + // the initial primary superclass check subsumes a self-check for most 76.23 + // types. An exception would be a secondary type like array-of-interface, 76.24 + // which does not appear in its own primary supertype display. 76.25 + // Finally, we could have chosen to move the self-check into the 76.26 + // PartialSubtypeCheckNode, and from there out-of-line in a platform 76.27 + // dependent manner. But it is worthwhile to have the check here, 76.28 + // where it can be perhaps be optimized. The cost in code space is 76.29 + // small (register compare, branch). 76.30 + 76.31 // Now do a linear scan of the secondary super-klass array. Again, no real 76.32 // performance impact (too rare) but it's gotta be done. 76.33 - // (The stub also contains the self-check of subklass == superklass. 76.34 // Since the code is rarely used, there is no penalty for moving it 76.35 - // out of line, and it can only improve I-cache density.) 76.36 + // out of line, and it can only improve I-cache density. 76.37 + // The decision to inline or out-of-line this final check is platform 76.38 + // dependent, and is found in the AD file definition of PartialSubtypeCheck. 76.39 Node* psc = _gvn.transform( 76.40 new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) ); 76.41
77.1 --- a/src/share/vm/opto/lcm.cpp Fri Mar 20 22:08:48 2009 -0400 77.2 +++ b/src/share/vm/opto/lcm.cpp Mon Mar 23 10:42:20 2009 -0400 77.3 @@ -158,7 +158,14 @@ 77.4 continue; // Give up if offset is beyond page size 77.5 // cannot reason about it; is probably not implicit null exception 77.6 } else { 77.7 - const TypePtr* tptr = base->bottom_type()->is_ptr(); 77.8 + const TypePtr* tptr; 77.9 + if (UseCompressedOops && Universe::narrow_oop_shift() == 0) { 77.10 + // 32-bits narrow oop can be the base of address expressions 77.11 + tptr = base->bottom_type()->make_ptr(); 77.12 + } else { 77.13 + // only regular oops are expected here 77.14 + tptr = base->bottom_type()->is_ptr(); 77.15 + } 77.16 // Give up if offset is not a compile-time constant 77.17 if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot ) 77.18 continue;
78.1 --- a/src/share/vm/opto/library_call.cpp Fri Mar 20 22:08:48 2009 -0400 78.2 +++ b/src/share/vm/opto/library_call.cpp Mon Mar 23 10:42:20 2009 -0400 78.3 @@ -1,5 +1,5 @@ 78.4 /* 78.5 - * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved. 78.6 + * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. 78.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 78.8 * 78.9 * This code is free software; you can redistribute it and/or modify it 78.10 @@ -221,6 +221,7 @@ 78.11 bool inline_unsafe_CAS(BasicType type); 78.12 bool inline_unsafe_ordered_store(BasicType type); 78.13 bool inline_fp_conversions(vmIntrinsics::ID id); 78.14 + bool inline_bitCount(vmIntrinsics::ID id); 78.15 bool inline_reverseBytes(vmIntrinsics::ID id); 78.16 }; 78.17 78.18 @@ -314,6 +315,11 @@ 78.19 if (!JDK_Version::is_gte_jdk14x_version()) return NULL; 78.20 break; 78.21 78.22 + case vmIntrinsics::_bitCount_i: 78.23 + case vmIntrinsics::_bitCount_l: 78.24 + if (!UsePopCountInstruction) return NULL; 78.25 + break; 78.26 + 78.27 default: 78.28 break; 78.29 } 78.30 @@ -617,6 +623,10 @@ 78.31 case vmIntrinsics::_longBitsToDouble: 78.32 return inline_fp_conversions(intrinsic_id()); 78.33 78.34 + case vmIntrinsics::_bitCount_i: 78.35 + case vmIntrinsics::_bitCount_l: 78.36 + return inline_bitCount(intrinsic_id()); 78.37 + 78.38 case vmIntrinsics::_reverseBytes_i: 78.39 case vmIntrinsics::_reverseBytes_l: 78.40 return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id()); 78.41 @@ -1714,6 +1724,27 @@ 78.42 } 78.43 } 78.44 78.45 +//----------------------------inline_bitCount_int/long----------------------- 78.46 +// inline int Integer.bitCount(int) 78.47 +// inline int Long.bitCount(long) 78.48 +bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) { 78.49 + assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount"); 78.50 + if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false; 78.51 + if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false; 78.52 + _sp += arg_size(); // restore stack pointer 78.53 + switch (id) { 78.54 + case vmIntrinsics::_bitCount_i: 78.55 + push(_gvn.transform(new (C, 2) PopCountINode(pop()))); 78.56 + break; 78.57 + case vmIntrinsics::_bitCount_l: 78.58 + push(_gvn.transform(new (C, 2) PopCountLNode(pop_pair()))); 78.59 + break; 78.60 + default: 78.61 + ShouldNotReachHere(); 78.62 + } 78.63 + return true; 78.64 +} 78.65 + 78.66 //----------------------------inline_reverseBytes_int/long------------------- 78.67 // inline Integer.reverseBytes(int) 78.68 // inline Long.reverseBytes(long)
79.1 --- a/src/share/vm/opto/matcher.cpp Fri Mar 20 22:08:48 2009 -0400 79.2 +++ b/src/share/vm/opto/matcher.cpp Mon Mar 23 10:42:20 2009 -0400 79.3 @@ -1481,8 +1481,13 @@ 79.4 const Type* mach_at = mach->adr_type(); 79.5 // DecodeN node consumed by an address may have different type 79.6 // then its input. Don't compare types for such case. 79.7 - if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() && 79.8 - m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) { 79.9 + if (m->adr_type() != mach_at && 79.10 + (m->in(MemNode::Address)->is_DecodeN() || 79.11 + m->in(MemNode::Address)->is_AddP() && 79.12 + m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN() || 79.13 + m->in(MemNode::Address)->is_AddP() && 79.14 + m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() && 79.15 + m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeN())) { 79.16 mach_at = m->adr_type(); 79.17 } 79.18 if (m->adr_type() != mach_at) {
80.1 --- a/src/share/vm/runtime/arguments.cpp Fri Mar 20 22:08:48 2009 -0400 80.2 +++ b/src/share/vm/runtime/arguments.cpp Mon Mar 23 10:42:20 2009 -0400 80.3 @@ -1211,7 +1211,9 @@ 80.4 if (UseLargePages && UseCompressedOops) { 80.5 // Cannot allocate guard pages for implicit checks in indexed addressing 80.6 // mode, when large pages are specified on windows. 80.7 - FLAG_SET_DEFAULT(UseImplicitNullCheckForNarrowOop, false); 80.8 + // This flag could be switched ON if narrow oop base address is set to 0, 80.9 + // see code in Universe::initialize_heap(). 80.10 + Universe::set_narrow_oop_use_implicit_null_checks(false); 80.11 } 80.12 #endif // _WIN64 80.13 } else {
81.1 --- a/src/share/vm/runtime/globals.hpp Fri Mar 20 22:08:48 2009 -0400 81.2 +++ b/src/share/vm/runtime/globals.hpp Mon Mar 23 10:42:20 2009 -0400 81.3 @@ -303,11 +303,14 @@ 81.4 "Use 32-bit object references in 64-bit VM. " \ 81.5 "lp64_product means flag is always constant in 32 bit VM") \ 81.6 \ 81.7 - lp64_product(bool, CheckCompressedOops, trueInDebug, \ 81.8 - "generate checks in encoding/decoding code") \ 81.9 - \ 81.10 - product(bool, UseImplicitNullCheckForNarrowOop, true, \ 81.11 - "generate implicit null check in indexed addressing mode.") \ 81.12 + notproduct(bool, CheckCompressedOops, true, \ 81.13 + "generate checks in encoding/decoding code in debug VM") \ 81.14 + \ 81.15 + product_pd(uintx, HeapBaseMinAddress, \ 81.16 + "OS specific low limit for heap base address") \ 81.17 + \ 81.18 + diagnostic(bool, PrintCompressedOopsMode, false, \ 81.19 + "Print compressed oops base address and encoding mode") \ 81.20 \ 81.21 /* UseMembar is theoretically a temp flag used for memory barrier \ 81.22 * removal testing. It was supposed to be removed before FCS but has \ 81.23 @@ -2169,6 +2172,9 @@ 81.24 diagnostic(bool, PrintIntrinsics, false, \ 81.25 "prints attempted and successful inlining of intrinsics") \ 81.26 \ 81.27 + product(bool, UsePopCountInstruction, false, \ 81.28 + "Use population count instruction") \ 81.29 + \ 81.30 diagnostic(ccstrlist, DisableIntrinsic, "", \ 81.31 "do not expand intrinsics whose (internal) names appear here") \ 81.32 \
82.1 --- a/src/share/vm/runtime/os.hpp Fri Mar 20 22:08:48 2009 -0400 82.2 +++ b/src/share/vm/runtime/os.hpp Mon Mar 23 10:42:20 2009 -0400 82.3 @@ -243,7 +243,7 @@ 82.4 82.5 static char* non_memory_address_word(); 82.6 // reserve, commit and pin the entire memory region 82.7 - static char* reserve_memory_special(size_t size); 82.8 + static char* reserve_memory_special(size_t size, char* addr = NULL); 82.9 static bool release_memory_special(char* addr, size_t bytes); 82.10 static bool large_page_init(); 82.11 static size_t large_page_size();
83.1 --- a/src/share/vm/runtime/virtualspace.cpp Fri Mar 20 22:08:48 2009 -0400 83.2 +++ b/src/share/vm/runtime/virtualspace.cpp Mon Mar 23 10:42:20 2009 -0400 83.3 @@ -109,6 +109,7 @@ 83.4 const size_t prefix_align, 83.5 const size_t suffix_size, 83.6 const size_t suffix_align, 83.7 + char* requested_address, 83.8 const size_t noaccess_prefix) 83.9 { 83.10 assert(prefix_size != 0, "sanity"); 83.11 @@ -131,7 +132,7 @@ 83.12 const bool try_reserve_special = UseLargePages && 83.13 prefix_align == os::large_page_size(); 83.14 if (!os::can_commit_large_page_memory() && try_reserve_special) { 83.15 - initialize(size, prefix_align, true, NULL, noaccess_prefix); 83.16 + initialize(size, prefix_align, true, requested_address, noaccess_prefix); 83.17 return; 83.18 } 83.19 83.20 @@ -146,7 +147,13 @@ 83.21 noaccess_prefix == prefix_align, "noaccess prefix wrong"); 83.22 83.23 // Optimistically try to reserve the exact size needed. 83.24 - char* addr = os::reserve_memory(size, NULL, prefix_align); 83.25 + char* addr; 83.26 + if (requested_address != 0) { 83.27 + addr = os::attempt_reserve_memory_at(size, 83.28 + requested_address-noaccess_prefix); 83.29 + } else { 83.30 + addr = os::reserve_memory(size, NULL, prefix_align); 83.31 + } 83.32 if (addr == NULL) return; 83.33 83.34 // Check whether the result has the needed alignment (unlikely unless 83.35 @@ -206,12 +213,8 @@ 83.36 char* base = NULL; 83.37 83.38 if (special) { 83.39 - // It's not hard to implement reserve_memory_special() such that it can 83.40 - // allocate at fixed address, but there seems no use of this feature 83.41 - // for now, so it's not implemented. 83.42 - assert(requested_address == NULL, "not implemented"); 83.43 83.44 - base = os::reserve_memory_special(size); 83.45 + base = os::reserve_memory_special(size, requested_address); 83.46 83.47 if (base != NULL) { 83.48 // Check alignment constraints 83.49 @@ -372,7 +375,8 @@ 83.50 bool large, char* requested_address) : 83.51 ReservedSpace(size, alignment, large, 83.52 requested_address, 83.53 - UseCompressedOops && UseImplicitNullCheckForNarrowOop ? 83.54 + (UseCompressedOops && (Universe::narrow_oop_base() != NULL) && 83.55 + Universe::narrow_oop_use_implicit_null_checks()) ? 83.56 lcm(os::vm_page_size(), alignment) : 0) { 83.57 // Only reserved space for the java heap should have a noaccess_prefix 83.58 // if using compressed oops. 83.59 @@ -382,9 +386,12 @@ 83.60 ReservedHeapSpace::ReservedHeapSpace(const size_t prefix_size, 83.61 const size_t prefix_align, 83.62 const size_t suffix_size, 83.63 - const size_t suffix_align) : 83.64 + const size_t suffix_align, 83.65 + char* requested_address) : 83.66 ReservedSpace(prefix_size, prefix_align, suffix_size, suffix_align, 83.67 - UseCompressedOops && UseImplicitNullCheckForNarrowOop ? 83.68 + requested_address, 83.69 + (UseCompressedOops && (Universe::narrow_oop_base() != NULL) && 83.70 + Universe::narrow_oop_use_implicit_null_checks()) ? 83.71 lcm(os::vm_page_size(), prefix_align) : 0) { 83.72 protect_noaccess_prefix(prefix_size+suffix_size); 83.73 }
84.1 --- a/src/share/vm/runtime/virtualspace.hpp Fri Mar 20 22:08:48 2009 -0400 84.2 +++ b/src/share/vm/runtime/virtualspace.hpp Mon Mar 23 10:42:20 2009 -0400 84.3 @@ -73,7 +73,8 @@ 84.4 const size_t noaccess_prefix = 0); 84.5 ReservedSpace(const size_t prefix_size, const size_t prefix_align, 84.6 const size_t suffix_size, const size_t suffix_align, 84.7 - const size_t noaccess_prefix); 84.8 + char* requested_address, 84.9 + const size_t noaccess_prefix = 0); 84.10 84.11 // Accessors 84.12 char* base() const { return _base; } 84.13 @@ -121,7 +122,8 @@ 84.14 ReservedHeapSpace(size_t size, size_t forced_base_alignment, 84.15 bool large, char* requested_address); 84.16 ReservedHeapSpace(const size_t prefix_size, const size_t prefix_align, 84.17 - const size_t suffix_size, const size_t suffix_align); 84.18 + const size_t suffix_size, const size_t suffix_align, 84.19 + char* requested_address); 84.20 }; 84.21 84.22 // VirtualSpace is data structure for committing a previously reserved address range in smaller chunks.
85.1 --- a/src/share/vm/runtime/vmStructs.cpp Fri Mar 20 22:08:48 2009 -0400 85.2 +++ b/src/share/vm/runtime/vmStructs.cpp Mon Mar 23 10:42:20 2009 -0400 85.3 @@ -263,7 +263,9 @@ 85.4 static_field(Universe, _bootstrapping, bool) \ 85.5 static_field(Universe, _fully_initialized, bool) \ 85.6 static_field(Universe, _verify_count, int) \ 85.7 - static_field(Universe, _heap_base, address) \ 85.8 + static_field(Universe, _narrow_oop._base, address) \ 85.9 + static_field(Universe, _narrow_oop._shift, int) \ 85.10 + static_field(Universe, _narrow_oop._use_implicit_null_checks, bool) \ 85.11 \ 85.12 /**********************************************************************************/ \ 85.13 /* Generation and Space hierarchies */ \
86.1 --- a/src/share/vm/runtime/vm_version.cpp Fri Mar 20 22:08:48 2009 -0400 86.2 +++ b/src/share/vm/runtime/vm_version.cpp Mon Mar 23 10:42:20 2009 -0400 86.3 @@ -163,9 +163,11 @@ 86.4 #elif _MSC_VER == 1200 86.5 #define HOTSPOT_BUILD_COMPILER "MS VC++ 6.0" 86.6 #elif _MSC_VER == 1310 86.7 - #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1" 86.8 + #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1 (VS2003)" 86.9 #elif _MSC_VER == 1400 86.10 - #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0" 86.11 + #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0 (VS2005)" 86.12 + #elif _MSC_VER == 1500 86.13 + #define HOTSPOT_BUILD_COMPILER "MS VC++ 9.0 (VS2008)" 86.14 #else 86.15 #define HOTSPOT_BUILD_COMPILER "unknown MS VC++:" XSTR(_MSC_VER) 86.16 #endif
87.1 --- a/src/share/vm/utilities/globalDefinitions_visCPP.hpp Fri Mar 20 22:08:48 2009 -0400 87.2 +++ b/src/share/vm/utilities/globalDefinitions_visCPP.hpp Mon Mar 23 10:42:20 2009 -0400 87.3 @@ -162,7 +162,7 @@ 87.4 } 87.5 87.6 // Visual Studio 2005 deprecates POSIX names - use ISO C++ names instead 87.7 -#if _MSC_VER >= 1400 && !defined(_WIN64) 87.8 +#if _MSC_VER >= 1400 87.9 #define open _open 87.10 #define close _close 87.11 #define read _read
88.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 88.2 +++ b/test/compiler/6378821/Test6378821.java Mon Mar 23 10:42:20 2009 -0400 88.3 @@ -0,0 +1,75 @@ 88.4 +/* 88.5 + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. 88.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 88.7 + * 88.8 + * This code is free software; you can redistribute it and/or modify it 88.9 + * under the terms of the GNU General Public License version 2 only, as 88.10 + * published by the Free Software Foundation. 88.11 + * 88.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 88.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 88.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 88.15 + * version 2 for more details (a copy is included in the LICENSE file that 88.16 + * accompanied this code). 88.17 + * 88.18 + * You should have received a copy of the GNU General Public License version 88.19 + * 2 along with this work; if not, write to the Free Software Foundation, 88.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 88.21 + * 88.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 88.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 88.24 + * have any questions. 88.25 + */ 88.26 + 88.27 +/** 88.28 + * @test 88.29 + * @bug 6378821 88.30 + * @summary where available, bitCount() should use POPC on SPARC processors and AMD+10h 88.31 + * 88.32 + * @run main/othervm -Xcomp -XX:CompileOnly=Test6378821.fcomp Test6378821 88.33 + */ 88.34 + 88.35 +public class Test6378821 { 88.36 + static final int[] ia = new int[] { 0x12345678 }; 88.37 + static final long[] la = new long[] { 0x12345678abcdefL }; 88.38 + 88.39 + public static void main(String [] args) { 88.40 + // Resolve the class and the method. 88.41 + Integer.bitCount(1); 88.42 + Long.bitCount(1); 88.43 + 88.44 + sub(ia[0]); 88.45 + sub(la[0]); 88.46 + sub(ia); 88.47 + sub(la); 88.48 + } 88.49 + 88.50 + static void check(int i, int expected, int result) { 88.51 + if (result != expected) { 88.52 + throw new InternalError("Wrong population count for " + i + ": " + result + " != " + expected); 88.53 + } 88.54 + } 88.55 + 88.56 + static void check(long l, int expected, int result) { 88.57 + if (result != expected) { 88.58 + throw new InternalError("Wrong population count for " + l + ": " + result + " != " + expected); 88.59 + } 88.60 + } 88.61 + 88.62 + static void sub(int i) { check(i, fint(i), fcomp(i) ); } 88.63 + static void sub(int[] ia) { check(ia[0], fint(ia), fcomp(ia)); } 88.64 + static void sub(long l) { check(l, fint(l), fcomp(l) ); } 88.65 + static void sub(long[] la) { check(la[0], fint(la), fcomp(la)); } 88.66 + 88.67 + static int fint (int i) { return Integer.bitCount(i); } 88.68 + static int fcomp(int i) { return Integer.bitCount(i); } 88.69 + 88.70 + static int fint (int[] ia) { return Integer.bitCount(ia[0]); } 88.71 + static int fcomp(int[] ia) { return Integer.bitCount(ia[0]); } 88.72 + 88.73 + static int fint (long l) { return Long.bitCount(l); } 88.74 + static int fcomp(long l) { return Long.bitCount(l); } 88.75 + 88.76 + static int fint (long[] la) { return Long.bitCount(la[0]); } 88.77 + static int fcomp(long[] la) { return Long.bitCount(la[0]); } 88.78 +}