Merge

Thu, 19 Mar 2009 09:13:24 -0700

author
kvn
date
Thu, 19 Mar 2009 09:13:24 -0700
changeset 1082
bd441136a5ce
parent 1075
ba50942c8138
parent 1081
039a914095f4
child 1084
59f139e8a8d1
child 1090
60bfce711da4
child 1100
c89f86385056

Merge

src/cpu/sparc/vm/sparc.ad file | annotate | diff | comparison | revisions
src/cpu/x86/vm/assembler_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/c1_LIRAssembler_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/c1_Runtime1_x86.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/interp_masm_x86_32.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/interp_masm_x86_64.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/stubGenerator_x86_32.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/stubGenerator_x86_64.cpp file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_32.ad file | annotate | diff | comparison | revisions
src/cpu/x86/vm/x86_64.ad file | annotate | diff | comparison | revisions
src/os/linux/vm/os_linux.cpp file | annotate | diff | comparison | revisions
src/os/solaris/vm/os_solaris.cpp file | annotate | diff | comparison | revisions
src/os/windows/vm/os_windows.cpp file | annotate | diff | comparison | revisions
src/share/vm/classfile/vmSymbols.hpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/concurrentMark.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp file | annotate | diff | comparison | revisions
src/share/vm/includeDB_core file | annotate | diff | comparison | revisions
src/share/vm/memory/genCollectedHeap.cpp file | annotate | diff | comparison | revisions
src/share/vm/memory/universe.cpp file | annotate | diff | comparison | revisions
src/share/vm/memory/universe.hpp file | annotate | diff | comparison | revisions
src/share/vm/oops/oop.inline.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/classes.hpp file | annotate | diff | comparison | revisions
src/share/vm/opto/compile.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/graphKit.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/lcm.cpp file | annotate | diff | comparison | revisions
src/share/vm/opto/matcher.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/arguments.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/os.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/Debugger.java	Wed Mar 18 11:37:48 2009 -0400
     1.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/Debugger.java	Thu Mar 19 09:13:24 2009 -0700
     1.3 @@ -118,9 +118,9 @@
     1.4    public long getJIntSize();
     1.5    public long getJLongSize();
     1.6    public long getJShortSize();
     1.7 -  public long getHeapBase();
     1.8    public long getHeapOopSize();
     1.9 -  public long getLogMinObjAlignmentInBytes();
    1.10 +  public long getNarrowOopBase();
    1.11 +  public int  getNarrowOopShift();
    1.12  
    1.13    public ReadResult readBytesFromProcess(long address, long numBytes)
    1.14      throws DebuggerException;
     2.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java	Wed Mar 18 11:37:48 2009 -0400
     2.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java	Thu Mar 19 09:13:24 2009 -0700
     2.3 @@ -56,8 +56,8 @@
     2.4    // heap data.
     2.5    protected long oopSize;
     2.6    protected long heapOopSize;
     2.7 -  protected long heapBase;                 // heap base for compressed oops.
     2.8 -  protected long logMinObjAlignmentInBytes; // Used to decode compressed oops.
     2.9 +  protected long narrowOopBase;  // heap base for compressed oops.
    2.10 +  protected int  narrowOopShift; // shift to decode compressed oops.
    2.11    // Should be initialized if desired by calling initCache()
    2.12    private PageCache cache;
    2.13  
    2.14 @@ -159,10 +159,10 @@
    2.15      javaPrimitiveTypesConfigured = true;
    2.16    }
    2.17  
    2.18 -  public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignmentInBytes) {
    2.19 -    this.heapBase = heapBase;
    2.20 +  public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift) {
    2.21      this.heapOopSize = heapOopSize;
    2.22 -    this.logMinObjAlignmentInBytes = logMinObjAlignmentInBytes;
    2.23 +    this.narrowOopBase = narrowOopBase;
    2.24 +    this.narrowOopShift = narrowOopShift;
    2.25    }
    2.26  
    2.27    /** May be called by subclasses if desired to initialize the page
    2.28 @@ -459,7 +459,7 @@
    2.29      long value = readCInteger(address, getHeapOopSize(), true);
    2.30      if (value != 0) {
    2.31        // See oop.inline.hpp decode_heap_oop
    2.32 -      value = (long)(heapBase + (long)(value << logMinObjAlignmentInBytes));
    2.33 +      value = (long)(narrowOopBase + (long)(value << narrowOopShift));
    2.34      }
    2.35      return value;
    2.36    }
    2.37 @@ -545,10 +545,10 @@
    2.38      return heapOopSize;
    2.39    }
    2.40  
    2.41 -  public long getHeapBase() {
    2.42 -    return heapBase;
    2.43 +  public long getNarrowOopBase() {
    2.44 +    return narrowOopBase;
    2.45    }
    2.46 -  public long getLogMinObjAlignmentInBytes() {
    2.47 -    return logMinObjAlignmentInBytes;
    2.48 +  public int getNarrowOopShift() {
    2.49 +    return narrowOopShift;
    2.50    }
    2.51  }
     3.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/JVMDebugger.java	Wed Mar 18 11:37:48 2009 -0400
     3.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/JVMDebugger.java	Thu Mar 19 09:13:24 2009 -0700
     3.3 @@ -42,5 +42,5 @@
     3.4                                                long jintSize,
     3.5                                                long jlongSize,
     3.6                                                long jshortSize);
     3.7 -  public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignment);
     3.8 +  public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift);
     3.9  }
     4.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebugger.java	Wed Mar 18 11:37:48 2009 -0400
     4.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebugger.java	Thu Mar 19 09:13:24 2009 -0700
     4.3 @@ -65,9 +65,10 @@
     4.4    public long      getJIntSize() throws RemoteException;
     4.5    public long      getJLongSize() throws RemoteException;
     4.6    public long      getJShortSize() throws RemoteException;
     4.7 -  public long      getHeapBase() throws RemoteException;
     4.8    public long      getHeapOopSize() throws RemoteException;
     4.9 -  public long      getLogMinObjAlignmentInBytes() throws RemoteException;
    4.10 +  public long      getNarrowOopBase() throws RemoteException;
    4.11 +  public int       getNarrowOopShift() throws RemoteException;
    4.12 +
    4.13    public boolean   areThreadsEqual(long addrOrId1, boolean isAddress1,
    4.14                                     long addrOrId2, boolean isAddress2) throws RemoteException;
    4.15    public int       getThreadHashCode(long addrOrId, boolean isAddress) throws RemoteException;
     5.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Wed Mar 18 11:37:48 2009 -0400
     5.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Thu Mar 19 09:13:24 2009 -0700
     5.3 @@ -85,9 +85,9 @@
     5.4        jlongSize    = remoteDebugger.getJLongSize();
     5.5        jshortSize   = remoteDebugger.getJShortSize();
     5.6        javaPrimitiveTypesConfigured = true;
     5.7 -      heapBase     = remoteDebugger.getHeapBase();
     5.8 +      narrowOopBase  = remoteDebugger.getNarrowOopBase();
     5.9 +      narrowOopShift = remoteDebugger.getNarrowOopShift();
    5.10        heapOopSize  = remoteDebugger.getHeapOopSize();
    5.11 -      logMinObjAlignmentInBytes  = remoteDebugger.getLogMinObjAlignmentInBytes();
    5.12      }
    5.13      catch (RemoteException e) {
    5.14        throw new DebuggerException(e);
     6.1 --- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerServer.java	Wed Mar 18 11:37:48 2009 -0400
     6.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerServer.java	Thu Mar 19 09:13:24 2009 -0700
     6.3 @@ -114,17 +114,18 @@
     6.4      return debugger.getJShortSize();
     6.5    }
     6.6  
     6.7 -  public long getHeapBase() throws RemoteException {
     6.8 -    return debugger.getHeapBase();
     6.9 -  }
    6.10 -
    6.11    public long getHeapOopSize() throws RemoteException {
    6.12      return debugger.getHeapOopSize();
    6.13    }
    6.14  
    6.15 -  public long getLogMinObjAlignmentInBytes() throws RemoteException {
    6.16 -    return debugger.getLogMinObjAlignmentInBytes();
    6.17 +  public long getNarrowOopBase() throws RemoteException {
    6.18 +    return debugger.getNarrowOopBase();
    6.19    }
    6.20 +
    6.21 +  public int  getNarrowOopShift() throws RemoteException {
    6.22 +    return debugger.getNarrowOopShift();
    6.23 +  }
    6.24 +
    6.25    public boolean   areThreadsEqual(long addrOrId1, boolean isAddress1,
    6.26                                     long addrOrId2, boolean isAddress2) throws RemoteException {
    6.27      ThreadProxy t1 = getThreadProxy(addrOrId1, isAddress1);
     7.1 --- a/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java	Wed Mar 18 11:37:48 2009 -0400
     7.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java	Thu Mar 19 09:13:24 2009 -0700
     7.3 @@ -53,7 +53,8 @@
     7.4    // system obj array klass object
     7.5    private static sun.jvm.hotspot.types.OopField systemObjArrayKlassObjField;
     7.6  
     7.7 -  private static AddressField heapBaseField;
     7.8 +  private static AddressField narrowOopBaseField;
     7.9 +  private static CIntegerField narrowOopShiftField;
    7.10  
    7.11    static {
    7.12      VM.registerVMInitializedObserver(new Observer() {
    7.13 @@ -86,7 +87,8 @@
    7.14  
    7.15      systemObjArrayKlassObjField = type.getOopField("_systemObjArrayKlassObj");
    7.16  
    7.17 -    heapBaseField = type.getAddressField("_heap_base");
    7.18 +    narrowOopBaseField = type.getAddressField("_narrow_oop._base");
    7.19 +    narrowOopShiftField = type.getCIntegerField("_narrow_oop._shift");
    7.20    }
    7.21  
    7.22    public Universe() {
    7.23 @@ -100,14 +102,18 @@
    7.24      }
    7.25    }
    7.26  
    7.27 -  public static long getHeapBase() {
    7.28 -    if (heapBaseField.getValue() == null) {
    7.29 +  public static long getNarrowOopBase() {
    7.30 +    if (narrowOopBaseField.getValue() == null) {
    7.31        return 0;
    7.32      } else {
    7.33 -      return heapBaseField.getValue().minus(null);
    7.34 +      return narrowOopBaseField.getValue().minus(null);
    7.35      }
    7.36    }
    7.37  
    7.38 +  public static int getNarrowOopShift() {
    7.39 +    return (int)narrowOopShiftField.getValue();
    7.40 +  }
    7.41 +
    7.42    /** Returns "TRUE" iff "p" points into the allocated area of the heap. */
    7.43    public boolean isIn(Address p) {
    7.44      return heap().isIn(p);
     8.1 --- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Wed Mar 18 11:37:48 2009 -0400
     8.2 +++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Thu Mar 19 09:13:24 2009 -0700
     8.3 @@ -342,8 +342,8 @@
     8.4        throw new RuntimeException("Attempt to initialize VM twice");
     8.5      }
     8.6      soleInstance = new VM(db, debugger, debugger.getMachineDescription().isBigEndian());
     8.7 -    debugger.putHeapConst(Universe.getHeapBase(), soleInstance.getHeapOopSize(),
     8.8 -                          soleInstance.logMinObjAlignmentInBytes);
     8.9 +    debugger.putHeapConst(soleInstance.getHeapOopSize(), Universe.getNarrowOopBase(),
    8.10 +                          Universe.getNarrowOopShift());
    8.11      for (Iterator iter = vmInitializedObservers.iterator(); iter.hasNext(); ) {
    8.12        ((Observer) iter.next()).update(null, null);
    8.13      }
     9.1 --- a/make/windows/get_msc_ver.sh	Wed Mar 18 11:37:48 2009 -0400
     9.2 +++ b/make/windows/get_msc_ver.sh	Thu Mar 19 09:13:24 2009 -0700
     9.3 @@ -29,6 +29,7 @@
     9.4  # cl version 13.10.3077 returns "MSC_VER=1310"
     9.5  # cl version 14.00.30701 returns "MSC_VER=1399" (OLD_MSSDK version)
     9.6  # cl version 14.00.40310.41 returns "MSC_VER=1400"
     9.7 +# cl version 15.00.21022.8 returns "MSC_VER=1500"
     9.8  
     9.9  # Note that we currently do not have a way to set HotSpotMksHome in
    9.10  # the batch build, but so far this has not seemed to be a problem. The
    10.1 --- a/make/windows/makefiles/compile.make	Wed Mar 18 11:37:48 2009 -0400
    10.2 +++ b/make/windows/makefiles/compile.make	Thu Mar 19 09:13:24 2009 -0700
    10.3 @@ -170,11 +170,9 @@
    10.4  # Manifest Tool - used in VS2005 and later to adjust manifests stored
    10.5  # as resources inside build artifacts.
    10.6  MT=mt.exe
    10.7 -!if "$(BUILDARCH)" == "i486"
    10.8 -# VS2005 on x86 restricts the use of certain libc functions without this
    10.9 +# VS2005 and later restricts the use of certain libc functions without this
   10.10  CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE
   10.11  !endif
   10.12 -!endif
   10.13  
   10.14  !if "$(COMPILER_NAME)" == "VS2008"
   10.15  PRODUCT_OPT_OPTION   = /O2 /Oy-
   10.16 @@ -185,11 +183,9 @@
   10.17  # Manifest Tool - used in VS2005 and later to adjust manifests stored
   10.18  # as resources inside build artifacts.
   10.19  MT=mt.exe
   10.20 -!if "$(BUILDARCH)" == "i486"
   10.21 -# VS2005 on x86 restricts the use of certain libc functions without this
   10.22 +# VS2005 and later restricts the use of certain libc functions without this
   10.23  CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE
   10.24  !endif
   10.25 -!endif
   10.26  
   10.27  # Compile for space above time.
   10.28  !if "$(Variant)" == "kernel"
    11.1 --- a/make/windows/makefiles/sa.make	Wed Mar 18 11:37:48 2009 -0400
    11.2 +++ b/make/windows/makefiles/sa.make	Thu Mar 19 09:13:24 2009 -0700
    11.3 @@ -89,9 +89,11 @@
    11.4  SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
    11.5  !elseif "$(BUILDARCH)" == "amd64"
    11.6  SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
    11.7 +!if "$(COMPILER_NAME)" == "VS2005"
    11.8  # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 
    11.9  # otherwise we get missing __security_check_cookie externals at link time. 
   11.10  SA_LINK_FLAGS = bufferoverflowU.lib
   11.11 +!endif
   11.12  !else
   11.13  SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /ZI /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
   11.14  !endif
    12.1 --- a/make/windows/makefiles/sanity.make	Wed Mar 18 11:37:48 2009 -0400
    12.2 +++ b/make/windows/makefiles/sanity.make	Thu Mar 19 09:13:24 2009 -0700
    12.3 @@ -27,9 +27,9 @@
    12.4  all: checkCL checkLink
    12.5  
    12.6  checkCL:
    12.7 -	@ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" \
    12.8 +	@ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" if "$(MSC_VER)" NEQ "1500" \
    12.9  	echo *** WARNING *** unrecognized cl.exe version $(MSC_VER) ($(RAW_MSC_VER)).  Use FORCE_MSC_VER to override automatic detection.
   12.10  
   12.11  checkLink:
   12.12 -	@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" \
   12.13 +	@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" if "$(LINK_VER)" NEQ "900" \
   12.14  	echo *** WARNING *** unrecognized link.exe version $(LINK_VER) ($(RAW_LINK_VER)).  Use FORCE_LINK_VER to override automatic detection.
    13.1 --- a/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
    13.2 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
    13.3 @@ -2767,6 +2767,268 @@
    13.4  }
    13.5  
    13.6  
    13.7 +void MacroAssembler::check_klass_subtype(Register sub_klass,
    13.8 +                                         Register super_klass,
    13.9 +                                         Register temp_reg,
   13.10 +                                         Register temp2_reg,
   13.11 +                                         Label& L_success) {
   13.12 +  Label L_failure, L_pop_to_failure;
   13.13 +  check_klass_subtype_fast_path(sub_klass, super_klass,
   13.14 +                                temp_reg, temp2_reg,
   13.15 +                                &L_success, &L_failure, NULL);
   13.16 +  Register sub_2 = sub_klass;
   13.17 +  Register sup_2 = super_klass;
   13.18 +  if (!sub_2->is_global())  sub_2 = L0;
   13.19 +  if (!sup_2->is_global())  sup_2 = L1;
   13.20 +
   13.21 +  save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
   13.22 +  check_klass_subtype_slow_path(sub_2, sup_2,
   13.23 +                                L2, L3, L4, L5,
   13.24 +                                NULL, &L_pop_to_failure);
   13.25 +
   13.26 +  // on success:
   13.27 +  restore();
   13.28 +  ba(false, L_success);
   13.29 +  delayed()->nop();
   13.30 +
   13.31 +  // on failure:
   13.32 +  bind(L_pop_to_failure);
   13.33 +  restore();
   13.34 +  bind(L_failure);
   13.35 +}
   13.36 +
   13.37 +
   13.38 +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
   13.39 +                                                   Register super_klass,
   13.40 +                                                   Register temp_reg,
   13.41 +                                                   Register temp2_reg,
   13.42 +                                                   Label* L_success,
   13.43 +                                                   Label* L_failure,
   13.44 +                                                   Label* L_slow_path,
   13.45 +                                        RegisterConstant super_check_offset,
   13.46 +                                        Register instanceof_hack) {
   13.47 +  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
   13.48 +                   Klass::secondary_super_cache_offset_in_bytes());
   13.49 +  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
   13.50 +                    Klass::super_check_offset_offset_in_bytes());
   13.51 +
   13.52 +  bool must_load_sco  = (super_check_offset.constant_or_zero() == -1);
   13.53 +  bool need_slow_path = (must_load_sco ||
   13.54 +                         super_check_offset.constant_or_zero() == sco_offset);
   13.55 +
   13.56 +  assert_different_registers(sub_klass, super_klass, temp_reg);
   13.57 +  if (super_check_offset.is_register()) {
   13.58 +    assert_different_registers(sub_klass, super_klass,
   13.59 +                               super_check_offset.as_register());
   13.60 +  } else if (must_load_sco) {
   13.61 +    assert(temp2_reg != noreg, "supply either a temp or a register offset");
   13.62 +  }
   13.63 +
   13.64 +  Label L_fallthrough;
   13.65 +  int label_nulls = 0;
   13.66 +  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
   13.67 +  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
   13.68 +  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
   13.69 +  assert(label_nulls <= 1 || instanceof_hack != noreg ||
   13.70 +         (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
   13.71 +         "at most one NULL in the batch, usually");
   13.72 +
   13.73 +  // Support for the instanceof hack, which uses delay slots to
   13.74 +  // set a destination register to zero or one.
   13.75 +  bool do_bool_sets = (instanceof_hack != noreg);
   13.76 +#define BOOL_SET(bool_value)                            \
   13.77 +  if (do_bool_sets && bool_value >= 0)                  \
   13.78 +    set(bool_value, instanceof_hack)
   13.79 +#define DELAYED_BOOL_SET(bool_value)                    \
   13.80 +  if (do_bool_sets && bool_value >= 0)                  \
   13.81 +    delayed()->set(bool_value, instanceof_hack);        \
   13.82 +  else delayed()->nop()
   13.83 +  // Hacked ba(), which may only be used just before L_fallthrough.
   13.84 +#define FINAL_JUMP(label, bool_value)                   \
   13.85 +  if (&(label) == &L_fallthrough) {                     \
   13.86 +    BOOL_SET(bool_value);                               \
   13.87 +  } else {                                              \
   13.88 +    ba((do_bool_sets && bool_value >= 0), label);       \
   13.89 +    DELAYED_BOOL_SET(bool_value);                       \
   13.90 +  }
   13.91 +
   13.92 +  // If the pointers are equal, we are done (e.g., String[] elements).
   13.93 +  // This self-check enables sharing of secondary supertype arrays among
   13.94 +  // non-primary types such as array-of-interface.  Otherwise, each such
   13.95 +  // type would need its own customized SSA.
   13.96 +  // We move this check to the front of the fast path because many
   13.97 +  // type checks are in fact trivially successful in this manner,
   13.98 +  // so we get a nicely predicted branch right at the start of the check.
   13.99 +  cmp(super_klass, sub_klass);
  13.100 +  brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
  13.101 +  DELAYED_BOOL_SET(1);
  13.102 +
  13.103 +  // Check the supertype display:
  13.104 +  if (must_load_sco) {
  13.105 +    // The super check offset is always positive...
  13.106 +    lduw(super_klass, sco_offset, temp2_reg);
  13.107 +    super_check_offset = RegisterConstant(temp2_reg);
  13.108 +  }
  13.109 +  ld_ptr(sub_klass, super_check_offset, temp_reg);
  13.110 +  cmp(super_klass, temp_reg);
  13.111 +
  13.112 +  // This check has worked decisively for primary supers.
  13.113 +  // Secondary supers are sought in the super_cache ('super_cache_addr').
  13.114 +  // (Secondary supers are interfaces and very deeply nested subtypes.)
  13.115 +  // This works in the same check above because of a tricky aliasing
  13.116 +  // between the super_cache and the primary super display elements.
  13.117 +  // (The 'super_check_addr' can address either, as the case requires.)
  13.118 +  // Note that the cache is updated below if it does not help us find
  13.119 +  // what we need immediately.
  13.120 +  // So if it was a primary super, we can just fail immediately.
  13.121 +  // Otherwise, it's the slow path for us (no success at this point).
  13.122 +
  13.123 +  if (super_check_offset.is_register()) {
  13.124 +    brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
  13.125 +    delayed(); if (do_bool_sets)  BOOL_SET(1);
  13.126 +    // if !do_bool_sets, sneak the next cmp into the delay slot:
  13.127 +    cmp(super_check_offset.as_register(), sc_offset);
  13.128 +
  13.129 +    if (L_failure == &L_fallthrough) {
  13.130 +      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path);
  13.131 +      delayed()->nop();
  13.132 +      BOOL_SET(0);  // fallthrough on failure
  13.133 +    } else {
  13.134 +      brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
  13.135 +      DELAYED_BOOL_SET(0);
  13.136 +      FINAL_JUMP(*L_slow_path, -1);  // -1 => vanilla delay slot
  13.137 +    }
  13.138 +  } else if (super_check_offset.as_constant() == sc_offset) {
  13.139 +    // Need a slow path; fast failure is impossible.
  13.140 +    if (L_slow_path == &L_fallthrough) {
  13.141 +      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
  13.142 +      DELAYED_BOOL_SET(1);
  13.143 +    } else {
  13.144 +      brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
  13.145 +      delayed()->nop();
  13.146 +      FINAL_JUMP(*L_success, 1);
  13.147 +    }
  13.148 +  } else {
  13.149 +    // No slow path; it's a fast decision.
  13.150 +    if (L_failure == &L_fallthrough) {
  13.151 +      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
  13.152 +      DELAYED_BOOL_SET(1);
  13.153 +      BOOL_SET(0);
  13.154 +    } else {
  13.155 +      brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
  13.156 +      DELAYED_BOOL_SET(0);
  13.157 +      FINAL_JUMP(*L_success, 1);
  13.158 +    }
  13.159 +  }
  13.160 +
  13.161 +  bind(L_fallthrough);
  13.162 +
  13.163 +#undef final_jump
  13.164 +#undef bool_set
  13.165 +#undef DELAYED_BOOL_SET
  13.166 +#undef final_jump
  13.167 +}
  13.168 +
  13.169 +
  13.170 +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  13.171 +                                                   Register super_klass,
  13.172 +                                                   Register count_temp,
  13.173 +                                                   Register scan_temp,
  13.174 +                                                   Register scratch_reg,
  13.175 +                                                   Register coop_reg,
  13.176 +                                                   Label* L_success,
  13.177 +                                                   Label* L_failure) {
  13.178 +  assert_different_registers(sub_klass, super_klass,
  13.179 +                             count_temp, scan_temp, scratch_reg, coop_reg);
  13.180 +
  13.181 +  Label L_fallthrough, L_loop;
  13.182 +  int label_nulls = 0;
  13.183 +  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  13.184 +  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  13.185 +  assert(label_nulls <= 1, "at most one NULL in the batch");
  13.186 +
  13.187 +  // a couple of useful fields in sub_klass:
  13.188 +  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
  13.189 +                   Klass::secondary_supers_offset_in_bytes());
  13.190 +  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  13.191 +                   Klass::secondary_super_cache_offset_in_bytes());
  13.192 +
  13.193 +  // Do a linear scan of the secondary super-klass chain.
  13.194 +  // This code is rarely used, so simplicity is a virtue here.
  13.195 +
  13.196 +#ifndef PRODUCT
  13.197 +  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  13.198 +  inc_counter((address) pst_counter, count_temp, scan_temp);
  13.199 +#endif
  13.200 +
  13.201 +  // We will consult the secondary-super array.
  13.202 +  ld_ptr(sub_klass, ss_offset, scan_temp);
  13.203 +
  13.204 +  // Compress superclass if necessary.
  13.205 +  Register search_key = super_klass;
  13.206 +  bool decode_super_klass = false;
  13.207 +  if (UseCompressedOops) {
  13.208 +    if (coop_reg != noreg) {
  13.209 +      encode_heap_oop_not_null(super_klass, coop_reg);
  13.210 +      search_key = coop_reg;
  13.211 +    } else {
  13.212 +      encode_heap_oop_not_null(super_klass);
  13.213 +      decode_super_klass = true; // scarce temps!
  13.214 +    }
  13.215 +    // The superclass is never null; it would be a basic system error if a null
  13.216 +    // pointer were to sneak in here.  Note that we have already loaded the
  13.217 +    // Klass::super_check_offset from the super_klass in the fast path,
  13.218 +    // so if there is a null in that register, we are already in the afterlife.
  13.219 +  }
  13.220 +
  13.221 +  // Load the array length.  (Positive movl does right thing on LP64.)
  13.222 +  lduw(scan_temp, arrayOopDesc::length_offset_in_bytes(), count_temp);
  13.223 +
  13.224 +  // Check for empty secondary super list
  13.225 +  tst(count_temp);
  13.226 +
  13.227 +  // Top of search loop
  13.228 +  bind(L_loop);
  13.229 +  br(Assembler::equal, false, Assembler::pn, *L_failure);
  13.230 +  delayed()->add(scan_temp, heapOopSize, scan_temp);
  13.231 +  assert(heapOopSize != 0, "heapOopSize should be initialized");
  13.232 +
  13.233 +  // Skip the array header in all array accesses.
  13.234 +  int elem_offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
  13.235 +  elem_offset -= heapOopSize;   // the scan pointer was pre-incremented also
  13.236 +
  13.237 +  // Load next super to check
  13.238 +  if (UseCompressedOops) {
  13.239 +    // Don't use load_heap_oop; we don't want to decode the element.
  13.240 +    lduw(   scan_temp, elem_offset, scratch_reg );
  13.241 +  } else {
  13.242 +    ld_ptr( scan_temp, elem_offset, scratch_reg );
  13.243 +  }
  13.244 +
  13.245 +  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
  13.246 +  cmp(scratch_reg, search_key);
  13.247 +
  13.248 +  // A miss means we are NOT a subtype and need to keep looping
  13.249 +  brx(Assembler::notEqual, false, Assembler::pn, L_loop);
  13.250 +  delayed()->deccc(count_temp); // decrement trip counter in delay slot
  13.251 +
  13.252 +  // Falling out the bottom means we found a hit; we ARE a subtype
  13.253 +  if (decode_super_klass) decode_heap_oop(super_klass);
  13.254 +
  13.255 +  // Success.  Cache the super we found and proceed in triumph.
  13.256 +  st_ptr(super_klass, sub_klass, sc_offset);
  13.257 +
  13.258 +  if (L_success != &L_fallthrough) {
  13.259 +    ba(false, *L_success);
  13.260 +    delayed()->nop();
  13.261 +  }
  13.262 +
  13.263 +  bind(L_fallthrough);
  13.264 +}
  13.265 +
  13.266 +
  13.267 +
  13.268 +
  13.269  void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
  13.270                                            Register temp_reg,
  13.271                                            Label& done, Label* slow_case,
  13.272 @@ -4316,7 +4578,13 @@
  13.273  
  13.274  void MacroAssembler::encode_heap_oop(Register src, Register dst) {
  13.275    assert (UseCompressedOops, "must be compressed");
  13.276 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  13.277 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  13.278    verify_oop(src);
  13.279 +  if (Universe::narrow_oop_base() == NULL) {
  13.280 +    srlx(src, LogMinObjAlignmentInBytes, dst);
  13.281 +    return;
  13.282 +  }
  13.283    Label done;
  13.284    if (src == dst) {
  13.285      // optimize for frequent case src == dst
  13.286 @@ -4338,26 +4606,39 @@
  13.287  
  13.288  void MacroAssembler::encode_heap_oop_not_null(Register r) {
  13.289    assert (UseCompressedOops, "must be compressed");
  13.290 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  13.291 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  13.292    verify_oop(r);
  13.293 -  sub(r, G6_heapbase, r);
  13.294 +  if (Universe::narrow_oop_base() != NULL)
  13.295 +    sub(r, G6_heapbase, r);
  13.296    srlx(r, LogMinObjAlignmentInBytes, r);
  13.297  }
  13.298  
  13.299  void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) {
  13.300    assert (UseCompressedOops, "must be compressed");
  13.301 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  13.302 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  13.303    verify_oop(src);
  13.304 -  sub(src, G6_heapbase, dst);
  13.305 -  srlx(dst, LogMinObjAlignmentInBytes, dst);
  13.306 +  if (Universe::narrow_oop_base() == NULL) {
  13.307 +    srlx(src, LogMinObjAlignmentInBytes, dst);
  13.308 +  } else {
  13.309 +    sub(src, G6_heapbase, dst);
  13.310 +    srlx(dst, LogMinObjAlignmentInBytes, dst);
  13.311 +  }
  13.312  }
  13.313  
  13.314  // Same algorithm as oops.inline.hpp decode_heap_oop.
  13.315  void  MacroAssembler::decode_heap_oop(Register src, Register dst) {
  13.316    assert (UseCompressedOops, "must be compressed");
  13.317 -  Label done;
  13.318 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  13.319 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  13.320    sllx(src, LogMinObjAlignmentInBytes, dst);
  13.321 -  bpr(rc_nz, true, Assembler::pt, dst, done);
  13.322 -  delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
  13.323 -  bind(done);
  13.324 +  if (Universe::narrow_oop_base() != NULL) {
  13.325 +    Label done;
  13.326 +    bpr(rc_nz, true, Assembler::pt, dst, done);
  13.327 +    delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
  13.328 +    bind(done);
  13.329 +  }
  13.330    verify_oop(dst);
  13.331  }
  13.332  
  13.333 @@ -4366,8 +4647,11 @@
  13.334    // pd_code_size_limit.
  13.335    // Also do not verify_oop as this is called by verify_oop.
  13.336    assert (UseCompressedOops, "must be compressed");
  13.337 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  13.338 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  13.339    sllx(r, LogMinObjAlignmentInBytes, r);
  13.340 -  add(r, G6_heapbase, r);
  13.341 +  if (Universe::narrow_oop_base() != NULL)
  13.342 +    add(r, G6_heapbase, r);
  13.343  }
  13.344  
  13.345  void  MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) {
  13.346 @@ -4375,14 +4659,17 @@
  13.347    // pd_code_size_limit.
  13.348    // Also do not verify_oop as this is called by verify_oop.
  13.349    assert (UseCompressedOops, "must be compressed");
  13.350 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  13.351 +  assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  13.352    sllx(src, LogMinObjAlignmentInBytes, dst);
  13.353 -  add(dst, G6_heapbase, dst);
  13.354 +  if (Universe::narrow_oop_base() != NULL)
  13.355 +    add(dst, G6_heapbase, dst);
  13.356  }
  13.357  
  13.358  void MacroAssembler::reinit_heapbase() {
  13.359    if (UseCompressedOops) {
  13.360      // call indirectly to solve generation ordering problem
  13.361 -    Address base(G6_heapbase, (address)Universe::heap_base_addr());
  13.362 +    Address base(G6_heapbase, (address)Universe::narrow_oop_base_addr());
  13.363      load_ptr_contents(base, G6_heapbase);
  13.364    }
  13.365  }
    14.1 --- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Mar 18 11:37:48 2009 -0400
    14.2 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Mar 19 09:13:24 2009 -0700
    14.3 @@ -2327,6 +2327,46 @@
    14.4                                 Register temp_reg, Register temp2_reg,
    14.5                                 Label& no_such_interface);
    14.6  
    14.7 +  // Test sub_klass against super_klass, with fast and slow paths.
    14.8 +
    14.9 +  // The fast path produces a tri-state answer: yes / no / maybe-slow.
   14.10 +  // One of the three labels can be NULL, meaning take the fall-through.
   14.11 +  // If super_check_offset is -1, the value is loaded up from super_klass.
   14.12 +  // No registers are killed, except temp_reg and temp2_reg.
   14.13 +  // If super_check_offset is not -1, temp2_reg is not used and can be noreg.
   14.14 +  void check_klass_subtype_fast_path(Register sub_klass,
   14.15 +                                     Register super_klass,
   14.16 +                                     Register temp_reg,
   14.17 +                                     Register temp2_reg,
   14.18 +                                     Label* L_success,
   14.19 +                                     Label* L_failure,
   14.20 +                                     Label* L_slow_path,
   14.21 +                RegisterConstant super_check_offset = RegisterConstant(-1),
   14.22 +                Register instanceof_hack = noreg);
   14.23 +
   14.24 +  // The rest of the type check; must be wired to a corresponding fast path.
   14.25 +  // It does not repeat the fast path logic, so don't use it standalone.
   14.26 +  // The temp_reg can be noreg, if no temps are available.
   14.27 +  // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
   14.28 +  // Updates the sub's secondary super cache as necessary.
   14.29 +  void check_klass_subtype_slow_path(Register sub_klass,
   14.30 +                                     Register super_klass,
   14.31 +                                     Register temp_reg,
   14.32 +                                     Register temp2_reg,
   14.33 +                                     Register temp3_reg,
   14.34 +                                     Register temp4_reg,
   14.35 +                                     Label* L_success,
   14.36 +                                     Label* L_failure);
   14.37 +
   14.38 +  // Simplified, combined version, good for typical uses.
   14.39 +  // Falls through on failure.
   14.40 +  void check_klass_subtype(Register sub_klass,
   14.41 +                           Register super_klass,
   14.42 +                           Register temp_reg,
   14.43 +                           Register temp2_reg,
   14.44 +                           Label& L_success);
   14.45 +
   14.46 +
   14.47    // Stack overflow checking
   14.48  
   14.49    // Note: this clobbers G3_scratch
    15.1 --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
    15.2 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
    15.3 @@ -2393,23 +2393,11 @@
    15.4  
    15.5      // get instance klass
    15.6      load(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc), k_RInfo, T_OBJECT, NULL);
    15.7 -    // get super_check_offset
    15.8 -    load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
    15.9 -    // See if we get an immediate positive hit
   15.10 -    __ ld_ptr(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr->as_register());
   15.11 -    __ cmp(k_RInfo, O7);
   15.12 -    __ br(Assembler::equal, false, Assembler::pn, done);
   15.13 -    __ delayed()->nop();
   15.14 -    // check for immediate negative hit
   15.15 -    __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   15.16 -    __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
   15.17 -    __ delayed()->nop();
   15.18 -    // check for self
   15.19 -    __ cmp(klass_RInfo, k_RInfo);
   15.20 -    __ br(Assembler::equal, false, Assembler::pn, done);
   15.21 -    __ delayed()->nop();
   15.22 -
   15.23 -    // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
   15.24 +    // perform the fast part of the checking logic
   15.25 +    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done, stub->entry(), NULL);
   15.26 +
   15.27 +    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
   15.28 +    assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
   15.29      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
   15.30      __ delayed()->nop();
   15.31      __ cmp(G3, 0);
   15.32 @@ -2493,58 +2481,30 @@
   15.33        __ delayed()->nop();
   15.34        __ bind(done);
   15.35      } else {
   15.36 +      bool need_slow_path = true;
   15.37        if (k->is_loaded()) {
   15.38 -        load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL);
   15.39 -
   15.40 -        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
   15.41 -          // See if we get an immediate positive hit
   15.42 -          __ cmp(Rtmp1, k_RInfo );
   15.43 -          __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
   15.44 -          __ delayed()->nop();
   15.45 -        } else {
   15.46 -          // See if we get an immediate positive hit
   15.47 -          assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
   15.48 -          __ cmp(Rtmp1, k_RInfo );
   15.49 -          __ br(Assembler::equal, false, Assembler::pn, done);
   15.50 -          // check for self
   15.51 -          __ delayed()->cmp(klass_RInfo, k_RInfo);
   15.52 -          __ br(Assembler::equal, false, Assembler::pn, done);
   15.53 -          __ delayed()->nop();
   15.54 -
   15.55 -          // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
   15.56 -          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
   15.57 -          __ delayed()->nop();
   15.58 -          __ cmp(G3, 0);
   15.59 -          __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
   15.60 -          __ delayed()->nop();
   15.61 -        }
   15.62 -        __ bind(done);
   15.63 +        if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
   15.64 +          need_slow_path = false;
   15.65 +        // perform the fast part of the checking logic
   15.66 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
   15.67 +                                         (need_slow_path ? &done : NULL),
   15.68 +                                         stub->entry(), NULL,
   15.69 +                                         RegisterConstant(k->super_check_offset()));
   15.70        } else {
   15.71 -        assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
   15.72 -
   15.73 -        load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
   15.74 -        // See if we get an immediate positive hit
   15.75 -        load(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr, T_OBJECT);
   15.76 -        __ cmp(k_RInfo, O7);
   15.77 -        __ br(Assembler::equal, false, Assembler::pn, done);
   15.78 -        __ delayed()->nop();
   15.79 -        // check for immediate negative hit
   15.80 -        __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   15.81 -        __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
   15.82 -        // check for self
   15.83 -        __ delayed()->cmp(klass_RInfo, k_RInfo);
   15.84 -        __ br(Assembler::equal, false, Assembler::pn, done);
   15.85 -        __ delayed()->nop();
   15.86 -
   15.87 -        // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
   15.88 +        // perform the fast part of the checking logic
   15.89 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7,
   15.90 +                                         &done, stub->entry(), NULL);
   15.91 +      }
   15.92 +      if (need_slow_path) {
   15.93 +        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
   15.94 +        assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
   15.95          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
   15.96          __ delayed()->nop();
   15.97          __ cmp(G3, 0);
   15.98          __ br(Assembler::equal, false, Assembler::pn, *stub->entry());
   15.99          __ delayed()->nop();
  15.100 -        __ bind(done);
  15.101        }
  15.102 -
  15.103 +      __ bind(done);
  15.104      }
  15.105      __ mov(obj, dst);
  15.106    } else if (code == lir_instanceof) {
  15.107 @@ -2582,58 +2542,32 @@
  15.108        __ set(0, dst);
  15.109        __ bind(done);
  15.110      } else {
  15.111 +      bool need_slow_path = true;
  15.112        if (k->is_loaded()) {
  15.113 -        assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
  15.114 -        load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL);
  15.115 -
  15.116 -        if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
  15.117 -          // See if we get an immediate positive hit
  15.118 -          __ cmp(Rtmp1, k_RInfo );
  15.119 -          __ br(Assembler::equal, true, Assembler::pt, done);
  15.120 -          __ delayed()->set(1, dst);
  15.121 -          __ set(0, dst);
  15.122 -          __ bind(done);
  15.123 -        } else {
  15.124 -          // See if we get an immediate positive hit
  15.125 -          assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
  15.126 -          __ cmp(Rtmp1, k_RInfo );
  15.127 -          __ br(Assembler::equal, true, Assembler::pt, done);
  15.128 -          __ delayed()->set(1, dst);
  15.129 -          // check for self
  15.130 -          __ cmp(klass_RInfo, k_RInfo);
  15.131 -          __ br(Assembler::equal, true, Assembler::pt, done);
  15.132 -          __ delayed()->set(1, dst);
  15.133 -
  15.134 -          // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
  15.135 -          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
  15.136 -          __ delayed()->nop();
  15.137 -          __ mov(G3, dst);
  15.138 -          __ bind(done);
  15.139 -        }
  15.140 +        if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
  15.141 +          need_slow_path = false;
  15.142 +        // perform the fast part of the checking logic
  15.143 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, noreg,
  15.144 +                                         (need_slow_path ? &done : NULL),
  15.145 +                                         (need_slow_path ? &done : NULL), NULL,
  15.146 +                                         RegisterConstant(k->super_check_offset()),
  15.147 +                                         dst);
  15.148        } else {
  15.149          assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
  15.150 -
  15.151 -        load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), dst, T_INT, NULL);
  15.152 -        // See if we get an immediate positive hit
  15.153 -        load(klass_RInfo, dst, FrameMap::O7_oop_opr, T_OBJECT);
  15.154 -        __ cmp(k_RInfo, O7);
  15.155 -        __ br(Assembler::equal, true, Assembler::pt, done);
  15.156 -        __ delayed()->set(1, dst);
  15.157 -        // check for immediate negative hit
  15.158 -        __ cmp(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
  15.159 -        __ br(Assembler::notEqual, true, Assembler::pt, done);
  15.160 -        __ delayed()->set(0, dst);
  15.161 -        // check for self
  15.162 -        __ cmp(klass_RInfo, k_RInfo);
  15.163 -        __ br(Assembler::equal, true, Assembler::pt, done);
  15.164 -        __ delayed()->set(1, dst);
  15.165 -
  15.166 -        // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
  15.167 +        // perform the fast part of the checking logic
  15.168 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, dst,
  15.169 +                                         &done, &done, NULL,
  15.170 +                                         RegisterConstant(-1),
  15.171 +                                         dst);
  15.172 +      }
  15.173 +      if (need_slow_path) {
  15.174 +        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
  15.175 +        assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
  15.176          __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
  15.177          __ delayed()->nop();
  15.178          __ mov(G3, dst);
  15.179 -        __ bind(done);
  15.180        }
  15.181 +      __ bind(done);
  15.182      }
  15.183    } else {
  15.184      ShouldNotReachHere();
    16.1 --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
    16.2 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
    16.3 @@ -714,38 +714,19 @@
    16.4          //      sub  : G3, argument, destroyed
    16.5          //      super: G1, argument, not changed
    16.6          //      raddr: O7, blown by call
    16.7 -        Label loop, miss;
    16.8 +        Label miss;
    16.9  
   16.10          __ save_frame(0);               // Blow no registers!
   16.11  
   16.12 -        __ ld_ptr( G3, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
   16.13 -        __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0); // length in l0
   16.14 -        __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1); // ptr into array
   16.15 -        __ clr(L4);                     // Index
   16.16 -        // Load a little early; will load 1 off the end of the array.
   16.17 -        // Ok for now; revisit if we have other uses of this routine.
   16.18 -        __ ld_ptr(L1,0,L2);             // Will load a little early
   16.19 -
   16.20 -        // The scan loop
   16.21 -        __ bind(loop);
   16.22 -        __ add(L1,wordSize,L1); // Bump by OOP size
   16.23 -        __ cmp(L4,L0);
   16.24 -        __ br(Assembler::equal,false,Assembler::pn,miss);
   16.25 -        __ delayed()->inc(L4);  // Bump index
   16.26 -        __ subcc(L2,G1,L3);             // Check for match; zero in L3 for a hit
   16.27 -        __ brx( Assembler::notEqual, false, Assembler::pt, loop );
   16.28 -        __ delayed()->ld_ptr(L1,0,L2); // Will load a little early
   16.29 -
   16.30 -        // Got a hit; report success; set cache
   16.31 -        __ st_ptr( G1, G3, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
   16.32 +        __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss);
   16.33  
   16.34          __ mov(1, G3);
   16.35 -        __ ret();                       // Result in G5 is ok; flags set
   16.36 +        __ ret();                       // Result in G5 is 'true'
   16.37          __ delayed()->restore();        // free copy or add can go here
   16.38  
   16.39          __ bind(miss);
   16.40          __ mov(0, G3);
   16.41 -        __ ret();                       // Result in G5 is ok; flags set
   16.42 +        __ ret();                       // Result in G5 is 'false'
   16.43          __ delayed()->restore();        // free copy or add can go here
   16.44        }
   16.45  
    17.1 --- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
    17.2 +++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
    17.3 @@ -866,65 +866,18 @@
    17.4                                                    Register Rtmp2,
    17.5                                                    Register Rtmp3,
    17.6                                                    Label &ok_is_subtype ) {
    17.7 -  Label not_subtype, loop;
    17.8 +  Label not_subtype;
    17.9  
   17.10    // Profile the not-null value's klass.
   17.11    profile_typecheck(Rsub_klass, Rtmp1);
   17.12  
   17.13 -  // Load the super-klass's check offset into Rtmp1
   17.14 -  ld( Rsuper_klass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1 );
   17.15 -  // Load from the sub-klass's super-class display list, or a 1-word cache of
   17.16 -  // the secondary superclass list, or a failing value with a sentinel offset
   17.17 -  // if the super-klass is an interface or exceptionally deep in the Java
   17.18 -  // hierarchy and we have to scan the secondary superclass list the hard way.
   17.19 -  ld_ptr( Rsub_klass, Rtmp1, Rtmp2 );
   17.20 -  // See if we get an immediate positive hit
   17.21 -  cmp( Rtmp2, Rsuper_klass );
   17.22 -  brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
   17.23 -  // In the delay slot, check for immediate negative hit
   17.24 -  delayed()->cmp( Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
   17.25 -  br( Assembler::notEqual, false, Assembler::pt, not_subtype );
   17.26 -  // In the delay slot, check for self
   17.27 -  delayed()->cmp( Rsub_klass, Rsuper_klass );
   17.28 -  brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
   17.29 -
   17.30 -  // Now do a linear scan of the secondary super-klass chain.
   17.31 -  delayed()->ld_ptr( Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), Rtmp2 );
   17.32 -
   17.33 -  // compress superclass
   17.34 -  if (UseCompressedOops) encode_heap_oop(Rsuper_klass);
   17.35 -
   17.36 -  // Rtmp2 holds the objArrayOop of secondary supers.
   17.37 -  ld( Rtmp2, arrayOopDesc::length_offset_in_bytes(), Rtmp1 );// Load the array length
   17.38 -  // Check for empty secondary super list
   17.39 -  tst(Rtmp1);
   17.40 -
   17.41 -  // Top of search loop
   17.42 -  bind( loop );
   17.43 -  br( Assembler::equal, false, Assembler::pn, not_subtype );
   17.44 -  delayed()->nop();
   17.45 -
   17.46 -  // load next super to check
   17.47 -  if (UseCompressedOops) {
   17.48 -    lduw( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
   17.49 -    // Bump array pointer forward one oop
   17.50 -    add( Rtmp2, 4, Rtmp2 );
   17.51 -  } else {
   17.52 -    ld_ptr( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
   17.53 -    // Bump array pointer forward one oop
   17.54 -    add( Rtmp2, wordSize, Rtmp2);
   17.55 -  }
   17.56 -  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
   17.57 -  cmp( Rtmp3, Rsuper_klass );
   17.58 -  // A miss means we are NOT a subtype and need to keep looping
   17.59 -  brx( Assembler::notEqual, false, Assembler::pt, loop );
   17.60 -  delayed()->deccc( Rtmp1 );    // dec trip counter in delay slot
   17.61 -  // Falling out the bottom means we found a hit; we ARE a subtype
   17.62 -  if (UseCompressedOops) decode_heap_oop(Rsuper_klass);
   17.63 -  br( Assembler::always, false, Assembler::pt, ok_is_subtype );
   17.64 -  // Update the cache
   17.65 -  delayed()->st_ptr( Rsuper_klass, Rsub_klass,
   17.66 -                     sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
   17.67 +  check_klass_subtype_fast_path(Rsub_klass, Rsuper_klass,
   17.68 +                                Rtmp1, Rtmp2,
   17.69 +                                &ok_is_subtype, &not_subtype, NULL);
   17.70 +
   17.71 +  check_klass_subtype_slow_path(Rsub_klass, Rsuper_klass,
   17.72 +                                Rtmp1, Rtmp2, Rtmp3, /*hack:*/ noreg,
   17.73 +                                &ok_is_subtype, NULL);
   17.74  
   17.75    bind(not_subtype);
   17.76    profile_typecheck_failed(Rtmp1);
    18.1 --- a/src/cpu/sparc/vm/sparc.ad	Wed Mar 18 11:37:48 2009 -0400
    18.2 +++ b/src/cpu/sparc/vm/sparc.ad	Thu Mar 19 09:13:24 2009 -0700
    18.3 @@ -547,7 +547,11 @@
    18.4      int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
    18.5      int klass_load_size;
    18.6      if (UseCompressedOops) {
    18.7 -      klass_load_size = 3*BytesPerInstWord; // see MacroAssembler::load_klass()
    18.8 +      assert(Universe::heap() != NULL, "java heap should be initialized");
    18.9 +      if (Universe::narrow_oop_base() == NULL)
   18.10 +        klass_load_size = 2*BytesPerInstWord; // see MacroAssembler::load_klass()
   18.11 +      else
   18.12 +        klass_load_size = 3*BytesPerInstWord;
   18.13      } else {
   18.14        klass_load_size = 1*BytesPerInstWord;
   18.15      }
   18.16 @@ -1601,9 +1605,11 @@
   18.17    st->print_cr("\nUEP:");
   18.18  #ifdef    _LP64
   18.19    if (UseCompressedOops) {
   18.20 +    assert(Universe::heap() != NULL, "java heap should be initialized");
   18.21      st->print_cr("\tLDUW   [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass");
   18.22      st->print_cr("\tSLL    R_G5,3,R_G5");
   18.23 -    st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
   18.24 +    if (Universe::narrow_oop_base() != NULL)
   18.25 +      st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
   18.26    } else {
   18.27      st->print_cr("\tLDX    [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
   18.28    }
   18.29 @@ -2502,7 +2508,11 @@
   18.30        __ load_klass(O0, G3_scratch);
   18.31        int klass_load_size;
   18.32        if (UseCompressedOops) {
   18.33 -        klass_load_size = 3*BytesPerInstWord;
   18.34 +        assert(Universe::heap() != NULL, "java heap should be initialized");
   18.35 +        if (Universe::narrow_oop_base() == NULL)
   18.36 +          klass_load_size = 2*BytesPerInstWord;
   18.37 +        else
   18.38 +          klass_load_size = 3*BytesPerInstWord;
   18.39        } else {
   18.40          klass_load_size = 1*BytesPerInstWord;
   18.41        }
   18.42 @@ -9005,6 +9015,33 @@
   18.43    ins_pipe(long_memory_op);
   18.44  %}
   18.45  
   18.46 +
   18.47 +//---------- Population Count Instructions -------------------------------------
   18.48 +
   18.49 +instruct popCountI(iRegI dst, iRegI src) %{
   18.50 +  predicate(UsePopCountInstruction);
   18.51 +  match(Set dst (PopCountI src));
   18.52 +
   18.53 +  format %{ "POPC   $src, $dst" %}
   18.54 +  ins_encode %{
   18.55 +    __ popc($src$$Register, $dst$$Register);
   18.56 +  %}
   18.57 +  ins_pipe(ialu_reg);
   18.58 +%}
   18.59 +
   18.60 +// Note: Long.bitCount(long) returns an int.
   18.61 +instruct popCountL(iRegI dst, iRegL src) %{
   18.62 +  predicate(UsePopCountInstruction);
   18.63 +  match(Set dst (PopCountL src));
   18.64 +
   18.65 +  format %{ "POPC   $src, $dst" %}
   18.66 +  ins_encode %{
   18.67 +    __ popc($src$$Register, $dst$$Register);
   18.68 +  %}
   18.69 +  ins_pipe(ialu_reg);
   18.70 +%}
   18.71 +
   18.72 +
   18.73  // ============================================================================
   18.74  //------------Bytes reverse--------------------------------------------------
   18.75  
    19.1 --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
    19.2 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
    19.3 @@ -900,19 +900,7 @@
    19.4      __ align(CodeEntryAlignment);
    19.5      StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
    19.6      address start = __ pc();
    19.7 -    Label loop, miss;
    19.8 -
    19.9 -    // Compare super with sub directly, since super is not in its own SSA.
   19.10 -    // The compiler used to emit this test, but we fold it in here,
   19.11 -    // to increase overall code density, with no real loss of speed.
   19.12 -    { Label L;
   19.13 -      __ cmp(O1, O2);
   19.14 -      __ brx(Assembler::notEqual, false, Assembler::pt, L);
   19.15 -      __ delayed()->nop();
   19.16 -      __ retl();
   19.17 -      __ delayed()->addcc(G0,0,O0); // set Z flags, zero result
   19.18 -      __ bind(L);
   19.19 -    }
   19.20 +    Label miss;
   19.21  
   19.22  #if defined(COMPILER2) && !defined(_LP64)
   19.23      // Do not use a 'save' because it blows the 64-bit O registers.
   19.24 @@ -936,56 +924,12 @@
   19.25      Register L2_super   = L2;
   19.26      Register L3_index   = L3;
   19.27  
   19.28 -#ifdef _LP64
   19.29 -    Register L4_ooptmp  = L4;
   19.30 -
   19.31 -    if (UseCompressedOops) {
   19.32 -      // this must be under UseCompressedOops check, as we rely upon fact
   19.33 -      // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save
   19.34 -      // on stack, see several lines above
   19.35 -      __ encode_heap_oop(Rsuper, L4_ooptmp);
   19.36 -    }
   19.37 -#endif
   19.38 -
   19.39 -    inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1);
   19.40 -
   19.41 -    __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
   19.42 -    __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len);
   19.43 -    __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr);
   19.44 -    __ clr(L3_index);           // zero index
   19.45 -    // Load a little early; will load 1 off the end of the array.
   19.46 -    // Ok for now; revisit if we have other uses of this routine.
   19.47 -    if (UseCompressedOops) {
   19.48 -      __ lduw(L1_ary_ptr,0,L2_super);// Will load a little early
   19.49 -    } else {
   19.50 -      __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
   19.51 -    }
   19.52 -
   19.53 -    assert(heapOopSize != 0, "heapOopSize should be initialized");
   19.54 -    // The scan loop
   19.55 -    __ BIND(loop);
   19.56 -    __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size
   19.57 -    __ cmp(L3_index,L0_ary_len);
   19.58 -    __ br(Assembler::equal,false,Assembler::pn,miss);
   19.59 -    __ delayed()->inc(L3_index); // Bump index
   19.60 -
   19.61 -    if (UseCompressedOops) {
   19.62 -#ifdef  _LP64
   19.63 -      __ subcc(L2_super,L4_ooptmp,Rret);   // Check for match; zero in Rret for a hit
   19.64 -      __ br( Assembler::notEqual, false, Assembler::pt, loop );
   19.65 -      __ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early
   19.66 -#else
   19.67 -      ShouldNotReachHere();
   19.68 -#endif
   19.69 -    } else {
   19.70 -      __ subcc(L2_super,Rsuper,Rret);   // Check for match; zero in Rret for a hit
   19.71 -      __ brx( Assembler::notEqual, false, Assembler::pt, loop );
   19.72 -      __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
   19.73 -    }
   19.74 -
   19.75 -    // Got a hit; report success; set cache.  Cache load doesn't
   19.76 -    // happen here; for speed it is directly emitted by the compiler.
   19.77 -    __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
   19.78 +    __ check_klass_subtype_slow_path(Rsub, Rsuper,
   19.79 +                                     L0, L1, L2, L3,
   19.80 +                                     NULL, &miss);
   19.81 +
   19.82 +    // Match falls through here.
   19.83 +    __ addcc(G0,0,Rret);        // set Z flags, Z result
   19.84  
   19.85  #if defined(COMPILER2) && !defined(_LP64)
   19.86      __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
   19.87 @@ -999,7 +943,6 @@
   19.88      __ delayed()->restore();
   19.89  #endif
   19.90  
   19.91 -    // Hit or miss falls through here
   19.92      __ BIND(miss);
   19.93      __ addcc(G0,1,Rret);        // set NZ flags, NZ result
   19.94  
   19.95 @@ -2330,51 +2273,31 @@
   19.96                             Register super_check_offset,
   19.97                             Register super_klass,
   19.98                             Register temp,
   19.99 -                           Label& L_success,
  19.100 -                           Register deccc_hack = noreg) {
  19.101 +                           Label& L_success) {
  19.102      assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
  19.103  
  19.104      BLOCK_COMMENT("type_check:");
  19.105  
  19.106 -    Label L_miss;
  19.107 +    Label L_miss, L_pop_to_miss;
  19.108  
  19.109      assert_clean_int(super_check_offset, temp);
  19.110  
  19.111 -    // maybe decrement caller's trip count:
  19.112 -#define DELAY_SLOT delayed();   \
  19.113 -    { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); }
  19.114 -
  19.115 -    // if the pointers are equal, we are done (e.g., String[] elements)
  19.116 -    __ cmp(sub_klass, super_klass);
  19.117 -    __ brx(Assembler::equal, true, Assembler::pt, L_success);
  19.118 -    __ DELAY_SLOT;
  19.119 -
  19.120 -    // check the supertype display:
  19.121 -    __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type
  19.122 -    __ cmp(super_klass,                      temp); // test the super type
  19.123 -    __ brx(Assembler::equal, true, Assembler::pt, L_success);
  19.124 -    __ DELAY_SLOT;
  19.125 -
  19.126 -    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  19.127 -                     Klass::secondary_super_cache_offset_in_bytes());
  19.128 -    __ cmp(super_klass, sc_offset);
  19.129 -    __ brx(Assembler::notEqual, true, Assembler::pt, L_miss);
  19.130 -    __ delayed()->nop();
  19.131 -
  19.132 +    __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
  19.133 +                                     &L_success, &L_miss, NULL,
  19.134 +                                     super_check_offset);
  19.135 +
  19.136 +    BLOCK_COMMENT("type_check_slow_path:");
  19.137      __ save_frame(0);
  19.138 -    __ mov(sub_klass->after_save(), O1);
  19.139 -    // mov(super_klass->after_save(), O2); //fill delay slot
  19.140 -    assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation");
  19.141 -    __ call(StubRoutines::Sparc::_partial_subtype_check);
  19.142 -    __ delayed()->mov(super_klass->after_save(), O2);
  19.143 +    __ check_klass_subtype_slow_path(sub_klass->after_save(),
  19.144 +                                     super_klass->after_save(),
  19.145 +                                     L0, L1, L2, L4,
  19.146 +                                     NULL, &L_pop_to_miss);
  19.147 +    __ ba(false, L_success);
  19.148 +    __ delayed()->restore();
  19.149 +
  19.150 +    __ bind(L_pop_to_miss);
  19.151      __ restore();
  19.152  
  19.153 -    // Upon return, the condition codes are already set.
  19.154 -    __ brx(Assembler::equal, true, Assembler::pt, L_success);
  19.155 -    __ DELAY_SLOT;
  19.156 -
  19.157 -#undef DELAY_SLOT
  19.158 -
  19.159      // Fall through on failure!
  19.160      __ BIND(L_miss);
  19.161    }
  19.162 @@ -2411,7 +2334,7 @@
  19.163      gen_write_ref_array_pre_barrier(O1, O2);
  19.164  
  19.165  #ifdef ASSERT
  19.166 -    // We sometimes save a frame (see partial_subtype_check below).
  19.167 +    // We sometimes save a frame (see generate_type_check below).
  19.168      // If this will cause trouble, let's fail now instead of later.
  19.169      __ save_frame(0);
  19.170      __ restore();
  19.171 @@ -2455,41 +2378,39 @@
  19.172      //   G3, G4, G5 --- current oop, oop.klass, oop.klass.super
  19.173      __ align(16);
  19.174  
  19.175 -    __ bind(store_element);
  19.176 -    // deccc(G1_remain);                // decrement the count (hoisted)
  19.177 +    __ BIND(store_element);
  19.178 +    __ deccc(G1_remain);                // decrement the count
  19.179      __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
  19.180      __ inc(O5_offset, heapOopSize);     // step to next offset
  19.181      __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
  19.182      __ delayed()->set(0, O0);           // return -1 on success
  19.183  
  19.184      // ======== loop entry is here ========
  19.185 -    __ bind(load_element);
  19.186 +    __ BIND(load_element);
  19.187      __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
  19.188      __ br_null(G3_oop, true, Assembler::pt, store_element);
  19.189 -    __ delayed()->deccc(G1_remain);     // decrement the count
  19.190 +    __ delayed()->nop();
  19.191  
  19.192      __ load_klass(G3_oop, G4_klass); // query the object klass
  19.193  
  19.194      generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
  19.195                          // branch to this on success:
  19.196 -                        store_element,
  19.197 -                        // decrement this on success:
  19.198 -                        G1_remain);
  19.199 +                        store_element);
  19.200      // ======== end loop ========
  19.201  
  19.202      // It was a real error; we must depend on the caller to finish the job.
  19.203      // Register G1 has number of *remaining* oops, O2 number of *total* oops.
  19.204      // Emit GC store barriers for the oops we have copied (O2 minus G1),
  19.205      // and report their number to the caller.
  19.206 -    __ bind(fail);
  19.207 +    __ BIND(fail);
  19.208      __ subcc(O2_count, G1_remain, O2_count);
  19.209      __ brx(Assembler::zero, false, Assembler::pt, done);
  19.210      __ delayed()->not1(O2_count, O0);   // report (-1^K) to caller
  19.211  
  19.212 -    __ bind(do_card_marks);
  19.213 +    __ BIND(do_card_marks);
  19.214      gen_write_ref_array_post_barrier(O1_to, O2_count, O3);   // store check on O1[0..O2]
  19.215  
  19.216 -    __ bind(done);
  19.217 +    __ BIND(done);
  19.218      inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
  19.219      __ retl();
  19.220      __ delayed()->nop();             // return value in 00
  19.221 @@ -2942,14 +2863,15 @@
  19.222      StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
  19.223      StubRoutines::_fence_entry               = generate_fence();
  19.224  #endif  // COMPILER2 !=> _LP64
  19.225 -
  19.226 -    StubRoutines::Sparc::_partial_subtype_check                = generate_partial_subtype_check();
  19.227    }
  19.228  
  19.229  
  19.230    void generate_all() {
  19.231      // Generates all stubs and initializes the entry points
  19.232  
  19.233 +    // Generate partial_subtype_check first here since its code depends on
  19.234 +    // UseZeroBaseCompressedOops which is defined after heap initialization.
  19.235 +    StubRoutines::Sparc::_partial_subtype_check                = generate_partial_subtype_check();
  19.236      // These entry points require SharedInfo::stack0 to be set up in non-core builds
  19.237      StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
  19.238      StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
    20.1 --- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
    20.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
    20.3 @@ -1,5 +1,5 @@
    20.4  /*
    20.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    20.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    20.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    20.8   *
    20.9   * This code is free software; you can redistribute it and/or modify it
   20.10 @@ -72,6 +72,9 @@
   20.11          FLAG_SET_ERGO(bool, UseCompressedOops, false);
   20.12        }
   20.13      }
   20.14 +    // 32-bit oops don't make sense for the 64-bit VM on sparc
   20.15 +    // since the 32-bit VM has the same registers and smaller objects.
   20.16 +    Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
   20.17  #endif // _LP64
   20.18  #ifdef COMPILER2
   20.19      // Indirect branch is the same cost as direct
   20.20 @@ -89,16 +92,26 @@
   20.21  #endif
   20.22    }
   20.23  
   20.24 +  // Use hardware population count instruction if available.
   20.25 +  if (has_hardware_popc()) {
   20.26 +    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
   20.27 +      UsePopCountInstruction = true;
   20.28 +    }
   20.29 +  }
   20.30 +
   20.31    char buf[512];
   20.32 -  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s",
   20.33 +  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
   20.34                 (has_v8() ? ", has_v8" : ""),
   20.35                 (has_v9() ? ", has_v9" : ""),
   20.36 +               (has_hardware_popc() ? ", popc" : ""),
   20.37                 (has_vis1() ? ", has_vis1" : ""),
   20.38                 (has_vis2() ? ", has_vis2" : ""),
   20.39                 (is_ultra3() ? ", is_ultra3" : ""),
   20.40                 (is_sun4v() ? ", is_sun4v" : ""),
   20.41                 (is_niagara1() ? ", is_niagara1" : ""),
   20.42 -               (!has_hardware_int_muldiv() ? ", no-muldiv" : ""),
   20.43 +               (is_niagara1_plus() ? ", is_niagara1_plus" : ""),
   20.44 +               (!has_hardware_mul32() ? ", no-mul32" : ""),
   20.45 +               (!has_hardware_div32() ? ", no-div32" : ""),
   20.46                 (!has_hardware_fsmuld() ? ", no-fsmuld" : ""));
   20.47  
   20.48    // buf is started with ", " or is empty
    21.1 --- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Wed Mar 18 11:37:48 2009 -0400
    21.2 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu Mar 19 09:13:24 2009 -0700
    21.3 @@ -1,5 +1,5 @@
    21.4  /*
    21.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    21.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    21.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    21.8   *
    21.9   * This code is free software; you can redistribute it and/or modify it
   21.10 @@ -25,34 +25,38 @@
   21.11  class VM_Version: public Abstract_VM_Version {
   21.12  protected:
   21.13    enum Feature_Flag {
   21.14 -    v8_instructions     = 0,
   21.15 -    hardware_int_muldiv = 1,
   21.16 -    hardware_fsmuld     = 2,
   21.17 -    v9_instructions     = 3,
   21.18 -    vis1_instructions   = 4,
   21.19 -    vis2_instructions   = 5,
   21.20 -    sun4v_instructions  = 6
   21.21 +    v8_instructions    = 0,
   21.22 +    hardware_mul32     = 1,
   21.23 +    hardware_div32     = 2,
   21.24 +    hardware_fsmuld    = 3,
   21.25 +    hardware_popc      = 4,
   21.26 +    v9_instructions    = 5,
   21.27 +    vis1_instructions  = 6,
   21.28 +    vis2_instructions  = 7,
   21.29 +    sun4v_instructions = 8
   21.30    };
   21.31  
   21.32    enum Feature_Flag_Set {
   21.33 -    unknown_m             = 0,
   21.34 -    all_features_m        = -1,
   21.35 +    unknown_m           = 0,
   21.36 +    all_features_m      = -1,
   21.37  
   21.38 -    v8_instructions_m     = 1 << v8_instructions,
   21.39 -    hardware_int_muldiv_m = 1 << hardware_int_muldiv,
   21.40 -    hardware_fsmuld_m     = 1 << hardware_fsmuld,
   21.41 -    v9_instructions_m     = 1 << v9_instructions,
   21.42 -    vis1_instructions_m   = 1 << vis1_instructions,
   21.43 -    vis2_instructions_m   = 1 << vis2_instructions,
   21.44 -    sun4v_m               = 1 << sun4v_instructions,
   21.45 +    v8_instructions_m   = 1 << v8_instructions,
   21.46 +    hardware_mul32_m    = 1 << hardware_mul32,
   21.47 +    hardware_div32_m    = 1 << hardware_div32,
   21.48 +    hardware_fsmuld_m   = 1 << hardware_fsmuld,
   21.49 +    hardware_popc_m     = 1 << hardware_popc,
   21.50 +    v9_instructions_m   = 1 << v9_instructions,
   21.51 +    vis1_instructions_m = 1 << vis1_instructions,
   21.52 +    vis2_instructions_m = 1 << vis2_instructions,
   21.53 +    sun4v_m             = 1 << sun4v_instructions,
   21.54  
   21.55 -    generic_v8_m          = v8_instructions_m | hardware_int_muldiv_m | hardware_fsmuld_m,
   21.56 -    generic_v9_m          = generic_v8_m | v9_instructions_m | vis1_instructions_m,
   21.57 -    ultra3_m              = generic_v9_m | vis2_instructions_m,
   21.58 +    generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
   21.59 +    generic_v9_m        = generic_v8_m | v9_instructions_m,
   21.60 +    ultra3_m            = generic_v9_m | vis1_instructions_m | vis2_instructions_m,
   21.61  
   21.62      // Temporary until we have something more accurate
   21.63 -    niagara1_unique_m     = sun4v_m,
   21.64 -    niagara1_m            = generic_v9_m | niagara1_unique_m
   21.65 +    niagara1_unique_m   = sun4v_m,
   21.66 +    niagara1_m          = generic_v9_m | niagara1_unique_m
   21.67    };
   21.68  
   21.69    static int  _features;
   21.70 @@ -62,7 +66,7 @@
   21.71    static int  determine_features();
   21.72    static int  platform_features(int features);
   21.73  
   21.74 -  static bool is_niagara1(int features) { return (features & niagara1_m) == niagara1_m; }
   21.75 +  static bool is_niagara1(int features) { return (features & sun4v_m) != 0; }
   21.76  
   21.77    static int maximum_niagara1_processor_count() { return 32; }
   21.78    // Returns true if the platform is in the niagara line and
   21.79 @@ -76,8 +80,10 @@
   21.80    // Instruction support
   21.81    static bool has_v8()                  { return (_features & v8_instructions_m) != 0; }
   21.82    static bool has_v9()                  { return (_features & v9_instructions_m) != 0; }
   21.83 -  static bool has_hardware_int_muldiv() { return (_features & hardware_int_muldiv_m) != 0; }
   21.84 +  static bool has_hardware_mul32()      { return (_features & hardware_mul32_m) != 0; }
   21.85 +  static bool has_hardware_div32()      { return (_features & hardware_div32_m) != 0; }
   21.86    static bool has_hardware_fsmuld()     { return (_features & hardware_fsmuld_m) != 0; }
   21.87 +  static bool has_hardware_popc()       { return (_features & hardware_popc_m) != 0; }
   21.88    static bool has_vis1()                { return (_features & vis1_instructions_m) != 0; }
   21.89    static bool has_vis2()                { return (_features & vis2_instructions_m) != 0; }
   21.90  
    22.1 --- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
    22.2 +++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
    22.3 @@ -221,13 +221,15 @@
    22.4      if (is_vtable_stub) {
    22.5        // ld;ld;ld,jmp,nop
    22.6        const int basic = 5*BytesPerInstWord +
    22.7 -                        // shift;add for load_klass
    22.8 -                        (UseCompressedOops ? 2*BytesPerInstWord : 0);
    22.9 +                        // shift;add for load_klass (only shift with zero heap based)
   22.10 +                        (UseCompressedOops ?
   22.11 +                         ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
   22.12        return basic + slop;
   22.13      } else {
   22.14        const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord +
   22.15 -                        // shift;add for load_klass
   22.16 -                        (UseCompressedOops ? 2*BytesPerInstWord : 0);
   22.17 +                        // shift;add for load_klass (only shift with zero heap based)
   22.18 +                        (UseCompressedOops ?
   22.19 +                         ((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
   22.20        return (basic + slop);
   22.21      }
   22.22    }
    23.1 --- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Mar 18 11:37:48 2009 -0400
    23.2 +++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu Mar 19 09:13:24 2009 -0700
    23.3 @@ -727,7 +727,7 @@
    23.4    }
    23.5  
    23.6  #ifdef _LP64
    23.7 -  assert(false, "fix locate_operand");
    23.8 +  assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
    23.9  #else
   23.10    assert(which == imm_operand, "instruction has only an imm field");
   23.11  #endif // LP64
   23.12 @@ -2193,6 +2193,25 @@
   23.13    emit_byte(0x58 | encode);
   23.14  }
   23.15  
   23.16 +void Assembler::popcntl(Register dst, Address src) {
   23.17 +  assert(VM_Version::supports_popcnt(), "must support");
   23.18 +  InstructionMark im(this);
   23.19 +  emit_byte(0xF3);
   23.20 +  prefix(src, dst);
   23.21 +  emit_byte(0x0F);
   23.22 +  emit_byte(0xB8);
   23.23 +  emit_operand(dst, src);
   23.24 +}
   23.25 +
   23.26 +void Assembler::popcntl(Register dst, Register src) {
   23.27 +  assert(VM_Version::supports_popcnt(), "must support");
   23.28 +  emit_byte(0xF3);
   23.29 +  int encode = prefix_and_encode(dst->encoding(), src->encoding());
   23.30 +  emit_byte(0x0F);
   23.31 +  emit_byte(0xB8);
   23.32 +  emit_byte(0xC0 | encode);
   23.33 +}
   23.34 +
   23.35  void Assembler::popf() {
   23.36    emit_byte(0x9D);
   23.37  }
   23.38 @@ -3224,12 +3243,6 @@
   23.39    emit_byte(0xF1);
   23.40  }
   23.41  
   23.42 -void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format) {
   23.43 -  InstructionMark im(this);
   23.44 -  int encode = prefix_and_encode(dst->encoding());
   23.45 -  emit_byte(0xB8 | encode);
   23.46 -  emit_data((int)imm32, rspec, format);
   23.47 -}
   23.48  
   23.49  #ifndef _LP64
   23.50  
   23.51 @@ -3249,6 +3262,12 @@
   23.52    emit_data((int)imm32, rspec, 0);
   23.53  }
   23.54  
   23.55 +void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
   23.56 +  InstructionMark im(this);
   23.57 +  int encode = prefix_and_encode(dst->encoding());
   23.58 +  emit_byte(0xB8 | encode);
   23.59 +  emit_data((int)imm32, rspec, 0);
   23.60 +}
   23.61  
   23.62  void Assembler::popa() { // 32bit
   23.63    emit_byte(0x61);
   23.64 @@ -3857,6 +3876,37 @@
   23.65    emit_data64(imm64, rspec);
   23.66  }
   23.67  
   23.68 +void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
   23.69 +  InstructionMark im(this);
   23.70 +  int encode = prefix_and_encode(dst->encoding());
   23.71 +  emit_byte(0xB8 | encode);
   23.72 +  emit_data((int)imm32, rspec, narrow_oop_operand);
   23.73 +}
   23.74 +
   23.75 +void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
   23.76 +  InstructionMark im(this);
   23.77 +  prefix(dst);
   23.78 +  emit_byte(0xC7);
   23.79 +  emit_operand(rax, dst, 4);
   23.80 +  emit_data((int)imm32, rspec, narrow_oop_operand);
   23.81 +}
   23.82 +
   23.83 +void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
   23.84 +  InstructionMark im(this);
   23.85 +  int encode = prefix_and_encode(src1->encoding());
   23.86 +  emit_byte(0x81);
   23.87 +  emit_byte(0xF8 | encode);
   23.88 +  emit_data((int)imm32, rspec, narrow_oop_operand);
   23.89 +}
   23.90 +
   23.91 +void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
   23.92 +  InstructionMark im(this);
   23.93 +  prefix(src1);
   23.94 +  emit_byte(0x81);
   23.95 +  emit_operand(rax, src1, 4);
   23.96 +  emit_data((int)imm32, rspec, narrow_oop_operand);
   23.97 +}
   23.98 +
   23.99  void Assembler::movdq(XMMRegister dst, Register src) {
  23.100    // table D-1 says MMX/SSE2
  23.101    NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
  23.102 @@ -4049,6 +4099,25 @@
  23.103    addq(rsp, 16 * wordSize);
  23.104  }
  23.105  
  23.106 +void Assembler::popcntq(Register dst, Address src) {
  23.107 +  assert(VM_Version::supports_popcnt(), "must support");
  23.108 +  InstructionMark im(this);
  23.109 +  emit_byte(0xF3);
  23.110 +  prefixq(src, dst);
  23.111 +  emit_byte(0x0F);
  23.112 +  emit_byte(0xB8);
  23.113 +  emit_operand(dst, src);
  23.114 +}
  23.115 +
  23.116 +void Assembler::popcntq(Register dst, Register src) {
  23.117 +  assert(VM_Version::supports_popcnt(), "must support");
  23.118 +  emit_byte(0xF3);
  23.119 +  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
  23.120 +  emit_byte(0x0F);
  23.121 +  emit_byte(0xB8);
  23.122 +  emit_byte(0xC0 | encode);
  23.123 +}
  23.124 +
  23.125  void Assembler::popq(Address dst) {
  23.126    InstructionMark im(this);
  23.127    prefixq(dst);
  23.128 @@ -7217,6 +7286,225 @@
  23.129  }
  23.130  
  23.131  
  23.132 +void MacroAssembler::check_klass_subtype(Register sub_klass,
  23.133 +                           Register super_klass,
  23.134 +                           Register temp_reg,
  23.135 +                           Label& L_success) {
  23.136 +  Label L_failure;
  23.137 +  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
  23.138 +  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
  23.139 +  bind(L_failure);
  23.140 +}
  23.141 +
  23.142 +
  23.143 +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  23.144 +                                                   Register super_klass,
  23.145 +                                                   Register temp_reg,
  23.146 +                                                   Label* L_success,
  23.147 +                                                   Label* L_failure,
  23.148 +                                                   Label* L_slow_path,
  23.149 +                                        RegisterConstant super_check_offset) {
  23.150 +  assert_different_registers(sub_klass, super_klass, temp_reg);
  23.151 +  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
  23.152 +  if (super_check_offset.is_register()) {
  23.153 +    assert_different_registers(sub_klass, super_klass,
  23.154 +                               super_check_offset.as_register());
  23.155 +  } else if (must_load_sco) {
  23.156 +    assert(temp_reg != noreg, "supply either a temp or a register offset");
  23.157 +  }
  23.158 +
  23.159 +  Label L_fallthrough;
  23.160 +  int label_nulls = 0;
  23.161 +  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  23.162 +  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  23.163 +  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
  23.164 +  assert(label_nulls <= 1, "at most one NULL in the batch");
  23.165 +
  23.166 +  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  23.167 +                   Klass::secondary_super_cache_offset_in_bytes());
  23.168 +  int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
  23.169 +                    Klass::super_check_offset_offset_in_bytes());
  23.170 +  Address super_check_offset_addr(super_klass, sco_offset);
  23.171 +
  23.172 +  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
  23.173 +  // range of a jccb.  If this routine grows larger, reconsider at
  23.174 +  // least some of these.
  23.175 +#define local_jcc(assembler_cond, label)                                \
  23.176 +  if (&(label) == &L_fallthrough)  jccb(assembler_cond, label);         \
  23.177 +  else                             jcc( assembler_cond, label) /*omit semi*/
  23.178 +
  23.179 +  // Hacked jmp, which may only be used just before L_fallthrough.
  23.180 +#define final_jmp(label)                                                \
  23.181 +  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
  23.182 +  else                            jmp(label)                /*omit semi*/
  23.183 +
  23.184 +  // If the pointers are equal, we are done (e.g., String[] elements).
  23.185 +  // This self-check enables sharing of secondary supertype arrays among
  23.186 +  // non-primary types such as array-of-interface.  Otherwise, each such
  23.187 +  // type would need its own customized SSA.
  23.188 +  // We move this check to the front of the fast path because many
  23.189 +  // type checks are in fact trivially successful in this manner,
  23.190 +  // so we get a nicely predicted branch right at the start of the check.
  23.191 +  cmpptr(sub_klass, super_klass);
  23.192 +  local_jcc(Assembler::equal, *L_success);
  23.193 +
  23.194 +  // Check the supertype display:
  23.195 +  if (must_load_sco) {
  23.196 +    // Positive movl does right thing on LP64.
  23.197 +    movl(temp_reg, super_check_offset_addr);
  23.198 +    super_check_offset = RegisterConstant(temp_reg);
  23.199 +  }
  23.200 +  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
  23.201 +  cmpptr(super_klass, super_check_addr); // load displayed supertype
  23.202 +
  23.203 +  // This check has worked decisively for primary supers.
  23.204 +  // Secondary supers are sought in the super_cache ('super_cache_addr').
  23.205 +  // (Secondary supers are interfaces and very deeply nested subtypes.)
  23.206 +  // This works in the same check above because of a tricky aliasing
  23.207 +  // between the super_cache and the primary super display elements.
  23.208 +  // (The 'super_check_addr' can address either, as the case requires.)
  23.209 +  // Note that the cache is updated below if it does not help us find
  23.210 +  // what we need immediately.
  23.211 +  // So if it was a primary super, we can just fail immediately.
  23.212 +  // Otherwise, it's the slow path for us (no success at this point).
  23.213 +
  23.214 +  if (super_check_offset.is_register()) {
  23.215 +    local_jcc(Assembler::equal, *L_success);
  23.216 +    cmpl(super_check_offset.as_register(), sc_offset);
  23.217 +    if (L_failure == &L_fallthrough) {
  23.218 +      local_jcc(Assembler::equal, *L_slow_path);
  23.219 +    } else {
  23.220 +      local_jcc(Assembler::notEqual, *L_failure);
  23.221 +      final_jmp(*L_slow_path);
  23.222 +    }
  23.223 +  } else if (super_check_offset.as_constant() == sc_offset) {
  23.224 +    // Need a slow path; fast failure is impossible.
  23.225 +    if (L_slow_path == &L_fallthrough) {
  23.226 +      local_jcc(Assembler::equal, *L_success);
  23.227 +    } else {
  23.228 +      local_jcc(Assembler::notEqual, *L_slow_path);
  23.229 +      final_jmp(*L_success);
  23.230 +    }
  23.231 +  } else {
  23.232 +    // No slow path; it's a fast decision.
  23.233 +    if (L_failure == &L_fallthrough) {
  23.234 +      local_jcc(Assembler::equal, *L_success);
  23.235 +    } else {
  23.236 +      local_jcc(Assembler::notEqual, *L_failure);
  23.237 +      final_jmp(*L_success);
  23.238 +    }
  23.239 +  }
  23.240 +
  23.241 +  bind(L_fallthrough);
  23.242 +
  23.243 +#undef local_jcc
  23.244 +#undef final_jmp
  23.245 +}
  23.246 +
  23.247 +
  23.248 +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
  23.249 +                                                   Register super_klass,
  23.250 +                                                   Register temp_reg,
  23.251 +                                                   Register temp2_reg,
  23.252 +                                                   Label* L_success,
  23.253 +                                                   Label* L_failure,
  23.254 +                                                   bool set_cond_codes) {
  23.255 +  assert_different_registers(sub_klass, super_klass, temp_reg);
  23.256 +  if (temp2_reg != noreg)
  23.257 +    assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
  23.258 +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
  23.259 +
  23.260 +  Label L_fallthrough;
  23.261 +  int label_nulls = 0;
  23.262 +  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
  23.263 +  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
  23.264 +  assert(label_nulls <= 1, "at most one NULL in the batch");
  23.265 +
  23.266 +  // a couple of useful fields in sub_klass:
  23.267 +  int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
  23.268 +                   Klass::secondary_supers_offset_in_bytes());
  23.269 +  int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
  23.270 +                   Klass::secondary_super_cache_offset_in_bytes());
  23.271 +  Address secondary_supers_addr(sub_klass, ss_offset);
  23.272 +  Address super_cache_addr(     sub_klass, sc_offset);
  23.273 +
  23.274 +  // Do a linear scan of the secondary super-klass chain.
  23.275 +  // This code is rarely used, so simplicity is a virtue here.
  23.276 +  // The repne_scan instruction uses fixed registers, which we must spill.
  23.277 +  // Don't worry too much about pre-existing connections with the input regs.
  23.278 +
  23.279 +  assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
  23.280 +  assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
  23.281 +
  23.282 +  // Get super_klass value into rax (even if it was in rdi or rcx).
  23.283 +  bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
  23.284 +  if (super_klass != rax || UseCompressedOops) {
  23.285 +    if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
  23.286 +    mov(rax, super_klass);
  23.287 +  }
  23.288 +  if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
  23.289 +  if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
  23.290 +
  23.291 +#ifndef PRODUCT
  23.292 +  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
  23.293 +  ExternalAddress pst_counter_addr((address) pst_counter);
  23.294 +  NOT_LP64(  incrementl(pst_counter_addr) );
  23.295 +  LP64_ONLY( lea(rcx, pst_counter_addr) );
  23.296 +  LP64_ONLY( incrementl(Address(rcx, 0)) );
  23.297 +#endif //PRODUCT
  23.298 +
  23.299 +  // We will consult the secondary-super array.
  23.300 +  movptr(rdi, secondary_supers_addr);
  23.301 +  // Load the array length.  (Positive movl does right thing on LP64.)
  23.302 +  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
  23.303 +  // Skip to start of data.
  23.304 +  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  23.305 +
  23.306 +  // Scan RCX words at [RDI] for an occurrence of RAX.
  23.307 +  // Set NZ/Z based on last compare.
  23.308 +#ifdef _LP64
  23.309 +  // This part is tricky, as values in supers array could be 32 or 64 bit wide
  23.310 +  // and we store values in objArrays always encoded, thus we need to encode
  23.311 +  // the value of rax before repne.  Note that rax is dead after the repne.
  23.312 +  if (UseCompressedOops) {
  23.313 +    encode_heap_oop_not_null(rax);
  23.314 +    // The superclass is never null; it would be a basic system error if a null
  23.315 +    // pointer were to sneak in here.  Note that we have already loaded the
  23.316 +    // Klass::super_check_offset from the super_klass in the fast path,
  23.317 +    // so if there is a null in that register, we are already in the afterlife.
  23.318 +    repne_scanl();
  23.319 +  } else
  23.320 +#endif // _LP64
  23.321 +    repne_scan();
  23.322 +
  23.323 +  // Unspill the temp. registers:
  23.324 +  if (pushed_rdi)  pop(rdi);
  23.325 +  if (pushed_rcx)  pop(rcx);
  23.326 +  if (pushed_rax)  pop(rax);
  23.327 +
  23.328 +  if (set_cond_codes) {
  23.329 +    // Special hack for the AD files:  rdi is guaranteed non-zero.
  23.330 +    assert(!pushed_rdi, "rdi must be left non-NULL");
  23.331 +    // Also, the condition codes are properly set Z/NZ on succeed/failure.
  23.332 +  }
  23.333 +
  23.334 +  if (L_failure == &L_fallthrough)
  23.335 +        jccb(Assembler::notEqual, *L_failure);
  23.336 +  else  jcc(Assembler::notEqual, *L_failure);
  23.337 +
  23.338 +  // Success.  Cache the super we found and proceed in triumph.
  23.339 +  movptr(super_cache_addr, super_klass);
  23.340 +
  23.341 +  if (L_success != &L_fallthrough) {
  23.342 +    jmp(*L_success);
  23.343 +  }
  23.344 +
  23.345 +#undef IS_A_TEMP
  23.346 +
  23.347 +  bind(L_fallthrough);
  23.348 +}
  23.349 +
  23.350 +
  23.351  void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
  23.352    ucomisd(dst, as_Address(src));
  23.353  }
  23.354 @@ -7710,14 +7998,21 @@
  23.355  void MacroAssembler::load_prototype_header(Register dst, Register src) {
  23.356  #ifdef _LP64
  23.357    if (UseCompressedOops) {
  23.358 +    assert (Universe::heap() != NULL, "java heap should be initialized");
  23.359      movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  23.360 -    movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  23.361 +    if (Universe::narrow_oop_shift() != 0) {
  23.362 +      assert(Address::times_8 == LogMinObjAlignmentInBytes &&
  23.363 +             Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
  23.364 +      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  23.365 +    } else {
  23.366 +      movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  23.367 +    }
  23.368    } else
  23.369  #endif
  23.370 -    {
  23.371 -      movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  23.372 -      movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  23.373 -    }
  23.374 +  {
  23.375 +    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
  23.376 +    movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
  23.377 +  }
  23.378  }
  23.379  
  23.380  void MacroAssembler::store_klass(Register dst, Register src) {
  23.381 @@ -7760,11 +8055,20 @@
  23.382  // Algorithm must match oop.inline.hpp encode_heap_oop.
  23.383  void MacroAssembler::encode_heap_oop(Register r) {
  23.384    assert (UseCompressedOops, "should be compressed");
  23.385 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  23.386 +  if (Universe::narrow_oop_base() == NULL) {
  23.387 +    verify_oop(r, "broken oop in encode_heap_oop");
  23.388 +    if (Universe::narrow_oop_shift() != 0) {
  23.389 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  23.390 +      shrq(r, LogMinObjAlignmentInBytes);
  23.391 +    }
  23.392 +    return;
  23.393 +  }
  23.394  #ifdef ASSERT
  23.395    if (CheckCompressedOops) {
  23.396      Label ok;
  23.397      push(rscratch1); // cmpptr trashes rscratch1
  23.398 -    cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
  23.399 +    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
  23.400      jcc(Assembler::equal, ok);
  23.401      stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
  23.402      bind(ok);
  23.403 @@ -7780,6 +8084,7 @@
  23.404  
  23.405  void MacroAssembler::encode_heap_oop_not_null(Register r) {
  23.406    assert (UseCompressedOops, "should be compressed");
  23.407 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  23.408  #ifdef ASSERT
  23.409    if (CheckCompressedOops) {
  23.410      Label ok;
  23.411 @@ -7790,12 +8095,18 @@
  23.412    }
  23.413  #endif
  23.414    verify_oop(r, "broken oop in encode_heap_oop_not_null");
  23.415 -  subq(r, r12_heapbase);
  23.416 -  shrq(r, LogMinObjAlignmentInBytes);
  23.417 +  if (Universe::narrow_oop_base() != NULL) {
  23.418 +    subq(r, r12_heapbase);
  23.419 +  }
  23.420 +  if (Universe::narrow_oop_shift() != 0) {
  23.421 +    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  23.422 +    shrq(r, LogMinObjAlignmentInBytes);
  23.423 +  }
  23.424  }
  23.425  
  23.426  void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
  23.427    assert (UseCompressedOops, "should be compressed");
  23.428 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  23.429  #ifdef ASSERT
  23.430    if (CheckCompressedOops) {
  23.431      Label ok;
  23.432 @@ -7809,18 +8120,32 @@
  23.433    if (dst != src) {
  23.434      movq(dst, src);
  23.435    }
  23.436 -  subq(dst, r12_heapbase);
  23.437 -  shrq(dst, LogMinObjAlignmentInBytes);
  23.438 +  if (Universe::narrow_oop_base() != NULL) {
  23.439 +    subq(dst, r12_heapbase);
  23.440 +  }
  23.441 +  if (Universe::narrow_oop_shift() != 0) {
  23.442 +    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  23.443 +    shrq(dst, LogMinObjAlignmentInBytes);
  23.444 +  }
  23.445  }
  23.446  
  23.447  void  MacroAssembler::decode_heap_oop(Register r) {
  23.448    assert (UseCompressedOops, "should be compressed");
  23.449 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  23.450 +  if (Universe::narrow_oop_base() == NULL) {
  23.451 +    if (Universe::narrow_oop_shift() != 0) {
  23.452 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  23.453 +      shlq(r, LogMinObjAlignmentInBytes);
  23.454 +    }
  23.455 +    verify_oop(r, "broken oop in decode_heap_oop");
  23.456 +    return;
  23.457 +  }
  23.458  #ifdef ASSERT
  23.459    if (CheckCompressedOops) {
  23.460      Label ok;
  23.461      push(rscratch1);
  23.462      cmpptr(r12_heapbase,
  23.463 -           ExternalAddress((address)Universe::heap_base_addr()));
  23.464 +           ExternalAddress((address)Universe::narrow_oop_base_addr()));
  23.465      jcc(Assembler::equal, ok);
  23.466      stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
  23.467      bind(ok);
  23.468 @@ -7844,32 +8169,76 @@
  23.469  
  23.470  void  MacroAssembler::decode_heap_oop_not_null(Register r) {
  23.471    assert (UseCompressedOops, "should only be used for compressed headers");
  23.472 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  23.473    // Cannot assert, unverified entry point counts instructions (see .ad file)
  23.474    // vtableStubs also counts instructions in pd_code_size_limit.
  23.475    // Also do not verify_oop as this is called by verify_oop.
  23.476 -  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
  23.477 -  leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
  23.478 +  if (Universe::narrow_oop_base() == NULL) {
  23.479 +    if (Universe::narrow_oop_shift() != 0) {
  23.480 +      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
  23.481 +      shlq(r, LogMinObjAlignmentInBytes);
  23.482 +    }
  23.483 +  } else {
  23.484 +      assert (Address::times_8 == LogMinObjAlignmentInBytes &&
  23.485 +              Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
  23.486 +    leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
  23.487 +  }
  23.488  }
  23.489  
  23.490  void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
  23.491    assert (UseCompressedOops, "should only be used for compressed headers");
  23.492 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  23.493    // Cannot assert, unverified entry point counts instructions (see .ad file)
  23.494    // vtableStubs also counts instructions in pd_code_size_limit.
  23.495    // Also do not verify_oop as this is called by verify_oop.
  23.496 -  assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong");
  23.497 -  leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
  23.498 +  if (Universe::narrow_oop_shift() != 0) {
  23.499 +    assert (Address::times_8 == LogMinObjAlignmentInBytes &&
  23.500 +            Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
  23.501 +    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
  23.502 +  } else if (dst != src) {
  23.503 +    movq(dst, src);
  23.504 +  }
  23.505  }
  23.506  
  23.507  void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
  23.508 -  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  23.509 +  assert (UseCompressedOops, "should only be used for compressed headers");
  23.510 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  23.511 +  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  23.512    int oop_index = oop_recorder()->find_index(obj);
  23.513    RelocationHolder rspec = oop_Relocation::spec(oop_index);
  23.514 -  mov_literal32(dst, oop_index, rspec, narrow_oop_operand);
  23.515 +  mov_narrow_oop(dst, oop_index, rspec);
  23.516 +}
  23.517 +
  23.518 +void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
  23.519 +  assert (UseCompressedOops, "should only be used for compressed headers");
  23.520 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  23.521 +  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  23.522 +  int oop_index = oop_recorder()->find_index(obj);
  23.523 +  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  23.524 +  mov_narrow_oop(dst, oop_index, rspec);
  23.525 +}
  23.526 +
  23.527 +void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
  23.528 +  assert (UseCompressedOops, "should only be used for compressed headers");
  23.529 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  23.530 +  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  23.531 +  int oop_index = oop_recorder()->find_index(obj);
  23.532 +  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  23.533 +  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
  23.534 +}
  23.535 +
  23.536 +void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
  23.537 +  assert (UseCompressedOops, "should only be used for compressed headers");
  23.538 +  assert (Universe::heap() != NULL, "java heap should be initialized");
  23.539 +  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  23.540 +  int oop_index = oop_recorder()->find_index(obj);
  23.541 +  RelocationHolder rspec = oop_Relocation::spec(oop_index);
  23.542 +  Assembler::cmp_narrow_oop(dst, oop_index, rspec);
  23.543  }
  23.544  
  23.545  void MacroAssembler::reinit_heapbase() {
  23.546    if (UseCompressedOops) {
  23.547 -    movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr()));
  23.548 +    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
  23.549    }
  23.550  }
  23.551  #endif // _LP64
    24.1 --- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
    24.2 +++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
    24.3 @@ -578,20 +578,25 @@
    24.4  
    24.5    // These are all easily abused and hence protected
    24.6  
    24.7 -  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format = 0);
    24.8 -
    24.9    // 32BIT ONLY SECTION
   24.10  #ifndef _LP64
   24.11    // Make these disappear in 64bit mode since they would never be correct
   24.12    void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec);   // 32BIT ONLY
   24.13    void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
   24.14  
   24.15 +  void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec);    // 32BIT ONLY
   24.16    void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec);     // 32BIT ONLY
   24.17  
   24.18    void push_literal32(int32_t imm32, RelocationHolder const& rspec);                 // 32BIT ONLY
   24.19  #else
   24.20    // 64BIT ONLY SECTION
   24.21    void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec);   // 64BIT ONLY
   24.22 +
   24.23 +  void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
   24.24 +  void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
   24.25 +
   24.26 +  void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
   24.27 +  void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
   24.28  #endif // _LP64
   24.29  
   24.30    // These are unique in that we are ensured by the caller that the 32bit
   24.31 @@ -1219,6 +1224,14 @@
   24.32    void popq(Address dst);
   24.33  #endif
   24.34  
   24.35 +  void popcntl(Register dst, Address src);
   24.36 +  void popcntl(Register dst, Register src);
   24.37 +
   24.38 +#ifdef _LP64
   24.39 +  void popcntq(Register dst, Address src);
   24.40 +  void popcntq(Register dst, Register src);
   24.41 +#endif
   24.42 +
   24.43    // Prefetches (SSE, SSE2, 3DNOW only)
   24.44  
   24.45    void prefetchnta(Address src);
   24.46 @@ -1647,6 +1660,9 @@
   24.47    void decode_heap_oop_not_null(Register dst, Register src);
   24.48  
   24.49    void set_narrow_oop(Register dst, jobject obj);
   24.50 +  void set_narrow_oop(Address dst, jobject obj);
   24.51 +  void cmp_narrow_oop(Register dst, jobject obj);
   24.52 +  void cmp_narrow_oop(Address dst, jobject obj);
   24.53  
   24.54    // if heap base register is used - reinit it with the correct value
   24.55    void reinit_heapbase();
   24.56 @@ -1791,6 +1807,40 @@
   24.57                                 Register scan_temp,
   24.58                                 Label& no_such_interface);
   24.59  
   24.60 +  // Test sub_klass against super_klass, with fast and slow paths.
   24.61 +
   24.62 +  // The fast path produces a tri-state answer: yes / no / maybe-slow.
   24.63 +  // One of the three labels can be NULL, meaning take the fall-through.
   24.64 +  // If super_check_offset is -1, the value is loaded up from super_klass.
   24.65 +  // No registers are killed, except temp_reg.
   24.66 +  void check_klass_subtype_fast_path(Register sub_klass,
   24.67 +                                     Register super_klass,
   24.68 +                                     Register temp_reg,
   24.69 +                                     Label* L_success,
   24.70 +                                     Label* L_failure,
   24.71 +                                     Label* L_slow_path,
   24.72 +                RegisterConstant super_check_offset = RegisterConstant(-1));
   24.73 +
   24.74 +  // The rest of the type check; must be wired to a corresponding fast path.
   24.75 +  // It does not repeat the fast path logic, so don't use it standalone.
   24.76 +  // The temp_reg and temp2_reg can be noreg, if no temps are available.
   24.77 +  // Updates the sub's secondary super cache as necessary.
   24.78 +  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
   24.79 +  void check_klass_subtype_slow_path(Register sub_klass,
   24.80 +                                     Register super_klass,
   24.81 +                                     Register temp_reg,
   24.82 +                                     Register temp2_reg,
   24.83 +                                     Label* L_success,
   24.84 +                                     Label* L_failure,
   24.85 +                                     bool set_cond_codes = false);
   24.86 +
   24.87 +  // Simplified, combined version, good for typical uses.
   24.88 +  // Falls through on failure.
   24.89 +  void check_klass_subtype(Register sub_klass,
   24.90 +                           Register super_klass,
   24.91 +                           Register temp_reg,
   24.92 +                           Label& L_success);
   24.93 +
   24.94    //----
   24.95    void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
   24.96  
    25.1 --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Mar 18 11:37:48 2009 -0400
    25.2 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Mar 19 09:13:24 2009 -0700
    25.3 @@ -1598,18 +1598,9 @@
    25.4  
    25.5      // get instance klass
    25.6      __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
    25.7 -    // get super_check_offset
    25.8 -    __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
    25.9 -    // See if we get an immediate positive hit
   25.10 -    __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
   25.11 -    __ jcc(Assembler::equal, done);
   25.12 -    // check for immediate negative hit
   25.13 -    __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   25.14 -    __ jcc(Assembler::notEqual, *stub->entry());
   25.15 -    // check for self
   25.16 -    __ cmpptr(klass_RInfo, k_RInfo);
   25.17 -    __ jcc(Assembler::equal, done);
   25.18 -
   25.19 +    // perform the fast part of the checking logic
   25.20 +    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
   25.21 +    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
   25.22      __ push(klass_RInfo);
   25.23      __ push(k_RInfo);
   25.24      __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
   25.25 @@ -1735,17 +1726,9 @@
   25.26          }
   25.27          __ bind(done);
   25.28        } else {
   25.29 -        __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
   25.30 -        // See if we get an immediate positive hit
   25.31 -        __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
   25.32 -        __ jcc(Assembler::equal, done);
   25.33 -        // check for immediate negative hit
   25.34 -        __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   25.35 -        __ jcc(Assembler::notEqual, *stub->entry());
   25.36 -        // check for self
   25.37 -        __ cmpptr(klass_RInfo, k_RInfo);
   25.38 -        __ jcc(Assembler::equal, done);
   25.39 -
   25.40 +        // perform the fast part of the checking logic
   25.41 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
   25.42 +        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
   25.43          __ push(klass_RInfo);
   25.44          __ push(k_RInfo);
   25.45          __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
   25.46 @@ -1821,23 +1804,15 @@
   25.47            __ pop(dst);
   25.48            __ jmp(done);
   25.49          }
   25.50 -      } else {
   25.51 -#else
   25.52 -      { // YUCK
   25.53 +      }
   25.54 +        else // next block is unconditional if LP64:
   25.55  #endif // LP64
   25.56 +      {
   25.57          assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
   25.58  
   25.59 -        __ movl(dst, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
   25.60 -        // See if we get an immediate positive hit
   25.61 -        __ cmpptr(k_RInfo, Address(klass_RInfo, dst, Address::times_1));
   25.62 -        __ jcc(Assembler::equal, one);
   25.63 -        // check for immediate negative hit
   25.64 -        __ cmpl(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   25.65 -        __ jcc(Assembler::notEqual, zero);
   25.66 -        // check for self
   25.67 -        __ cmpptr(klass_RInfo, k_RInfo);
   25.68 -        __ jcc(Assembler::equal, one);
   25.69 -
   25.70 +        // perform the fast part of the checking logic
   25.71 +        __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, dst, &one, &zero, NULL);
   25.72 +        // call out-of-line instance of __ check_klass_subtype_slow_path(...):
   25.73          __ push(klass_RInfo);
   25.74          __ push(k_RInfo);
   25.75          __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
    26.1 --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Mar 18 11:37:48 2009 -0400
    26.2 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Mar 19 09:13:24 2009 -0700
    26.3 @@ -1354,6 +1354,13 @@
    26.4  
    26.5      case slow_subtype_check_id:
    26.6        {
    26.7 +        // Typical calling sequence:
    26.8 +        // __ push(klass_RInfo);  // object klass or other subclass
    26.9 +        // __ push(sup_k_RInfo);  // array element klass or other superclass
   26.10 +        // __ call(slow_subtype_check);
   26.11 +        // Note that the subclass is pushed first, and is therefore deepest.
   26.12 +        // Previous versions of this code reversed the names 'sub' and 'super'.
   26.13 +        // This was operationally harmless but made the code unreadable.
   26.14          enum layout {
   26.15            rax_off, SLOT2(raxH_off)
   26.16            rcx_off, SLOT2(rcxH_off)
   26.17 @@ -1361,9 +1368,10 @@
   26.18            rdi_off, SLOT2(rdiH_off)
   26.19            // saved_rbp_off, SLOT2(saved_rbpH_off)
   26.20            return_off, SLOT2(returnH_off)
   26.21 -          sub_off, SLOT2(subH_off)
   26.22 -          super_off, SLOT2(superH_off)
   26.23 -          framesize
   26.24 +          sup_k_off, SLOT2(sup_kH_off)
   26.25 +          klass_off, SLOT2(superH_off)
   26.26 +          framesize,
   26.27 +          result_off = klass_off  // deepest argument is also the return value
   26.28          };
   26.29  
   26.30          __ set_info("slow_subtype_check", dont_gc_arguments);
   26.31 @@ -1373,19 +1381,14 @@
   26.32          __ push(rax);
   26.33  
   26.34          // This is called by pushing args and not with C abi
   26.35 -        __ movptr(rsi, Address(rsp, (super_off) * VMRegImpl::stack_slot_size)); // super
   26.36 -        __ movptr(rax, Address(rsp, (sub_off  ) * VMRegImpl::stack_slot_size)); // sub
   26.37 -
   26.38 -        __ movptr(rdi,Address(rsi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
   26.39 -        // since size is postive movl does right thing on 64bit
   26.40 -        __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
   26.41 -        __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   26.42 +        __ movptr(rsi, Address(rsp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass
   26.43 +        __ movptr(rax, Address(rsp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass
   26.44  
   26.45          Label miss;
   26.46 -        __ repne_scan();
   26.47 -        __ jcc(Assembler::notEqual, miss);
   26.48 -        __ movptr(Address(rsi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax);
   26.49 -        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), 1); // result
   26.50 +        __ check_klass_subtype_slow_path(rsi, rax, rcx, rdi, NULL, &miss);
   26.51 +
   26.52 +        // fallthrough on success:
   26.53 +        __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), 1); // result
   26.54          __ pop(rax);
   26.55          __ pop(rcx);
   26.56          __ pop(rsi);
   26.57 @@ -1393,7 +1396,7 @@
   26.58          __ ret(0);
   26.59  
   26.60          __ bind(miss);
   26.61 -        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
   26.62 +        __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
   26.63          __ pop(rax);
   26.64          __ pop(rcx);
   26.65          __ pop(rsi);
    27.1 --- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Wed Mar 18 11:37:48 2009 -0400
    27.2 +++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Thu Mar 19 09:13:24 2009 -0700
    27.3 @@ -219,47 +219,16 @@
    27.4    // Resets EDI to locals.  Register sub_klass cannot be any of the above.
    27.5  void InterpreterMacroAssembler::gen_subtype_check( Register Rsub_klass, Label &ok_is_subtype ) {
    27.6    assert( Rsub_klass != rax, "rax, holds superklass" );
    27.7 -  assert( Rsub_klass != rcx, "rcx holds 2ndary super array length" );
    27.8 -  assert( Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr" );
    27.9 -  Label not_subtype, loop;
   27.10 +  assert( Rsub_klass != rcx, "used as a temp" );
   27.11 +  assert( Rsub_klass != rdi, "used as a temp, restored from locals" );
   27.12  
   27.13    // Profile the not-null value's klass.
   27.14 -  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi
   27.15 +  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
   27.16  
   27.17 -  // Load the super-klass's check offset into ECX
   27.18 -  movl( rcx, Address(rax, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() ) );
   27.19 -  // Load from the sub-klass's super-class display list, or a 1-word cache of
   27.20 -  // the secondary superclass list, or a failing value with a sentinel offset
   27.21 -  // if the super-klass is an interface or exceptionally deep in the Java
   27.22 -  // hierarchy and we have to scan the secondary superclass list the hard way.
   27.23 -  // See if we get an immediate positive hit
   27.24 -  cmpptr( rax, Address(Rsub_klass,rcx,Address::times_1) );
   27.25 -  jcc( Assembler::equal,ok_is_subtype );
   27.26 +  // Do the check.
   27.27 +  check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
   27.28  
   27.29 -  // Check for immediate negative hit
   27.30 -  cmpl( rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
   27.31 -  jcc( Assembler::notEqual, not_subtype );
   27.32 -  // Check for self
   27.33 -  cmpptr( Rsub_klass, rax );
   27.34 -  jcc( Assembler::equal, ok_is_subtype );
   27.35 -
   27.36 -  // Now do a linear scan of the secondary super-klass chain.
   27.37 -  movptr( rdi, Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()) );
   27.38 -  // EDI holds the objArrayOop of secondary supers.
   27.39 -  movl( rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));// Load the array length
   27.40 -  // Skip to start of data; also clear Z flag incase ECX is zero
   27.41 -  addptr( rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT) );
   27.42 -  // Scan ECX words at [EDI] for occurance of EAX
   27.43 -  // Set NZ/Z based on last compare
   27.44 -  repne_scan();
   27.45 -  restore_locals();           // Restore EDI; Must not blow flags
   27.46 -  // Not equal?
   27.47 -  jcc( Assembler::notEqual, not_subtype );
   27.48 -  // Must be equal but missed in cache.  Update cache.
   27.49 -  movptr( Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax );
   27.50 -  jmp( ok_is_subtype );
   27.51 -
   27.52 -  bind(not_subtype);
   27.53 +  // Profile the failure of the check.
   27.54    profile_typecheck_failed(rcx); // blows rcx
   27.55  }
   27.56  
    28.1 --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Wed Mar 18 11:37:48 2009 -0400
    28.2 +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Thu Mar 19 09:13:24 2009 -0700
    28.3 @@ -232,65 +232,13 @@
    28.4    assert(Rsub_klass != rcx, "rcx holds 2ndary super array length");
    28.5    assert(Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr");
    28.6  
    28.7 -  Label not_subtype, not_subtype_pop, loop;
    28.8 +  // Profile the not-null value's klass.
    28.9 +  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
   28.10  
   28.11 -  // Profile the not-null value's klass.
   28.12 -  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi
   28.13 +  // Do the check.
   28.14 +  check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
   28.15  
   28.16 -  // Load the super-klass's check offset into rcx
   28.17 -  movl(rcx, Address(rax, sizeof(oopDesc) +
   28.18 -                    Klass::super_check_offset_offset_in_bytes()));
   28.19 -  // Load from the sub-klass's super-class display list, or a 1-word
   28.20 -  // cache of the secondary superclass list, or a failing value with a
   28.21 -  // sentinel offset if the super-klass is an interface or
   28.22 -  // exceptionally deep in the Java hierarchy and we have to scan the
   28.23 -  // secondary superclass list the hard way.  See if we get an
   28.24 -  // immediate positive hit
   28.25 -  cmpptr(rax, Address(Rsub_klass, rcx, Address::times_1));
   28.26 -  jcc(Assembler::equal,ok_is_subtype);
   28.27 -
   28.28 -  // Check for immediate negative hit
   28.29 -  cmpl(rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
   28.30 -  jcc( Assembler::notEqual, not_subtype );
   28.31 -  // Check for self
   28.32 -  cmpptr(Rsub_klass, rax);
   28.33 -  jcc(Assembler::equal, ok_is_subtype);
   28.34 -
   28.35 -  // Now do a linear scan of the secondary super-klass chain.
   28.36 -  movptr(rdi, Address(Rsub_klass, sizeof(oopDesc) +
   28.37 -                      Klass::secondary_supers_offset_in_bytes()));
   28.38 -  // rdi holds the objArrayOop of secondary supers.
   28.39 -  // Load the array length
   28.40 -  movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
   28.41 -  // Skip to start of data; also clear Z flag incase rcx is zero
   28.42 -  addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   28.43 -  // Scan rcx words at [rdi] for occurance of rax
   28.44 -  // Set NZ/Z based on last compare
   28.45 -
   28.46 -  // this part is kind tricky, as values in supers array could be 32 or 64 bit wide
   28.47 -  // and we store values in objArrays always encoded, thus we need to encode value
   28.48 -  // before repne
   28.49 -  if (UseCompressedOops) {
   28.50 -    push(rax);
   28.51 -    encode_heap_oop(rax);
   28.52 -    repne_scanl();
   28.53 -    // Not equal?
   28.54 -    jcc(Assembler::notEqual, not_subtype_pop);
   28.55 -    // restore heap oop here for movq
   28.56 -    pop(rax);
   28.57 -  } else {
   28.58 -    repne_scan();
   28.59 -    jcc(Assembler::notEqual, not_subtype);
   28.60 -  }
   28.61 -  // Must be equal but missed in cache.  Update cache.
   28.62 -  movptr(Address(Rsub_klass, sizeof(oopDesc) +
   28.63 -               Klass::secondary_super_cache_offset_in_bytes()), rax);
   28.64 -  jmp(ok_is_subtype);
   28.65 -
   28.66 -  bind(not_subtype_pop);
   28.67 -  // restore heap oop here for miss
   28.68 -  if (UseCompressedOops) pop(rax);
   28.69 -  bind(not_subtype);
   28.70 +  // Profile the failure of the check.
   28.71    profile_typecheck_failed(rcx); // blows rcx
   28.72  }
   28.73  
    29.1 --- a/src/cpu/x86/vm/interpreterRT_x86_64.cpp	Wed Mar 18 11:37:48 2009 -0400
    29.2 +++ b/src/cpu/x86/vm/interpreterRT_x86_64.cpp	Thu Mar 19 09:13:24 2009 -0700
    29.3 @@ -349,7 +349,7 @@
    29.4  
    29.5      if (_num_args < Argument::n_float_register_parameters_c-1) {
    29.6        *_reg_args++ = from_obj;
    29.7 -      *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float
    29.8 +      *_fp_identifiers |= (intptr_t)(0x01 << (_num_args*2)); // mark as float
    29.9        _num_args++;
   29.10      } else {
   29.11        *_to++ = from_obj;
   29.12 @@ -364,7 +364,7 @@
   29.13  
   29.14      if (_num_args < Argument::n_float_register_parameters_c-1) {
   29.15        *_reg_args++ = from_obj;
   29.16 -      *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double
   29.17 +      *_fp_identifiers |= (intptr_t)(0x3 << (_num_args*2)); // mark as double
   29.18        _num_args++;
   29.19      } else {
   29.20        *_to++ = from_obj;
    30.1 --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Mar 18 11:37:48 2009 -0400
    30.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Mar 19 09:13:24 2009 -0700
    30.3 @@ -1310,81 +1310,51 @@
    30.4                             Address& super_check_offset_addr,
    30.5                             Address& super_klass_addr,
    30.6                             Register temp,
    30.7 -                           Label* L_success_ptr, Label* L_failure_ptr) {
    30.8 +                           Label* L_success, Label* L_failure) {
    30.9      BLOCK_COMMENT("type_check:");
   30.10  
   30.11      Label L_fallthrough;
   30.12 -    bool fall_through_on_success = (L_success_ptr == NULL);
   30.13 -    if (fall_through_on_success) {
   30.14 -      L_success_ptr = &L_fallthrough;
   30.15 -    } else {
   30.16 -      L_failure_ptr = &L_fallthrough;
   30.17 -    }
   30.18 -    Label& L_success = *L_success_ptr;
   30.19 -    Label& L_failure = *L_failure_ptr;
   30.20 +#define LOCAL_JCC(assembler_con, label_ptr)                             \
   30.21 +    if (label_ptr != NULL)  __ jcc(assembler_con, *(label_ptr));        \
   30.22 +    else                    __ jcc(assembler_con, L_fallthrough) /*omit semi*/
   30.23  
   30.24 +    // The following is a strange variation of the fast path which requires
   30.25 +    // one less register, because needed values are on the argument stack.
   30.26 +    // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
   30.27 +    //                                  L_success, L_failure, NULL);
   30.28      assert_different_registers(sub_klass, temp);
   30.29  
   30.30 -    // a couple of useful fields in sub_klass:
   30.31 -    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
   30.32 -                     Klass::secondary_supers_offset_in_bytes());
   30.33      int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
   30.34                       Klass::secondary_super_cache_offset_in_bytes());
   30.35 -    Address secondary_supers_addr(sub_klass, ss_offset);
   30.36 -    Address super_cache_addr(     sub_klass, sc_offset);
   30.37  
   30.38      // if the pointers are equal, we are done (e.g., String[] elements)
   30.39      __ cmpptr(sub_klass, super_klass_addr);
   30.40 -    __ jcc(Assembler::equal, L_success);
   30.41 +    LOCAL_JCC(Assembler::equal, L_success);
   30.42  
   30.43      // check the supertype display:
   30.44      __ movl2ptr(temp, super_check_offset_addr);
   30.45      Address super_check_addr(sub_klass, temp, Address::times_1, 0);
   30.46      __ movptr(temp, super_check_addr); // load displayed supertype
   30.47      __ cmpptr(temp, super_klass_addr); // test the super type
   30.48 -    __ jcc(Assembler::equal, L_success);
   30.49 +    LOCAL_JCC(Assembler::equal, L_success);
   30.50  
   30.51      // if it was a primary super, we can just fail immediately
   30.52      __ cmpl(super_check_offset_addr, sc_offset);
   30.53 -    __ jcc(Assembler::notEqual, L_failure);
   30.54 +    LOCAL_JCC(Assembler::notEqual, L_failure);
   30.55  
   30.56 -    // Now do a linear scan of the secondary super-klass chain.
   30.57 -    // This code is rarely used, so simplicity is a virtue here.
   30.58 -    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
   30.59 -    {
   30.60 -      // The repne_scan instruction uses fixed registers, which we must spill.
   30.61 -      // (We need a couple more temps in any case.)
   30.62 -      __ push(rax);
   30.63 -      __ push(rcx);
   30.64 -      __ push(rdi);
   30.65 -      assert_different_registers(sub_klass, rax, rcx, rdi);
   30.66 +    // The repne_scan instruction uses fixed registers, which will get spilled.
   30.67 +    // We happen to know this works best when super_klass is in rax.
   30.68 +    Register super_klass = temp;
   30.69 +    __ movptr(super_klass, super_klass_addr);
   30.70 +    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
   30.71 +                                     L_success, L_failure);
   30.72  
   30.73 -      __ movptr(rdi, secondary_supers_addr);
   30.74 -      // Load the array length.
   30.75 -      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
   30.76 -      // Skip to start of data.
   30.77 -      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   30.78 -      // Scan rcx words at [edi] for occurance of rax,
   30.79 -      // Set NZ/Z based on last compare
   30.80 -      __ movptr(rax, super_klass_addr);
   30.81 -      __ repne_scan();
   30.82 +    __ bind(L_fallthrough);
   30.83  
   30.84 -      // Unspill the temp. registers:
   30.85 -      __ pop(rdi);
   30.86 -      __ pop(rcx);
   30.87 -      __ pop(rax);
   30.88 -    }
   30.89 -    __ jcc(Assembler::notEqual, L_failure);
   30.90 +    if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
   30.91 +    if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
   30.92  
   30.93 -    // Success.  Cache the super we found and proceed in triumph.
   30.94 -    __ movptr(temp, super_klass_addr); // note: rax, is dead
   30.95 -    __ movptr(super_cache_addr, temp);
   30.96 -
   30.97 -    if (!fall_through_on_success)
   30.98 -      __ jmp(L_success);
   30.99 -
  30.100 -    // Fall through on failure!
  30.101 -    __ bind(L_fallthrough);
  30.102 +#undef LOCAL_JCC
  30.103    }
  30.104  
  30.105    //
    31.1 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Mar 18 11:37:48 2009 -0400
    31.2 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Mar 19 09:13:24 2009 -0700
    31.3 @@ -2091,66 +2091,9 @@
    31.4  
    31.5      Label L_miss;
    31.6  
    31.7 -    // a couple of useful fields in sub_klass:
    31.8 -    int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
    31.9 -                     Klass::secondary_supers_offset_in_bytes());
   31.10 -    int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
   31.11 -                     Klass::secondary_super_cache_offset_in_bytes());
   31.12 -    Address secondary_supers_addr(sub_klass, ss_offset);
   31.13 -    Address super_cache_addr(     sub_klass, sc_offset);
   31.14 -
   31.15 -    // if the pointers are equal, we are done (e.g., String[] elements)
   31.16 -    __ cmpptr(super_klass, sub_klass);
   31.17 -    __ jcc(Assembler::equal, L_success);
   31.18 -
   31.19 -    // check the supertype display:
   31.20 -    Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
   31.21 -    __ cmpptr(super_klass, super_check_addr); // test the super type
   31.22 -    __ jcc(Assembler::equal, L_success);
   31.23 -
   31.24 -    // if it was a primary super, we can just fail immediately
   31.25 -    __ cmpl(super_check_offset, sc_offset);
   31.26 -    __ jcc(Assembler::notEqual, L_miss);
   31.27 -
   31.28 -    // Now do a linear scan of the secondary super-klass chain.
   31.29 -    // The repne_scan instruction uses fixed registers, which we must spill.
   31.30 -    // (We need a couple more temps in any case.)
   31.31 -    // This code is rarely used, so simplicity is a virtue here.
   31.32 -    inc_counter_np(SharedRuntime::_partial_subtype_ctr);
   31.33 -    {
   31.34 -      __ push(rax);
   31.35 -      __ push(rcx);
   31.36 -      __ push(rdi);
   31.37 -      assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
   31.38 -
   31.39 -      __ movptr(rdi, secondary_supers_addr);
   31.40 -      // Load the array length.
   31.41 -      __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
   31.42 -      // Skip to start of data.
   31.43 -      __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   31.44 -      // Scan rcx words at [rdi] for occurance of rax
   31.45 -      // Set NZ/Z based on last compare
   31.46 -      __ movptr(rax, super_klass);
   31.47 -      if (UseCompressedOops) {
   31.48 -        // Compare against compressed form.  Don't need to uncompress because
   31.49 -        // looks like orig rax is restored in popq below.
   31.50 -        __ encode_heap_oop(rax);
   31.51 -        __ repne_scanl();
   31.52 -      } else {
   31.53 -        __ repne_scan();
   31.54 -      }
   31.55 -
   31.56 -      // Unspill the temp. registers:
   31.57 -      __ pop(rdi);
   31.58 -      __ pop(rcx);
   31.59 -      __ pop(rax);
   31.60 -
   31.61 -      __ jcc(Assembler::notEqual, L_miss);
   31.62 -    }
   31.63 -
   31.64 -    // Success.  Cache the super we found and proceed in triumph.
   31.65 -    __ movptr(super_cache_addr, super_klass); // note: rax is dead
   31.66 -    __ jmp(L_success);
   31.67 +    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg,        &L_success, &L_miss, NULL,
   31.68 +                                     super_check_offset);
   31.69 +    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
   31.70  
   31.71      // Fall through on failure!
   31.72      __ BIND(L_miss);
    32.1 --- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Mar 18 11:37:48 2009 -0400
    32.2 +++ b/src/cpu/x86/vm/vm_version_x86.cpp	Thu Mar 19 09:13:24 2009 -0700
    32.3 @@ -284,7 +284,7 @@
    32.4    }
    32.5  
    32.6    char buf[256];
    32.7 -  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
    32.8 +  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
    32.9                 cores_per_cpu(), threads_per_core(),
   32.10                 cpu_family(), _model, _stepping,
   32.11                 (supports_cmov() ? ", cmov" : ""),
   32.12 @@ -297,6 +297,7 @@
   32.13                 (supports_ssse3()? ", ssse3": ""),
   32.14                 (supports_sse4_1() ? ", sse4.1" : ""),
   32.15                 (supports_sse4_2() ? ", sse4.2" : ""),
   32.16 +               (supports_popcnt() ? ", popcnt" : ""),
   32.17                 (supports_mmx_ext() ? ", mmxext" : ""),
   32.18                 (supports_3dnow()   ? ", 3dnow"  : ""),
   32.19                 (supports_3dnow2()  ? ", 3dnowext" : ""),
   32.20 @@ -410,6 +411,13 @@
   32.21      }
   32.22    }
   32.23  
   32.24 +  // Use population count instruction if available.
   32.25 +  if (supports_popcnt()) {
   32.26 +    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
   32.27 +      UsePopCountInstruction = true;
   32.28 +    }
   32.29 +  }
   32.30 +
   32.31    assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   32.32    assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
   32.33  
    33.1 --- a/src/cpu/x86/vm/vm_version_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
    33.2 +++ b/src/cpu/x86/vm/vm_version_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
    33.3 @@ -70,7 +70,9 @@
    33.4                 dca      : 1,
    33.5                 sse4_1   : 1,
    33.6                 sse4_2   : 1,
    33.7 -                        : 11;
    33.8 +                        : 2,
    33.9 +               popcnt   : 1,
   33.10 +                        : 8;
   33.11      } bits;
   33.12    };
   33.13  
   33.14 @@ -179,7 +181,8 @@
   33.15       CPU_SSSE3  = (1 << 9),
   33.16       CPU_SSE4A  = (1 << 10),
   33.17       CPU_SSE4_1 = (1 << 11),
   33.18 -     CPU_SSE4_2 = (1 << 12)
   33.19 +     CPU_SSE4_2 = (1 << 12),
   33.20 +     CPU_POPCNT = (1 << 13)
   33.21     } cpuFeatureFlags;
   33.22  
   33.23    // cpuid information block.  All info derived from executing cpuid with
   33.24 @@ -290,6 +293,8 @@
   33.25        result |= CPU_SSE4_1;
   33.26      if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
   33.27        result |= CPU_SSE4_2;
   33.28 +    if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
   33.29 +      result |= CPU_POPCNT;
   33.30      return result;
   33.31    }
   33.32  
   33.33 @@ -379,6 +384,7 @@
   33.34    static bool supports_ssse3()    { return (_cpuFeatures & CPU_SSSE3)!= 0; }
   33.35    static bool supports_sse4_1()   { return (_cpuFeatures & CPU_SSE4_1) != 0; }
   33.36    static bool supports_sse4_2()   { return (_cpuFeatures & CPU_SSE4_2) != 0; }
   33.37 +  static bool supports_popcnt()   { return (_cpuFeatures & CPU_POPCNT) != 0; }
   33.38    //
   33.39    // AMD features
   33.40    //
    34.1 --- a/src/cpu/x86/vm/x86_32.ad	Wed Mar 18 11:37:48 2009 -0400
    34.2 +++ b/src/cpu/x86/vm/x86_32.ad	Thu Mar 19 09:13:24 2009 -0700
    34.3 @@ -1483,16 +1483,20 @@
    34.4    // main source block for now.  In future, we can generalize this by
    34.5    // adding a syntax that specifies the sizes of fields in an order,
    34.6    // so that the adlc can build the emit functions automagically
    34.7 -  enc_class OpcP %{             // Emit opcode
    34.8 -    emit_opcode(cbuf,$primary);
    34.9 -  %}
   34.10 -
   34.11 -  enc_class OpcS %{             // Emit opcode
   34.12 -    emit_opcode(cbuf,$secondary);
   34.13 -  %}
   34.14 -
   34.15 -  enc_class Opcode(immI d8 ) %{ // Emit opcode
   34.16 -    emit_opcode(cbuf,$d8$$constant);
   34.17 +
   34.18 +  // Emit primary opcode
   34.19 +  enc_class OpcP %{
   34.20 +    emit_opcode(cbuf, $primary);
   34.21 +  %}
   34.22 +
   34.23 +  // Emit secondary opcode
   34.24 +  enc_class OpcS %{
   34.25 +    emit_opcode(cbuf, $secondary);
   34.26 +  %}
   34.27 +
   34.28 +  // Emit opcode directly
   34.29 +  enc_class Opcode(immI d8) %{
   34.30 +    emit_opcode(cbuf, $d8$$constant);
   34.31    %}
   34.32  
   34.33    enc_class SizePrefix %{
   34.34 @@ -1688,26 +1692,15 @@
   34.35      Register Reax = as_Register(EAX_enc); // super class
   34.36      Register Recx = as_Register(ECX_enc); // killed
   34.37      Register Resi = as_Register(ESI_enc); // sub class
   34.38 -    Label hit, miss;
   34.39 +    Label miss;
   34.40  
   34.41      MacroAssembler _masm(&cbuf);
   34.42 -    // Compare super with sub directly, since super is not in its own SSA.
   34.43 -    // The compiler used to emit this test, but we fold it in here,
   34.44 -    // to allow platform-specific tweaking on sparc.
   34.45 -    __ cmpptr(Reax, Resi);
   34.46 -    __ jcc(Assembler::equal, hit);
   34.47 -#ifndef PRODUCT
   34.48 -    __ incrementl(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
   34.49 -#endif //PRODUCT
   34.50 -    __ movptr(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
   34.51 -    __ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes()));
   34.52 -    __ addptr(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   34.53 -    __ repne_scan();
   34.54 -    __ jcc(Assembler::notEqual, miss);
   34.55 -    __ movptr(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax);
   34.56 -    __ bind(hit);
   34.57 -    if( $primary )
   34.58 -      __ xorptr(Redi,Redi);
   34.59 +    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
   34.60 +                                     NULL, &miss,
   34.61 +                                     /*set_cond_codes:*/ true);
   34.62 +    if ($primary) {
   34.63 +      __ xorptr(Redi, Redi);
   34.64 +    }
   34.65      __ bind(miss);
   34.66    %}
   34.67  
   34.68 @@ -6387,6 +6380,67 @@
   34.69  %}
   34.70  
   34.71  
   34.72 +//---------- Population Count Instructions -------------------------------------
   34.73 +
   34.74 +instruct popCountI(eRegI dst, eRegI src) %{
   34.75 +  predicate(UsePopCountInstruction);
   34.76 +  match(Set dst (PopCountI src));
   34.77 +
   34.78 +  format %{ "POPCNT $dst, $src" %}
   34.79 +  ins_encode %{
   34.80 +    __ popcntl($dst$$Register, $src$$Register);
   34.81 +  %}
   34.82 +  ins_pipe(ialu_reg);
   34.83 +%}
   34.84 +
   34.85 +instruct popCountI_mem(eRegI dst, memory mem) %{
   34.86 +  predicate(UsePopCountInstruction);
   34.87 +  match(Set dst (PopCountI (LoadI mem)));
   34.88 +
   34.89 +  format %{ "POPCNT $dst, $mem" %}
   34.90 +  ins_encode %{
   34.91 +    __ popcntl($dst$$Register, $mem$$Address);
   34.92 +  %}
   34.93 +  ins_pipe(ialu_reg);
   34.94 +%}
   34.95 +
   34.96 +// Note: Long.bitCount(long) returns an int.
   34.97 +instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
   34.98 +  predicate(UsePopCountInstruction);
   34.99 +  match(Set dst (PopCountL src));
  34.100 +  effect(KILL cr, TEMP tmp, TEMP dst);
  34.101 +
  34.102 +  format %{ "POPCNT $dst, $src.lo\n\t"
  34.103 +            "POPCNT $tmp, $src.hi\n\t"
  34.104 +            "ADD    $dst, $tmp" %}
  34.105 +  ins_encode %{
  34.106 +    __ popcntl($dst$$Register, $src$$Register);
  34.107 +    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
  34.108 +    __ addl($dst$$Register, $tmp$$Register);
  34.109 +  %}
  34.110 +  ins_pipe(ialu_reg);
  34.111 +%}
  34.112 +
  34.113 +// Note: Long.bitCount(long) returns an int.
  34.114 +instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
  34.115 +  predicate(UsePopCountInstruction);
  34.116 +  match(Set dst (PopCountL (LoadL mem)));
  34.117 +  effect(KILL cr, TEMP tmp, TEMP dst);
  34.118 +
  34.119 +  format %{ "POPCNT $dst, $mem\n\t"
  34.120 +            "POPCNT $tmp, $mem+4\n\t"
  34.121 +            "ADD    $dst, $tmp" %}
  34.122 +  ins_encode %{
  34.123 +    //__ popcntl($dst$$Register, $mem$$Address$$first);
  34.124 +    //__ popcntl($tmp$$Register, $mem$$Address$$second);
  34.125 +    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
  34.126 +    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
  34.127 +    __ addl($dst$$Register, $tmp$$Register);
  34.128 +  %}
  34.129 +  ins_pipe(ialu_reg);
  34.130 +%}
  34.131 +
  34.132 +
  34.133  //----------Load/Store/Move Instructions---------------------------------------
  34.134  //----------Load Instructions--------------------------------------------------
  34.135  // Load Byte (8bit signed)
  34.136 @@ -12501,15 +12555,12 @@
  34.137    effect( KILL rcx, KILL cr );
  34.138  
  34.139    ins_cost(1100);  // slightly larger than the next version
  34.140 -  format %{ "CMPL   EAX,ESI\n\t"
  34.141 -            "JEQ,s  hit\n\t"
  34.142 -            "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
  34.143 +  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
  34.144              "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
  34.145              "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
  34.146              "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
  34.147              "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
  34.148              "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
  34.149 -     "hit:\n\t"
  34.150              "XOR    $result,$result\t\t Hit: EDI zero\n\t"
  34.151       "miss:\t" %}
  34.152  
  34.153 @@ -12523,9 +12574,7 @@
  34.154    effect( KILL rcx, KILL result );
  34.155  
  34.156    ins_cost(1000);
  34.157 -  format %{ "CMPL   EAX,ESI\n\t"
  34.158 -            "JEQ,s  miss\t# Actually a hit; we are done.\n\t"
  34.159 -            "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
  34.160 +  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
  34.161              "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
  34.162              "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
  34.163              "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
    35.1 --- a/src/cpu/x86/vm/x86_64.ad	Wed Mar 18 11:37:48 2009 -0400
    35.2 +++ b/src/cpu/x86/vm/x86_64.ad	Thu Mar 19 09:13:24 2009 -0700
    35.3 @@ -326,7 +326,6 @@
    35.4                           R9,  R9_H,
    35.5                           R10, R10_H,
    35.6                           R11, R11_H,
    35.7 -                         R12, R12_H,
    35.8                           R13, R13_H,
    35.9                           R14, R14_H);
   35.10  
   35.11 @@ -340,7 +339,6 @@
   35.12                           R9,  R9_H,
   35.13                           R10, R10_H,
   35.14                           R11, R11_H,
   35.15 -                         R12, R12_H,
   35.16                           R13, R13_H,
   35.17                           R14, R14_H);
   35.18  
   35.19 @@ -354,7 +352,6 @@
   35.20                               R9,  R9_H,
   35.21                               R10, R10_H,
   35.22                               R11, R11_H,
   35.23 -                             R12, R12_H,
   35.24                               R13, R13_H,
   35.25                               R14, R14_H);
   35.26  
   35.27 @@ -444,9 +441,6 @@
   35.28  // Singleton class for RDX long register
   35.29  reg_class long_rdx_reg(RDX, RDX_H);
   35.30  
   35.31 -// Singleton class for R12 long register
   35.32 -reg_class long_r12_reg(R12, R12_H);
   35.33 -
   35.34  // Class for all int registers (except RSP)
   35.35  reg_class int_reg(RAX,
   35.36                    RDX,
   35.37 @@ -1842,7 +1836,9 @@
   35.38  {
   35.39    if (UseCompressedOops) {
   35.40      st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
   35.41 -    st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
   35.42 +    if (Universe::narrow_oop_shift() != 0) {
   35.43 +      st->print_cr("leaq    rscratch1, [r12_heapbase, r, Address::times_8, 0]");
   35.44 +    }
   35.45      st->print_cr("cmpq    rax, rscratch1\t # Inline cache check");
   35.46    } else {
   35.47      st->print_cr("cmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
   35.48 @@ -1891,7 +1887,11 @@
   35.49  uint MachUEPNode::size(PhaseRegAlloc* ra_) const
   35.50  {
   35.51    if (UseCompressedOops) {
   35.52 -    return OptoBreakpoint ? 19 : 20;
   35.53 +    if (Universe::narrow_oop_shift() == 0) {
   35.54 +      return OptoBreakpoint ? 15 : 16;
   35.55 +    } else {
   35.56 +      return OptoBreakpoint ? 19 : 20;
   35.57 +    }
   35.58    } else {
   35.59      return OptoBreakpoint ? 11 : 12;
   35.60    }
   35.61 @@ -2575,45 +2575,13 @@
   35.62      Register Rrax = as_Register(RAX_enc); // super class
   35.63      Register Rrcx = as_Register(RCX_enc); // killed
   35.64      Register Rrsi = as_Register(RSI_enc); // sub class
   35.65 -    Label hit, miss, cmiss;
   35.66 +    Label miss;
   35.67 +    const bool set_cond_codes = true;
   35.68  
   35.69      MacroAssembler _masm(&cbuf);
   35.70 -    // Compare super with sub directly, since super is not in its own SSA.
   35.71 -    // The compiler used to emit this test, but we fold it in here,
   35.72 -    // to allow platform-specific tweaking on sparc.
   35.73 -    __ cmpptr(Rrax, Rrsi);
   35.74 -    __ jcc(Assembler::equal, hit);
   35.75 -#ifndef PRODUCT
   35.76 -    __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
   35.77 -    __ incrementl(Address(Rrcx, 0));
   35.78 -#endif //PRODUCT
   35.79 -    __ movptr(Rrdi, Address(Rrsi, 
   35.80 -                          sizeof(oopDesc) + 
   35.81 -                          Klass::secondary_supers_offset_in_bytes()));
   35.82 -    __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
   35.83 -    __ addptr(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
   35.84 -    if (UseCompressedOops) {
   35.85 -      __ encode_heap_oop(Rrax);
   35.86 -      __ repne_scanl();
   35.87 -      __ jcc(Assembler::notEqual, cmiss);
   35.88 -      __ decode_heap_oop(Rrax);
   35.89 -      __ movptr(Address(Rrsi,
   35.90 -                      sizeof(oopDesc) +
   35.91 -                      Klass::secondary_super_cache_offset_in_bytes()),
   35.92 -              Rrax);
   35.93 -      __ jmp(hit);
   35.94 -      __ bind(cmiss);
   35.95 -      __ decode_heap_oop(Rrax);
   35.96 -      __ jmp(miss);
   35.97 -    } else {
   35.98 -      __ repne_scan();
   35.99 -      __ jcc(Assembler::notEqual, miss);
  35.100 -      __ movptr(Address(Rrsi,
  35.101 -                      sizeof(oopDesc) +
  35.102 -                      Klass::secondary_super_cache_offset_in_bytes()),
  35.103 -              Rrax);
  35.104 -    }
  35.105 -    __ bind(hit);
  35.106 +    __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
  35.107 +                                     NULL, &miss,
  35.108 +                                     /*set_cond_codes:*/ true);
  35.109      if ($primary) {
  35.110        __ xorptr(Rrdi, Rrdi);
  35.111      }
  35.112 @@ -4906,15 +4874,6 @@
  35.113    interface(REG_INTER);
  35.114  %}
  35.115  
  35.116 -
  35.117 -operand r12RegL() %{
  35.118 -  constraint(ALLOC_IN_RC(long_r12_reg));
  35.119 -  match(RegL);
  35.120 -
  35.121 -  format %{ %}
  35.122 -  interface(REG_INTER);
  35.123 -%}
  35.124 -
  35.125  operand rRegN() %{
  35.126    constraint(ALLOC_IN_RC(int_reg));
  35.127    match(RegN);
  35.128 @@ -5289,21 +5248,6 @@
  35.129    %}
  35.130  %}
  35.131  
  35.132 -// Indirect Narrow Oop Plus Offset Operand
  35.133 -operand indNarrowOopOffset(rRegN src, immL32 off) %{
  35.134 -  constraint(ALLOC_IN_RC(ptr_reg));
  35.135 -  match(AddP (DecodeN src) off);
  35.136 -
  35.137 -  op_cost(10);
  35.138 -  format %{"[R12 + $src << 3 + $off] (compressed oop addressing)" %}
  35.139 -  interface(MEMORY_INTER) %{
  35.140 -    base(0xc); // R12
  35.141 -    index($src);
  35.142 -    scale(0x3);
  35.143 -    disp($off);
  35.144 -  %}
  35.145 -%}
  35.146 -
  35.147  // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
  35.148  operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
  35.149  %{
  35.150 @@ -5321,6 +5265,158 @@
  35.151    %}
  35.152  %}
  35.153  
  35.154 +// Indirect Narrow Oop Plus Offset Operand
  35.155 +// Note: x86 architecture doesn't support "scale * index + offset" without a base
  35.156 +// we can't free r12 even with Universe::narrow_oop_base() == NULL.
  35.157 +operand indCompressedOopOffset(rRegN reg, immL32 off) %{
  35.158 +  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
  35.159 +  constraint(ALLOC_IN_RC(ptr_reg));
  35.160 +  match(AddP (DecodeN reg) off);
  35.161 +
  35.162 +  op_cost(10);
  35.163 +  format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
  35.164 +  interface(MEMORY_INTER) %{
  35.165 +    base(0xc); // R12
  35.166 +    index($reg);
  35.167 +    scale(0x3);
  35.168 +    disp($off);
  35.169 +  %}
  35.170 +%}
  35.171 +
  35.172 +// Indirect Memory Operand
  35.173 +operand indirectNarrow(rRegN reg)
  35.174 +%{
  35.175 +  predicate(Universe::narrow_oop_shift() == 0);
  35.176 +  constraint(ALLOC_IN_RC(ptr_reg));
  35.177 +  match(DecodeN reg);
  35.178 +
  35.179 +  format %{ "[$reg]" %}
  35.180 +  interface(MEMORY_INTER) %{
  35.181 +    base($reg);
  35.182 +    index(0x4);
  35.183 +    scale(0x0);
  35.184 +    disp(0x0);
  35.185 +  %}
  35.186 +%}
  35.187 +
  35.188 +// Indirect Memory Plus Short Offset Operand
  35.189 +operand indOffset8Narrow(rRegN reg, immL8 off)
  35.190 +%{
  35.191 +  predicate(Universe::narrow_oop_shift() == 0);
  35.192 +  constraint(ALLOC_IN_RC(ptr_reg));
  35.193 +  match(AddP (DecodeN reg) off);
  35.194 +
  35.195 +  format %{ "[$reg + $off (8-bit)]" %}
  35.196 +  interface(MEMORY_INTER) %{
  35.197 +    base($reg);
  35.198 +    index(0x4);
  35.199 +    scale(0x0);
  35.200 +    disp($off);
  35.201 +  %}
  35.202 +%}
  35.203 +
  35.204 +// Indirect Memory Plus Long Offset Operand
  35.205 +operand indOffset32Narrow(rRegN reg, immL32 off)
  35.206 +%{
  35.207 +  predicate(Universe::narrow_oop_shift() == 0);
  35.208 +  constraint(ALLOC_IN_RC(ptr_reg));
  35.209 +  match(AddP (DecodeN reg) off);
  35.210 +
  35.211 +  format %{ "[$reg + $off (32-bit)]" %}
  35.212 +  interface(MEMORY_INTER) %{
  35.213 +    base($reg);
  35.214 +    index(0x4);
  35.215 +    scale(0x0);
  35.216 +    disp($off);
  35.217 +  %}
  35.218 +%}
  35.219 +
  35.220 +// Indirect Memory Plus Index Register Plus Offset Operand
  35.221 +operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
  35.222 +%{
  35.223 +  predicate(Universe::narrow_oop_shift() == 0);
  35.224 +  constraint(ALLOC_IN_RC(ptr_reg));
  35.225 +  match(AddP (AddP (DecodeN reg) lreg) off);
  35.226 +
  35.227 +  op_cost(10);
  35.228 +  format %{"[$reg + $off + $lreg]" %}
  35.229 +  interface(MEMORY_INTER) %{
  35.230 +    base($reg);
  35.231 +    index($lreg);
  35.232 +    scale(0x0);
  35.233 +    disp($off);
  35.234 +  %}
  35.235 +%}
  35.236 +
  35.237 +// Indirect Memory Plus Index Register Plus Offset Operand
  35.238 +operand indIndexNarrow(rRegN reg, rRegL lreg)
  35.239 +%{
  35.240 +  predicate(Universe::narrow_oop_shift() == 0);
  35.241 +  constraint(ALLOC_IN_RC(ptr_reg));
  35.242 +  match(AddP (DecodeN reg) lreg);
  35.243 +
  35.244 +  op_cost(10);
  35.245 +  format %{"[$reg + $lreg]" %}
  35.246 +  interface(MEMORY_INTER) %{
  35.247 +    base($reg);
  35.248 +    index($lreg);
  35.249 +    scale(0x0);
  35.250 +    disp(0x0);
  35.251 +  %}
  35.252 +%}
  35.253 +
  35.254 +// Indirect Memory Times Scale Plus Index Register
  35.255 +operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
  35.256 +%{
  35.257 +  predicate(Universe::narrow_oop_shift() == 0);
  35.258 +  constraint(ALLOC_IN_RC(ptr_reg));
  35.259 +  match(AddP (DecodeN reg) (LShiftL lreg scale));
  35.260 +
  35.261 +  op_cost(10);
  35.262 +  format %{"[$reg + $lreg << $scale]" %}
  35.263 +  interface(MEMORY_INTER) %{
  35.264 +    base($reg);
  35.265 +    index($lreg);
  35.266 +    scale($scale);
  35.267 +    disp(0x0);
  35.268 +  %}
  35.269 +%}
  35.270 +
  35.271 +// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
  35.272 +operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
  35.273 +%{
  35.274 +  predicate(Universe::narrow_oop_shift() == 0);
  35.275 +  constraint(ALLOC_IN_RC(ptr_reg));
  35.276 +  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
  35.277 +
  35.278 +  op_cost(10);
  35.279 +  format %{"[$reg + $off + $lreg << $scale]" %}
  35.280 +  interface(MEMORY_INTER) %{
  35.281 +    base($reg);
  35.282 +    index($lreg);
  35.283 +    scale($scale);
  35.284 +    disp($off);
  35.285 +  %}
  35.286 +%}
  35.287 +
  35.288 +// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
  35.289 +operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
  35.290 +%{
  35.291 +  constraint(ALLOC_IN_RC(ptr_reg));
  35.292 +  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
  35.293 +  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
  35.294 +
  35.295 +  op_cost(10);
  35.296 +  format %{"[$reg + $off + $idx << $scale]" %}
  35.297 +  interface(MEMORY_INTER) %{
  35.298 +    base($reg);
  35.299 +    index($idx);
  35.300 +    scale($scale);
  35.301 +    disp($off);
  35.302 +  %}
  35.303 +%}
  35.304 +
  35.305 +
  35.306  //----------Special Memory Operands--------------------------------------------
  35.307  // Stack Slot Operand - This operand is used for loading and storing temporary
  35.308  //                      values on the stack where a match requires a value to
  35.309 @@ -5488,7 +5584,10 @@
  35.310  
  35.311  opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
  35.312                 indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
  35.313 -               indNarrowOopOffset);
  35.314 +               indCompressedOopOffset,
  35.315 +               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
  35.316 +               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
  35.317 +               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
  35.318  
  35.319  //----------PIPELINE-----------------------------------------------------------
  35.320  // Rules which define the behavior of the target architectures pipeline.
  35.321 @@ -6234,9 +6333,7 @@
  35.322     ins_cost(125); // XXX
  35.323     format %{ "movl    $dst, $mem\t# compressed ptr" %}
  35.324     ins_encode %{
  35.325 -     Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
  35.326 -     Register dst = as_Register($dst$$reg);
  35.327 -     __ movl(dst, addr);
  35.328 +     __ movl($dst$$Register, $mem$$Address);
  35.329     %}
  35.330     ins_pipe(ialu_reg_mem); // XXX
  35.331  %}
  35.332 @@ -6262,9 +6359,7 @@
  35.333    ins_cost(125); // XXX
  35.334    format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
  35.335    ins_encode %{
  35.336 -    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
  35.337 -    Register dst = as_Register($dst$$reg);
  35.338 -    __ movl(dst, addr);
  35.339 +    __ movl($dst$$Register, $mem$$Address);
  35.340    %}
  35.341    ins_pipe(ialu_reg_mem); // XXX
  35.342  %}
  35.343 @@ -6418,6 +6513,102 @@
  35.344    ins_pipe(ialu_reg_reg_fat);
  35.345  %}
  35.346  
  35.347 +instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
  35.348 +%{
  35.349 +  match(Set dst mem);
  35.350 +
  35.351 +  ins_cost(110);
  35.352 +  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
  35.353 +  opcode(0x8D);
  35.354 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  35.355 +  ins_pipe(ialu_reg_reg_fat);
  35.356 +%}
  35.357 +
  35.358 +// Load Effective Address which uses Narrow (32-bits) oop
  35.359 +instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
  35.360 +%{
  35.361 +  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
  35.362 +  match(Set dst mem);
  35.363 +
  35.364 +  ins_cost(110);
  35.365 +  format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
  35.366 +  opcode(0x8D);
  35.367 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  35.368 +  ins_pipe(ialu_reg_reg_fat);
  35.369 +%}
  35.370 +
  35.371 +instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
  35.372 +%{
  35.373 +  predicate(Universe::narrow_oop_shift() == 0);
  35.374 +  match(Set dst mem);
  35.375 +
  35.376 +  ins_cost(110); // XXX
  35.377 +  format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
  35.378 +  opcode(0x8D);
  35.379 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  35.380 +  ins_pipe(ialu_reg_reg_fat);
  35.381 +%}
  35.382 +
  35.383 +instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
  35.384 +%{
  35.385 +  predicate(Universe::narrow_oop_shift() == 0);
  35.386 +  match(Set dst mem);
  35.387 +
  35.388 +  ins_cost(110);
  35.389 +  format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
  35.390 +  opcode(0x8D);
  35.391 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  35.392 +  ins_pipe(ialu_reg_reg_fat);
  35.393 +%}
  35.394 +
  35.395 +instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
  35.396 +%{
  35.397 +  predicate(Universe::narrow_oop_shift() == 0);
  35.398 +  match(Set dst mem);
  35.399 +
  35.400 +  ins_cost(110);
  35.401 +  format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
  35.402 +  opcode(0x8D);
  35.403 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  35.404 +  ins_pipe(ialu_reg_reg_fat);
  35.405 +%}
  35.406 +
  35.407 +instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
  35.408 +%{
  35.409 +  predicate(Universe::narrow_oop_shift() == 0);
  35.410 +  match(Set dst mem);
  35.411 +
  35.412 +  ins_cost(110);
  35.413 +  format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
  35.414 +  opcode(0x8D);
  35.415 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  35.416 +  ins_pipe(ialu_reg_reg_fat);
  35.417 +%}
  35.418 +
  35.419 +instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
  35.420 +%{
  35.421 +  predicate(Universe::narrow_oop_shift() == 0);
  35.422 +  match(Set dst mem);
  35.423 +
  35.424 +  ins_cost(110);
  35.425 +  format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
  35.426 +  opcode(0x8D);
  35.427 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  35.428 +  ins_pipe(ialu_reg_reg_fat);
  35.429 +%}
  35.430 +
  35.431 +instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
  35.432 +%{
  35.433 +  predicate(Universe::narrow_oop_shift() == 0);
  35.434 +  match(Set dst mem);
  35.435 +
  35.436 +  ins_cost(110);
  35.437 +  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
  35.438 +  opcode(0x8D);
  35.439 +  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
  35.440 +  ins_pipe(ialu_reg_reg_fat);
  35.441 +%}
  35.442 +
  35.443  instruct loadConI(rRegI dst, immI src)
  35.444  %{
  35.445    match(Set dst src);
  35.446 @@ -6528,8 +6719,7 @@
  35.447    effect(KILL cr);
  35.448    format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
  35.449    ins_encode %{
  35.450 -    Register dst = $dst$$Register;
  35.451 -    __ xorq(dst, dst);
  35.452 +    __ xorq($dst$$Register, $dst$$Register);
  35.453    %}
  35.454    ins_pipe(ialu_reg);
  35.455  %}
  35.456 @@ -6541,11 +6731,10 @@
  35.457    format %{ "movl    $dst, $src\t# compressed ptr" %}
  35.458    ins_encode %{
  35.459      address con = (address)$src$$constant;
  35.460 -    Register dst = $dst$$Register;
  35.461      if (con == NULL) {
  35.462        ShouldNotReachHere();
  35.463      } else {
  35.464 -      __ set_narrow_oop(dst, (jobject)$src$$constant);
  35.465 +      __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
  35.466      }
  35.467    %}
  35.468    ins_pipe(ialu_reg_fat); // XXX
  35.469 @@ -6794,12 +6983,25 @@
  35.470    ins_pipe(ialu_mem_reg);
  35.471  %}
  35.472  
  35.473 +instruct storeImmP0(memory mem, immP0 zero)
  35.474 +%{
  35.475 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  35.476 +  match(Set mem (StoreP mem zero));
  35.477 +
  35.478 +  ins_cost(125); // XXX
  35.479 +  format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
  35.480 +  ins_encode %{
  35.481 +    __ movq($mem$$Address, r12);
  35.482 +  %}
  35.483 +  ins_pipe(ialu_mem_reg);
  35.484 +%}
  35.485 +
  35.486  // Store NULL Pointer, mark word, or other simple pointer constant.
  35.487  instruct storeImmP(memory mem, immP31 src)
  35.488  %{
  35.489    match(Set mem (StoreP mem src));
  35.490  
  35.491 -  ins_cost(125); // XXX
  35.492 +  ins_cost(150); // XXX
  35.493    format %{ "movq    $mem, $src\t# ptr" %}
  35.494    opcode(0xC7); /* C7 /0 */
  35.495    ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
  35.496 @@ -6814,14 +7016,55 @@
  35.497    ins_cost(125); // XXX
  35.498    format %{ "movl    $mem, $src\t# compressed ptr" %}
  35.499    ins_encode %{
  35.500 -    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
  35.501 -    Register src = as_Register($src$$reg);
  35.502 -    __ movl(addr, src);
  35.503 +    __ movl($mem$$Address, $src$$Register);
  35.504    %}
  35.505    ins_pipe(ialu_mem_reg);
  35.506  %}
  35.507  
  35.508 +instruct storeImmN0(memory mem, immN0 zero)
  35.509 +%{
  35.510 +  predicate(Universe::narrow_oop_base() == NULL);
  35.511 +  match(Set mem (StoreN mem zero));
  35.512 +
  35.513 +  ins_cost(125); // XXX
  35.514 +  format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
  35.515 +  ins_encode %{
  35.516 +    __ movl($mem$$Address, r12);
  35.517 +  %}
  35.518 +  ins_pipe(ialu_mem_reg);
  35.519 +%}
  35.520 +
  35.521 +instruct storeImmN(memory mem, immN src)
  35.522 +%{
  35.523 +  match(Set mem (StoreN mem src));
  35.524 +
  35.525 +  ins_cost(150); // XXX
  35.526 +  format %{ "movl    $mem, $src\t# compressed ptr" %}
  35.527 +  ins_encode %{
  35.528 +    address con = (address)$src$$constant;
  35.529 +    if (con == NULL) {
  35.530 +      __ movl($mem$$Address, (int32_t)0);
  35.531 +    } else {
  35.532 +      __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
  35.533 +    }
  35.534 +  %}
  35.535 +  ins_pipe(ialu_mem_imm);
  35.536 +%}
  35.537 +
  35.538  // Store Integer Immediate
  35.539 +instruct storeImmI0(memory mem, immI0 zero)
  35.540 +%{
  35.541 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  35.542 +  match(Set mem (StoreI mem zero));
  35.543 +
  35.544 +  ins_cost(125); // XXX
  35.545 +  format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
  35.546 +  ins_encode %{
  35.547 +    __ movl($mem$$Address, r12);
  35.548 +  %}
  35.549 +  ins_pipe(ialu_mem_reg);
  35.550 +%}
  35.551 +
  35.552  instruct storeImmI(memory mem, immI src)
  35.553  %{
  35.554    match(Set mem (StoreI mem src));
  35.555 @@ -6834,6 +7077,19 @@
  35.556  %}
  35.557  
  35.558  // Store Long Immediate
  35.559 +instruct storeImmL0(memory mem, immL0 zero)
  35.560 +%{
  35.561 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  35.562 +  match(Set mem (StoreL mem zero));
  35.563 +
  35.564 +  ins_cost(125); // XXX
  35.565 +  format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
  35.566 +  ins_encode %{
  35.567 +    __ movq($mem$$Address, r12);
  35.568 +  %}
  35.569 +  ins_pipe(ialu_mem_reg);
  35.570 +%}
  35.571 +
  35.572  instruct storeImmL(memory mem, immL32 src)
  35.573  %{
  35.574    match(Set mem (StoreL mem src));
  35.575 @@ -6846,6 +7102,19 @@
  35.576  %}
  35.577  
  35.578  // Store Short/Char Immediate
  35.579 +instruct storeImmC0(memory mem, immI0 zero)
  35.580 +%{
  35.581 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  35.582 +  match(Set mem (StoreC mem zero));
  35.583 +
  35.584 +  ins_cost(125); // XXX
  35.585 +  format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
  35.586 +  ins_encode %{
  35.587 +    __ movw($mem$$Address, r12);
  35.588 +  %}
  35.589 +  ins_pipe(ialu_mem_reg);
  35.590 +%}
  35.591 +
  35.592  instruct storeImmI16(memory mem, immI16 src)
  35.593  %{
  35.594    predicate(UseStoreImmI16);
  35.595 @@ -6859,6 +7128,19 @@
  35.596  %}
  35.597  
  35.598  // Store Byte Immediate
  35.599 +instruct storeImmB0(memory mem, immI0 zero)
  35.600 +%{
  35.601 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  35.602 +  match(Set mem (StoreB mem zero));
  35.603 +
  35.604 +  ins_cost(125); // XXX
  35.605 +  format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
  35.606 +  ins_encode %{
  35.607 +    __ movb($mem$$Address, r12);
  35.608 +  %}
  35.609 +  ins_pipe(ialu_mem_reg);
  35.610 +%}
  35.611 +
  35.612  instruct storeImmB(memory mem, immI8 src)
  35.613  %{
  35.614    match(Set mem (StoreB mem src));
  35.615 @@ -6898,6 +7180,19 @@
  35.616  %}
  35.617  
  35.618  // Store CMS card-mark Immediate
  35.619 +instruct storeImmCM0_reg(memory mem, immI0 zero)
  35.620 +%{
  35.621 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  35.622 +  match(Set mem (StoreCM mem zero));
  35.623 +
  35.624 +  ins_cost(125); // XXX
  35.625 +  format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
  35.626 +  ins_encode %{
  35.627 +    __ movb($mem$$Address, r12);
  35.628 +  %}
  35.629 +  ins_pipe(ialu_mem_reg);
  35.630 +%}
  35.631 +
  35.632  instruct storeImmCM0(memory mem, immI0 src)
  35.633  %{
  35.634    match(Set mem (StoreCM mem src));
  35.635 @@ -6931,6 +7226,19 @@
  35.636  %}
  35.637  
  35.638  // Store immediate Float value (it is faster than store from XMM register)
  35.639 +instruct storeF0(memory mem, immF0 zero)
  35.640 +%{
  35.641 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  35.642 +  match(Set mem (StoreF mem zero));
  35.643 +
  35.644 +  ins_cost(25); // XXX
  35.645 +  format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
  35.646 +  ins_encode %{
  35.647 +    __ movl($mem$$Address, r12);
  35.648 +  %}
  35.649 +  ins_pipe(ialu_mem_reg);
  35.650 +%}
  35.651 +
  35.652  instruct storeF_imm(memory mem, immF src)
  35.653  %{
  35.654    match(Set mem (StoreF mem src));
  35.655 @@ -6957,6 +7265,7 @@
  35.656  // Store immediate double 0.0 (it is faster than store from XMM register)
  35.657  instruct storeD0_imm(memory mem, immD0 src)
  35.658  %{
  35.659 +  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
  35.660    match(Set mem (StoreD mem src));
  35.661  
  35.662    ins_cost(50);
  35.663 @@ -6966,6 +7275,19 @@
  35.664    ins_pipe(ialu_mem_imm);
  35.665  %}
  35.666  
  35.667 +instruct storeD0(memory mem, immD0 zero)
  35.668 +%{
  35.669 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  35.670 +  match(Set mem (StoreD mem zero));
  35.671 +
  35.672 +  ins_cost(25); // XXX
  35.673 +  format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
  35.674 +  ins_encode %{
  35.675 +    __ movq($mem$$Address, r12);
  35.676 +  %}
  35.677 +  ins_pipe(ialu_mem_reg);
  35.678 +%}
  35.679 +
  35.680  instruct storeSSI(stackSlotI dst, rRegI src)
  35.681  %{
  35.682    match(Set dst src);
  35.683 @@ -7077,6 +7399,56 @@
  35.684    ins_pipe( ialu_mem_reg );
  35.685  %}
  35.686  
  35.687 +
  35.688 +//---------- Population Count Instructions -------------------------------------
  35.689 +
  35.690 +instruct popCountI(rRegI dst, rRegI src) %{
  35.691 +  predicate(UsePopCountInstruction);
  35.692 +  match(Set dst (PopCountI src));
  35.693 +
  35.694 +  format %{ "popcnt  $dst, $src" %}
  35.695 +  ins_encode %{
  35.696 +    __ popcntl($dst$$Register, $src$$Register);
  35.697 +  %}
  35.698 +  ins_pipe(ialu_reg);
  35.699 +%}
  35.700 +
  35.701 +instruct popCountI_mem(rRegI dst, memory mem) %{
  35.702 +  predicate(UsePopCountInstruction);
  35.703 +  match(Set dst (PopCountI (LoadI mem)));
  35.704 +
  35.705 +  format %{ "popcnt  $dst, $mem" %}
  35.706 +  ins_encode %{
  35.707 +    __ popcntl($dst$$Register, $mem$$Address);
  35.708 +  %}
  35.709 +  ins_pipe(ialu_reg);
  35.710 +%}
  35.711 +
  35.712 +// Note: Long.bitCount(long) returns an int.
  35.713 +instruct popCountL(rRegI dst, rRegL src) %{
  35.714 +  predicate(UsePopCountInstruction);
  35.715 +  match(Set dst (PopCountL src));
  35.716 +
  35.717 +  format %{ "popcnt  $dst, $src" %}
  35.718 +  ins_encode %{
  35.719 +    __ popcntq($dst$$Register, $src$$Register);
  35.720 +  %}
  35.721 +  ins_pipe(ialu_reg);
  35.722 +%}
  35.723 +
  35.724 +// Note: Long.bitCount(long) returns an int.
  35.725 +instruct popCountL_mem(rRegI dst, memory mem) %{
  35.726 +  predicate(UsePopCountInstruction);
  35.727 +  match(Set dst (PopCountL (LoadL mem)));
  35.728 +
  35.729 +  format %{ "popcnt  $dst, $mem" %}
  35.730 +  ins_encode %{
  35.731 +    __ popcntq($dst$$Register, $mem$$Address);
  35.732 +  %}
  35.733 +  ins_pipe(ialu_reg);
  35.734 +%}
  35.735 +
  35.736 +
  35.737  //----------MemBar Instructions-----------------------------------------------
  35.738  // Memory barrier flavors
  35.739  
  35.740 @@ -7192,9 +7564,7 @@
  35.741    effect(KILL cr);
  35.742    format %{ "encode_heap_oop_not_null $dst,$src" %}
  35.743    ins_encode %{
  35.744 -    Register s = $src$$Register;
  35.745 -    Register d = $dst$$Register;
  35.746 -    __ encode_heap_oop_not_null(d, s);
  35.747 +    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
  35.748    %}
  35.749    ins_pipe(ialu_reg_long);
  35.750  %}
  35.751 @@ -7224,7 +7594,11 @@
  35.752    ins_encode %{
  35.753      Register s = $src$$Register;
  35.754      Register d = $dst$$Register;
  35.755 -    __ decode_heap_oop_not_null(d, s);
  35.756 +    if (s != d) {
  35.757 +      __ decode_heap_oop_not_null(d, s);
  35.758 +    } else {
  35.759 +      __ decode_heap_oop_not_null(d);
  35.760 +    }
  35.761    %}
  35.762    ins_pipe(ialu_reg_long);
  35.763  %}
  35.764 @@ -11389,8 +11763,9 @@
  35.765  
  35.766  // This will generate a signed flags result. This should be OK since
  35.767  // any compare to a zero should be eq/neq.
  35.768 -instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero)
  35.769 -%{
  35.770 +instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
  35.771 +%{
  35.772 +  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
  35.773    match(Set cr (CmpP (LoadP op) zero));
  35.774  
  35.775    ins_cost(500); // XXX
  35.776 @@ -11401,13 +11776,24 @@
  35.777    ins_pipe(ialu_cr_reg_imm);
  35.778  %}
  35.779  
  35.780 +instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
  35.781 +%{
  35.782 +  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
  35.783 +  match(Set cr (CmpP (LoadP mem) zero));
  35.784 +
  35.785 +  format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
  35.786 +  ins_encode %{
  35.787 +    __ cmpq(r12, $mem$$Address);
  35.788 +  %}
  35.789 +  ins_pipe(ialu_cr_reg_mem);
  35.790 +%}
  35.791  
  35.792  instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
  35.793  %{
  35.794    match(Set cr (CmpN op1 op2));
  35.795  
  35.796    format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
  35.797 -  ins_encode %{ __ cmpl(as_Register($op1$$reg), as_Register($op2$$reg)); %}
  35.798 +  ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
  35.799    ins_pipe(ialu_cr_reg_reg);
  35.800  %}
  35.801  
  35.802 @@ -11415,11 +11801,30 @@
  35.803  %{
  35.804    match(Set cr (CmpN src (LoadN mem)));
  35.805  
  35.806 -  ins_cost(500); // XXX
  35.807 -  format %{ "cmpl    $src, mem\t# compressed ptr" %}
  35.808 +  format %{ "cmpl    $src, $mem\t# compressed ptr" %}
  35.809    ins_encode %{
  35.810 -    Address adr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
  35.811 -    __ cmpl(as_Register($src$$reg), adr);
  35.812 +    __ cmpl($src$$Register, $mem$$Address);
  35.813 +  %}
  35.814 +  ins_pipe(ialu_cr_reg_mem);
  35.815 +%}
  35.816 +
  35.817 +instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
  35.818 +  match(Set cr (CmpN op1 op2));
  35.819 +
  35.820 +  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
  35.821 +  ins_encode %{
  35.822 +    __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
  35.823 +  %}
  35.824 +  ins_pipe(ialu_cr_reg_imm);
  35.825 +%}
  35.826 +
  35.827 +instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
  35.828 +%{
  35.829 +  match(Set cr (CmpN src (LoadN mem)));
  35.830 +
  35.831 +  format %{ "cmpl    $mem, $src\t# compressed ptr" %}
  35.832 +  ins_encode %{
  35.833 +    __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
  35.834    %}
  35.835    ins_pipe(ialu_cr_reg_mem);
  35.836  %}
  35.837 @@ -11432,15 +11837,27 @@
  35.838    ins_pipe(ialu_cr_reg_imm);
  35.839  %}
  35.840  
  35.841 -instruct testN_reg_mem(rFlagsReg cr, memory mem, immN0 zero)
  35.842 -%{
  35.843 +instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
  35.844 +%{
  35.845 +  predicate(Universe::narrow_oop_base() != NULL);
  35.846    match(Set cr (CmpN (LoadN mem) zero));
  35.847  
  35.848    ins_cost(500); // XXX
  35.849    format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
  35.850    ins_encode %{
  35.851 -    Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
  35.852 -    __ cmpl(addr, (int)0xFFFFFFFF);
  35.853 +    __ cmpl($mem$$Address, (int)0xFFFFFFFF);
  35.854 +  %}
  35.855 +  ins_pipe(ialu_cr_reg_mem);
  35.856 +%}
  35.857 +
  35.858 +instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
  35.859 +%{
  35.860 +  predicate(Universe::narrow_oop_base() == NULL);
  35.861 +  match(Set cr (CmpN (LoadN mem) zero));
  35.862 +
  35.863 +  format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
  35.864 +  ins_encode %{
  35.865 +    __ cmpl(r12, $mem$$Address);
  35.866    %}
  35.867    ins_pipe(ialu_cr_reg_mem);
  35.868  %}
  35.869 @@ -11472,7 +11889,6 @@
  35.870  %{
  35.871    match(Set cr (CmpL op1 (LoadL op2)));
  35.872  
  35.873 -  ins_cost(500); // XXX
  35.874    format %{ "cmpq    $op1, $op2" %}
  35.875    opcode(0x3B); /* Opcode 3B /r */
  35.876    ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
  35.877 @@ -11733,15 +12149,12 @@
  35.878    effect(KILL rcx, KILL cr);
  35.879  
  35.880    ins_cost(1100);  // slightly larger than the next version
  35.881 -  format %{ "cmpq    rax, rsi\n\t"
  35.882 -            "jeq,s   hit\n\t"
  35.883 -            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
  35.884 +  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
  35.885              "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
  35.886              "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
  35.887              "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
  35.888              "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
  35.889              "movq    [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
  35.890 -    "hit:\n\t"
  35.891              "xorq    $result, $result\t\t Hit: rdi zero\n\t"
  35.892      "miss:\t" %}
  35.893  
  35.894 @@ -11756,13 +12169,10 @@
  35.895                                       rdi_RegP result)
  35.896  %{
  35.897    match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
  35.898 -  predicate(!UseCompressedOops); // decoding oop kills condition codes
  35.899    effect(KILL rcx, KILL result);
  35.900  
  35.901    ins_cost(1000);
  35.902 -  format %{ "cmpq    rax, rsi\n\t"
  35.903 -            "jeq,s   miss\t# Actually a hit; we are done.\n\t"
  35.904 -            "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
  35.905 +  format %{ "movq    rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
  35.906              "movl    rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
  35.907              "addq    rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
  35.908              "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
    36.1 --- a/src/os/linux/vm/os_linux.cpp	Wed Mar 18 11:37:48 2009 -0400
    36.2 +++ b/src/os/linux/vm/os_linux.cpp	Thu Mar 19 09:13:24 2009 -0700
    36.3 @@ -2582,7 +2582,7 @@
    36.4  #define SHM_HUGETLB 04000
    36.5  #endif
    36.6  
    36.7 -char* os::reserve_memory_special(size_t bytes) {
    36.8 +char* os::reserve_memory_special(size_t bytes, char* req_addr) {
    36.9    assert(UseLargePages, "only for large pages");
   36.10  
   36.11    key_t key = IPC_PRIVATE;
    37.1 --- a/src/os/solaris/dtrace/generateJvmOffsets.cpp	Wed Mar 18 11:37:48 2009 -0400
    37.2 +++ b/src/os/solaris/dtrace/generateJvmOffsets.cpp	Thu Mar 19 09:13:24 2009 -0700
    37.3 @@ -249,6 +249,10 @@
    37.4  
    37.5    printf("\n");
    37.6  
    37.7 +  GEN_OFFS(NarrowOopStruct, _base);
    37.8 +  GEN_OFFS(NarrowOopStruct, _shift);
    37.9 +  printf("\n");
   37.10 +
   37.11    GEN_VALUE(SIZE_HeapBlockHeader, sizeof(HeapBlock::Header));
   37.12    GEN_SIZE(oopDesc);
   37.13    GEN_SIZE(constantPoolOopDesc);
    38.1 --- a/src/os/solaris/dtrace/jhelper.d	Wed Mar 18 11:37:48 2009 -0400
    38.2 +++ b/src/os/solaris/dtrace/jhelper.d	Thu Mar 19 09:13:24 2009 -0700
    38.3 @@ -46,7 +46,10 @@
    38.4  extern pointer __1cJCodeCacheF_heap_;
    38.5  extern pointer __1cIUniverseP_methodKlassObj_;
    38.6  extern pointer __1cIUniverseO_collectedHeap_;
    38.7 -extern pointer __1cIUniverseK_heap_base_;
    38.8 +extern pointer __1cIUniverseL_narrow_oop_;
    38.9 +#ifdef _LP64
   38.10 +extern pointer UseCompressedOops;
   38.11 +#endif
   38.12  
   38.13  extern pointer __1cHnmethodG__vtbl_;
   38.14  extern pointer __1cKBufferBlobG__vtbl_;
   38.15 @@ -56,6 +59,7 @@
   38.16  #define copyin_uint16(ADDR) *(uint16_t*) copyin((pointer) (ADDR), sizeof(uint16_t))
   38.17  #define copyin_uint32(ADDR) *(uint32_t*) copyin((pointer) (ADDR), sizeof(uint32_t))
   38.18  #define copyin_int32(ADDR)  *(int32_t*)  copyin((pointer) (ADDR), sizeof(int32_t))
   38.19 +#define copyin_uint8(ADDR)  *(uint8_t*)  copyin((pointer) (ADDR), sizeof(uint8_t))
   38.20  
   38.21  #define SAME(x) x
   38.22  #define copyin_offset(JVM_CONST)  JVM_CONST = \
   38.23 @@ -132,6 +136,9 @@
   38.24    copyin_offset(SIZE_oopDesc);
   38.25    copyin_offset(SIZE_constantPoolOopDesc);
   38.26  
   38.27 +  copyin_offset(OFFSET_NarrowOopStruct_base);
   38.28 +  copyin_offset(OFFSET_NarrowOopStruct_shift);
   38.29 +
   38.30    /*
   38.31     * The PC to translate is in arg0.
   38.32     */
   38.33 @@ -151,9 +158,19 @@
   38.34  
   38.35    this->Universe_methodKlassOop = copyin_ptr(&``__1cIUniverseP_methodKlassObj_);
   38.36    this->CodeCache_heap_address = copyin_ptr(&``__1cJCodeCacheF_heap_);
   38.37 -  this->Universe_heap_base = copyin_ptr(&``__1cIUniverseK_heap_base_);
   38.38  
   38.39    /* Reading volatile values */
   38.40 +#ifdef _LP64
   38.41 +  this->Use_Compressed_Oops  = copyin_uint8(&``UseCompressedOops);
   38.42 +#else
   38.43 +  this->Use_Compressed_Oops  = 0;
   38.44 +#endif
   38.45 +
   38.46 +  this->Universe_narrow_oop_base  = copyin_ptr(&``__1cIUniverseL_narrow_oop_ +
   38.47 +                                               OFFSET_NarrowOopStruct_base);
   38.48 +  this->Universe_narrow_oop_shift = copyin_int32(&``__1cIUniverseL_narrow_oop_ +
   38.49 +                                                 OFFSET_NarrowOopStruct_shift);
   38.50 +
   38.51    this->CodeCache_low = copyin_ptr(this->CodeCache_heap_address + 
   38.52        OFFSET_CodeHeap_memory + OFFSET_VirtualSpace_low);
   38.53  
   38.54 @@ -295,7 +312,7 @@
   38.55  
   38.56  dtrace:helper:ustack:
   38.57  /!this->done && this->vtbl == this->BufferBlob_vtbl &&
   38.58 -this->Universe_heap_base == NULL &&
   38.59 +this->Use_Compressed_Oops == 0 &&
   38.60  this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
   38.61  {
   38.62    MARK_LINE;
   38.63 @@ -306,7 +323,7 @@
   38.64  
   38.65  dtrace:helper:ustack:
   38.66  /!this->done && this->vtbl == this->BufferBlob_vtbl &&
   38.67 -this->Universe_heap_base != NULL &&
   38.68 +this->Use_Compressed_Oops != 0 &&
   38.69  this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
   38.70  {
   38.71    MARK_LINE;
   38.72 @@ -314,8 +331,8 @@
   38.73     * Read compressed pointer and  decode heap oop, same as oop.inline.hpp
   38.74     */
   38.75    this->cklass = copyin_uint32(this->methodOopPtr + OFFSET_oopDesc_metadata);
   38.76 -  this->klass = (uint64_t)((uintptr_t)this->Universe_heap_base +
   38.77 -                ((uintptr_t)this->cklass << 3));
   38.78 +  this->klass = (uint64_t)((uintptr_t)this->Universe_narrow_oop_base +
   38.79 +                ((uintptr_t)this->cklass << this->Universe_narrow_oop_shift));
   38.80    this->methodOop = this->klass == this->Universe_methodKlassOop;
   38.81    this->done = !this->methodOop;
   38.82  }
    39.1 --- a/src/os/solaris/dtrace/libjvm_db.c	Wed Mar 18 11:37:48 2009 -0400
    39.2 +++ b/src/os/solaris/dtrace/libjvm_db.c	Thu Mar 19 09:13:24 2009 -0700
    39.3 @@ -146,13 +146,17 @@
    39.4    uint64_t BufferBlob_vtbl;
    39.5    uint64_t RuntimeStub_vtbl;
    39.6  
    39.7 +  uint64_t Use_Compressed_Oops_address;
    39.8    uint64_t Universe_methodKlassObj_address;
    39.9 +  uint64_t Universe_narrow_oop_base_address;
   39.10 +  uint64_t Universe_narrow_oop_shift_address;
   39.11    uint64_t CodeCache_heap_address;
   39.12 -  uint64_t Universe_heap_base_address;
   39.13  
   39.14    /* Volatiles */
   39.15 +  uint8_t  Use_Compressed_Oops;
   39.16    uint64_t Universe_methodKlassObj;
   39.17 -  uint64_t Universe_heap_base;
   39.18 +  uint64_t Universe_narrow_oop_base;
   39.19 +  uint32_t Universe_narrow_oop_shift;
   39.20    uint64_t CodeCache_low;
   39.21    uint64_t CodeCache_high;
   39.22    uint64_t CodeCache_segmap_low;
   39.23 @@ -279,8 +283,11 @@
   39.24        if (strcmp("_methodKlassObj", vmp->fieldName) == 0) {
   39.25          J->Universe_methodKlassObj_address = vmp->address;
   39.26        }
   39.27 -      if (strcmp("_heap_base", vmp->fieldName) == 0) {
   39.28 -        J->Universe_heap_base_address = vmp->address;
   39.29 +      if (strcmp("_narrow_oop._base", vmp->fieldName) == 0) {
   39.30 +        J->Universe_narrow_oop_base_address = vmp->address;
   39.31 +      }
   39.32 +      if (strcmp("_narrow_oop._shift", vmp->fieldName) == 0) {
   39.33 +        J->Universe_narrow_oop_shift_address = vmp->address;
   39.34        }
   39.35      }
   39.36      CHECK_FAIL(err);
   39.37 @@ -298,14 +305,39 @@
   39.38    return -1;
   39.39  }
   39.40  
   39.41 +static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) {
   39.42 +  psaddr_t sym_addr;
   39.43 +  int err;
   39.44 +
   39.45 +  err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
   39.46 +  if (err != PS_OK) goto fail;
   39.47 +  *valuep = sym_addr;
   39.48 +  return PS_OK;
   39.49 +
   39.50 + fail:
   39.51 +  return err;
   39.52 +}
   39.53 +
   39.54  static int read_volatiles(jvm_agent_t* J) {
   39.55    uint64_t ptr;
   39.56    int err;
   39.57  
   39.58 +  err = find_symbol(J, "UseCompressedOops", &J->Use_Compressed_Oops_address);
   39.59 +  if (err == PS_OK) {
   39.60 +    err = ps_pread(J->P,  J->Use_Compressed_Oops_address, &J->Use_Compressed_Oops, sizeof(uint8_t));
   39.61 +    CHECK_FAIL(err);
   39.62 +  } else {
   39.63 +    J->Use_Compressed_Oops = 0;
   39.64 +  }
   39.65 +
   39.66    err = read_pointer(J, J->Universe_methodKlassObj_address, &J->Universe_methodKlassObj);
   39.67    CHECK_FAIL(err);
   39.68 -  err = read_pointer(J, J->Universe_heap_base_address, &J->Universe_heap_base);
   39.69 +
   39.70 +  err = read_pointer(J, J->Universe_narrow_oop_base_address, &J->Universe_narrow_oop_base);
   39.71    CHECK_FAIL(err);
   39.72 +  err = ps_pread(J->P,  J->Universe_narrow_oop_shift_address, &J->Universe_narrow_oop_shift, sizeof(uint32_t));
   39.73 +  CHECK_FAIL(err);
   39.74 +
   39.75    err = read_pointer(J, J->CodeCache_heap_address + OFFSET_CodeHeap_memory +
   39.76                       OFFSET_VirtualSpace_low, &J->CodeCache_low);
   39.77    CHECK_FAIL(err);
   39.78 @@ -374,19 +406,6 @@
   39.79    return -1;
   39.80  }
   39.81  
   39.82 -static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) {
   39.83 -  psaddr_t sym_addr;
   39.84 -  int err;
   39.85 -
   39.86 -  err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
   39.87 -  if (err != PS_OK) goto fail;
   39.88 -  *valuep = sym_addr;
   39.89 -  return PS_OK;
   39.90 -
   39.91 - fail:
   39.92 -  return err;
   39.93 -}
   39.94 -
   39.95  static int find_jlong_constant(jvm_agent_t* J, const char *name, uint64_t* valuep) {
   39.96    psaddr_t sym_addr;
   39.97    int err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
   39.98 @@ -458,14 +477,14 @@
   39.99  static int is_methodOop(jvm_agent_t* J, uint64_t methodOopPtr) {
  39.100    uint64_t klass;
  39.101    int err;
  39.102 -  // If heap_base is nonnull, this was a compressed oop.
  39.103 -  if (J->Universe_heap_base != NULL) {
  39.104 +  // If UseCompressedOops, this was a compressed oop.
  39.105 +  if (J->Use_Compressed_Oops != 0) {
  39.106      uint32_t cklass;
  39.107      err = read_compressed_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata,
  39.108            &cklass);
  39.109      // decode heap oop, same as oop.inline.hpp
  39.110 -    klass = (uint64_t)((uintptr_t)J->Universe_heap_base +
  39.111 -            ((uintptr_t)cklass << 3));
  39.112 +    klass = (uint64_t)((uintptr_t)J->Universe_narrow_oop_base +
  39.113 +            ((uintptr_t)cklass << J->Universe_narrow_oop_shift));
  39.114    } else {
  39.115      err = read_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata, &klass);
  39.116    }
    40.1 --- a/src/os/solaris/vm/os_solaris.cpp	Wed Mar 18 11:37:48 2009 -0400
    40.2 +++ b/src/os/solaris/vm/os_solaris.cpp	Thu Mar 19 09:13:24 2009 -0700
    40.3 @@ -3220,7 +3220,7 @@
    40.4    return true;
    40.5  }
    40.6  
    40.7 -char* os::reserve_memory_special(size_t bytes) {
    40.8 +char* os::reserve_memory_special(size_t bytes, char* addr) {
    40.9    assert(UseLargePages && UseISM, "only for ISM large pages");
   40.10  
   40.11    size_t size = bytes;
   40.12 @@ -4451,6 +4451,9 @@
   40.13  int_fnP_thread_t os::Solaris::_thr_suspend_mutator;
   40.14  int_fnP_thread_t os::Solaris::_thr_continue_mutator;
   40.15  
   40.16 +// (Static) wrapper for getisax(2) call.
   40.17 +os::Solaris::getisax_func_t os::Solaris::_getisax = 0;
   40.18 +
   40.19  // (Static) wrappers for the liblgrp API
   40.20  os::Solaris::lgrp_home_func_t os::Solaris::_lgrp_home;
   40.21  os::Solaris::lgrp_init_func_t os::Solaris::_lgrp_init;
   40.22 @@ -4465,16 +4468,19 @@
   40.23  // (Static) wrapper for meminfo() call.
   40.24  os::Solaris::meminfo_func_t os::Solaris::_meminfo = 0;
   40.25  
   40.26 -static address resolve_symbol(const char *name) {
   40.27 -  address addr;
   40.28 -
   40.29 -  addr = (address) dlsym(RTLD_DEFAULT, name);
   40.30 +static address resolve_symbol_lazy(const char* name) {
   40.31 +  address addr = (address) dlsym(RTLD_DEFAULT, name);
   40.32    if(addr == NULL) {
   40.33      // RTLD_DEFAULT was not defined on some early versions of 2.5.1
   40.34      addr = (address) dlsym(RTLD_NEXT, name);
   40.35 -    if(addr == NULL) {
   40.36 -      fatal(dlerror());
   40.37 -    }
   40.38 +  }
   40.39 +  return addr;
   40.40 +}
   40.41 +
   40.42 +static address resolve_symbol(const char* name) {
   40.43 +  address addr = resolve_symbol_lazy(name);
   40.44 +  if(addr == NULL) {
   40.45 +    fatal(dlerror());
   40.46    }
   40.47    return addr;
   40.48  }
   40.49 @@ -4673,15 +4679,26 @@
   40.50  }
   40.51  
   40.52  void os::Solaris::misc_sym_init() {
   40.53 -  address func = (address)dlsym(RTLD_DEFAULT, "meminfo");
   40.54 -  if(func == NULL) {
   40.55 -    func = (address) dlsym(RTLD_NEXT, "meminfo");
   40.56 -  }
   40.57 +  address func;
   40.58 +
   40.59 +  // getisax
   40.60 +  func = resolve_symbol_lazy("getisax");
   40.61 +  if (func != NULL) {
   40.62 +    os::Solaris::_getisax = CAST_TO_FN_PTR(getisax_func_t, func);
   40.63 +  }
   40.64 +
   40.65 +  // meminfo
   40.66 +  func = resolve_symbol_lazy("meminfo");
   40.67    if (func != NULL) {
   40.68      os::Solaris::set_meminfo(CAST_TO_FN_PTR(meminfo_func_t, func));
   40.69    }
   40.70  }
   40.71  
   40.72 +uint_t os::Solaris::getisax(uint32_t* array, uint_t n) {
   40.73 +  assert(_getisax != NULL, "_getisax not set");
   40.74 +  return _getisax(array, n);
   40.75 +}
   40.76 +
   40.77  // Symbol doesn't exist in Solaris 8 pset.h
   40.78  #ifndef PS_MYID
   40.79  #define PS_MYID -3
   40.80 @@ -4716,6 +4733,10 @@
   40.81  
   40.82    Solaris::initialize_system_info();
   40.83  
   40.84 +  // Initialize misc. symbols as soon as possible, so we can use them
   40.85 +  // if we need them.
   40.86 +  Solaris::misc_sym_init();
   40.87 +
   40.88    int fd = open("/dev/zero", O_RDWR);
   40.89    if (fd < 0) {
   40.90      fatal1("os::init: cannot open /dev/zero (%s)", strerror(errno));
   40.91 @@ -4857,7 +4878,6 @@
   40.92      }
   40.93    }
   40.94  
   40.95 -  Solaris::misc_sym_init();
   40.96    Solaris::signal_sets_init();
   40.97    Solaris::init_signal_mem();
   40.98    Solaris::install_signal_handlers();
    41.1 --- a/src/os/solaris/vm/os_solaris.hpp	Wed Mar 18 11:37:48 2009 -0400
    41.2 +++ b/src/os/solaris/vm/os_solaris.hpp	Thu Mar 19 09:13:24 2009 -0700
    41.3 @@ -1,5 +1,5 @@
    41.4  /*
    41.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    41.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    41.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    41.8   *
    41.9   * This code is free software; you can redistribute it and/or modify it
   41.10 @@ -72,6 +72,8 @@
   41.11          LGRP_VIEW_OS            /* what's available to operating system */
   41.12    } lgrp_view_t;
   41.13  
   41.14 +  typedef uint_t (*getisax_func_t)(uint32_t* array, uint_t n);
   41.15 +
   41.16    typedef lgrp_id_t (*lgrp_home_func_t)(idtype_t idtype, id_t id);
   41.17    typedef lgrp_cookie_t (*lgrp_init_func_t)(lgrp_view_t view);
   41.18    typedef int (*lgrp_fini_func_t)(lgrp_cookie_t cookie);
   41.19 @@ -87,6 +89,8 @@
   41.20                                  const uint_t  info_req[],  int info_count,
   41.21                                  uint64_t  outdata[], uint_t validity[]);
   41.22  
   41.23 +  static getisax_func_t _getisax;
   41.24 +
   41.25    static lgrp_home_func_t _lgrp_home;
   41.26    static lgrp_init_func_t _lgrp_init;
   41.27    static lgrp_fini_func_t _lgrp_fini;
   41.28 @@ -283,6 +287,9 @@
   41.29    }
   41.30    static lgrp_cookie_t lgrp_cookie()                 { return _lgrp_cookie; }
   41.31  
   41.32 +  static bool supports_getisax()                     { return _getisax != NULL; }
   41.33 +  static uint_t getisax(uint32_t* array, uint_t n);
   41.34 +
   41.35    static void set_meminfo(meminfo_func_t func)       { _meminfo = func; }
   41.36    static int meminfo (const uint64_t inaddr[],   int addr_count,
   41.37                       const uint_t  info_req[],  int info_count,
    42.1 --- a/src/os/windows/vm/os_windows.cpp	Wed Mar 18 11:37:48 2009 -0400
    42.2 +++ b/src/os/windows/vm/os_windows.cpp	Thu Mar 19 09:13:24 2009 -0700
    42.3 @@ -2595,7 +2595,7 @@
    42.4    return true;
    42.5  }
    42.6  
    42.7 -char* os::reserve_memory_special(size_t bytes) {
    42.8 +char* os::reserve_memory_special(size_t bytes, char* addr) {
    42.9  
   42.10    if (UseLargePagesIndividualAllocation) {
   42.11      if (TracePageSizes && Verbose) {
   42.12 @@ -2615,7 +2615,7 @@
   42.13          "use -XX:-UseLargePagesIndividualAllocation to turn off");
   42.14        return NULL;
   42.15      }
   42.16 -    p_buf = (char *) VirtualAlloc(NULL,
   42.17 +    p_buf = (char *) VirtualAlloc(addr,
   42.18                                   size_of_reserve,  // size of Reserve
   42.19                                   MEM_RESERVE,
   42.20                                   PAGE_EXECUTE_READWRITE);
    43.1 --- a/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp	Wed Mar 18 11:37:48 2009 -0400
    43.2 +++ b/src/os_cpu/linux_sparc/vm/globals_linux_sparc.hpp	Thu Mar 19 09:13:24 2009 -0700
    43.3 @@ -30,5 +30,7 @@
    43.4  define_pd_global(uintx, JVMInvokeMethodSlack,    12288);
    43.5  define_pd_global(intx, CompilerThreadStackSize,  0);
    43.6  
    43.7 +// Only used on 64 bit platforms
    43.8 +define_pd_global(uintx, HeapBaseMinAddress,      4*G);
    43.9  // Only used on 64 bit Windows platforms
   43.10  define_pd_global(bool, UseVectoredExceptions, false);
    44.1 --- a/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
    44.2 +++ b/src/os_cpu/linux_x86/vm/globals_linux_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
    44.3 @@ -43,5 +43,7 @@
    44.4  
    44.5  define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
    44.6  
    44.7 +// Only used on 64 bit platforms
    44.8 +define_pd_global(uintx, HeapBaseMinAddress,      2*G);
    44.9  // Only used on 64 bit Windows platforms
   44.10  define_pd_global(bool, UseVectoredExceptions,    false);
    45.1 --- a/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp	Wed Mar 18 11:37:48 2009 -0400
    45.2 +++ b/src/os_cpu/solaris_sparc/vm/globals_solaris_sparc.hpp	Thu Mar 19 09:13:24 2009 -0700
    45.3 @@ -30,5 +30,9 @@
    45.4  define_pd_global(uintx, JVMInvokeMethodSlack,    12288);
    45.5  define_pd_global(intx, CompilerThreadStackSize,  0);
    45.6  
    45.7 +// Only used on 64 bit platforms
    45.8 +define_pd_global(uintx, HeapBaseMinAddress,      4*G);
    45.9  // Only used on 64 bit Windows platforms
   45.10  define_pd_global(bool, UseVectoredExceptions,    false);
   45.11 +
   45.12 +
    46.1 --- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Wed Mar 18 11:37:48 2009 -0400
    46.2 +++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Thu Mar 19 09:13:24 2009 -0700
    46.3 @@ -1,5 +1,5 @@
    46.4  /*
    46.5 - * Copyright 2006 Sun Microsystems, Inc.  All Rights Reserved.
    46.6 + * Copyright 2006-2009 Sun Microsystems, Inc.  All Rights Reserved.
    46.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    46.8   *
    46.9   * This code is free software; you can redistribute it and/or modify it
   46.10 @@ -25,58 +25,107 @@
   46.11  # include "incls/_precompiled.incl"
   46.12  # include "incls/_vm_version_solaris_sparc.cpp.incl"
   46.13  
   46.14 +# include <sys/auxv.h>
   46.15 +# include <sys/auxv_SPARC.h>
   46.16  # include <sys/systeminfo.h>
   46.17  
   46.18 +// We need to keep these here as long as we have to build on Solaris
   46.19 +// versions before 10.
   46.20 +#ifndef SI_ARCHITECTURE_32
   46.21 +#define SI_ARCHITECTURE_32      516     /* basic 32-bit SI_ARCHITECTURE */
   46.22 +#endif
   46.23 +
   46.24 +#ifndef SI_ARCHITECTURE_64
   46.25 +#define SI_ARCHITECTURE_64      517     /* basic 64-bit SI_ARCHITECTURE */
   46.26 +#endif
   46.27 +
   46.28 +static void do_sysinfo(int si, const char* string, int* features, int mask) {
   46.29 +  char   tmp;
   46.30 +  size_t bufsize = sysinfo(si, &tmp, 1);
   46.31 +
   46.32 +  // All SI defines used below must be supported.
   46.33 +  guarantee(bufsize != -1, "must be supported");
   46.34 +
   46.35 +  char* buf = (char*) malloc(bufsize);
   46.36 +
   46.37 +  if (buf == NULL)
   46.38 +    return;
   46.39 +
   46.40 +  if (sysinfo(si, buf, bufsize) == bufsize) {
   46.41 +    // Compare the string.
   46.42 +    if (strcmp(buf, string) == 0) {
   46.43 +      *features |= mask;
   46.44 +    }
   46.45 +  }
   46.46 +
   46.47 +  free(buf);
   46.48 +}
   46.49 +
   46.50  int VM_Version::platform_features(int features) {
   46.51 -  // We determine what sort of hardware we have via sysinfo(SI_ISALIST, ...).
   46.52 -  // This isn't the best of all possible ways because there's not enough
   46.53 -  // detail in the isa list it returns, but it's a bit less arcane than
   46.54 -  // generating assembly code and an illegal instruction handler.  We used
   46.55 -  // to generate a getpsr trap, but that's even more arcane.
   46.56 -  //
   46.57 -  // Another possibility would be to use sysinfo(SI_PLATFORM, ...), but
   46.58 -  // that would require more knowledge here than is wise.
   46.59 +  // getisax(2), SI_ARCHITECTURE_32, and SI_ARCHITECTURE_64 are
   46.60 +  // supported on Solaris 10 and later.
   46.61 +  if (os::Solaris::supports_getisax()) {
   46.62 +#ifndef PRODUCT
   46.63 +    if (PrintMiscellaneous && Verbose)
   46.64 +      tty->print_cr("getisax(2) supported.");
   46.65 +#endif
   46.66  
   46.67 -  // isalist spec via 'man isalist' as of 01-Aug-2001
   46.68 +    // Check 32-bit architecture.
   46.69 +    do_sysinfo(SI_ARCHITECTURE_32, "sparc", &features, v8_instructions_m);
   46.70  
   46.71 -  char   tmp;
   46.72 -  size_t bufsize  = sysinfo(SI_ISALIST, &tmp, 1);
   46.73 -  char*  buf      = (char*)malloc(bufsize);
   46.74 +    // Check 64-bit architecture.
   46.75 +    do_sysinfo(SI_ARCHITECTURE_64, "sparcv9", &features, generic_v9_m);
   46.76  
   46.77 -  if (buf != NULL) {
   46.78 -    if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) {
   46.79 -      // Figure out what kind of sparc we have
   46.80 -      char *sparc_string = strstr(buf, "sparc");
   46.81 -      if (sparc_string != NULL) {            features |= v8_instructions_m;
   46.82 -        if (sparc_string[5] == 'v') {
   46.83 -          if (sparc_string[6] == '8') {
   46.84 -            if (sparc_string[7] == '-')      features |= hardware_int_muldiv_m;
   46.85 -            else if (sparc_string[7] == 'p') features |= generic_v9_m;
   46.86 -            else                      features |= generic_v8_m;
   46.87 -          } else if (sparc_string[6] == '9') features |= generic_v9_m;
   46.88 +    // Extract valid instruction set extensions.
   46.89 +    uint_t av;
   46.90 +    uint_t avn = os::Solaris::getisax(&av, 1);
   46.91 +    assert(avn == 1, "should only return one av");
   46.92 +
   46.93 +    if (av & AV_SPARC_MUL32)  features |= hardware_mul32_m;
   46.94 +    if (av & AV_SPARC_DIV32)  features |= hardware_div32_m;
   46.95 +    if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m;
   46.96 +    if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m;
   46.97 +    if (av & AV_SPARC_POPC)   features |= hardware_popc_m;
   46.98 +    if (av & AV_SPARC_VIS)    features |= vis1_instructions_m;
   46.99 +    if (av & AV_SPARC_VIS2)   features |= vis2_instructions_m;
  46.100 +  } else {
  46.101 +    // getisax(2) failed, use the old legacy code.
  46.102 +#ifndef PRODUCT
  46.103 +    if (PrintMiscellaneous && Verbose)
  46.104 +      tty->print_cr("getisax(2) not supported.");
  46.105 +#endif
  46.106 +
  46.107 +    char   tmp;
  46.108 +    size_t bufsize = sysinfo(SI_ISALIST, &tmp, 1);
  46.109 +    char*  buf     = (char*) malloc(bufsize);
  46.110 +
  46.111 +    if (buf != NULL) {
  46.112 +      if (sysinfo(SI_ISALIST, buf, bufsize) == bufsize) {
  46.113 +        // Figure out what kind of sparc we have
  46.114 +        char *sparc_string = strstr(buf, "sparc");
  46.115 +        if (sparc_string != NULL) {              features |= v8_instructions_m;
  46.116 +          if (sparc_string[5] == 'v') {
  46.117 +            if (sparc_string[6] == '8') {
  46.118 +              if (sparc_string[7] == '-') {      features |= hardware_mul32_m;
  46.119 +                                                 features |= hardware_div32_m;
  46.120 +              } else if (sparc_string[7] == 'p') features |= generic_v9_m;
  46.121 +              else                               features |= generic_v8_m;
  46.122 +            } else if (sparc_string[6] == '9')   features |= generic_v9_m;
  46.123 +          }
  46.124 +        }
  46.125 +
  46.126 +        // Check for visualization instructions
  46.127 +        char *vis = strstr(buf, "vis");
  46.128 +        if (vis != NULL) {                       features |= vis1_instructions_m;
  46.129 +          if (vis[3] == '2')                     features |= vis2_instructions_m;
  46.130          }
  46.131        }
  46.132 -
  46.133 -      // Check for visualization instructions
  46.134 -      char *vis = strstr(buf, "vis");
  46.135 -      if (vis != NULL) {              features |= vis1_instructions_m;
  46.136 -        if (vis[3] == '2')            features |= vis2_instructions_m;
  46.137 -      }
  46.138 +      free(buf);
  46.139      }
  46.140 -    free(buf);
  46.141    }
  46.142  
  46.143 -  bufsize = sysinfo(SI_MACHINE, &tmp, 1);
  46.144 -  buf     = (char*)malloc(bufsize);
  46.145 -
  46.146 -  if (buf != NULL) {
  46.147 -    if (sysinfo(SI_MACHINE, buf, bufsize) == bufsize) {
  46.148 -      if (strstr(buf, "sun4v") != NULL) {
  46.149 -        features |= sun4v_m;
  46.150 -      }
  46.151 -    }
  46.152 -    free(buf);
  46.153 -  }
  46.154 +  // Determine the machine type.
  46.155 +  do_sysinfo(SI_MACHINE, "sun4v", &features, sun4v_m);
  46.156  
  46.157    return features;
  46.158  }
    47.1 --- a/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
    47.2 +++ b/src/os_cpu/solaris_x86/vm/globals_solaris_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
    47.3 @@ -46,5 +46,7 @@
    47.4  
    47.5  define_pd_global(intx, CompilerThreadStackSize,  0);
    47.6  
    47.7 +// Only used on 64 bit platforms
    47.8 +define_pd_global(uintx, HeapBaseMinAddress,      256*M);
    47.9  // Only used on 64 bit Windows platforms
   47.10  define_pd_global(bool, UseVectoredExceptions,    false);
    48.1 --- a/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
    48.2 +++ b/src/os_cpu/windows_x86/vm/globals_windows_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
    48.3 @@ -45,5 +45,7 @@
    48.4  
    48.5  define_pd_global(uintx, JVMInvokeMethodSlack,    8192);
    48.6  
    48.7 +// Only used on 64 bit platforms
    48.8 +define_pd_global(uintx, HeapBaseMinAddress,      2*G);
    48.9  // Only used on 64 bit Windows platforms
   48.10  define_pd_global(bool, UseVectoredExceptions,    false);
    49.1 --- a/src/os_cpu/windows_x86/vm/unwind_windows_x86.hpp	Wed Mar 18 11:37:48 2009 -0400
    49.2 +++ b/src/os_cpu/windows_x86/vm/unwind_windows_x86.hpp	Thu Mar 19 09:13:24 2009 -0700
    49.3 @@ -68,6 +68,9 @@
    49.4      PVOID HandlerData;
    49.5  } DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT;
    49.6  
    49.7 +#if MSC_VER < 1500
    49.8 +
    49.9 +/* Not needed for VS2008 compiler, comes from winnt.h. */
   49.10  typedef EXCEPTION_DISPOSITION (*PEXCEPTION_ROUTINE) (
   49.11      IN PEXCEPTION_RECORD ExceptionRecord,
   49.12      IN ULONG64 EstablisherFrame,
   49.13 @@ -75,4 +78,6 @@
   49.14      IN OUT PDISPATCHER_CONTEXT DispatcherContext
   49.15  );
   49.16  
   49.17 +#endif
   49.18 +
   49.19  #endif // AMD64
    50.1 --- a/src/share/vm/adlc/adlc.hpp	Wed Mar 18 11:37:48 2009 -0400
    50.2 +++ b/src/share/vm/adlc/adlc.hpp	Thu Mar 19 09:13:24 2009 -0700
    50.3 @@ -44,7 +44,7 @@
    50.4  #error "Something is wrong with the detection of MSC_VER in the makefiles"
    50.5  #endif
    50.6  
    50.7 -#if _MSC_VER >= 1400 && !defined(_WIN64)
    50.8 +#if _MSC_VER >= 1400
    50.9  #define strdup _strdup
   50.10  #endif
   50.11  
    51.1 --- a/src/share/vm/asm/assembler.cpp	Wed Mar 18 11:37:48 2009 -0400
    51.2 +++ b/src/share/vm/asm/assembler.cpp	Thu Mar 19 09:13:24 2009 -0700
    51.3 @@ -321,16 +321,19 @@
    51.4  bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
    51.5    // Exception handler checks the nmethod's implicit null checks table
    51.6    // only when this method returns false.
    51.7 -  if (UseCompressedOops) {
    51.8 +#ifdef _LP64
    51.9 +  if (UseCompressedOops && Universe::narrow_oop_base() != NULL) {
   51.10 +    assert (Universe::heap() != NULL, "java heap should be initialized");
   51.11      // The first page after heap_base is unmapped and
   51.12      // the 'offset' is equal to [heap_base + offset] for
   51.13      // narrow oop implicit null checks.
   51.14 -    uintptr_t heap_base = (uintptr_t)Universe::heap_base();
   51.15 -    if ((uintptr_t)offset >= heap_base) {
   51.16 +    uintptr_t base = (uintptr_t)Universe::narrow_oop_base();
   51.17 +    if ((uintptr_t)offset >= base) {
   51.18        // Normalize offset for the next check.
   51.19 -      offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1));
   51.20 +      offset = (intptr_t)(pointer_delta((void*)offset, (void*)base, 1));
   51.21      }
   51.22    }
   51.23 +#endif
   51.24    return offset < 0 || os::vm_page_size() <= offset;
   51.25  }
   51.26  
    52.1 --- a/src/share/vm/ci/ciMethodBlocks.cpp	Wed Mar 18 11:37:48 2009 -0400
    52.2 +++ b/src/share/vm/ci/ciMethodBlocks.cpp	Thu Mar 19 09:13:24 2009 -0700
    52.3 @@ -284,6 +284,11 @@
    52.4        //
    52.5        int ex_start = handler->start();
    52.6        int ex_end = handler->limit();
    52.7 +      // ensure a block at the start of exception range and start of following code
    52.8 +      (void) make_block_at(ex_start);
    52.9 +      if (ex_end < _code_size)
   52.10 +        (void) make_block_at(ex_end);
   52.11 +
   52.12        if (eb->is_handler()) {
   52.13          // Extend old handler exception range to cover additional range.
   52.14          int old_ex_start = eb->ex_start_bci();
   52.15 @@ -295,10 +300,6 @@
   52.16          eb->clear_exception_handler(); // Reset exception information
   52.17        }
   52.18        eb->set_exception_range(ex_start, ex_end);
   52.19 -      // ensure a block at the start of exception range and start of following code
   52.20 -      (void) make_block_at(ex_start);
   52.21 -      if (ex_end < _code_size)
   52.22 -        (void) make_block_at(ex_end);
   52.23      }
   52.24    }
   52.25  
    53.1 --- a/src/share/vm/classfile/vmSymbols.hpp	Wed Mar 18 11:37:48 2009 -0400
    53.2 +++ b/src/share/vm/classfile/vmSymbols.hpp	Thu Mar 19 09:13:24 2009 -0700
    53.3 @@ -284,6 +284,7 @@
    53.4    template(value_name,                                "value")                                    \
    53.5    template(frontCacheEnabled_name,                    "frontCacheEnabled")                        \
    53.6    template(stringCacheEnabled_name,                   "stringCacheEnabled")                       \
    53.7 +  template(bitCount_name,                             "bitCount")                                 \
    53.8                                                                                                    \
    53.9    /* non-intrinsic name/signature pairs: */                                                       \
   53.10    template(register_method_name,                      "register")                                 \
   53.11 @@ -304,6 +305,7 @@
   53.12    template(double_long_signature,                     "(D)J")                                     \
   53.13    template(double_double_signature,                   "(D)D")                                     \
   53.14    template(int_float_signature,                       "(I)F")                                     \
   53.15 +  template(long_int_signature,                        "(J)I")                                     \
   53.16    template(long_long_signature,                       "(J)J")                                     \
   53.17    template(long_double_signature,                     "(J)D")                                     \
   53.18    template(byte_signature,                            "B")                                        \
   53.19 @@ -507,6 +509,10 @@
   53.20     do_name(     doubleToLongBits_name,                           "doubleToLongBits")                                    \
   53.21    do_intrinsic(_longBitsToDouble,         java_lang_Double,       longBitsToDouble_name,    long_double_signature, F_S) \
   53.22     do_name(     longBitsToDouble_name,                           "longBitsToDouble")                                    \
   53.23 +                                                                                                                        \
   53.24 +  do_intrinsic(_bitCount_i,               java_lang_Integer,      bitCount_name,            int_int_signature,   F_S)   \
   53.25 +  do_intrinsic(_bitCount_l,               java_lang_Long,         bitCount_name,            long_int_signature,  F_S)   \
   53.26 +                                                                                                                        \
   53.27    do_intrinsic(_reverseBytes_i,           java_lang_Integer,      reverseBytes_name,        int_int_signature,   F_S)   \
   53.28     do_name(     reverseBytes_name,                               "reverseBytes")                                        \
   53.29    do_intrinsic(_reverseBytes_l,           java_lang_Long,         reverseBytes_name,        long_long_signature, F_S)   \
   53.30 @@ -696,7 +702,6 @@
   53.31    do_signature(putShort_raw_signature,    "(JS)V")                                                                      \
   53.32    do_signature(getChar_raw_signature,     "(J)C")                                                                       \
   53.33    do_signature(putChar_raw_signature,     "(JC)V")                                                                      \
   53.34 -  do_signature(getInt_raw_signature,      "(J)I")                                                                       \
   53.35    do_signature(putInt_raw_signature,      "(JI)V")                                                                      \
   53.36        do_alias(getLong_raw_signature,    /*(J)J*/ long_long_signature)                                                  \
   53.37        do_alias(putLong_raw_signature,    /*(JJ)V*/ long_long_void_signature)                                            \
   53.38 @@ -713,7 +718,7 @@
   53.39    do_intrinsic(_getByte_raw,              sun_misc_Unsafe,        getByte_name, getByte_raw_signature,           F_RN)  \
   53.40    do_intrinsic(_getShort_raw,             sun_misc_Unsafe,        getShort_name, getShort_raw_signature,         F_RN)  \
   53.41    do_intrinsic(_getChar_raw,              sun_misc_Unsafe,        getChar_name, getChar_raw_signature,           F_RN)  \
   53.42 -  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, getInt_raw_signature,             F_RN)  \
   53.43 +  do_intrinsic(_getInt_raw,               sun_misc_Unsafe,        getInt_name, long_int_signature,               F_RN)  \
   53.44    do_intrinsic(_getLong_raw,              sun_misc_Unsafe,        getLong_name, getLong_raw_signature,           F_RN)  \
   53.45    do_intrinsic(_getFloat_raw,             sun_misc_Unsafe,        getFloat_name, getFloat_raw_signature,         F_RN)  \
   53.46    do_intrinsic(_getDouble_raw,            sun_misc_Unsafe,        getDouble_name, getDouble_raw_signature,       F_RN)  \
    54.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Mar 18 11:37:48 2009 -0400
    54.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Mar 19 09:13:24 2009 -0700
    54.3 @@ -107,7 +107,7 @@
    54.4  #ifndef PRODUCT
    54.5  bool CMBitMapRO::covers(ReservedSpace rs) const {
    54.6    // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
    54.7 -  assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize,
    54.8 +  assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
    54.9           "size inconsistency");
   54.10    return _bmStartWord == (HeapWord*)(rs.base()) &&
   54.11           _bmWordSize  == rs.size()>>LogHeapWordSize;
    55.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Mar 18 11:37:48 2009 -0400
    55.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Mar 19 09:13:24 2009 -0700
    55.3 @@ -1422,9 +1422,34 @@
    55.4    // Reserve the maximum.
    55.5    PermanentGenerationSpec* pgs = collector_policy()->permanent_generation();
    55.6    // Includes the perm-gen.
    55.7 +
    55.8 +  const size_t total_reserved = max_byte_size + pgs->max_size();
    55.9 +  char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
   55.10 +
   55.11    ReservedSpace heap_rs(max_byte_size + pgs->max_size(),
   55.12                          HeapRegion::GrainBytes,
   55.13 -                        false /*ism*/);
   55.14 +                        false /*ism*/, addr);
   55.15 +
   55.16 +  if (UseCompressedOops) {
   55.17 +    if (addr != NULL && !heap_rs.is_reserved()) {
   55.18 +      // Failed to reserve at specified address - the requested memory
   55.19 +      // region is taken already, for example, by 'java' launcher.
   55.20 +      // Try again to reserver heap higher.
   55.21 +      addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
   55.22 +      ReservedSpace heap_rs0(total_reserved, HeapRegion::GrainBytes,
   55.23 +                             false /*ism*/, addr);
   55.24 +      if (addr != NULL && !heap_rs0.is_reserved()) {
   55.25 +        // Failed to reserve at specified address again - give up.
   55.26 +        addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
   55.27 +        assert(addr == NULL, "");
   55.28 +        ReservedSpace heap_rs1(total_reserved, HeapRegion::GrainBytes,
   55.29 +                               false /*ism*/, addr);
   55.30 +        heap_rs = heap_rs1;
   55.31 +      } else {
   55.32 +        heap_rs = heap_rs0;
   55.33 +      }
   55.34 +    }
   55.35 +  }
   55.36  
   55.37    if (!heap_rs.is_reserved()) {
   55.38      vm_exit_during_initialization("Could not reserve enough space for object heap");
    56.1 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Mar 18 11:37:48 2009 -0400
    56.2 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Mar 19 09:13:24 2009 -0700
    56.3 @@ -508,7 +508,7 @@
    56.4    typedef PosParPRT* PosParPRTPtr;
    56.5    if (_max_fine_entries == 0) {
    56.6      assert(_mod_max_fine_entries_mask == 0, "Both or none.");
    56.7 -    _max_fine_entries = (1 << G1LogRSRegionEntries);
    56.8 +    _max_fine_entries = (size_t)(1 << G1LogRSRegionEntries);
    56.9      _mod_max_fine_entries_mask = _max_fine_entries - 1;
   56.10  #if SAMPLE_FOR_EVICTION
   56.11      assert(_fine_eviction_sample_size == 0
    57.1 --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Mar 18 11:37:48 2009 -0400
    57.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Thu Mar 19 09:13:24 2009 -0700
    57.3 @@ -104,12 +104,38 @@
    57.4                    og_min_size, og_max_size,
    57.5                    yg_min_size, yg_max_size);
    57.6  
    57.7 +  const size_t total_reserved = pg_max_size + og_max_size + yg_max_size;
    57.8 +  char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
    57.9 +
   57.10    // The main part of the heap (old gen + young gen) can often use a larger page
   57.11    // size than is needed or wanted for the perm gen.  Use the "compound
   57.12    // alignment" ReservedSpace ctor to avoid having to use the same page size for
   57.13    // all gens.
   57.14 +
   57.15    ReservedHeapSpace heap_rs(pg_max_size, pg_align, og_max_size + yg_max_size,
   57.16 -                            og_align);
   57.17 +                            og_align, addr);
   57.18 +
   57.19 +  if (UseCompressedOops) {
   57.20 +    if (addr != NULL && !heap_rs.is_reserved()) {
   57.21 +      // Failed to reserve at specified address - the requested memory
   57.22 +      // region is taken already, for example, by 'java' launcher.
   57.23 +      // Try again to reserver heap higher.
   57.24 +      addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
   57.25 +      ReservedHeapSpace heap_rs0(pg_max_size, pg_align, og_max_size + yg_max_size,
   57.26 +                                 og_align, addr);
   57.27 +      if (addr != NULL && !heap_rs0.is_reserved()) {
   57.28 +        // Failed to reserve at specified address again - give up.
   57.29 +        addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
   57.30 +        assert(addr == NULL, "");
   57.31 +        ReservedHeapSpace heap_rs1(pg_max_size, pg_align, og_max_size + yg_max_size,
   57.32 +                                   og_align, addr);
   57.33 +        heap_rs = heap_rs1;
   57.34 +      } else {
   57.35 +        heap_rs = heap_rs0;
   57.36 +      }
   57.37 +    }
   57.38 +  }
   57.39 +
   57.40    os::trace_page_sizes("ps perm", pg_min_size, pg_max_size, pg_page_sz,
   57.41                         heap_rs.base(), pg_max_size);
   57.42    os::trace_page_sizes("ps main", og_min_size + yg_min_size,
    58.1 --- a/src/share/vm/includeDB_core	Wed Mar 18 11:37:48 2009 -0400
    58.2 +++ b/src/share/vm/includeDB_core	Thu Mar 19 09:13:24 2009 -0700
    58.3 @@ -4598,6 +4598,7 @@
    58.4  vm_version_<arch>.hpp                   globals_extension.hpp
    58.5  vm_version_<arch>.hpp                   vm_version.hpp
    58.6  
    58.7 +vm_version_<os_arch>.cpp                os.hpp
    58.8  vm_version_<os_arch>.cpp                vm_version_<arch>.hpp
    58.9  
   58.10  vmreg.cpp                               assembler.hpp
    59.1 --- a/src/share/vm/memory/blockOffsetTable.hpp	Wed Mar 18 11:37:48 2009 -0400
    59.2 +++ b/src/share/vm/memory/blockOffsetTable.hpp	Thu Mar 19 09:13:24 2009 -0700
    59.3 @@ -235,7 +235,7 @@
    59.4    };
    59.5  
    59.6    static size_t power_to_cards_back(uint i) {
    59.7 -    return 1 << (LogBase * i);
    59.8 +    return (size_t)(1 << (LogBase * i));
    59.9    }
   59.10    static size_t power_to_words_back(uint i) {
   59.11      return power_to_cards_back(i) * N_words;
    60.1 --- a/src/share/vm/memory/genCollectedHeap.cpp	Wed Mar 18 11:37:48 2009 -0400
    60.2 +++ b/src/share/vm/memory/genCollectedHeap.cpp	Thu Mar 19 09:13:24 2009 -0700
    60.3 @@ -218,6 +218,31 @@
    60.4      heap_address -= total_reserved;
    60.5    } else {
    60.6      heap_address = NULL;  // any address will do.
    60.7 +    if (UseCompressedOops) {
    60.8 +      heap_address = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
    60.9 +      *_total_reserved = total_reserved;
   60.10 +      *_n_covered_regions = n_covered_regions;
   60.11 +      *heap_rs = ReservedHeapSpace(total_reserved, alignment,
   60.12 +                                   UseLargePages, heap_address);
   60.13 +
   60.14 +      if (heap_address != NULL && !heap_rs->is_reserved()) {
   60.15 +        // Failed to reserve at specified address - the requested memory
   60.16 +        // region is taken already, for example, by 'java' launcher.
   60.17 +        // Try again to reserver heap higher.
   60.18 +        heap_address = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
   60.19 +        *heap_rs = ReservedHeapSpace(total_reserved, alignment,
   60.20 +                                     UseLargePages, heap_address);
   60.21 +
   60.22 +        if (heap_address != NULL && !heap_rs->is_reserved()) {
   60.23 +          // Failed to reserve at specified address again - give up.
   60.24 +          heap_address = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
   60.25 +          assert(heap_address == NULL, "");
   60.26 +          *heap_rs = ReservedHeapSpace(total_reserved, alignment,
   60.27 +                                       UseLargePages, heap_address);
   60.28 +        }
   60.29 +      }
   60.30 +      return heap_address;
   60.31 +    }
   60.32    }
   60.33  
   60.34    *_total_reserved = total_reserved;
    61.1 --- a/src/share/vm/memory/universe.cpp	Wed Mar 18 11:37:48 2009 -0400
    61.2 +++ b/src/share/vm/memory/universe.cpp	Thu Mar 19 09:13:24 2009 -0700
    61.3 @@ -99,7 +99,8 @@
    61.4  size_t          Universe::_heap_used_at_last_gc = 0;
    61.5  
    61.6  CollectedHeap*  Universe::_collectedHeap = NULL;
    61.7 -address         Universe::_heap_base = NULL;
    61.8 +
    61.9 +NarrowOopStruct Universe::_narrow_oop = { NULL, 0, true };
   61.10  
   61.11  
   61.12  void Universe::basic_type_classes_do(void f(klassOop)) {
   61.13 @@ -729,6 +730,53 @@
   61.14    return JNI_OK;
   61.15  }
   61.16  
   61.17 +// Choose the heap base address and oop encoding mode
   61.18 +// when compressed oops are used:
   61.19 +// Unscaled  - Use 32-bits oops without encoding when
   61.20 +//     NarrowOopHeapBaseMin + heap_size < 4Gb
   61.21 +// ZeroBased - Use zero based compressed oops with encoding when
   61.22 +//     NarrowOopHeapBaseMin + heap_size < 32Gb
   61.23 +// HeapBased - Use compressed oops with heap base + encoding.
   61.24 +
   61.25 +// 4Gb
   61.26 +static const uint64_t NarrowOopHeapMax = (uint64_t(max_juint) + 1);
   61.27 +// 32Gb
   61.28 +static const uint64_t OopEncodingHeapMax = NarrowOopHeapMax << LogMinObjAlignmentInBytes;
   61.29 +
   61.30 +char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) {
   61.31 +#ifdef _LP64
   61.32 +  if (UseCompressedOops) {
   61.33 +    assert(mode == UnscaledNarrowOop  ||
   61.34 +           mode == ZeroBasedNarrowOop ||
   61.35 +           mode == HeapBasedNarrowOop, "mode is invalid");
   61.36 +
   61.37 +    const size_t total_size = heap_size + HeapBaseMinAddress;
   61.38 +    if (total_size <= OopEncodingHeapMax && (mode != HeapBasedNarrowOop)) {
   61.39 +      if (total_size <= NarrowOopHeapMax && (mode == UnscaledNarrowOop) &&
   61.40 +          (Universe::narrow_oop_shift() == 0)) {
   61.41 +        // Use 32-bits oops without encoding and
   61.42 +        // place heap's top on the 4Gb boundary
   61.43 +        return (char*)(NarrowOopHeapMax - heap_size);
   61.44 +      } else {
   61.45 +        // Can't reserve with NarrowOopShift == 0
   61.46 +        Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
   61.47 +        if (mode == UnscaledNarrowOop ||
   61.48 +            mode == ZeroBasedNarrowOop && total_size <= NarrowOopHeapMax) {
   61.49 +          // Use zero based compressed oops with encoding and
   61.50 +          // place heap's top on the 32Gb boundary in case
   61.51 +          // total_size > 4Gb or failed to reserve below 4Gb.
   61.52 +          return (char*)(OopEncodingHeapMax - heap_size);
   61.53 +        }
   61.54 +      }
   61.55 +    } else {
   61.56 +      // Can't reserve below 32Gb.
   61.57 +      Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
   61.58 +    }
   61.59 +  }
   61.60 +#endif
   61.61 +  return NULL; // also return NULL (don't care) for 32-bit VM
   61.62 +}
   61.63 +
   61.64  jint Universe::initialize_heap() {
   61.65  
   61.66    if (UseParallelGC) {
   61.67 @@ -773,6 +821,8 @@
   61.68    if (status != JNI_OK) {
   61.69      return status;
   61.70    }
   61.71 +
   61.72 +#ifdef _LP64
   61.73    if (UseCompressedOops) {
   61.74      // Subtract a page because something can get allocated at heap base.
   61.75      // This also makes implicit null checking work, because the
   61.76 @@ -780,8 +830,49 @@
   61.77      // See needs_explicit_null_check.
   61.78      // Only set the heap base for compressed oops because it indicates
   61.79      // compressed oops for pstack code.
   61.80 -    Universe::_heap_base = Universe::heap()->base() - os::vm_page_size();
   61.81 +    if (PrintCompressedOopsMode) {
   61.82 +      tty->cr();
   61.83 +      tty->print("heap address: "PTR_FORMAT, Universe::heap()->base());
   61.84 +    }
   61.85 +    if ((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax) {
   61.86 +      // Can't reserve heap below 32Gb.
   61.87 +      Universe::set_narrow_oop_base(Universe::heap()->base() - os::vm_page_size());
   61.88 +      Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
   61.89 +      if (PrintCompressedOopsMode) {
   61.90 +        tty->print(", Compressed Oops with base: "PTR_FORMAT, Universe::narrow_oop_base());
   61.91 +      }
   61.92 +    } else {
   61.93 +      Universe::set_narrow_oop_base(0);
   61.94 +      if (PrintCompressedOopsMode) {
   61.95 +        tty->print(", zero based Compressed Oops");
   61.96 +      }
   61.97 +#ifdef _WIN64
   61.98 +      if (!Universe::narrow_oop_use_implicit_null_checks()) {
   61.99 +        // Don't need guard page for implicit checks in indexed addressing
  61.100 +        // mode with zero based Compressed Oops.
  61.101 +        Universe::set_narrow_oop_use_implicit_null_checks(true);
  61.102 +      }
  61.103 +#endif //  _WIN64
  61.104 +      if((uint64_t)Universe::heap()->reserved_region().end() > NarrowOopHeapMax) {
  61.105 +        // Can't reserve heap below 4Gb.
  61.106 +        Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
  61.107 +      } else {
  61.108 +        assert(Universe::narrow_oop_shift() == 0, "use unscaled narrow oop");
  61.109 +        if (PrintCompressedOopsMode) {
  61.110 +          tty->print(", 32-bits Oops");
  61.111 +        }
  61.112 +      }
  61.113 +    }
  61.114 +    if (PrintCompressedOopsMode) {
  61.115 +      tty->cr();
  61.116 +      tty->cr();
  61.117 +    }
  61.118    }
  61.119 +  assert(Universe::narrow_oop_base() == (Universe::heap()->base() - os::vm_page_size()) ||
  61.120 +         Universe::narrow_oop_base() == NULL, "invalid value");
  61.121 +  assert(Universe::narrow_oop_shift() == LogMinObjAlignmentInBytes ||
  61.122 +         Universe::narrow_oop_shift() == 0, "invalid value");
  61.123 +#endif
  61.124  
  61.125    // We will never reach the CATCH below since Exceptions::_throw will cause
  61.126    // the VM to exit if an exception is thrown during initialization
    62.1 --- a/src/share/vm/memory/universe.hpp	Wed Mar 18 11:37:48 2009 -0400
    62.2 +++ b/src/share/vm/memory/universe.hpp	Thu Mar 19 09:13:24 2009 -0700
    62.3 @@ -90,6 +90,19 @@
    62.4    methodOop get_methodOop();
    62.5  };
    62.6  
    62.7 +// For UseCompressedOops.
    62.8 +struct NarrowOopStruct {
    62.9 +  // Base address for oop-within-java-object materialization.
   62.10 +  // NULL if using wide oops or zero based narrow oops.
   62.11 +  address _base;
   62.12 +  // Number of shift bits for encoding/decoding narrow oops.
   62.13 +  // 0 if using wide oops or zero based unscaled narrow oops,
   62.14 +  // LogMinObjAlignmentInBytes otherwise.
   62.15 +  int     _shift;
   62.16 +  // Generate code with implicit null checks for narrow oops.
   62.17 +  bool    _use_implicit_null_checks;
   62.18 +};
   62.19 +
   62.20  
   62.21  class Universe: AllStatic {
   62.22    // Ugh.  Universe is much too friendly.
   62.23 @@ -181,9 +194,9 @@
   62.24  
   62.25    // The particular choice of collected heap.
   62.26    static CollectedHeap* _collectedHeap;
   62.27 -  // Base address for oop-within-java-object materialization.
   62.28 -  // NULL if using wide oops.  Doubles as heap oop null value.
   62.29 -  static address        _heap_base;
   62.30 +
   62.31 +  // For UseCompressedOops.
   62.32 +  static struct NarrowOopStruct _narrow_oop;
   62.33  
   62.34    // array of dummy objects used with +FullGCAlot
   62.35    debug_only(static objArrayOop _fullgc_alot_dummy_array;)
   62.36 @@ -328,8 +341,25 @@
   62.37    static CollectedHeap* heap() { return _collectedHeap; }
   62.38  
   62.39    // For UseCompressedOops
   62.40 -  static address heap_base()       { return _heap_base; }
   62.41 -  static address* heap_base_addr() { return &_heap_base; }
   62.42 +  static address* narrow_oop_base_addr()              { return &_narrow_oop._base; }
   62.43 +  static address  narrow_oop_base()                   { return  _narrow_oop._base; }
   62.44 +  static int      narrow_oop_shift()                  { return  _narrow_oop._shift; }
   62.45 +  static void     set_narrow_oop_base(address base)   { _narrow_oop._base  = base; }
   62.46 +  static void     set_narrow_oop_shift(int shift)     { _narrow_oop._shift = shift; }
   62.47 +  static bool     narrow_oop_use_implicit_null_checks()             { return  _narrow_oop._use_implicit_null_checks; }
   62.48 +  static void     set_narrow_oop_use_implicit_null_checks(bool use) { _narrow_oop._use_implicit_null_checks = use; }
   62.49 +  // Narrow Oop encoding mode:
   62.50 +  // 0 - Use 32-bits oops without encoding when
   62.51 +  //     NarrowOopHeapBaseMin + heap_size < 4Gb
   62.52 +  // 1 - Use zero based compressed oops with encoding when
   62.53 +  //     NarrowOopHeapBaseMin + heap_size < 32Gb
   62.54 +  // 2 - Use compressed oops with heap base + encoding.
   62.55 +  enum NARROW_OOP_MODE {
   62.56 +    UnscaledNarrowOop  = 0,
   62.57 +    ZeroBasedNarrowOop = 1,
   62.58 +    HeapBasedNarrowOop = 2
   62.59 +  };
   62.60 +  static char* preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode);
   62.61  
   62.62    // Historic gc information
   62.63    static size_t get_heap_capacity_at_last_gc()         { return _heap_capacity_at_last_gc; }
    63.1 --- a/src/share/vm/oops/oop.inline.hpp	Wed Mar 18 11:37:48 2009 -0400
    63.2 +++ b/src/share/vm/oops/oop.inline.hpp	Thu Mar 19 09:13:24 2009 -0700
    63.3 @@ -148,10 +148,11 @@
    63.4  
    63.5  inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) {
    63.6    assert(!is_null(v), "oop value can never be zero");
    63.7 -  address heap_base = Universe::heap_base();
    63.8 -  uint64_t pd = (uint64_t)(pointer_delta((void*)v, (void*)heap_base, 1));
    63.9 +  address base = Universe::narrow_oop_base();
   63.10 +  int    shift = Universe::narrow_oop_shift();
   63.11 +  uint64_t  pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1));
   63.12    assert(OopEncodingHeapMax > pd, "change encoding max if new encoding");
   63.13 -  uint64_t result = pd >> LogMinObjAlignmentInBytes;
   63.14 +  uint64_t result = pd >> shift;
   63.15    assert((result & CONST64(0xffffffff00000000)) == 0, "narrow oop overflow");
   63.16    return (narrowOop)result;
   63.17  }
   63.18 @@ -162,8 +163,9 @@
   63.19  
   63.20  inline oop oopDesc::decode_heap_oop_not_null(narrowOop v) {
   63.21    assert(!is_null(v), "narrow oop value can never be zero");
   63.22 -  address heap_base = Universe::heap_base();
   63.23 -  return (oop)(void*)((uintptr_t)heap_base + ((uintptr_t)v << LogMinObjAlignmentInBytes));
   63.24 +  address base = Universe::narrow_oop_base();
   63.25 +  int    shift = Universe::narrow_oop_shift();
   63.26 +  return (oop)(void*)((uintptr_t)base + ((uintptr_t)v << shift));
   63.27  }
   63.28  
   63.29  inline oop oopDesc::decode_heap_oop(narrowOop v) {
    64.1 --- a/src/share/vm/opto/addnode.cpp	Wed Mar 18 11:37:48 2009 -0400
    64.2 +++ b/src/share/vm/opto/addnode.cpp	Thu Mar 19 09:13:24 2009 -0700
    64.3 @@ -756,7 +756,13 @@
    64.4        if ( eti == NULL ) {
    64.5          // there must be one pointer among the operands
    64.6          guarantee(tptr == NULL, "must be only one pointer operand");
    64.7 -        tptr = et->isa_oopptr();
    64.8 +        if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
    64.9 +          // 32-bits narrow oop can be the base of address expressions
   64.10 +          tptr = et->make_ptr()->isa_oopptr();
   64.11 +        } else {
   64.12 +          // only regular oops are expected here
   64.13 +          tptr = et->isa_oopptr();
   64.14 +        }
   64.15          guarantee(tptr != NULL, "non-int operand must be pointer");
   64.16          if (tptr->higher_equal(tp->add_offset(tptr->offset())))
   64.17            tp = tptr; // Set more precise type for bailout
    65.1 --- a/src/share/vm/opto/classes.hpp	Wed Mar 18 11:37:48 2009 -0400
    65.2 +++ b/src/share/vm/opto/classes.hpp	Thu Mar 19 09:13:24 2009 -0700
    65.3 @@ -184,6 +184,8 @@
    65.4  macro(Parm)
    65.5  macro(PartialSubtypeCheck)
    65.6  macro(Phi)
    65.7 +macro(PopCountI)
    65.8 +macro(PopCountL)
    65.9  macro(PowD)
   65.10  macro(PrefetchRead)
   65.11  macro(PrefetchWrite)
    66.1 --- a/src/share/vm/opto/compile.cpp	Wed Mar 18 11:37:48 2009 -0400
    66.2 +++ b/src/share/vm/opto/compile.cpp	Thu Mar 19 09:13:24 2009 -0700
    66.3 @@ -2081,7 +2081,7 @@
    66.4  
    66.5  #ifdef _LP64
    66.6    case Op_CastPP:
    66.7 -    if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) {
    66.8 +    if (n->in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks()) {
    66.9        Compile* C = Compile::current();
   66.10        Node* in1 = n->in(1);
   66.11        const Type* t = n->bottom_type();
   66.12 @@ -2136,7 +2136,7 @@
   66.13          new_in2 = in2->in(1);
   66.14        } else if (in2->Opcode() == Op_ConP) {
   66.15          const Type* t = in2->bottom_type();
   66.16 -        if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) {
   66.17 +        if (t == TypePtr::NULL_PTR && Universe::narrow_oop_use_implicit_null_checks()) {
   66.18            new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
   66.19            //
   66.20            // This transformation together with CastPP transformation above
    67.1 --- a/src/share/vm/opto/connode.cpp	Wed Mar 18 11:37:48 2009 -0400
    67.2 +++ b/src/share/vm/opto/connode.cpp	Thu Mar 19 09:13:24 2009 -0700
    67.3 @@ -433,7 +433,7 @@
    67.4  // If not converting int->oop, throw away cast after constant propagation
    67.5  Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
    67.6    const Type *t = ccp->type(in(1));
    67.7 -  if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) {
    67.8 +  if (!t->isa_oop_ptr() || (in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks())) {
    67.9      return NULL; // do not transform raw pointers or narrow oops
   67.10    }
   67.11    return ConstraintCastNode::Ideal_DU_postCCP(ccp);
    68.1 --- a/src/share/vm/opto/connode.hpp	Wed Mar 18 11:37:48 2009 -0400
    68.2 +++ b/src/share/vm/opto/connode.hpp	Thu Mar 19 09:13:24 2009 -0700
    68.3 @@ -1,5 +1,5 @@
    68.4  /*
    68.5 - * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
    68.6 + * Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
    68.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    68.8   *
    68.9   * This code is free software; you can redistribute it and/or modify it
   68.10 @@ -635,3 +635,23 @@
   68.11    virtual uint ideal_reg() const { return Op_RegL; }
   68.12    virtual const Type* Value( PhaseTransform *phase ) const;
   68.13  };
   68.14 +
   68.15 +//---------- PopCountINode -----------------------------------------------------
   68.16 +// Population count (bit count) of an integer.
   68.17 +class PopCountINode : public Node {
   68.18 +public:
   68.19 +  PopCountINode(Node* in1) : Node(0, in1) {}
   68.20 +  virtual int Opcode() const;
   68.21 +  const Type* bottom_type() const { return TypeInt::INT; }
   68.22 +  virtual uint ideal_reg() const { return Op_RegI; }
   68.23 +};
   68.24 +
   68.25 +//---------- PopCountLNode -----------------------------------------------------
   68.26 +// Population count (bit count) of a long.
   68.27 +class PopCountLNode : public Node {
   68.28 +public:
   68.29 +  PopCountLNode(Node* in1) : Node(0, in1) {}
   68.30 +  virtual int Opcode() const;
   68.31 +  const Type* bottom_type() const { return TypeInt::INT; }
   68.32 +  virtual uint ideal_reg() const { return Op_RegI; }
   68.33 +};
    69.1 --- a/src/share/vm/opto/graphKit.cpp	Wed Mar 18 11:37:48 2009 -0400
    69.2 +++ b/src/share/vm/opto/graphKit.cpp	Thu Mar 19 09:13:24 2009 -0700
    69.3 @@ -2277,7 +2277,7 @@
    69.4    r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) );
    69.5    set_control(                _gvn.transform( new (C, 1) IfFalseNode(iff2) ) );
    69.6  
    69.7 -  // Check for self.  Very rare to get here, but its taken 1/3 the time.
    69.8 +  // Check for self.  Very rare to get here, but it is taken 1/3 the time.
    69.9    // No performance impact (too rare) but allows sharing of secondary arrays
   69.10    // which has some footprint reduction.
   69.11    Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) );
   69.12 @@ -2286,11 +2286,27 @@
   69.13    r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) );
   69.14    set_control(               _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) );
   69.15  
   69.16 +  // -- Roads not taken here: --
   69.17 +  // We could also have chosen to perform the self-check at the beginning
   69.18 +  // of this code sequence, as the assembler does.  This would not pay off
   69.19 +  // the same way, since the optimizer, unlike the assembler, can perform
   69.20 +  // static type analysis to fold away many successful self-checks.
   69.21 +  // Non-foldable self checks work better here in second position, because
   69.22 +  // the initial primary superclass check subsumes a self-check for most
   69.23 +  // types.  An exception would be a secondary type like array-of-interface,
   69.24 +  // which does not appear in its own primary supertype display.
   69.25 +  // Finally, we could have chosen to move the self-check into the
   69.26 +  // PartialSubtypeCheckNode, and from there out-of-line in a platform
   69.27 +  // dependent manner.  But it is worthwhile to have the check here,
   69.28 +  // where it can be perhaps be optimized.  The cost in code space is
   69.29 +  // small (register compare, branch).
   69.30 +
   69.31    // Now do a linear scan of the secondary super-klass array.  Again, no real
   69.32    // performance impact (too rare) but it's gotta be done.
   69.33 -  // (The stub also contains the self-check of subklass == superklass.
   69.34    // Since the code is rarely used, there is no penalty for moving it
   69.35 -  // out of line, and it can only improve I-cache density.)
   69.36 +  // out of line, and it can only improve I-cache density.
   69.37 +  // The decision to inline or out-of-line this final check is platform
   69.38 +  // dependent, and is found in the AD file definition of PartialSubtypeCheck.
   69.39    Node* psc = _gvn.transform(
   69.40      new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) );
   69.41  
    70.1 --- a/src/share/vm/opto/lcm.cpp	Wed Mar 18 11:37:48 2009 -0400
    70.2 +++ b/src/share/vm/opto/lcm.cpp	Thu Mar 19 09:13:24 2009 -0700
    70.3 @@ -158,7 +158,14 @@
    70.4            continue;             // Give up if offset is beyond page size
    70.5          // cannot reason about it; is probably not implicit null exception
    70.6        } else {
    70.7 -        const TypePtr* tptr = base->bottom_type()->is_ptr();
    70.8 +        const TypePtr* tptr;
    70.9 +        if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
   70.10 +          // 32-bits narrow oop can be the base of address expressions
   70.11 +          tptr = base->bottom_type()->make_ptr();
   70.12 +        } else {
   70.13 +          // only regular oops are expected here
   70.14 +          tptr = base->bottom_type()->is_ptr();
   70.15 +        }
   70.16          // Give up if offset is not a compile-time constant
   70.17          if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
   70.18            continue;
    71.1 --- a/src/share/vm/opto/library_call.cpp	Wed Mar 18 11:37:48 2009 -0400
    71.2 +++ b/src/share/vm/opto/library_call.cpp	Thu Mar 19 09:13:24 2009 -0700
    71.3 @@ -1,5 +1,5 @@
    71.4  /*
    71.5 - * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
    71.6 + * Copyright 1999-2009 Sun Microsystems, Inc.  All Rights Reserved.
    71.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    71.8   *
    71.9   * This code is free software; you can redistribute it and/or modify it
   71.10 @@ -221,6 +221,7 @@
   71.11    bool inline_unsafe_CAS(BasicType type);
   71.12    bool inline_unsafe_ordered_store(BasicType type);
   71.13    bool inline_fp_conversions(vmIntrinsics::ID id);
   71.14 +  bool inline_bitCount(vmIntrinsics::ID id);
   71.15    bool inline_reverseBytes(vmIntrinsics::ID id);
   71.16  };
   71.17  
   71.18 @@ -314,6 +315,11 @@
   71.19      if (!JDK_Version::is_gte_jdk14x_version())  return NULL;
   71.20      break;
   71.21  
   71.22 +  case vmIntrinsics::_bitCount_i:
   71.23 +  case vmIntrinsics::_bitCount_l:
   71.24 +    if (!UsePopCountInstruction)  return NULL;
   71.25 +    break;
   71.26 +
   71.27   default:
   71.28      break;
   71.29    }
   71.30 @@ -617,6 +623,10 @@
   71.31    case vmIntrinsics::_longBitsToDouble:
   71.32      return inline_fp_conversions(intrinsic_id());
   71.33  
   71.34 +  case vmIntrinsics::_bitCount_i:
   71.35 +  case vmIntrinsics::_bitCount_l:
   71.36 +    return inline_bitCount(intrinsic_id());
   71.37 +
   71.38    case vmIntrinsics::_reverseBytes_i:
   71.39    case vmIntrinsics::_reverseBytes_l:
   71.40      return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
   71.41 @@ -1714,6 +1724,27 @@
   71.42    }
   71.43  }
   71.44  
   71.45 +//----------------------------inline_bitCount_int/long-----------------------
   71.46 +// inline int Integer.bitCount(int)
   71.47 +// inline int Long.bitCount(long)
   71.48 +bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) {
   71.49 +  assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount");
   71.50 +  if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false;
   71.51 +  if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false;
   71.52 +  _sp += arg_size();  // restore stack pointer
   71.53 +  switch (id) {
   71.54 +  case vmIntrinsics::_bitCount_i:
   71.55 +    push(_gvn.transform(new (C, 2) PopCountINode(pop())));
   71.56 +    break;
   71.57 +  case vmIntrinsics::_bitCount_l:
   71.58 +    push(_gvn.transform(new (C, 2) PopCountLNode(pop_pair())));
   71.59 +    break;
   71.60 +  default:
   71.61 +    ShouldNotReachHere();
   71.62 +  }
   71.63 +  return true;
   71.64 +}
   71.65 +
   71.66  //----------------------------inline_reverseBytes_int/long-------------------
   71.67  // inline Integer.reverseBytes(int)
   71.68  // inline Long.reverseBytes(long)
    72.1 --- a/src/share/vm/opto/matcher.cpp	Wed Mar 18 11:37:48 2009 -0400
    72.2 +++ b/src/share/vm/opto/matcher.cpp	Thu Mar 19 09:13:24 2009 -0700
    72.3 @@ -1481,8 +1481,13 @@
    72.4        const Type* mach_at = mach->adr_type();
    72.5        // DecodeN node consumed by an address may have different type
    72.6        // then its input. Don't compare types for such case.
    72.7 -      if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() &&
    72.8 -          m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) {
    72.9 +      if (m->adr_type() != mach_at &&
   72.10 +          (m->in(MemNode::Address)->is_DecodeN() ||
   72.11 +           m->in(MemNode::Address)->is_AddP() &&
   72.12 +           m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN() ||
   72.13 +           m->in(MemNode::Address)->is_AddP() &&
   72.14 +           m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() &&
   72.15 +           m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeN())) {
   72.16          mach_at = m->adr_type();
   72.17        }
   72.18        if (m->adr_type() != mach_at) {
    73.1 --- a/src/share/vm/runtime/arguments.cpp	Wed Mar 18 11:37:48 2009 -0400
    73.2 +++ b/src/share/vm/runtime/arguments.cpp	Thu Mar 19 09:13:24 2009 -0700
    73.3 @@ -1211,7 +1211,9 @@
    73.4      if (UseLargePages && UseCompressedOops) {
    73.5        // Cannot allocate guard pages for implicit checks in indexed addressing
    73.6        // mode, when large pages are specified on windows.
    73.7 -      FLAG_SET_DEFAULT(UseImplicitNullCheckForNarrowOop, false);
    73.8 +      // This flag could be switched ON if narrow oop base address is set to 0,
    73.9 +      // see code in Universe::initialize_heap().
   73.10 +      Universe::set_narrow_oop_use_implicit_null_checks(false);
   73.11      }
   73.12  #endif //  _WIN64
   73.13    } else {
    74.1 --- a/src/share/vm/runtime/globals.hpp	Wed Mar 18 11:37:48 2009 -0400
    74.2 +++ b/src/share/vm/runtime/globals.hpp	Thu Mar 19 09:13:24 2009 -0700
    74.3 @@ -303,11 +303,14 @@
    74.4              "Use 32-bit object references in 64-bit VM. "                   \
    74.5              "lp64_product means flag is always constant in 32 bit VM")      \
    74.6                                                                              \
    74.7 -  lp64_product(bool, CheckCompressedOops, trueInDebug,                      \
    74.8 -            "generate checks in encoding/decoding code")                    \
    74.9 -                                                                            \
   74.10 -  product(bool, UseImplicitNullCheckForNarrowOop, true,                     \
   74.11 -            "generate implicit null check in indexed addressing mode.")     \
   74.12 +  notproduct(bool, CheckCompressedOops, true,                               \
   74.13 +            "generate checks in encoding/decoding code in debug VM")        \
   74.14 +                                                                            \
   74.15 +  product_pd(uintx, HeapBaseMinAddress,                                     \
   74.16 +            "OS specific low limit for heap base address")                  \
   74.17 +                                                                            \
   74.18 +  diagnostic(bool, PrintCompressedOopsMode, false,                          \
   74.19 +            "Print compressed oops base address and encoding mode")         \
   74.20                                                                              \
   74.21    /* UseMembar is theoretically a temp flag used for memory barrier         \
   74.22     * removal testing.  It was supposed to be removed before FCS but has     \
   74.23 @@ -2169,6 +2172,9 @@
   74.24    diagnostic(bool, PrintIntrinsics, false,                                  \
   74.25            "prints attempted and successful inlining of intrinsics")         \
   74.26                                                                              \
   74.27 +  product(bool, UsePopCountInstruction, false,                              \
   74.28 +          "Use population count instruction")                               \
   74.29 +                                                                            \
   74.30    diagnostic(ccstrlist, DisableIntrinsic, "",                               \
   74.31            "do not expand intrinsics whose (internal) names appear here")    \
   74.32                                                                              \
    75.1 --- a/src/share/vm/runtime/os.hpp	Wed Mar 18 11:37:48 2009 -0400
    75.2 +++ b/src/share/vm/runtime/os.hpp	Thu Mar 19 09:13:24 2009 -0700
    75.3 @@ -243,7 +243,7 @@
    75.4  
    75.5    static char*  non_memory_address_word();
    75.6    // reserve, commit and pin the entire memory region
    75.7 -  static char*  reserve_memory_special(size_t size);
    75.8 +  static char*  reserve_memory_special(size_t size, char* addr = NULL);
    75.9    static bool   release_memory_special(char* addr, size_t bytes);
   75.10    static bool   large_page_init();
   75.11    static size_t large_page_size();
    76.1 --- a/src/share/vm/runtime/virtualspace.cpp	Wed Mar 18 11:37:48 2009 -0400
    76.2 +++ b/src/share/vm/runtime/virtualspace.cpp	Thu Mar 19 09:13:24 2009 -0700
    76.3 @@ -109,6 +109,7 @@
    76.4                               const size_t prefix_align,
    76.5                               const size_t suffix_size,
    76.6                               const size_t suffix_align,
    76.7 +                             char* requested_address,
    76.8                               const size_t noaccess_prefix)
    76.9  {
   76.10    assert(prefix_size != 0, "sanity");
   76.11 @@ -131,7 +132,7 @@
   76.12    const bool try_reserve_special = UseLargePages &&
   76.13      prefix_align == os::large_page_size();
   76.14    if (!os::can_commit_large_page_memory() && try_reserve_special) {
   76.15 -    initialize(size, prefix_align, true, NULL, noaccess_prefix);
   76.16 +    initialize(size, prefix_align, true, requested_address, noaccess_prefix);
   76.17      return;
   76.18    }
   76.19  
   76.20 @@ -146,7 +147,13 @@
   76.21           noaccess_prefix == prefix_align, "noaccess prefix wrong");
   76.22  
   76.23    // Optimistically try to reserve the exact size needed.
   76.24 -  char* addr = os::reserve_memory(size, NULL, prefix_align);
   76.25 +  char* addr;
   76.26 +  if (requested_address != 0) {
   76.27 +    addr = os::attempt_reserve_memory_at(size,
   76.28 +                                         requested_address-noaccess_prefix);
   76.29 +  } else {
   76.30 +    addr = os::reserve_memory(size, NULL, prefix_align);
   76.31 +  }
   76.32    if (addr == NULL) return;
   76.33  
   76.34    // Check whether the result has the needed alignment (unlikely unless
   76.35 @@ -206,12 +213,8 @@
   76.36    char* base = NULL;
   76.37  
   76.38    if (special) {
   76.39 -    // It's not hard to implement reserve_memory_special() such that it can
   76.40 -    // allocate at fixed address, but there seems no use of this feature
   76.41 -    // for now, so it's not implemented.
   76.42 -    assert(requested_address == NULL, "not implemented");
   76.43  
   76.44 -    base = os::reserve_memory_special(size);
   76.45 +    base = os::reserve_memory_special(size, requested_address);
   76.46  
   76.47      if (base != NULL) {
   76.48        // Check alignment constraints
   76.49 @@ -372,7 +375,8 @@
   76.50                                       bool large, char* requested_address) :
   76.51    ReservedSpace(size, alignment, large,
   76.52                  requested_address,
   76.53 -                UseCompressedOops && UseImplicitNullCheckForNarrowOop ?
   76.54 +                (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
   76.55 +                 Universe::narrow_oop_use_implicit_null_checks()) ?
   76.56                    lcm(os::vm_page_size(), alignment) : 0) {
   76.57    // Only reserved space for the java heap should have a noaccess_prefix
   76.58    // if using compressed oops.
   76.59 @@ -382,9 +386,12 @@
   76.60  ReservedHeapSpace::ReservedHeapSpace(const size_t prefix_size,
   76.61                                       const size_t prefix_align,
   76.62                                       const size_t suffix_size,
   76.63 -                                     const size_t suffix_align) :
   76.64 +                                     const size_t suffix_align,
   76.65 +                                     char* requested_address) :
   76.66    ReservedSpace(prefix_size, prefix_align, suffix_size, suffix_align,
   76.67 -                UseCompressedOops && UseImplicitNullCheckForNarrowOop ?
   76.68 +                requested_address,
   76.69 +                (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
   76.70 +                 Universe::narrow_oop_use_implicit_null_checks()) ?
   76.71                    lcm(os::vm_page_size(), prefix_align) : 0) {
   76.72    protect_noaccess_prefix(prefix_size+suffix_size);
   76.73  }
    77.1 --- a/src/share/vm/runtime/virtualspace.hpp	Wed Mar 18 11:37:48 2009 -0400
    77.2 +++ b/src/share/vm/runtime/virtualspace.hpp	Thu Mar 19 09:13:24 2009 -0700
    77.3 @@ -73,7 +73,8 @@
    77.4                  const size_t noaccess_prefix = 0);
    77.5    ReservedSpace(const size_t prefix_size, const size_t prefix_align,
    77.6                  const size_t suffix_size, const size_t suffix_align,
    77.7 -                const size_t noaccess_prefix);
    77.8 +                char* requested_address,
    77.9 +                const size_t noaccess_prefix = 0);
   77.10  
   77.11    // Accessors
   77.12    char*  base()      const { return _base;      }
   77.13 @@ -121,7 +122,8 @@
   77.14    ReservedHeapSpace(size_t size, size_t forced_base_alignment,
   77.15                      bool large, char* requested_address);
   77.16    ReservedHeapSpace(const size_t prefix_size, const size_t prefix_align,
   77.17 -                    const size_t suffix_size, const size_t suffix_align);
   77.18 +                    const size_t suffix_size, const size_t suffix_align,
   77.19 +                    char* requested_address);
   77.20  };
   77.21  
   77.22  // VirtualSpace is data structure for committing a previously reserved address range in smaller chunks.
    78.1 --- a/src/share/vm/runtime/vmStructs.cpp	Wed Mar 18 11:37:48 2009 -0400
    78.2 +++ b/src/share/vm/runtime/vmStructs.cpp	Thu Mar 19 09:13:24 2009 -0700
    78.3 @@ -263,7 +263,9 @@
    78.4       static_field(Universe,                    _bootstrapping,                                bool)                                  \
    78.5       static_field(Universe,                    _fully_initialized,                            bool)                                  \
    78.6       static_field(Universe,                    _verify_count,                                 int)                                   \
    78.7 -     static_field(Universe,                    _heap_base,                                    address)                                   \
    78.8 +     static_field(Universe,                    _narrow_oop._base,                             address)                               \
    78.9 +     static_field(Universe,                    _narrow_oop._shift,                            int)                                   \
   78.10 +     static_field(Universe,                    _narrow_oop._use_implicit_null_checks,         bool)                                  \
   78.11                                                                                                                                       \
   78.12    /**********************************************************************************/                                               \
   78.13    /* Generation and Space hierarchies                                               */                                               \
    79.1 --- a/src/share/vm/runtime/vm_version.cpp	Wed Mar 18 11:37:48 2009 -0400
    79.2 +++ b/src/share/vm/runtime/vm_version.cpp	Thu Mar 19 09:13:24 2009 -0700
    79.3 @@ -163,9 +163,11 @@
    79.4        #elif _MSC_VER == 1200
    79.5          #define HOTSPOT_BUILD_COMPILER "MS VC++ 6.0"
    79.6        #elif _MSC_VER == 1310
    79.7 -        #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1"
    79.8 +        #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1 (VS2003)"
    79.9        #elif _MSC_VER == 1400
   79.10 -        #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0"
   79.11 +        #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0 (VS2005)"
   79.12 +      #elif _MSC_VER == 1500
   79.13 +        #define HOTSPOT_BUILD_COMPILER "MS VC++ 9.0 (VS2008)"
   79.14        #else
   79.15          #define HOTSPOT_BUILD_COMPILER "unknown MS VC++:" XSTR(_MSC_VER)
   79.16        #endif
    80.1 --- a/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Wed Mar 18 11:37:48 2009 -0400
    80.2 +++ b/src/share/vm/utilities/globalDefinitions_visCPP.hpp	Thu Mar 19 09:13:24 2009 -0700
    80.3 @@ -162,7 +162,7 @@
    80.4  }
    80.5  
    80.6  // Visual Studio 2005 deprecates POSIX names - use ISO C++ names instead
    80.7 -#if _MSC_VER >= 1400 && !defined(_WIN64)
    80.8 +#if _MSC_VER >= 1400
    80.9  #define open _open
   80.10  #define close _close
   80.11  #define read  _read
    81.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    81.2 +++ b/test/compiler/6378821/Test6378821.java	Thu Mar 19 09:13:24 2009 -0700
    81.3 @@ -0,0 +1,75 @@
    81.4 +/*
    81.5 + * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
    81.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    81.7 + *
    81.8 + * This code is free software; you can redistribute it and/or modify it
    81.9 + * under the terms of the GNU General Public License version 2 only, as
   81.10 + * published by the Free Software Foundation.
   81.11 + *
   81.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
   81.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   81.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   81.15 + * version 2 for more details (a copy is included in the LICENSE file that
   81.16 + * accompanied this code).
   81.17 + *
   81.18 + * You should have received a copy of the GNU General Public License version
   81.19 + * 2 along with this work; if not, write to the Free Software Foundation,
   81.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
   81.21 + *
   81.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
   81.23 + * CA 95054 USA or visit www.sun.com if you need additional information or
   81.24 + * have any questions.
   81.25 + */
   81.26 +
   81.27 +/**
   81.28 + * @test
   81.29 + * @bug 6378821
   81.30 + * @summary where available, bitCount() should use POPC on SPARC processors and AMD+10h
   81.31 + *
   81.32 + * @run main/othervm -Xcomp -XX:CompileOnly=Test6378821.fcomp Test6378821
   81.33 + */
   81.34 +
   81.35 +public class Test6378821 {
   81.36 +    static final int[]  ia = new int[]  { 0x12345678 };
   81.37 +    static final long[] la = new long[] { 0x12345678abcdefL };
   81.38 +
   81.39 +    public static void main(String [] args) {
   81.40 +        // Resolve the class and the method.
   81.41 +        Integer.bitCount(1);
   81.42 +        Long.bitCount(1);
   81.43 +
   81.44 +        sub(ia[0]);
   81.45 +        sub(la[0]);
   81.46 +        sub(ia);
   81.47 +        sub(la);
   81.48 +    }
   81.49 +
   81.50 +    static void check(int i, int expected, int result) {
   81.51 +        if (result != expected) {
   81.52 +            throw new InternalError("Wrong population count for " + i + ": " + result + " != " + expected);
   81.53 +        }
   81.54 +    }
   81.55 +
   81.56 +    static void check(long l, int expected, int result) {
   81.57 +        if (result != expected) {
   81.58 +            throw new InternalError("Wrong population count for " + l + ": " + result + " != " + expected);
   81.59 +        }
   81.60 +    }
   81.61 +
   81.62 +    static void sub(int i)     { check(i,     fint(i),  fcomp(i) ); }
   81.63 +    static void sub(int[] ia)  { check(ia[0], fint(ia), fcomp(ia)); }
   81.64 +    static void sub(long l)    { check(l,     fint(l),  fcomp(l) ); }
   81.65 +    static void sub(long[] la) { check(la[0], fint(la), fcomp(la)); }
   81.66 +
   81.67 +    static int fint (int i)     { return Integer.bitCount(i); }
   81.68 +    static int fcomp(int i)     { return Integer.bitCount(i); }
   81.69 +
   81.70 +    static int fint (int[] ia)  { return Integer.bitCount(ia[0]); }
   81.71 +    static int fcomp(int[] ia)  { return Integer.bitCount(ia[0]); }
   81.72 +
   81.73 +    static int fint (long l)    { return Long.bitCount(l); }
   81.74 +    static int fcomp(long l)    { return Long.bitCount(l); }
   81.75 +
   81.76 +    static int fint (long[] la) { return Long.bitCount(la[0]); }
   81.77 +    static int fcomp(long[] la) { return Long.bitCount(la[0]); }
   81.78 +}

mercurial