aoqi@0: /* aoqi@0: * Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved. aoqi@0: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@0: * aoqi@0: * This code is free software; you can redistribute it and/or modify it aoqi@0: * under the terms of the GNU General Public License version 2 only, as aoqi@0: * published by the Free Software Foundation. Oracle designates this aoqi@0: * particular file as subject to the "Classpath" exception as provided aoqi@0: * by Oracle in the LICENSE file that accompanied this code. aoqi@0: * aoqi@0: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@0: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@0: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@0: * version 2 for more details (a copy is included in the LICENSE file that aoqi@0: * accompanied this code). aoqi@0: * aoqi@0: * You should have received a copy of the GNU General Public License version aoqi@0: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@0: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@0: * aoqi@0: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@0: * or visit www.oracle.com if you need additional information or have any aoqi@0: * questions. aoqi@0: */ aoqi@0: /* aoqi@0: * Licensed Materials - Property of IBM aoqi@0: * RMI-IIOP v1.0 aoqi@0: * Copyright IBM Corp. 1998 1999 All Rights Reserved aoqi@0: * aoqi@0: */ aoqi@0: aoqi@0: package sun.rmi.rmic.iiop; aoqi@0: aoqi@0: /** aoqi@0: * StaticStringsHash takes an array of constant strings and aoqi@0: * uses several different hash methods to try to find the aoqi@0: * 'best' one for that set. The set of methods is currently aoqi@0: * fixed, but with a little work could be made extensible thru aoqi@0: * subclassing. aoqi@0: *
aoqi@0: * The current set of methods is: aoqi@0: *
getKey(String)
method can be used to use the selected hash
aoqi@0: * method to produce a key. The method
string will contain
aoqi@0: * "length()", "charAt(n)", or "hashCode()", and is intended for use by
aoqi@0: * code generators.
aoqi@0: *
aoqi@0: * The keys
array will contain the full set of unique keys.
aoqi@0: *
aoqi@0: * The buckets
array will contain a set of arrays, one for
aoqi@0: * each key in the keys
, where buckets[x][y]
aoqi@0: * is an index into the strings
array.
aoqi@0: * @author Bryan Atsatt
aoqi@0: */
aoqi@0: public class StaticStringsHash {
aoqi@0:
aoqi@0: /** The set of strings upon which the hash info is created */
aoqi@0: public String[] strings = null;
aoqi@0:
aoqi@0: /** Unique hash keys */
aoqi@0: public int[] keys = null;
aoqi@0:
aoqi@0: /** Buckets for each key, where buckets[x][y] is an index
aoqi@0: * into the strings[] array. */
aoqi@0: public int[][] buckets = null;
aoqi@0:
aoqi@0: /** The method to invoke on String to produce the hash key */
aoqi@0: public String method = null;
aoqi@0:
aoqi@0: /** Get a key for the given string using the
aoqi@0: * selected hash method.
aoqi@0: * @param str the string to return a key for.
aoqi@0: * @return the key.
aoqi@0: */
aoqi@0: public int getKey(String str) {
aoqi@0: switch (keyKind) {
aoqi@0: case LENGTH: return str.length();
aoqi@0: case CHAR_AT: return str.charAt(charAt);
aoqi@0: case HASH_CODE: return str.hashCode();
aoqi@0: }
aoqi@0: throw new Error("Bad keyKind");
aoqi@0: }
aoqi@0:
aoqi@0: /** Constructor
aoqi@0: * @param strings the set of strings upon which to
aoqi@0: * find an optimal hash method. Must not contain
aoqi@0: * duplicates.
aoqi@0: */
aoqi@0: public StaticStringsHash(String[] strings) {
aoqi@0: this.strings = strings;
aoqi@0: length = strings.length;
aoqi@0: tempKeys = new int[length];
aoqi@0: bucketSizes = new int[length];
aoqi@0: setMinStringLength();
aoqi@0:
aoqi@0: // Decide on the best algorithm based on
aoqi@0: // which one has the smallest maximum
aoqi@0: // bucket depth. First, try length()...
aoqi@0:
aoqi@0: int currentMaxDepth = getKeys(LENGTH);
aoqi@0: int useCharAt = -1;
aoqi@0: boolean useHashCode = false;
aoqi@0:
aoqi@0: if (currentMaxDepth > 1) {
aoqi@0:
aoqi@0: // At least one bucket had more than one
aoqi@0: // entry, so try charAt(i). If there
aoqi@0: // are a lot of strings in the array,
aoqi@0: // and minStringLength is large, limit
aoqi@0: // the search to a smaller number of
aoqi@0: // characters to avoid spending a lot
aoqi@0: // of time here that is most likely to
aoqi@0: // be pointless...
aoqi@0:
aoqi@0: int minLength = minStringLength;
aoqi@0: if (length > CHAR_AT_MAX_LINES &&
aoqi@0: length * minLength > CHAR_AT_MAX_CHARS) {
aoqi@0: minLength = length/CHAR_AT_MAX_CHARS;
aoqi@0: }
aoqi@0:
aoqi@0: charAt = 0;
aoqi@0: for (int i = 0; i < minLength; i++) {
aoqi@0: int charAtDepth = getKeys(CHAR_AT);
aoqi@0: if (charAtDepth < currentMaxDepth) {
aoqi@0: currentMaxDepth = charAtDepth;
aoqi@0: useCharAt = i;
aoqi@0: if (currentMaxDepth == 1) {
aoqi@0: break;
aoqi@0: }
aoqi@0: }
aoqi@0: charAt++;
aoqi@0: }
aoqi@0: charAt = useCharAt;
aoqi@0:
aoqi@0:
aoqi@0: if (currentMaxDepth > 1) {
aoqi@0:
aoqi@0: // At least one bucket had more than one
aoqi@0: // entry, try hashCode().
aoqi@0: //
aoqi@0: // Since the cost of computing a full hashCode
aoqi@0: // (for the runtime target string) is much higher
aoqi@0: // than the previous methods, use it only if it is
aoqi@0: // substantially better. The definition of 'substantial'
aoqi@0: // here is not very well founded, and could be improved
aoqi@0: // with some further analysis ;^)
aoqi@0:
aoqi@0: int hashCodeDepth = getKeys(HASH_CODE);
aoqi@0: if (hashCodeDepth < currentMaxDepth-3) {
aoqi@0:
aoqi@0: // Using the full hashCode results in at least
aoqi@0: // 3 fewer entries in the worst bucket, so will
aoqi@0: // therefore avoid at least 3 calls to equals()
aoqi@0: // in the worst case.
aoqi@0: //
aoqi@0: // Note that using a number smaller than 3 could
aoqi@0: // result in using a hashCode when there are only
aoqi@0: // 2 strings in the array, and that would surely
aoqi@0: // be a poor performance choice.
aoqi@0:
aoqi@0: useHashCode = true;
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0: // Reset keys if needed...
aoqi@0:
aoqi@0: if (!useHashCode) {
aoqi@0: if (useCharAt >= 0) {
aoqi@0:
aoqi@0: // Use the charAt(i) method...
aoqi@0:
aoqi@0: getKeys(CHAR_AT);
aoqi@0:
aoqi@0: } else {
aoqi@0:
aoqi@0: // Use length method...
aoqi@0:
aoqi@0: getKeys(LENGTH);
aoqi@0: }
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0: // Now allocate and fill our real hashKeys array...
aoqi@0:
aoqi@0: keys = new int[bucketCount];
aoqi@0: System.arraycopy(tempKeys,0,keys,0,bucketCount);
aoqi@0:
aoqi@0: // Sort keys and bucketSizes arrays...
aoqi@0:
aoqi@0: boolean didSwap;
aoqi@0: do {
aoqi@0: didSwap = false;
aoqi@0: for (int i = 0; i < bucketCount - 1; i++) {
aoqi@0: if (keys[i] > keys[i+1]) {
aoqi@0: int temp = keys[i];
aoqi@0: keys[i] = keys[i+1];
aoqi@0: keys[i+1] = temp;
aoqi@0: temp = bucketSizes[i];
aoqi@0: bucketSizes[i] = bucketSizes[i+1];
aoqi@0: bucketSizes[i+1] = temp;
aoqi@0: didSwap = true;
aoqi@0: }
aoqi@0: }
aoqi@0: }
aoqi@0: while (didSwap == true);
aoqi@0:
aoqi@0: // Allocate our buckets array. Fill the string
aoqi@0: // index slot with an unused key so we can
aoqi@0: // determine which are free...
aoqi@0:
aoqi@0: int unused = findUnusedKey();
aoqi@0: buckets = new int[bucketCount][];
aoqi@0: for (int i = 0; i < bucketCount; i++) {
aoqi@0: buckets[i] = new int[bucketSizes[i]];
aoqi@0: for (int j = 0; j < bucketSizes[i]; j++) {
aoqi@0: buckets[i][j] = unused;
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0: // And fill it in...
aoqi@0:
aoqi@0: for(int i = 0; i < strings.length; i++) {
aoqi@0: int key = getKey(strings[i]);
aoqi@0: for (int j = 0; j < bucketCount; j++) {
aoqi@0: if (keys[j] == key) {
aoqi@0: int k = 0;
aoqi@0: while (buckets[j][k] != unused) {
aoqi@0: k++;
aoqi@0: }
aoqi@0: buckets[j][k] = i;
aoqi@0: break;
aoqi@0: }
aoqi@0: }
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0: /** Print an optimized 'contains' method for the
aoqi@0: * argument strings
aoqi@0: */
aoqi@0: public static void main (String[] args) {
aoqi@0: StaticStringsHash hash = new StaticStringsHash(args);
aoqi@0: System.out.println();
aoqi@0: System.out.println(" public boolean contains(String key) {");
aoqi@0: System.out.println(" switch (key."+hash.method+") {");
aoqi@0: for (int i = 0; i < hash.buckets.length; i++) {
aoqi@0: System.out.println(" case "+hash.keys[i]+": ");
aoqi@0: for (int j = 0; j < hash.buckets[i].length; j++) {
aoqi@0: if (j > 0) {
aoqi@0: System.out.print(" } else ");
aoqi@0: } else {
aoqi@0: System.out.print(" ");
aoqi@0: }
aoqi@0: System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {");
aoqi@0: System.out.println(" return true;");
aoqi@0: }
aoqi@0: System.out.println(" }");
aoqi@0: }
aoqi@0: System.out.println(" }");
aoqi@0: System.out.println(" return false;");
aoqi@0: System.out.println(" }");
aoqi@0: }
aoqi@0:
aoqi@0: private int length;
aoqi@0: private int[] tempKeys;
aoqi@0: private int[] bucketSizes;
aoqi@0: private int bucketCount;
aoqi@0: private int maxDepth;
aoqi@0: private int minStringLength = Integer.MAX_VALUE;
aoqi@0: private int keyKind;
aoqi@0: private int charAt;
aoqi@0:
aoqi@0: private static final int LENGTH = 0;
aoqi@0: private static final int CHAR_AT = 1;
aoqi@0: private static final int HASH_CODE = 2;
aoqi@0:
aoqi@0: /* Determines the maximum number of charAt(i)
aoqi@0: * tests that will be done. The search is
aoqi@0: * limited because if the number of characters
aoqi@0: * is large enough, the likelyhood of finding
aoqi@0: * a good hash key based on this method is
aoqi@0: * low. The CHAR_AT_MAX_CHARS limit only
aoqi@0: * applies f there are more strings than
aoqi@0: * CHAR_AT_MAX_LINES.
aoqi@0: */
aoqi@0: private static final int CHAR_AT_MAX_LINES = 50;
aoqi@0: private static final int CHAR_AT_MAX_CHARS = 1000;
aoqi@0:
aoqi@0: private void resetKeys(int keyKind) {
aoqi@0: this.keyKind = keyKind;
aoqi@0: switch (keyKind) {
aoqi@0: case LENGTH: method = "length()"; break;
aoqi@0: case CHAR_AT: method = "charAt("+charAt+")"; break;
aoqi@0: case HASH_CODE: method = "hashCode()"; break;
aoqi@0: }
aoqi@0: maxDepth = 1;
aoqi@0: bucketCount = 0;
aoqi@0: for (int i = 0; i < length; i++) {
aoqi@0: tempKeys[i] = 0;
aoqi@0: bucketSizes[i] = 0;
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0: private void setMinStringLength() {
aoqi@0: for (int i = 0; i < length; i++) {
aoqi@0: if (strings[i].length() < minStringLength) {
aoqi@0: minStringLength = strings[i].length();
aoqi@0: }
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0: private int findUnusedKey() {
aoqi@0: int unused = 0;
aoqi@0: int keysLength = keys.length;
aoqi@0:
aoqi@0: // Note that we just assume that resource
aoqi@0: // exhaustion will occur rather than an
aoqi@0: // infinite loop here if the set of keys
aoqi@0: // is very large.
aoqi@0:
aoqi@0: while (true) {
aoqi@0: boolean match = false;
aoqi@0: for (int i = 0; i < keysLength; i++) {
aoqi@0: if (keys[i] == unused) {
aoqi@0: match = true;
aoqi@0: break;
aoqi@0: }
aoqi@0: }
aoqi@0: if (match) {
aoqi@0: unused--;
aoqi@0: } else {
aoqi@0: break;
aoqi@0: }
aoqi@0: }
aoqi@0: return unused;
aoqi@0: }
aoqi@0:
aoqi@0: private int getKeys(int methodKind) {
aoqi@0: resetKeys(methodKind);
aoqi@0: for(int i = 0; i < strings.length; i++) {
aoqi@0: addKey(getKey(strings[i]));
aoqi@0: }
aoqi@0: return maxDepth;
aoqi@0: }
aoqi@0:
aoqi@0: private void addKey(int key) {
aoqi@0:
aoqi@0: // Have we seen this one before?
aoqi@0:
aoqi@0: boolean addIt = true;
aoqi@0: for (int j = 0; j < bucketCount; j++) {
aoqi@0: if (tempKeys[j] == key) {
aoqi@0: addIt = false;
aoqi@0: bucketSizes[j]++;
aoqi@0: if (bucketSizes[j] > maxDepth) {
aoqi@0: maxDepth = bucketSizes[j];
aoqi@0: }
aoqi@0: break;
aoqi@0: }
aoqi@0: }
aoqi@0:
aoqi@0: if (addIt) {
aoqi@0: tempKeys[bucketCount] = key;
aoqi@0: bucketSizes[bucketCount] = 1;
aoqi@0: bucketCount++;
aoqi@0: }
aoqi@0: }
aoqi@0: }