diff -r 000000000000 -r 55540e827aef src/share/classes/sun/rmi/rmic/iiop/StaticStringsHash.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/classes/sun/rmi/rmic/iiop/StaticStringsHash.java Sat Dec 01 00:00:00 2007 +0000 @@ -0,0 +1,372 @@ +/* + * Portions Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ +/* + * Licensed Materials - Property of IBM + * RMI-IIOP v1.0 + * Copyright IBM Corp. 1998 1999 All Rights Reserved + * + */ + +package sun.rmi.rmic.iiop; + +/** + * StaticStringsHash takes an array of constant strings and + * uses several different hash methods to try to find the + * 'best' one for that set. The set of methods is currently + * fixed, but with a little work could be made extensible thru + * subclassing. + *

+ * The current set of methods is: + *

    + *
  1. length() - works well when all strings are different length.
  2. + *
  3. charAt(n) - works well when one offset into all strings is different.
  4. + *
  5. hashCode() - works well with larger arrays.
  6. + *
+ * After constructing an instance over the set of strings, the + * getKey(String) method can be used to use the selected hash + * method to produce a key. The method string will contain + * "length()", "charAt(n)", or "hashCode()", and is intended for use by + * code generators. + *

+ * The keys array will contain the full set of unique keys. + *

+ * The buckets array will contain a set of arrays, one for + * each key in the keys, where buckets[x][y] + * is an index into the strings array. + * @author Bryan Atsatt + */ +public class StaticStringsHash { + + /** The set of strings upon which the hash info is created */ + public String[] strings = null; + + /** Unique hash keys */ + public int[] keys = null; + + /** Buckets for each key, where buckets[x][y] is an index + * into the strings[] array. */ + public int[][] buckets = null; + + /** The method to invoke on String to produce the hash key */ + public String method = null; + + /** Get a key for the given string using the + * selected hash method. + * @param str the string to return a key for. + * @return the key. + */ + public int getKey(String str) { + switch (keyKind) { + case LENGTH: return str.length(); + case CHAR_AT: return str.charAt(charAt); + case HASH_CODE: return str.hashCode(); + } + throw new Error("Bad keyKind"); + } + + /** Constructor + * @param strings the set of strings upon which to + * find an optimal hash method. Must not contain + * duplicates. + */ + public StaticStringsHash(String[] strings) { + this.strings = strings; + length = strings.length; + tempKeys = new int[length]; + bucketSizes = new int[length]; + setMinStringLength(); + + // Decide on the best algorithm based on + // which one has the smallest maximum + // bucket depth. First, try length()... + + int currentMaxDepth = getKeys(LENGTH); + int useCharAt = -1; + boolean useHashCode = false; + + if (currentMaxDepth > 1) { + + // At least one bucket had more than one + // entry, so try charAt(i). If there + // are a lot of strings in the array, + // and minStringLength is large, limit + // the search to a smaller number of + // characters to avoid spending a lot + // of time here that is most likely to + // be pointless... + + int minLength = minStringLength; + if (length > CHAR_AT_MAX_LINES && + length * minLength > CHAR_AT_MAX_CHARS) { + minLength = length/CHAR_AT_MAX_CHARS; + } + + charAt = 0; + for (int i = 0; i < minLength; i++) { + int charAtDepth = getKeys(CHAR_AT); + if (charAtDepth < currentMaxDepth) { + currentMaxDepth = charAtDepth; + useCharAt = i; + if (currentMaxDepth == 1) { + break; + } + } + charAt++; + } + charAt = useCharAt; + + + if (currentMaxDepth > 1) { + + // At least one bucket had more than one + // entry, try hashCode(). + // + // Since the cost of computing a full hashCode + // (for the runtime target string) is much higher + // than the previous methods, use it only if it is + // substantially better. The definition of 'substantial' + // here is not very well founded, and could be improved + // with some further analysis ;^) + + int hashCodeDepth = getKeys(HASH_CODE); + if (hashCodeDepth < currentMaxDepth-3) { + + // Using the full hashCode results in at least + // 3 fewer entries in the worst bucket, so will + // therefore avoid at least 3 calls to equals() + // in the worst case. + // + // Note that using a number smaller than 3 could + // result in using a hashCode when there are only + // 2 strings in the array, and that would surely + // be a poor performance choice. + + useHashCode = true; + } + } + + // Reset keys if needed... + + if (!useHashCode) { + if (useCharAt >= 0) { + + // Use the charAt(i) method... + + getKeys(CHAR_AT); + + } else { + + // Use length method... + + getKeys(LENGTH); + } + } + } + + // Now allocate and fill our real hashKeys array... + + keys = new int[bucketCount]; + System.arraycopy(tempKeys,0,keys,0,bucketCount); + + // Sort keys and bucketSizes arrays... + + boolean didSwap; + do { + didSwap = false; + for (int i = 0; i < bucketCount - 1; i++) { + if (keys[i] > keys[i+1]) { + int temp = keys[i]; + keys[i] = keys[i+1]; + keys[i+1] = temp; + temp = bucketSizes[i]; + bucketSizes[i] = bucketSizes[i+1]; + bucketSizes[i+1] = temp; + didSwap = true; + } + } + } + while (didSwap == true); + + // Allocate our buckets array. Fill the string + // index slot with an unused key so we can + // determine which are free... + + int unused = findUnusedKey(); + buckets = new int[bucketCount][]; + for (int i = 0; i < bucketCount; i++) { + buckets[i] = new int[bucketSizes[i]]; + for (int j = 0; j < bucketSizes[i]; j++) { + buckets[i][j] = unused; + } + } + + // And fill it in... + + for(int i = 0; i < strings.length; i++) { + int key = getKey(strings[i]); + for (int j = 0; j < bucketCount; j++) { + if (keys[j] == key) { + int k = 0; + while (buckets[j][k] != unused) { + k++; + } + buckets[j][k] = i; + break; + } + } + } + } + + /** Print an optimized 'contains' method for the + * argument strings + */ + public static void main (String[] args) { + StaticStringsHash hash = new StaticStringsHash(args); + System.out.println(); + System.out.println(" public boolean contains(String key) {"); + System.out.println(" switch (key."+hash.method+") {"); + for (int i = 0; i < hash.buckets.length; i++) { + System.out.println(" case "+hash.keys[i]+": "); + for (int j = 0; j < hash.buckets[i].length; j++) { + if (j > 0) { + System.out.print(" } else "); + } else { + System.out.print(" "); + } + System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {"); + System.out.println(" return true;"); + } + System.out.println(" }"); + } + System.out.println(" }"); + System.out.println(" return false;"); + System.out.println(" }"); + } + + private int length; + private int[] tempKeys; + private int[] bucketSizes; + private int bucketCount; + private int maxDepth; + private int minStringLength = Integer.MAX_VALUE; + private int keyKind; + private int charAt; + + private static final int LENGTH = 0; + private static final int CHAR_AT = 1; + private static final int HASH_CODE = 2; + + /* Determines the maximum number of charAt(i) + * tests that will be done. The search is + * limited because if the number of characters + * is large enough, the likelyhood of finding + * a good hash key based on this method is + * low. The CHAR_AT_MAX_CHARS limit only + * applies f there are more strings than + * CHAR_AT_MAX_LINES. + */ + private static final int CHAR_AT_MAX_LINES = 50; + private static final int CHAR_AT_MAX_CHARS = 1000; + + private void resetKeys(int keyKind) { + this.keyKind = keyKind; + switch (keyKind) { + case LENGTH: method = "length()"; break; + case CHAR_AT: method = "charAt("+charAt+")"; break; + case HASH_CODE: method = "hashCode()"; break; + } + maxDepth = 1; + bucketCount = 0; + for (int i = 0; i < length; i++) { + tempKeys[i] = 0; + bucketSizes[i] = 0; + } + } + + private void setMinStringLength() { + for (int i = 0; i < length; i++) { + if (strings[i].length() < minStringLength) { + minStringLength = strings[i].length(); + } + } + } + + private int findUnusedKey() { + int unused = 0; + int keysLength = keys.length; + + // Note that we just assume that resource + // exhaustion will occur rather than an + // infinite loop here if the set of keys + // is very large. + + while (true) { + boolean match = false; + for (int i = 0; i < keysLength; i++) { + if (keys[i] == unused) { + match = true; + break; + } + } + if (match) { + unused--; + } else { + break; + } + } + return unused; + } + + private int getKeys(int methodKind) { + resetKeys(methodKind); + for(int i = 0; i < strings.length; i++) { + addKey(getKey(strings[i])); + } + return maxDepth; + } + + private void addKey(int key) { + + // Have we seen this one before? + + boolean addIt = true; + for (int j = 0; j < bucketCount; j++) { + if (tempKeys[j] == key) { + addIt = false; + bucketSizes[j]++; + if (bucketSizes[j] > maxDepth) { + maxDepth = bucketSizes[j]; + } + break; + } + } + + if (addIt) { + tempKeys[bucketCount] = key; + bucketSizes[bucketCount] = 1; + bucketCount++; + } + } +}