1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/classes/sun/rmi/rmic/iiop/StaticStringsHash.java Sat Dec 01 00:00:00 2007 +0000 1.3 @@ -0,0 +1,372 @@ 1.4 +/* 1.5 + * Portions Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. Sun designates this 1.11 + * particular file as subject to the "Classpath" exception as provided 1.12 + * by Sun in the LICENSE file that accompanied this code. 1.13 + * 1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.17 + * version 2 for more details (a copy is included in the LICENSE file that 1.18 + * accompanied this code). 1.19 + * 1.20 + * You should have received a copy of the GNU General Public License version 1.21 + * 2 along with this work; if not, write to the Free Software Foundation, 1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.23 + * 1.24 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.25 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.26 + * have any questions. 1.27 + */ 1.28 +/* 1.29 + * Licensed Materials - Property of IBM 1.30 + * RMI-IIOP v1.0 1.31 + * Copyright IBM Corp. 1998 1999 All Rights Reserved 1.32 + * 1.33 + */ 1.34 + 1.35 +package sun.rmi.rmic.iiop; 1.36 + 1.37 +/** 1.38 + * StaticStringsHash takes an array of constant strings and 1.39 + * uses several different hash methods to try to find the 1.40 + * 'best' one for that set. The set of methods is currently 1.41 + * fixed, but with a little work could be made extensible thru 1.42 + * subclassing. 1.43 + * <p> 1.44 + * The current set of methods is: 1.45 + * <ol> 1.46 + * <li> length() - works well when all strings are different length.</li> 1.47 + * <li> charAt(n) - works well when one offset into all strings is different.</li> 1.48 + * <li> hashCode() - works well with larger arrays.</li> 1.49 + * </ol> 1.50 + * After constructing an instance over the set of strings, the 1.51 + * <code>getKey(String)</code> method can be used to use the selected hash 1.52 + * method to produce a key. The <code>method</code> string will contain 1.53 + * "length()", "charAt(n)", or "hashCode()", and is intended for use by 1.54 + * code generators. 1.55 + * <p> 1.56 + * The <code>keys</code> array will contain the full set of unique keys. 1.57 + * <p> 1.58 + * The <code>buckets</code> array will contain a set of arrays, one for 1.59 + * each key in the <code>keys</code>, where <code>buckets[x][y]</code> 1.60 + * is an index into the <code>strings</code> array. 1.61 + * @author Bryan Atsatt 1.62 + */ 1.63 +public class StaticStringsHash { 1.64 + 1.65 + /** The set of strings upon which the hash info is created */ 1.66 + public String[] strings = null; 1.67 + 1.68 + /** Unique hash keys */ 1.69 + public int[] keys = null; 1.70 + 1.71 + /** Buckets for each key, where buckets[x][y] is an index 1.72 + * into the strings[] array. */ 1.73 + public int[][] buckets = null; 1.74 + 1.75 + /** The method to invoke on String to produce the hash key */ 1.76 + public String method = null; 1.77 + 1.78 + /** Get a key for the given string using the 1.79 + * selected hash method. 1.80 + * @param str the string to return a key for. 1.81 + * @return the key. 1.82 + */ 1.83 + public int getKey(String str) { 1.84 + switch (keyKind) { 1.85 + case LENGTH: return str.length(); 1.86 + case CHAR_AT: return str.charAt(charAt); 1.87 + case HASH_CODE: return str.hashCode(); 1.88 + } 1.89 + throw new Error("Bad keyKind"); 1.90 + } 1.91 + 1.92 + /** Constructor 1.93 + * @param strings the set of strings upon which to 1.94 + * find an optimal hash method. Must not contain 1.95 + * duplicates. 1.96 + */ 1.97 + public StaticStringsHash(String[] strings) { 1.98 + this.strings = strings; 1.99 + length = strings.length; 1.100 + tempKeys = new int[length]; 1.101 + bucketSizes = new int[length]; 1.102 + setMinStringLength(); 1.103 + 1.104 + // Decide on the best algorithm based on 1.105 + // which one has the smallest maximum 1.106 + // bucket depth. First, try length()... 1.107 + 1.108 + int currentMaxDepth = getKeys(LENGTH); 1.109 + int useCharAt = -1; 1.110 + boolean useHashCode = false; 1.111 + 1.112 + if (currentMaxDepth > 1) { 1.113 + 1.114 + // At least one bucket had more than one 1.115 + // entry, so try charAt(i). If there 1.116 + // are a lot of strings in the array, 1.117 + // and minStringLength is large, limit 1.118 + // the search to a smaller number of 1.119 + // characters to avoid spending a lot 1.120 + // of time here that is most likely to 1.121 + // be pointless... 1.122 + 1.123 + int minLength = minStringLength; 1.124 + if (length > CHAR_AT_MAX_LINES && 1.125 + length * minLength > CHAR_AT_MAX_CHARS) { 1.126 + minLength = length/CHAR_AT_MAX_CHARS; 1.127 + } 1.128 + 1.129 + charAt = 0; 1.130 + for (int i = 0; i < minLength; i++) { 1.131 + int charAtDepth = getKeys(CHAR_AT); 1.132 + if (charAtDepth < currentMaxDepth) { 1.133 + currentMaxDepth = charAtDepth; 1.134 + useCharAt = i; 1.135 + if (currentMaxDepth == 1) { 1.136 + break; 1.137 + } 1.138 + } 1.139 + charAt++; 1.140 + } 1.141 + charAt = useCharAt; 1.142 + 1.143 + 1.144 + if (currentMaxDepth > 1) { 1.145 + 1.146 + // At least one bucket had more than one 1.147 + // entry, try hashCode(). 1.148 + // 1.149 + // Since the cost of computing a full hashCode 1.150 + // (for the runtime target string) is much higher 1.151 + // than the previous methods, use it only if it is 1.152 + // substantially better. The definition of 'substantial' 1.153 + // here is not very well founded, and could be improved 1.154 + // with some further analysis ;^) 1.155 + 1.156 + int hashCodeDepth = getKeys(HASH_CODE); 1.157 + if (hashCodeDepth < currentMaxDepth-3) { 1.158 + 1.159 + // Using the full hashCode results in at least 1.160 + // 3 fewer entries in the worst bucket, so will 1.161 + // therefore avoid at least 3 calls to equals() 1.162 + // in the worst case. 1.163 + // 1.164 + // Note that using a number smaller than 3 could 1.165 + // result in using a hashCode when there are only 1.166 + // 2 strings in the array, and that would surely 1.167 + // be a poor performance choice. 1.168 + 1.169 + useHashCode = true; 1.170 + } 1.171 + } 1.172 + 1.173 + // Reset keys if needed... 1.174 + 1.175 + if (!useHashCode) { 1.176 + if (useCharAt >= 0) { 1.177 + 1.178 + // Use the charAt(i) method... 1.179 + 1.180 + getKeys(CHAR_AT); 1.181 + 1.182 + } else { 1.183 + 1.184 + // Use length method... 1.185 + 1.186 + getKeys(LENGTH); 1.187 + } 1.188 + } 1.189 + } 1.190 + 1.191 + // Now allocate and fill our real hashKeys array... 1.192 + 1.193 + keys = new int[bucketCount]; 1.194 + System.arraycopy(tempKeys,0,keys,0,bucketCount); 1.195 + 1.196 + // Sort keys and bucketSizes arrays... 1.197 + 1.198 + boolean didSwap; 1.199 + do { 1.200 + didSwap = false; 1.201 + for (int i = 0; i < bucketCount - 1; i++) { 1.202 + if (keys[i] > keys[i+1]) { 1.203 + int temp = keys[i]; 1.204 + keys[i] = keys[i+1]; 1.205 + keys[i+1] = temp; 1.206 + temp = bucketSizes[i]; 1.207 + bucketSizes[i] = bucketSizes[i+1]; 1.208 + bucketSizes[i+1] = temp; 1.209 + didSwap = true; 1.210 + } 1.211 + } 1.212 + } 1.213 + while (didSwap == true); 1.214 + 1.215 + // Allocate our buckets array. Fill the string 1.216 + // index slot with an unused key so we can 1.217 + // determine which are free... 1.218 + 1.219 + int unused = findUnusedKey(); 1.220 + buckets = new int[bucketCount][]; 1.221 + for (int i = 0; i < bucketCount; i++) { 1.222 + buckets[i] = new int[bucketSizes[i]]; 1.223 + for (int j = 0; j < bucketSizes[i]; j++) { 1.224 + buckets[i][j] = unused; 1.225 + } 1.226 + } 1.227 + 1.228 + // And fill it in... 1.229 + 1.230 + for(int i = 0; i < strings.length; i++) { 1.231 + int key = getKey(strings[i]); 1.232 + for (int j = 0; j < bucketCount; j++) { 1.233 + if (keys[j] == key) { 1.234 + int k = 0; 1.235 + while (buckets[j][k] != unused) { 1.236 + k++; 1.237 + } 1.238 + buckets[j][k] = i; 1.239 + break; 1.240 + } 1.241 + } 1.242 + } 1.243 + } 1.244 + 1.245 + /** Print an optimized 'contains' method for the 1.246 + * argument strings 1.247 + */ 1.248 + public static void main (String[] args) { 1.249 + StaticStringsHash hash = new StaticStringsHash(args); 1.250 + System.out.println(); 1.251 + System.out.println(" public boolean contains(String key) {"); 1.252 + System.out.println(" switch (key."+hash.method+") {"); 1.253 + for (int i = 0; i < hash.buckets.length; i++) { 1.254 + System.out.println(" case "+hash.keys[i]+": "); 1.255 + for (int j = 0; j < hash.buckets[i].length; j++) { 1.256 + if (j > 0) { 1.257 + System.out.print(" } else "); 1.258 + } else { 1.259 + System.out.print(" "); 1.260 + } 1.261 + System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {"); 1.262 + System.out.println(" return true;"); 1.263 + } 1.264 + System.out.println(" }"); 1.265 + } 1.266 + System.out.println(" }"); 1.267 + System.out.println(" return false;"); 1.268 + System.out.println(" }"); 1.269 + } 1.270 + 1.271 + private int length; 1.272 + private int[] tempKeys; 1.273 + private int[] bucketSizes; 1.274 + private int bucketCount; 1.275 + private int maxDepth; 1.276 + private int minStringLength = Integer.MAX_VALUE; 1.277 + private int keyKind; 1.278 + private int charAt; 1.279 + 1.280 + private static final int LENGTH = 0; 1.281 + private static final int CHAR_AT = 1; 1.282 + private static final int HASH_CODE = 2; 1.283 + 1.284 + /* Determines the maximum number of charAt(i) 1.285 + * tests that will be done. The search is 1.286 + * limited because if the number of characters 1.287 + * is large enough, the likelyhood of finding 1.288 + * a good hash key based on this method is 1.289 + * low. The CHAR_AT_MAX_CHARS limit only 1.290 + * applies f there are more strings than 1.291 + * CHAR_AT_MAX_LINES. 1.292 + */ 1.293 + private static final int CHAR_AT_MAX_LINES = 50; 1.294 + private static final int CHAR_AT_MAX_CHARS = 1000; 1.295 + 1.296 + private void resetKeys(int keyKind) { 1.297 + this.keyKind = keyKind; 1.298 + switch (keyKind) { 1.299 + case LENGTH: method = "length()"; break; 1.300 + case CHAR_AT: method = "charAt("+charAt+")"; break; 1.301 + case HASH_CODE: method = "hashCode()"; break; 1.302 + } 1.303 + maxDepth = 1; 1.304 + bucketCount = 0; 1.305 + for (int i = 0; i < length; i++) { 1.306 + tempKeys[i] = 0; 1.307 + bucketSizes[i] = 0; 1.308 + } 1.309 + } 1.310 + 1.311 + private void setMinStringLength() { 1.312 + for (int i = 0; i < length; i++) { 1.313 + if (strings[i].length() < minStringLength) { 1.314 + minStringLength = strings[i].length(); 1.315 + } 1.316 + } 1.317 + } 1.318 + 1.319 + private int findUnusedKey() { 1.320 + int unused = 0; 1.321 + int keysLength = keys.length; 1.322 + 1.323 + // Note that we just assume that resource 1.324 + // exhaustion will occur rather than an 1.325 + // infinite loop here if the set of keys 1.326 + // is very large. 1.327 + 1.328 + while (true) { 1.329 + boolean match = false; 1.330 + for (int i = 0; i < keysLength; i++) { 1.331 + if (keys[i] == unused) { 1.332 + match = true; 1.333 + break; 1.334 + } 1.335 + } 1.336 + if (match) { 1.337 + unused--; 1.338 + } else { 1.339 + break; 1.340 + } 1.341 + } 1.342 + return unused; 1.343 + } 1.344 + 1.345 + private int getKeys(int methodKind) { 1.346 + resetKeys(methodKind); 1.347 + for(int i = 0; i < strings.length; i++) { 1.348 + addKey(getKey(strings[i])); 1.349 + } 1.350 + return maxDepth; 1.351 + } 1.352 + 1.353 + private void addKey(int key) { 1.354 + 1.355 + // Have we seen this one before? 1.356 + 1.357 + boolean addIt = true; 1.358 + for (int j = 0; j < bucketCount; j++) { 1.359 + if (tempKeys[j] == key) { 1.360 + addIt = false; 1.361 + bucketSizes[j]++; 1.362 + if (bucketSizes[j] > maxDepth) { 1.363 + maxDepth = bucketSizes[j]; 1.364 + } 1.365 + break; 1.366 + } 1.367 + } 1.368 + 1.369 + if (addIt) { 1.370 + tempKeys[bucketCount] = key; 1.371 + bucketSizes[bucketCount] = 1; 1.372 + bucketCount++; 1.373 + } 1.374 + } 1.375 +}