src/share/classes/sun/rmi/rmic/iiop/StaticStringsHash.java

Thu, 24 May 2018 16:41:12 +0800

author
aoqi
date
Thu, 24 May 2018 16:41:12 +0800
changeset 1410
9c913ea7e4a1
parent 748
6845b95cba6b
permissions
-rw-r--r--

Merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation. Oracle designates this
aoqi@0 8 * particular file as subject to the "Classpath" exception as provided
aoqi@0 9 * by Oracle in the LICENSE file that accompanied this code.
aoqi@0 10 *
aoqi@0 11 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 14 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 15 * accompanied this code).
aoqi@0 16 *
aoqi@0 17 * You should have received a copy of the GNU General Public License version
aoqi@0 18 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 20 *
aoqi@0 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 22 * or visit www.oracle.com if you need additional information or have any
aoqi@0 23 * questions.
aoqi@0 24 */
aoqi@0 25 /*
aoqi@0 26 * Licensed Materials - Property of IBM
aoqi@0 27 * RMI-IIOP v1.0
aoqi@0 28 * Copyright IBM Corp. 1998 1999 All Rights Reserved
aoqi@0 29 *
aoqi@0 30 */
aoqi@0 31
aoqi@0 32 package sun.rmi.rmic.iiop;
aoqi@0 33
aoqi@0 34 /**
aoqi@0 35 * StaticStringsHash takes an array of constant strings and
aoqi@0 36 * uses several different hash methods to try to find the
aoqi@0 37 * 'best' one for that set. The set of methods is currently
aoqi@0 38 * fixed, but with a little work could be made extensible thru
aoqi@0 39 * subclassing.
aoqi@0 40 * <p>
aoqi@0 41 * The current set of methods is:
aoqi@0 42 * <ol>
aoqi@0 43 * <li> length() - works well when all strings are different length.</li>
aoqi@0 44 * <li> charAt(n) - works well when one offset into all strings is different.</li>
aoqi@0 45 * <li> hashCode() - works well with larger arrays.</li>
aoqi@0 46 * </ol>
aoqi@0 47 * After constructing an instance over the set of strings, the
aoqi@0 48 * <code>getKey(String)</code> method can be used to use the selected hash
aoqi@0 49 * method to produce a key. The <code>method</code> string will contain
aoqi@0 50 * "length()", "charAt(n)", or "hashCode()", and is intended for use by
aoqi@0 51 * code generators.
aoqi@0 52 * <p>
aoqi@0 53 * The <code>keys</code> array will contain the full set of unique keys.
aoqi@0 54 * <p>
aoqi@0 55 * The <code>buckets</code> array will contain a set of arrays, one for
aoqi@0 56 * each key in the <code>keys</code>, where <code>buckets[x][y]</code>
aoqi@0 57 * is an index into the <code>strings</code> array.
aoqi@0 58 * @author Bryan Atsatt
aoqi@0 59 */
aoqi@0 60 public class StaticStringsHash {
aoqi@0 61
aoqi@0 62 /** The set of strings upon which the hash info is created */
aoqi@0 63 public String[] strings = null;
aoqi@0 64
aoqi@0 65 /** Unique hash keys */
aoqi@0 66 public int[] keys = null;
aoqi@0 67
aoqi@0 68 /** Buckets for each key, where buckets[x][y] is an index
aoqi@0 69 * into the strings[] array. */
aoqi@0 70 public int[][] buckets = null;
aoqi@0 71
aoqi@0 72 /** The method to invoke on String to produce the hash key */
aoqi@0 73 public String method = null;
aoqi@0 74
aoqi@0 75 /** Get a key for the given string using the
aoqi@0 76 * selected hash method.
aoqi@0 77 * @param str the string to return a key for.
aoqi@0 78 * @return the key.
aoqi@0 79 */
aoqi@0 80 public int getKey(String str) {
aoqi@0 81 switch (keyKind) {
aoqi@0 82 case LENGTH: return str.length();
aoqi@0 83 case CHAR_AT: return str.charAt(charAt);
aoqi@0 84 case HASH_CODE: return str.hashCode();
aoqi@0 85 }
aoqi@0 86 throw new Error("Bad keyKind");
aoqi@0 87 }
aoqi@0 88
aoqi@0 89 /** Constructor
aoqi@0 90 * @param strings the set of strings upon which to
aoqi@0 91 * find an optimal hash method. Must not contain
aoqi@0 92 * duplicates.
aoqi@0 93 */
aoqi@0 94 public StaticStringsHash(String[] strings) {
aoqi@0 95 this.strings = strings;
aoqi@0 96 length = strings.length;
aoqi@0 97 tempKeys = new int[length];
aoqi@0 98 bucketSizes = new int[length];
aoqi@0 99 setMinStringLength();
aoqi@0 100
aoqi@0 101 // Decide on the best algorithm based on
aoqi@0 102 // which one has the smallest maximum
aoqi@0 103 // bucket depth. First, try length()...
aoqi@0 104
aoqi@0 105 int currentMaxDepth = getKeys(LENGTH);
aoqi@0 106 int useCharAt = -1;
aoqi@0 107 boolean useHashCode = false;
aoqi@0 108
aoqi@0 109 if (currentMaxDepth > 1) {
aoqi@0 110
aoqi@0 111 // At least one bucket had more than one
aoqi@0 112 // entry, so try charAt(i). If there
aoqi@0 113 // are a lot of strings in the array,
aoqi@0 114 // and minStringLength is large, limit
aoqi@0 115 // the search to a smaller number of
aoqi@0 116 // characters to avoid spending a lot
aoqi@0 117 // of time here that is most likely to
aoqi@0 118 // be pointless...
aoqi@0 119
aoqi@0 120 int minLength = minStringLength;
aoqi@0 121 if (length > CHAR_AT_MAX_LINES &&
aoqi@0 122 length * minLength > CHAR_AT_MAX_CHARS) {
aoqi@0 123 minLength = length/CHAR_AT_MAX_CHARS;
aoqi@0 124 }
aoqi@0 125
aoqi@0 126 charAt = 0;
aoqi@0 127 for (int i = 0; i < minLength; i++) {
aoqi@0 128 int charAtDepth = getKeys(CHAR_AT);
aoqi@0 129 if (charAtDepth < currentMaxDepth) {
aoqi@0 130 currentMaxDepth = charAtDepth;
aoqi@0 131 useCharAt = i;
aoqi@0 132 if (currentMaxDepth == 1) {
aoqi@0 133 break;
aoqi@0 134 }
aoqi@0 135 }
aoqi@0 136 charAt++;
aoqi@0 137 }
aoqi@0 138 charAt = useCharAt;
aoqi@0 139
aoqi@0 140
aoqi@0 141 if (currentMaxDepth > 1) {
aoqi@0 142
aoqi@0 143 // At least one bucket had more than one
aoqi@0 144 // entry, try hashCode().
aoqi@0 145 //
aoqi@0 146 // Since the cost of computing a full hashCode
aoqi@0 147 // (for the runtime target string) is much higher
aoqi@0 148 // than the previous methods, use it only if it is
aoqi@0 149 // substantially better. The definition of 'substantial'
aoqi@0 150 // here is not very well founded, and could be improved
aoqi@0 151 // with some further analysis ;^)
aoqi@0 152
aoqi@0 153 int hashCodeDepth = getKeys(HASH_CODE);
aoqi@0 154 if (hashCodeDepth < currentMaxDepth-3) {
aoqi@0 155
aoqi@0 156 // Using the full hashCode results in at least
aoqi@0 157 // 3 fewer entries in the worst bucket, so will
aoqi@0 158 // therefore avoid at least 3 calls to equals()
aoqi@0 159 // in the worst case.
aoqi@0 160 //
aoqi@0 161 // Note that using a number smaller than 3 could
aoqi@0 162 // result in using a hashCode when there are only
aoqi@0 163 // 2 strings in the array, and that would surely
aoqi@0 164 // be a poor performance choice.
aoqi@0 165
aoqi@0 166 useHashCode = true;
aoqi@0 167 }
aoqi@0 168 }
aoqi@0 169
aoqi@0 170 // Reset keys if needed...
aoqi@0 171
aoqi@0 172 if (!useHashCode) {
aoqi@0 173 if (useCharAt >= 0) {
aoqi@0 174
aoqi@0 175 // Use the charAt(i) method...
aoqi@0 176
aoqi@0 177 getKeys(CHAR_AT);
aoqi@0 178
aoqi@0 179 } else {
aoqi@0 180
aoqi@0 181 // Use length method...
aoqi@0 182
aoqi@0 183 getKeys(LENGTH);
aoqi@0 184 }
aoqi@0 185 }
aoqi@0 186 }
aoqi@0 187
aoqi@0 188 // Now allocate and fill our real hashKeys array...
aoqi@0 189
aoqi@0 190 keys = new int[bucketCount];
aoqi@0 191 System.arraycopy(tempKeys,0,keys,0,bucketCount);
aoqi@0 192
aoqi@0 193 // Sort keys and bucketSizes arrays...
aoqi@0 194
aoqi@0 195 boolean didSwap;
aoqi@0 196 do {
aoqi@0 197 didSwap = false;
aoqi@0 198 for (int i = 0; i < bucketCount - 1; i++) {
aoqi@0 199 if (keys[i] > keys[i+1]) {
aoqi@0 200 int temp = keys[i];
aoqi@0 201 keys[i] = keys[i+1];
aoqi@0 202 keys[i+1] = temp;
aoqi@0 203 temp = bucketSizes[i];
aoqi@0 204 bucketSizes[i] = bucketSizes[i+1];
aoqi@0 205 bucketSizes[i+1] = temp;
aoqi@0 206 didSwap = true;
aoqi@0 207 }
aoqi@0 208 }
aoqi@0 209 }
aoqi@0 210 while (didSwap == true);
aoqi@0 211
aoqi@0 212 // Allocate our buckets array. Fill the string
aoqi@0 213 // index slot with an unused key so we can
aoqi@0 214 // determine which are free...
aoqi@0 215
aoqi@0 216 int unused = findUnusedKey();
aoqi@0 217 buckets = new int[bucketCount][];
aoqi@0 218 for (int i = 0; i < bucketCount; i++) {
aoqi@0 219 buckets[i] = new int[bucketSizes[i]];
aoqi@0 220 for (int j = 0; j < bucketSizes[i]; j++) {
aoqi@0 221 buckets[i][j] = unused;
aoqi@0 222 }
aoqi@0 223 }
aoqi@0 224
aoqi@0 225 // And fill it in...
aoqi@0 226
aoqi@0 227 for(int i = 0; i < strings.length; i++) {
aoqi@0 228 int key = getKey(strings[i]);
aoqi@0 229 for (int j = 0; j < bucketCount; j++) {
aoqi@0 230 if (keys[j] == key) {
aoqi@0 231 int k = 0;
aoqi@0 232 while (buckets[j][k] != unused) {
aoqi@0 233 k++;
aoqi@0 234 }
aoqi@0 235 buckets[j][k] = i;
aoqi@0 236 break;
aoqi@0 237 }
aoqi@0 238 }
aoqi@0 239 }
aoqi@0 240 }
aoqi@0 241
aoqi@0 242 /** Print an optimized 'contains' method for the
aoqi@0 243 * argument strings
aoqi@0 244 */
aoqi@0 245 public static void main (String[] args) {
aoqi@0 246 StaticStringsHash hash = new StaticStringsHash(args);
aoqi@0 247 System.out.println();
aoqi@0 248 System.out.println(" public boolean contains(String key) {");
aoqi@0 249 System.out.println(" switch (key."+hash.method+") {");
aoqi@0 250 for (int i = 0; i < hash.buckets.length; i++) {
aoqi@0 251 System.out.println(" case "+hash.keys[i]+": ");
aoqi@0 252 for (int j = 0; j < hash.buckets[i].length; j++) {
aoqi@0 253 if (j > 0) {
aoqi@0 254 System.out.print(" } else ");
aoqi@0 255 } else {
aoqi@0 256 System.out.print(" ");
aoqi@0 257 }
aoqi@0 258 System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {");
aoqi@0 259 System.out.println(" return true;");
aoqi@0 260 }
aoqi@0 261 System.out.println(" }");
aoqi@0 262 }
aoqi@0 263 System.out.println(" }");
aoqi@0 264 System.out.println(" return false;");
aoqi@0 265 System.out.println(" }");
aoqi@0 266 }
aoqi@0 267
aoqi@0 268 private int length;
aoqi@0 269 private int[] tempKeys;
aoqi@0 270 private int[] bucketSizes;
aoqi@0 271 private int bucketCount;
aoqi@0 272 private int maxDepth;
aoqi@0 273 private int minStringLength = Integer.MAX_VALUE;
aoqi@0 274 private int keyKind;
aoqi@0 275 private int charAt;
aoqi@0 276
aoqi@0 277 private static final int LENGTH = 0;
aoqi@0 278 private static final int CHAR_AT = 1;
aoqi@0 279 private static final int HASH_CODE = 2;
aoqi@0 280
aoqi@0 281 /* Determines the maximum number of charAt(i)
aoqi@0 282 * tests that will be done. The search is
aoqi@0 283 * limited because if the number of characters
aoqi@0 284 * is large enough, the likelyhood of finding
aoqi@0 285 * a good hash key based on this method is
aoqi@0 286 * low. The CHAR_AT_MAX_CHARS limit only
aoqi@0 287 * applies f there are more strings than
aoqi@0 288 * CHAR_AT_MAX_LINES.
aoqi@0 289 */
aoqi@0 290 private static final int CHAR_AT_MAX_LINES = 50;
aoqi@0 291 private static final int CHAR_AT_MAX_CHARS = 1000;
aoqi@0 292
aoqi@0 293 private void resetKeys(int keyKind) {
aoqi@0 294 this.keyKind = keyKind;
aoqi@0 295 switch (keyKind) {
aoqi@0 296 case LENGTH: method = "length()"; break;
aoqi@0 297 case CHAR_AT: method = "charAt("+charAt+")"; break;
aoqi@0 298 case HASH_CODE: method = "hashCode()"; break;
aoqi@0 299 }
aoqi@0 300 maxDepth = 1;
aoqi@0 301 bucketCount = 0;
aoqi@0 302 for (int i = 0; i < length; i++) {
aoqi@0 303 tempKeys[i] = 0;
aoqi@0 304 bucketSizes[i] = 0;
aoqi@0 305 }
aoqi@0 306 }
aoqi@0 307
aoqi@0 308 private void setMinStringLength() {
aoqi@0 309 for (int i = 0; i < length; i++) {
aoqi@0 310 if (strings[i].length() < minStringLength) {
aoqi@0 311 minStringLength = strings[i].length();
aoqi@0 312 }
aoqi@0 313 }
aoqi@0 314 }
aoqi@0 315
aoqi@0 316 private int findUnusedKey() {
aoqi@0 317 int unused = 0;
aoqi@0 318 int keysLength = keys.length;
aoqi@0 319
aoqi@0 320 // Note that we just assume that resource
aoqi@0 321 // exhaustion will occur rather than an
aoqi@0 322 // infinite loop here if the set of keys
aoqi@0 323 // is very large.
aoqi@0 324
aoqi@0 325 while (true) {
aoqi@0 326 boolean match = false;
aoqi@0 327 for (int i = 0; i < keysLength; i++) {
aoqi@0 328 if (keys[i] == unused) {
aoqi@0 329 match = true;
aoqi@0 330 break;
aoqi@0 331 }
aoqi@0 332 }
aoqi@0 333 if (match) {
aoqi@0 334 unused--;
aoqi@0 335 } else {
aoqi@0 336 break;
aoqi@0 337 }
aoqi@0 338 }
aoqi@0 339 return unused;
aoqi@0 340 }
aoqi@0 341
aoqi@0 342 private int getKeys(int methodKind) {
aoqi@0 343 resetKeys(methodKind);
aoqi@0 344 for(int i = 0; i < strings.length; i++) {
aoqi@0 345 addKey(getKey(strings[i]));
aoqi@0 346 }
aoqi@0 347 return maxDepth;
aoqi@0 348 }
aoqi@0 349
aoqi@0 350 private void addKey(int key) {
aoqi@0 351
aoqi@0 352 // Have we seen this one before?
aoqi@0 353
aoqi@0 354 boolean addIt = true;
aoqi@0 355 for (int j = 0; j < bucketCount; j++) {
aoqi@0 356 if (tempKeys[j] == key) {
aoqi@0 357 addIt = false;
aoqi@0 358 bucketSizes[j]++;
aoqi@0 359 if (bucketSizes[j] > maxDepth) {
aoqi@0 360 maxDepth = bucketSizes[j];
aoqi@0 361 }
aoqi@0 362 break;
aoqi@0 363 }
aoqi@0 364 }
aoqi@0 365
aoqi@0 366 if (addIt) {
aoqi@0 367 tempKeys[bucketCount] = key;
aoqi@0 368 bucketSizes[bucketCount] = 1;
aoqi@0 369 bucketCount++;
aoqi@0 370 }
aoqi@0 371 }
aoqi@0 372 }

mercurial