Thu, 24 May 2018 16:41:12 +0800
Merge
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. Oracle designates this |
aoqi@0 | 8 | * particular file as subject to the "Classpath" exception as provided |
aoqi@0 | 9 | * by Oracle in the LICENSE file that accompanied this code. |
aoqi@0 | 10 | * |
aoqi@0 | 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 14 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 15 | * accompanied this code). |
aoqi@0 | 16 | * |
aoqi@0 | 17 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 20 | * |
aoqi@0 | 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 22 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 23 | * questions. |
aoqi@0 | 24 | */ |
aoqi@0 | 25 | /* |
aoqi@0 | 26 | * Licensed Materials - Property of IBM |
aoqi@0 | 27 | * RMI-IIOP v1.0 |
aoqi@0 | 28 | * Copyright IBM Corp. 1998 1999 All Rights Reserved |
aoqi@0 | 29 | * |
aoqi@0 | 30 | */ |
aoqi@0 | 31 | |
aoqi@0 | 32 | package sun.rmi.rmic.iiop; |
aoqi@0 | 33 | |
aoqi@0 | 34 | /** |
aoqi@0 | 35 | * StaticStringsHash takes an array of constant strings and |
aoqi@0 | 36 | * uses several different hash methods to try to find the |
aoqi@0 | 37 | * 'best' one for that set. The set of methods is currently |
aoqi@0 | 38 | * fixed, but with a little work could be made extensible thru |
aoqi@0 | 39 | * subclassing. |
aoqi@0 | 40 | * <p> |
aoqi@0 | 41 | * The current set of methods is: |
aoqi@0 | 42 | * <ol> |
aoqi@0 | 43 | * <li> length() - works well when all strings are different length.</li> |
aoqi@0 | 44 | * <li> charAt(n) - works well when one offset into all strings is different.</li> |
aoqi@0 | 45 | * <li> hashCode() - works well with larger arrays.</li> |
aoqi@0 | 46 | * </ol> |
aoqi@0 | 47 | * After constructing an instance over the set of strings, the |
aoqi@0 | 48 | * <code>getKey(String)</code> method can be used to use the selected hash |
aoqi@0 | 49 | * method to produce a key. The <code>method</code> string will contain |
aoqi@0 | 50 | * "length()", "charAt(n)", or "hashCode()", and is intended for use by |
aoqi@0 | 51 | * code generators. |
aoqi@0 | 52 | * <p> |
aoqi@0 | 53 | * The <code>keys</code> array will contain the full set of unique keys. |
aoqi@0 | 54 | * <p> |
aoqi@0 | 55 | * The <code>buckets</code> array will contain a set of arrays, one for |
aoqi@0 | 56 | * each key in the <code>keys</code>, where <code>buckets[x][y]</code> |
aoqi@0 | 57 | * is an index into the <code>strings</code> array. |
aoqi@0 | 58 | * @author Bryan Atsatt |
aoqi@0 | 59 | */ |
aoqi@0 | 60 | public class StaticStringsHash { |
aoqi@0 | 61 | |
aoqi@0 | 62 | /** The set of strings upon which the hash info is created */ |
aoqi@0 | 63 | public String[] strings = null; |
aoqi@0 | 64 | |
aoqi@0 | 65 | /** Unique hash keys */ |
aoqi@0 | 66 | public int[] keys = null; |
aoqi@0 | 67 | |
aoqi@0 | 68 | /** Buckets for each key, where buckets[x][y] is an index |
aoqi@0 | 69 | * into the strings[] array. */ |
aoqi@0 | 70 | public int[][] buckets = null; |
aoqi@0 | 71 | |
aoqi@0 | 72 | /** The method to invoke on String to produce the hash key */ |
aoqi@0 | 73 | public String method = null; |
aoqi@0 | 74 | |
aoqi@0 | 75 | /** Get a key for the given string using the |
aoqi@0 | 76 | * selected hash method. |
aoqi@0 | 77 | * @param str the string to return a key for. |
aoqi@0 | 78 | * @return the key. |
aoqi@0 | 79 | */ |
aoqi@0 | 80 | public int getKey(String str) { |
aoqi@0 | 81 | switch (keyKind) { |
aoqi@0 | 82 | case LENGTH: return str.length(); |
aoqi@0 | 83 | case CHAR_AT: return str.charAt(charAt); |
aoqi@0 | 84 | case HASH_CODE: return str.hashCode(); |
aoqi@0 | 85 | } |
aoqi@0 | 86 | throw new Error("Bad keyKind"); |
aoqi@0 | 87 | } |
aoqi@0 | 88 | |
aoqi@0 | 89 | /** Constructor |
aoqi@0 | 90 | * @param strings the set of strings upon which to |
aoqi@0 | 91 | * find an optimal hash method. Must not contain |
aoqi@0 | 92 | * duplicates. |
aoqi@0 | 93 | */ |
aoqi@0 | 94 | public StaticStringsHash(String[] strings) { |
aoqi@0 | 95 | this.strings = strings; |
aoqi@0 | 96 | length = strings.length; |
aoqi@0 | 97 | tempKeys = new int[length]; |
aoqi@0 | 98 | bucketSizes = new int[length]; |
aoqi@0 | 99 | setMinStringLength(); |
aoqi@0 | 100 | |
aoqi@0 | 101 | // Decide on the best algorithm based on |
aoqi@0 | 102 | // which one has the smallest maximum |
aoqi@0 | 103 | // bucket depth. First, try length()... |
aoqi@0 | 104 | |
aoqi@0 | 105 | int currentMaxDepth = getKeys(LENGTH); |
aoqi@0 | 106 | int useCharAt = -1; |
aoqi@0 | 107 | boolean useHashCode = false; |
aoqi@0 | 108 | |
aoqi@0 | 109 | if (currentMaxDepth > 1) { |
aoqi@0 | 110 | |
aoqi@0 | 111 | // At least one bucket had more than one |
aoqi@0 | 112 | // entry, so try charAt(i). If there |
aoqi@0 | 113 | // are a lot of strings in the array, |
aoqi@0 | 114 | // and minStringLength is large, limit |
aoqi@0 | 115 | // the search to a smaller number of |
aoqi@0 | 116 | // characters to avoid spending a lot |
aoqi@0 | 117 | // of time here that is most likely to |
aoqi@0 | 118 | // be pointless... |
aoqi@0 | 119 | |
aoqi@0 | 120 | int minLength = minStringLength; |
aoqi@0 | 121 | if (length > CHAR_AT_MAX_LINES && |
aoqi@0 | 122 | length * minLength > CHAR_AT_MAX_CHARS) { |
aoqi@0 | 123 | minLength = length/CHAR_AT_MAX_CHARS; |
aoqi@0 | 124 | } |
aoqi@0 | 125 | |
aoqi@0 | 126 | charAt = 0; |
aoqi@0 | 127 | for (int i = 0; i < minLength; i++) { |
aoqi@0 | 128 | int charAtDepth = getKeys(CHAR_AT); |
aoqi@0 | 129 | if (charAtDepth < currentMaxDepth) { |
aoqi@0 | 130 | currentMaxDepth = charAtDepth; |
aoqi@0 | 131 | useCharAt = i; |
aoqi@0 | 132 | if (currentMaxDepth == 1) { |
aoqi@0 | 133 | break; |
aoqi@0 | 134 | } |
aoqi@0 | 135 | } |
aoqi@0 | 136 | charAt++; |
aoqi@0 | 137 | } |
aoqi@0 | 138 | charAt = useCharAt; |
aoqi@0 | 139 | |
aoqi@0 | 140 | |
aoqi@0 | 141 | if (currentMaxDepth > 1) { |
aoqi@0 | 142 | |
aoqi@0 | 143 | // At least one bucket had more than one |
aoqi@0 | 144 | // entry, try hashCode(). |
aoqi@0 | 145 | // |
aoqi@0 | 146 | // Since the cost of computing a full hashCode |
aoqi@0 | 147 | // (for the runtime target string) is much higher |
aoqi@0 | 148 | // than the previous methods, use it only if it is |
aoqi@0 | 149 | // substantially better. The definition of 'substantial' |
aoqi@0 | 150 | // here is not very well founded, and could be improved |
aoqi@0 | 151 | // with some further analysis ;^) |
aoqi@0 | 152 | |
aoqi@0 | 153 | int hashCodeDepth = getKeys(HASH_CODE); |
aoqi@0 | 154 | if (hashCodeDepth < currentMaxDepth-3) { |
aoqi@0 | 155 | |
aoqi@0 | 156 | // Using the full hashCode results in at least |
aoqi@0 | 157 | // 3 fewer entries in the worst bucket, so will |
aoqi@0 | 158 | // therefore avoid at least 3 calls to equals() |
aoqi@0 | 159 | // in the worst case. |
aoqi@0 | 160 | // |
aoqi@0 | 161 | // Note that using a number smaller than 3 could |
aoqi@0 | 162 | // result in using a hashCode when there are only |
aoqi@0 | 163 | // 2 strings in the array, and that would surely |
aoqi@0 | 164 | // be a poor performance choice. |
aoqi@0 | 165 | |
aoqi@0 | 166 | useHashCode = true; |
aoqi@0 | 167 | } |
aoqi@0 | 168 | } |
aoqi@0 | 169 | |
aoqi@0 | 170 | // Reset keys if needed... |
aoqi@0 | 171 | |
aoqi@0 | 172 | if (!useHashCode) { |
aoqi@0 | 173 | if (useCharAt >= 0) { |
aoqi@0 | 174 | |
aoqi@0 | 175 | // Use the charAt(i) method... |
aoqi@0 | 176 | |
aoqi@0 | 177 | getKeys(CHAR_AT); |
aoqi@0 | 178 | |
aoqi@0 | 179 | } else { |
aoqi@0 | 180 | |
aoqi@0 | 181 | // Use length method... |
aoqi@0 | 182 | |
aoqi@0 | 183 | getKeys(LENGTH); |
aoqi@0 | 184 | } |
aoqi@0 | 185 | } |
aoqi@0 | 186 | } |
aoqi@0 | 187 | |
aoqi@0 | 188 | // Now allocate and fill our real hashKeys array... |
aoqi@0 | 189 | |
aoqi@0 | 190 | keys = new int[bucketCount]; |
aoqi@0 | 191 | System.arraycopy(tempKeys,0,keys,0,bucketCount); |
aoqi@0 | 192 | |
aoqi@0 | 193 | // Sort keys and bucketSizes arrays... |
aoqi@0 | 194 | |
aoqi@0 | 195 | boolean didSwap; |
aoqi@0 | 196 | do { |
aoqi@0 | 197 | didSwap = false; |
aoqi@0 | 198 | for (int i = 0; i < bucketCount - 1; i++) { |
aoqi@0 | 199 | if (keys[i] > keys[i+1]) { |
aoqi@0 | 200 | int temp = keys[i]; |
aoqi@0 | 201 | keys[i] = keys[i+1]; |
aoqi@0 | 202 | keys[i+1] = temp; |
aoqi@0 | 203 | temp = bucketSizes[i]; |
aoqi@0 | 204 | bucketSizes[i] = bucketSizes[i+1]; |
aoqi@0 | 205 | bucketSizes[i+1] = temp; |
aoqi@0 | 206 | didSwap = true; |
aoqi@0 | 207 | } |
aoqi@0 | 208 | } |
aoqi@0 | 209 | } |
aoqi@0 | 210 | while (didSwap == true); |
aoqi@0 | 211 | |
aoqi@0 | 212 | // Allocate our buckets array. Fill the string |
aoqi@0 | 213 | // index slot with an unused key so we can |
aoqi@0 | 214 | // determine which are free... |
aoqi@0 | 215 | |
aoqi@0 | 216 | int unused = findUnusedKey(); |
aoqi@0 | 217 | buckets = new int[bucketCount][]; |
aoqi@0 | 218 | for (int i = 0; i < bucketCount; i++) { |
aoqi@0 | 219 | buckets[i] = new int[bucketSizes[i]]; |
aoqi@0 | 220 | for (int j = 0; j < bucketSizes[i]; j++) { |
aoqi@0 | 221 | buckets[i][j] = unused; |
aoqi@0 | 222 | } |
aoqi@0 | 223 | } |
aoqi@0 | 224 | |
aoqi@0 | 225 | // And fill it in... |
aoqi@0 | 226 | |
aoqi@0 | 227 | for(int i = 0; i < strings.length; i++) { |
aoqi@0 | 228 | int key = getKey(strings[i]); |
aoqi@0 | 229 | for (int j = 0; j < bucketCount; j++) { |
aoqi@0 | 230 | if (keys[j] == key) { |
aoqi@0 | 231 | int k = 0; |
aoqi@0 | 232 | while (buckets[j][k] != unused) { |
aoqi@0 | 233 | k++; |
aoqi@0 | 234 | } |
aoqi@0 | 235 | buckets[j][k] = i; |
aoqi@0 | 236 | break; |
aoqi@0 | 237 | } |
aoqi@0 | 238 | } |
aoqi@0 | 239 | } |
aoqi@0 | 240 | } |
aoqi@0 | 241 | |
aoqi@0 | 242 | /** Print an optimized 'contains' method for the |
aoqi@0 | 243 | * argument strings |
aoqi@0 | 244 | */ |
aoqi@0 | 245 | public static void main (String[] args) { |
aoqi@0 | 246 | StaticStringsHash hash = new StaticStringsHash(args); |
aoqi@0 | 247 | System.out.println(); |
aoqi@0 | 248 | System.out.println(" public boolean contains(String key) {"); |
aoqi@0 | 249 | System.out.println(" switch (key."+hash.method+") {"); |
aoqi@0 | 250 | for (int i = 0; i < hash.buckets.length; i++) { |
aoqi@0 | 251 | System.out.println(" case "+hash.keys[i]+": "); |
aoqi@0 | 252 | for (int j = 0; j < hash.buckets[i].length; j++) { |
aoqi@0 | 253 | if (j > 0) { |
aoqi@0 | 254 | System.out.print(" } else "); |
aoqi@0 | 255 | } else { |
aoqi@0 | 256 | System.out.print(" "); |
aoqi@0 | 257 | } |
aoqi@0 | 258 | System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {"); |
aoqi@0 | 259 | System.out.println(" return true;"); |
aoqi@0 | 260 | } |
aoqi@0 | 261 | System.out.println(" }"); |
aoqi@0 | 262 | } |
aoqi@0 | 263 | System.out.println(" }"); |
aoqi@0 | 264 | System.out.println(" return false;"); |
aoqi@0 | 265 | System.out.println(" }"); |
aoqi@0 | 266 | } |
aoqi@0 | 267 | |
aoqi@0 | 268 | private int length; |
aoqi@0 | 269 | private int[] tempKeys; |
aoqi@0 | 270 | private int[] bucketSizes; |
aoqi@0 | 271 | private int bucketCount; |
aoqi@0 | 272 | private int maxDepth; |
aoqi@0 | 273 | private int minStringLength = Integer.MAX_VALUE; |
aoqi@0 | 274 | private int keyKind; |
aoqi@0 | 275 | private int charAt; |
aoqi@0 | 276 | |
aoqi@0 | 277 | private static final int LENGTH = 0; |
aoqi@0 | 278 | private static final int CHAR_AT = 1; |
aoqi@0 | 279 | private static final int HASH_CODE = 2; |
aoqi@0 | 280 | |
aoqi@0 | 281 | /* Determines the maximum number of charAt(i) |
aoqi@0 | 282 | * tests that will be done. The search is |
aoqi@0 | 283 | * limited because if the number of characters |
aoqi@0 | 284 | * is large enough, the likelyhood of finding |
aoqi@0 | 285 | * a good hash key based on this method is |
aoqi@0 | 286 | * low. The CHAR_AT_MAX_CHARS limit only |
aoqi@0 | 287 | * applies f there are more strings than |
aoqi@0 | 288 | * CHAR_AT_MAX_LINES. |
aoqi@0 | 289 | */ |
aoqi@0 | 290 | private static final int CHAR_AT_MAX_LINES = 50; |
aoqi@0 | 291 | private static final int CHAR_AT_MAX_CHARS = 1000; |
aoqi@0 | 292 | |
aoqi@0 | 293 | private void resetKeys(int keyKind) { |
aoqi@0 | 294 | this.keyKind = keyKind; |
aoqi@0 | 295 | switch (keyKind) { |
aoqi@0 | 296 | case LENGTH: method = "length()"; break; |
aoqi@0 | 297 | case CHAR_AT: method = "charAt("+charAt+")"; break; |
aoqi@0 | 298 | case HASH_CODE: method = "hashCode()"; break; |
aoqi@0 | 299 | } |
aoqi@0 | 300 | maxDepth = 1; |
aoqi@0 | 301 | bucketCount = 0; |
aoqi@0 | 302 | for (int i = 0; i < length; i++) { |
aoqi@0 | 303 | tempKeys[i] = 0; |
aoqi@0 | 304 | bucketSizes[i] = 0; |
aoqi@0 | 305 | } |
aoqi@0 | 306 | } |
aoqi@0 | 307 | |
aoqi@0 | 308 | private void setMinStringLength() { |
aoqi@0 | 309 | for (int i = 0; i < length; i++) { |
aoqi@0 | 310 | if (strings[i].length() < minStringLength) { |
aoqi@0 | 311 | minStringLength = strings[i].length(); |
aoqi@0 | 312 | } |
aoqi@0 | 313 | } |
aoqi@0 | 314 | } |
aoqi@0 | 315 | |
aoqi@0 | 316 | private int findUnusedKey() { |
aoqi@0 | 317 | int unused = 0; |
aoqi@0 | 318 | int keysLength = keys.length; |
aoqi@0 | 319 | |
aoqi@0 | 320 | // Note that we just assume that resource |
aoqi@0 | 321 | // exhaustion will occur rather than an |
aoqi@0 | 322 | // infinite loop here if the set of keys |
aoqi@0 | 323 | // is very large. |
aoqi@0 | 324 | |
aoqi@0 | 325 | while (true) { |
aoqi@0 | 326 | boolean match = false; |
aoqi@0 | 327 | for (int i = 0; i < keysLength; i++) { |
aoqi@0 | 328 | if (keys[i] == unused) { |
aoqi@0 | 329 | match = true; |
aoqi@0 | 330 | break; |
aoqi@0 | 331 | } |
aoqi@0 | 332 | } |
aoqi@0 | 333 | if (match) { |
aoqi@0 | 334 | unused--; |
aoqi@0 | 335 | } else { |
aoqi@0 | 336 | break; |
aoqi@0 | 337 | } |
aoqi@0 | 338 | } |
aoqi@0 | 339 | return unused; |
aoqi@0 | 340 | } |
aoqi@0 | 341 | |
aoqi@0 | 342 | private int getKeys(int methodKind) { |
aoqi@0 | 343 | resetKeys(methodKind); |
aoqi@0 | 344 | for(int i = 0; i < strings.length; i++) { |
aoqi@0 | 345 | addKey(getKey(strings[i])); |
aoqi@0 | 346 | } |
aoqi@0 | 347 | return maxDepth; |
aoqi@0 | 348 | } |
aoqi@0 | 349 | |
aoqi@0 | 350 | private void addKey(int key) { |
aoqi@0 | 351 | |
aoqi@0 | 352 | // Have we seen this one before? |
aoqi@0 | 353 | |
aoqi@0 | 354 | boolean addIt = true; |
aoqi@0 | 355 | for (int j = 0; j < bucketCount; j++) { |
aoqi@0 | 356 | if (tempKeys[j] == key) { |
aoqi@0 | 357 | addIt = false; |
aoqi@0 | 358 | bucketSizes[j]++; |
aoqi@0 | 359 | if (bucketSizes[j] > maxDepth) { |
aoqi@0 | 360 | maxDepth = bucketSizes[j]; |
aoqi@0 | 361 | } |
aoqi@0 | 362 | break; |
aoqi@0 | 363 | } |
aoqi@0 | 364 | } |
aoqi@0 | 365 | |
aoqi@0 | 366 | if (addIt) { |
aoqi@0 | 367 | tempKeys[bucketCount] = key; |
aoqi@0 | 368 | bucketSizes[bucketCount] = 1; |
aoqi@0 | 369 | bucketCount++; |
aoqi@0 | 370 | } |
aoqi@0 | 371 | } |
aoqi@0 | 372 | } |