src/share/classes/sun/rmi/rmic/iiop/StaticStringsHash.java

changeset 1
55540e827aef
child 158
91006f157c46
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/classes/sun/rmi/rmic/iiop/StaticStringsHash.java	Sat Dec 01 00:00:00 2007 +0000
     1.3 @@ -0,0 +1,372 @@
     1.4 +/*
     1.5 + * Portions Copyright 1999-2007 Sun Microsystems, Inc.  All Rights Reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Sun designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Sun in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
    1.25 + * CA 95054 USA or visit www.sun.com if you need additional information or
    1.26 + * have any questions.
    1.27 + */
    1.28 +/*
    1.29 + * Licensed Materials - Property of IBM
    1.30 + * RMI-IIOP v1.0
    1.31 + * Copyright IBM Corp. 1998 1999  All Rights Reserved
    1.32 + *
    1.33 + */
    1.34 +
    1.35 +package sun.rmi.rmic.iiop;
    1.36 +
    1.37 +/**
    1.38 + * StaticStringsHash takes an array of constant strings and
    1.39 + * uses several different hash methods to try to find the
    1.40 + * 'best' one for that set. The set of methods is currently
    1.41 + * fixed, but with a little work could be made extensible thru
    1.42 + * subclassing.
    1.43 + * <p>
    1.44 + * The current set of methods is:
    1.45 + * <ol>
    1.46 + * <li> length() - works well when all strings are different length.</li>
    1.47 + * <li> charAt(n) - works well when one offset into all strings is different.</li>
    1.48 + * <li> hashCode() - works well with larger arrays.</li>
    1.49 + * </ol>
    1.50 + * After constructing an instance over the set of strings, the
    1.51 + * <code>getKey(String)</code> method can be used to use the selected hash
    1.52 + * method to produce a key.  The <code>method</code> string will contain
    1.53 + * "length()", "charAt(n)", or "hashCode()", and is intended for use by
    1.54 + * code generators.
    1.55 + * <p>
    1.56 + * The <code>keys</code> array will contain the full set of unique keys.
    1.57 + * <p>
    1.58 + * The <code>buckets</code> array will contain a set of arrays, one for
    1.59 + * each key in the <code>keys</code>, where <code>buckets[x][y]</code>
    1.60 + * is an index into the <code>strings</code> array.
    1.61 + * @author      Bryan Atsatt
    1.62 + */
    1.63 +public class StaticStringsHash {
    1.64 +
    1.65 +    /** The set of strings upon which the hash info is created */
    1.66 +    public String[] strings = null;
    1.67 +
    1.68 +    /** Unique hash keys */
    1.69 +    public int[] keys = null;
    1.70 +
    1.71 +    /** Buckets for each key, where buckets[x][y] is an index
    1.72 +     * into the strings[] array. */
    1.73 +    public int[][] buckets = null;
    1.74 +
    1.75 +    /** The method to invoke on String to produce the hash key */
    1.76 +    public String method = null;
    1.77 +
    1.78 +    /** Get a key for the given string using the
    1.79 +     * selected hash method.
    1.80 +     * @param str the string to return a key for.
    1.81 +     * @return the key.
    1.82 +     */
    1.83 +    public int getKey(String str) {
    1.84 +        switch (keyKind) {
    1.85 +        case LENGTH: return str.length();
    1.86 +        case CHAR_AT: return str.charAt(charAt);
    1.87 +        case HASH_CODE: return str.hashCode();
    1.88 +        }
    1.89 +        throw new Error("Bad keyKind");
    1.90 +    }
    1.91 +
    1.92 +    /** Constructor
    1.93 +     * @param strings the set of strings upon which to
    1.94 +     * find an optimal hash method. Must not contain
    1.95 +     * duplicates.
    1.96 +     */
    1.97 +    public StaticStringsHash(String[] strings) {
    1.98 +        this.strings = strings;
    1.99 +        length = strings.length;
   1.100 +        tempKeys = new int[length];
   1.101 +        bucketSizes = new int[length];
   1.102 +        setMinStringLength();
   1.103 +
   1.104 +        // Decide on the best algorithm based on
   1.105 +        // which one has the smallest maximum
   1.106 +        // bucket depth. First, try length()...
   1.107 +
   1.108 +        int currentMaxDepth = getKeys(LENGTH);
   1.109 +        int useCharAt = -1;
   1.110 +        boolean useHashCode = false;
   1.111 +
   1.112 +        if (currentMaxDepth > 1) {
   1.113 +
   1.114 +            // At least one bucket had more than one
   1.115 +            // entry, so try charAt(i).  If there
   1.116 +            // are a lot of strings in the array,
   1.117 +            // and minStringLength is large, limit
   1.118 +            // the search to a smaller number of
   1.119 +            // characters to avoid spending a lot
   1.120 +            // of time here that is most likely to
   1.121 +            // be pointless...
   1.122 +
   1.123 +            int minLength = minStringLength;
   1.124 +            if (length > CHAR_AT_MAX_LINES &&
   1.125 +                length * minLength > CHAR_AT_MAX_CHARS) {
   1.126 +                minLength = length/CHAR_AT_MAX_CHARS;
   1.127 +            }
   1.128 +
   1.129 +            charAt = 0;
   1.130 +            for (int i = 0; i < minLength; i++) {
   1.131 +                int charAtDepth = getKeys(CHAR_AT);
   1.132 +                if (charAtDepth < currentMaxDepth) {
   1.133 +                    currentMaxDepth = charAtDepth;
   1.134 +                    useCharAt = i;
   1.135 +                    if (currentMaxDepth == 1) {
   1.136 +                        break;
   1.137 +                    }
   1.138 +                }
   1.139 +                charAt++;
   1.140 +            }
   1.141 +            charAt = useCharAt;
   1.142 +
   1.143 +
   1.144 +            if (currentMaxDepth > 1) {
   1.145 +
   1.146 +                // At least one bucket had more than one
   1.147 +                // entry, try hashCode().
   1.148 +                //
   1.149 +                // Since the cost of computing a full hashCode
   1.150 +                // (for the runtime target string) is much higher
   1.151 +                // than the previous methods, use it only if it is
   1.152 +                // substantially better. The definition of 'substantial'
   1.153 +                // here is not very well founded, and could be improved
   1.154 +                // with some further analysis ;^)
   1.155 +
   1.156 +                int hashCodeDepth = getKeys(HASH_CODE);
   1.157 +                if (hashCodeDepth < currentMaxDepth-3) {
   1.158 +
   1.159 +                    // Using the full hashCode results in at least
   1.160 +                    // 3 fewer entries in the worst bucket, so will
   1.161 +                    // therefore avoid at least 3 calls to equals()
   1.162 +                    // in the worst case.
   1.163 +                    //
   1.164 +                    // Note that using a number smaller than 3 could
   1.165 +                    // result in using a hashCode when there are only
   1.166 +                    // 2 strings in the array, and that would surely
   1.167 +                    // be a poor performance choice.
   1.168 +
   1.169 +                    useHashCode = true;
   1.170 +                }
   1.171 +            }
   1.172 +
   1.173 +            // Reset keys if needed...
   1.174 +
   1.175 +            if (!useHashCode) {
   1.176 +                if (useCharAt >= 0) {
   1.177 +
   1.178 +                    // Use the charAt(i) method...
   1.179 +
   1.180 +                    getKeys(CHAR_AT);
   1.181 +
   1.182 +                } else {
   1.183 +
   1.184 +                    // Use length method...
   1.185 +
   1.186 +                    getKeys(LENGTH);
   1.187 +                }
   1.188 +            }
   1.189 +        }
   1.190 +
   1.191 +        // Now allocate and fill our real hashKeys array...
   1.192 +
   1.193 +        keys = new int[bucketCount];
   1.194 +        System.arraycopy(tempKeys,0,keys,0,bucketCount);
   1.195 +
   1.196 +        // Sort keys and bucketSizes arrays...
   1.197 +
   1.198 +        boolean didSwap;
   1.199 +        do {
   1.200 +            didSwap = false;
   1.201 +            for (int i = 0; i < bucketCount - 1; i++) {
   1.202 +                if (keys[i] > keys[i+1]) {
   1.203 +                    int temp = keys[i];
   1.204 +                    keys[i] = keys[i+1];
   1.205 +                    keys[i+1] = temp;
   1.206 +                    temp = bucketSizes[i];
   1.207 +                    bucketSizes[i] = bucketSizes[i+1];
   1.208 +                    bucketSizes[i+1] = temp;
   1.209 +                    didSwap = true;
   1.210 +                }
   1.211 +            }
   1.212 +        }
   1.213 +        while (didSwap == true);
   1.214 +
   1.215 +        // Allocate our buckets array. Fill the string
   1.216 +        // index slot with an unused key so we can
   1.217 +        // determine which are free...
   1.218 +
   1.219 +        int unused = findUnusedKey();
   1.220 +        buckets = new int[bucketCount][];
   1.221 +        for (int i = 0; i < bucketCount; i++) {
   1.222 +            buckets[i] = new int[bucketSizes[i]];
   1.223 +            for (int j = 0; j < bucketSizes[i]; j++) {
   1.224 +                buckets[i][j] = unused;
   1.225 +            }
   1.226 +        }
   1.227 +
   1.228 +        // And fill it in...
   1.229 +
   1.230 +        for(int i = 0; i < strings.length; i++) {
   1.231 +            int key = getKey(strings[i]);
   1.232 +            for (int j = 0; j < bucketCount; j++) {
   1.233 +                if (keys[j] == key) {
   1.234 +                    int k = 0;
   1.235 +                    while (buckets[j][k] != unused) {
   1.236 +                        k++;
   1.237 +                    }
   1.238 +                    buckets[j][k] = i;
   1.239 +                    break;
   1.240 +                }
   1.241 +            }
   1.242 +        }
   1.243 +    }
   1.244 +
   1.245 +    /** Print an optimized 'contains' method for the
   1.246 +     * argument strings
   1.247 +     */
   1.248 +    public static void main (String[] args) {
   1.249 +        StaticStringsHash hash = new StaticStringsHash(args);
   1.250 +        System.out.println();
   1.251 +        System.out.println("    public boolean contains(String key) {");
   1.252 +        System.out.println("        switch (key."+hash.method+") {");
   1.253 +        for (int i = 0; i < hash.buckets.length; i++) {
   1.254 +            System.out.println("            case "+hash.keys[i]+": ");
   1.255 +            for (int j = 0; j < hash.buckets[i].length; j++) {
   1.256 +                if (j > 0) {
   1.257 +                    System.out.print("                } else ");
   1.258 +                } else {
   1.259 +                    System.out.print("                ");
   1.260 +                }
   1.261 +                System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {");
   1.262 +                System.out.println("                    return true;");
   1.263 +            }
   1.264 +            System.out.println("                }");
   1.265 +        }
   1.266 +        System.out.println("        }");
   1.267 +        System.out.println("        return false;");
   1.268 +        System.out.println("    }");
   1.269 +    }
   1.270 +
   1.271 +    private int length;
   1.272 +    private int[] tempKeys;
   1.273 +    private int[] bucketSizes;
   1.274 +    private int bucketCount;
   1.275 +    private int maxDepth;
   1.276 +    private int minStringLength = Integer.MAX_VALUE;
   1.277 +    private int keyKind;
   1.278 +    private int charAt;
   1.279 +
   1.280 +    private static final int LENGTH = 0;
   1.281 +    private static final int CHAR_AT = 1;
   1.282 +    private static final int HASH_CODE = 2;
   1.283 +
   1.284 +    /* Determines the maximum number of charAt(i)
   1.285 +     * tests that will be done. The search is
   1.286 +     * limited because if the number of characters
   1.287 +     * is large enough, the likelyhood of finding
   1.288 +     * a good hash key  based on this method is
   1.289 +     * low. The CHAR_AT_MAX_CHARS limit only
   1.290 +     * applies f there are more strings than
   1.291 +     * CHAR_AT_MAX_LINES.
   1.292 +     */
   1.293 +    private static final int CHAR_AT_MAX_LINES = 50;
   1.294 +    private static final int CHAR_AT_MAX_CHARS = 1000;
   1.295 +
   1.296 +    private void resetKeys(int keyKind) {
   1.297 +        this.keyKind = keyKind;
   1.298 +        switch (keyKind) {
   1.299 +        case LENGTH: method = "length()"; break;
   1.300 +        case CHAR_AT: method = "charAt("+charAt+")"; break;
   1.301 +        case HASH_CODE: method = "hashCode()"; break;
   1.302 +        }
   1.303 +        maxDepth = 1;
   1.304 +        bucketCount = 0;
   1.305 +        for (int i = 0; i < length; i++) {
   1.306 +            tempKeys[i] = 0;
   1.307 +            bucketSizes[i] = 0;
   1.308 +        }
   1.309 +    }
   1.310 +
   1.311 +    private void setMinStringLength() {
   1.312 +        for (int i = 0; i < length; i++) {
   1.313 +            if (strings[i].length() < minStringLength) {
   1.314 +                minStringLength = strings[i].length();
   1.315 +            }
   1.316 +        }
   1.317 +    }
   1.318 +
   1.319 +    private int findUnusedKey() {
   1.320 +        int unused = 0;
   1.321 +        int keysLength = keys.length;
   1.322 +
   1.323 +        // Note that we just assume that resource
   1.324 +        // exhaustion will occur rather than an
   1.325 +        // infinite loop here if the set of keys
   1.326 +        // is very large.
   1.327 +
   1.328 +        while (true) {
   1.329 +            boolean match = false;
   1.330 +            for (int i = 0; i < keysLength; i++) {
   1.331 +                if (keys[i] == unused) {
   1.332 +                    match = true;
   1.333 +                    break;
   1.334 +                }
   1.335 +            }
   1.336 +            if (match) {
   1.337 +                unused--;
   1.338 +            } else {
   1.339 +                break;
   1.340 +            }
   1.341 +        }
   1.342 +        return unused;
   1.343 +    }
   1.344 +
   1.345 +    private int getKeys(int methodKind) {
   1.346 +        resetKeys(methodKind);
   1.347 +        for(int i = 0; i < strings.length; i++) {
   1.348 +            addKey(getKey(strings[i]));
   1.349 +        }
   1.350 +        return maxDepth;
   1.351 +    }
   1.352 +
   1.353 +    private void addKey(int key) {
   1.354 +
   1.355 +        // Have we seen this one before?
   1.356 +
   1.357 +        boolean addIt = true;
   1.358 +        for (int j = 0; j < bucketCount; j++) {
   1.359 +            if (tempKeys[j] == key) {
   1.360 +                addIt = false;
   1.361 +                bucketSizes[j]++;
   1.362 +                if (bucketSizes[j] > maxDepth) {
   1.363 +                    maxDepth = bucketSizes[j];
   1.364 +                }
   1.365 +                break;
   1.366 +            }
   1.367 +        }
   1.368 +
   1.369 +        if (addIt) {
   1.370 +            tempKeys[bucketCount] = key;
   1.371 +            bucketSizes[bucketCount] = 1;
   1.372 +            bucketCount++;
   1.373 +        }
   1.374 +    }
   1.375 +}

mercurial