src/share/classes/sun/rmi/rmic/iiop/StaticStringsHash.java

changeset 1
55540e827aef
child 158
91006f157c46
equal deleted inserted replaced
-1:000000000000 1:55540e827aef
1 /*
2 * Portions Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25 /*
26 * Licensed Materials - Property of IBM
27 * RMI-IIOP v1.0
28 * Copyright IBM Corp. 1998 1999 All Rights Reserved
29 *
30 */
31
32 package sun.rmi.rmic.iiop;
33
34 /**
35 * StaticStringsHash takes an array of constant strings and
36 * uses several different hash methods to try to find the
37 * 'best' one for that set. The set of methods is currently
38 * fixed, but with a little work could be made extensible thru
39 * subclassing.
40 * <p>
41 * The current set of methods is:
42 * <ol>
43 * <li> length() - works well when all strings are different length.</li>
44 * <li> charAt(n) - works well when one offset into all strings is different.</li>
45 * <li> hashCode() - works well with larger arrays.</li>
46 * </ol>
47 * After constructing an instance over the set of strings, the
48 * <code>getKey(String)</code> method can be used to use the selected hash
49 * method to produce a key. The <code>method</code> string will contain
50 * "length()", "charAt(n)", or "hashCode()", and is intended for use by
51 * code generators.
52 * <p>
53 * The <code>keys</code> array will contain the full set of unique keys.
54 * <p>
55 * The <code>buckets</code> array will contain a set of arrays, one for
56 * each key in the <code>keys</code>, where <code>buckets[x][y]</code>
57 * is an index into the <code>strings</code> array.
58 * @author Bryan Atsatt
59 */
60 public class StaticStringsHash {
61
62 /** The set of strings upon which the hash info is created */
63 public String[] strings = null;
64
65 /** Unique hash keys */
66 public int[] keys = null;
67
68 /** Buckets for each key, where buckets[x][y] is an index
69 * into the strings[] array. */
70 public int[][] buckets = null;
71
72 /** The method to invoke on String to produce the hash key */
73 public String method = null;
74
75 /** Get a key for the given string using the
76 * selected hash method.
77 * @param str the string to return a key for.
78 * @return the key.
79 */
80 public int getKey(String str) {
81 switch (keyKind) {
82 case LENGTH: return str.length();
83 case CHAR_AT: return str.charAt(charAt);
84 case HASH_CODE: return str.hashCode();
85 }
86 throw new Error("Bad keyKind");
87 }
88
89 /** Constructor
90 * @param strings the set of strings upon which to
91 * find an optimal hash method. Must not contain
92 * duplicates.
93 */
94 public StaticStringsHash(String[] strings) {
95 this.strings = strings;
96 length = strings.length;
97 tempKeys = new int[length];
98 bucketSizes = new int[length];
99 setMinStringLength();
100
101 // Decide on the best algorithm based on
102 // which one has the smallest maximum
103 // bucket depth. First, try length()...
104
105 int currentMaxDepth = getKeys(LENGTH);
106 int useCharAt = -1;
107 boolean useHashCode = false;
108
109 if (currentMaxDepth > 1) {
110
111 // At least one bucket had more than one
112 // entry, so try charAt(i). If there
113 // are a lot of strings in the array,
114 // and minStringLength is large, limit
115 // the search to a smaller number of
116 // characters to avoid spending a lot
117 // of time here that is most likely to
118 // be pointless...
119
120 int minLength = minStringLength;
121 if (length > CHAR_AT_MAX_LINES &&
122 length * minLength > CHAR_AT_MAX_CHARS) {
123 minLength = length/CHAR_AT_MAX_CHARS;
124 }
125
126 charAt = 0;
127 for (int i = 0; i < minLength; i++) {
128 int charAtDepth = getKeys(CHAR_AT);
129 if (charAtDepth < currentMaxDepth) {
130 currentMaxDepth = charAtDepth;
131 useCharAt = i;
132 if (currentMaxDepth == 1) {
133 break;
134 }
135 }
136 charAt++;
137 }
138 charAt = useCharAt;
139
140
141 if (currentMaxDepth > 1) {
142
143 // At least one bucket had more than one
144 // entry, try hashCode().
145 //
146 // Since the cost of computing a full hashCode
147 // (for the runtime target string) is much higher
148 // than the previous methods, use it only if it is
149 // substantially better. The definition of 'substantial'
150 // here is not very well founded, and could be improved
151 // with some further analysis ;^)
152
153 int hashCodeDepth = getKeys(HASH_CODE);
154 if (hashCodeDepth < currentMaxDepth-3) {
155
156 // Using the full hashCode results in at least
157 // 3 fewer entries in the worst bucket, so will
158 // therefore avoid at least 3 calls to equals()
159 // in the worst case.
160 //
161 // Note that using a number smaller than 3 could
162 // result in using a hashCode when there are only
163 // 2 strings in the array, and that would surely
164 // be a poor performance choice.
165
166 useHashCode = true;
167 }
168 }
169
170 // Reset keys if needed...
171
172 if (!useHashCode) {
173 if (useCharAt >= 0) {
174
175 // Use the charAt(i) method...
176
177 getKeys(CHAR_AT);
178
179 } else {
180
181 // Use length method...
182
183 getKeys(LENGTH);
184 }
185 }
186 }
187
188 // Now allocate and fill our real hashKeys array...
189
190 keys = new int[bucketCount];
191 System.arraycopy(tempKeys,0,keys,0,bucketCount);
192
193 // Sort keys and bucketSizes arrays...
194
195 boolean didSwap;
196 do {
197 didSwap = false;
198 for (int i = 0; i < bucketCount - 1; i++) {
199 if (keys[i] > keys[i+1]) {
200 int temp = keys[i];
201 keys[i] = keys[i+1];
202 keys[i+1] = temp;
203 temp = bucketSizes[i];
204 bucketSizes[i] = bucketSizes[i+1];
205 bucketSizes[i+1] = temp;
206 didSwap = true;
207 }
208 }
209 }
210 while (didSwap == true);
211
212 // Allocate our buckets array. Fill the string
213 // index slot with an unused key so we can
214 // determine which are free...
215
216 int unused = findUnusedKey();
217 buckets = new int[bucketCount][];
218 for (int i = 0; i < bucketCount; i++) {
219 buckets[i] = new int[bucketSizes[i]];
220 for (int j = 0; j < bucketSizes[i]; j++) {
221 buckets[i][j] = unused;
222 }
223 }
224
225 // And fill it in...
226
227 for(int i = 0; i < strings.length; i++) {
228 int key = getKey(strings[i]);
229 for (int j = 0; j < bucketCount; j++) {
230 if (keys[j] == key) {
231 int k = 0;
232 while (buckets[j][k] != unused) {
233 k++;
234 }
235 buckets[j][k] = i;
236 break;
237 }
238 }
239 }
240 }
241
242 /** Print an optimized 'contains' method for the
243 * argument strings
244 */
245 public static void main (String[] args) {
246 StaticStringsHash hash = new StaticStringsHash(args);
247 System.out.println();
248 System.out.println(" public boolean contains(String key) {");
249 System.out.println(" switch (key."+hash.method+") {");
250 for (int i = 0; i < hash.buckets.length; i++) {
251 System.out.println(" case "+hash.keys[i]+": ");
252 for (int j = 0; j < hash.buckets[i].length; j++) {
253 if (j > 0) {
254 System.out.print(" } else ");
255 } else {
256 System.out.print(" ");
257 }
258 System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {");
259 System.out.println(" return true;");
260 }
261 System.out.println(" }");
262 }
263 System.out.println(" }");
264 System.out.println(" return false;");
265 System.out.println(" }");
266 }
267
268 private int length;
269 private int[] tempKeys;
270 private int[] bucketSizes;
271 private int bucketCount;
272 private int maxDepth;
273 private int minStringLength = Integer.MAX_VALUE;
274 private int keyKind;
275 private int charAt;
276
277 private static final int LENGTH = 0;
278 private static final int CHAR_AT = 1;
279 private static final int HASH_CODE = 2;
280
281 /* Determines the maximum number of charAt(i)
282 * tests that will be done. The search is
283 * limited because if the number of characters
284 * is large enough, the likelyhood of finding
285 * a good hash key based on this method is
286 * low. The CHAR_AT_MAX_CHARS limit only
287 * applies f there are more strings than
288 * CHAR_AT_MAX_LINES.
289 */
290 private static final int CHAR_AT_MAX_LINES = 50;
291 private static final int CHAR_AT_MAX_CHARS = 1000;
292
293 private void resetKeys(int keyKind) {
294 this.keyKind = keyKind;
295 switch (keyKind) {
296 case LENGTH: method = "length()"; break;
297 case CHAR_AT: method = "charAt("+charAt+")"; break;
298 case HASH_CODE: method = "hashCode()"; break;
299 }
300 maxDepth = 1;
301 bucketCount = 0;
302 for (int i = 0; i < length; i++) {
303 tempKeys[i] = 0;
304 bucketSizes[i] = 0;
305 }
306 }
307
308 private void setMinStringLength() {
309 for (int i = 0; i < length; i++) {
310 if (strings[i].length() < minStringLength) {
311 minStringLength = strings[i].length();
312 }
313 }
314 }
315
316 private int findUnusedKey() {
317 int unused = 0;
318 int keysLength = keys.length;
319
320 // Note that we just assume that resource
321 // exhaustion will occur rather than an
322 // infinite loop here if the set of keys
323 // is very large.
324
325 while (true) {
326 boolean match = false;
327 for (int i = 0; i < keysLength; i++) {
328 if (keys[i] == unused) {
329 match = true;
330 break;
331 }
332 }
333 if (match) {
334 unused--;
335 } else {
336 break;
337 }
338 }
339 return unused;
340 }
341
342 private int getKeys(int methodKind) {
343 resetKeys(methodKind);
344 for(int i = 0; i < strings.length; i++) {
345 addKey(getKey(strings[i]));
346 }
347 return maxDepth;
348 }
349
350 private void addKey(int key) {
351
352 // Have we seen this one before?
353
354 boolean addIt = true;
355 for (int j = 0; j < bucketCount; j++) {
356 if (tempKeys[j] == key) {
357 addIt = false;
358 bucketSizes[j]++;
359 if (bucketSizes[j] > maxDepth) {
360 maxDepth = bucketSizes[j];
361 }
362 break;
363 }
364 }
365
366 if (addIt) {
367 tempKeys[bucketCount] = key;
368 bucketSizes[bucketCount] = 1;
369 bucketCount++;
370 }
371 }
372 }

mercurial