|
1 /* |
|
2 * Portions Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Sun designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Sun in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
|
22 * CA 95054 USA or visit www.sun.com if you need additional information or |
|
23 * have any questions. |
|
24 */ |
|
25 /* |
|
26 * Licensed Materials - Property of IBM |
|
27 * RMI-IIOP v1.0 |
|
28 * Copyright IBM Corp. 1998 1999 All Rights Reserved |
|
29 * |
|
30 */ |
|
31 |
|
32 package sun.rmi.rmic.iiop; |
|
33 |
|
34 /** |
|
35 * StaticStringsHash takes an array of constant strings and |
|
36 * uses several different hash methods to try to find the |
|
37 * 'best' one for that set. The set of methods is currently |
|
38 * fixed, but with a little work could be made extensible thru |
|
39 * subclassing. |
|
40 * <p> |
|
41 * The current set of methods is: |
|
42 * <ol> |
|
43 * <li> length() - works well when all strings are different length.</li> |
|
44 * <li> charAt(n) - works well when one offset into all strings is different.</li> |
|
45 * <li> hashCode() - works well with larger arrays.</li> |
|
46 * </ol> |
|
47 * After constructing an instance over the set of strings, the |
|
48 * <code>getKey(String)</code> method can be used to use the selected hash |
|
49 * method to produce a key. The <code>method</code> string will contain |
|
50 * "length()", "charAt(n)", or "hashCode()", and is intended for use by |
|
51 * code generators. |
|
52 * <p> |
|
53 * The <code>keys</code> array will contain the full set of unique keys. |
|
54 * <p> |
|
55 * The <code>buckets</code> array will contain a set of arrays, one for |
|
56 * each key in the <code>keys</code>, where <code>buckets[x][y]</code> |
|
57 * is an index into the <code>strings</code> array. |
|
58 * @author Bryan Atsatt |
|
59 */ |
|
60 public class StaticStringsHash { |
|
61 |
|
62 /** The set of strings upon which the hash info is created */ |
|
63 public String[] strings = null; |
|
64 |
|
65 /** Unique hash keys */ |
|
66 public int[] keys = null; |
|
67 |
|
68 /** Buckets for each key, where buckets[x][y] is an index |
|
69 * into the strings[] array. */ |
|
70 public int[][] buckets = null; |
|
71 |
|
72 /** The method to invoke on String to produce the hash key */ |
|
73 public String method = null; |
|
74 |
|
75 /** Get a key for the given string using the |
|
76 * selected hash method. |
|
77 * @param str the string to return a key for. |
|
78 * @return the key. |
|
79 */ |
|
80 public int getKey(String str) { |
|
81 switch (keyKind) { |
|
82 case LENGTH: return str.length(); |
|
83 case CHAR_AT: return str.charAt(charAt); |
|
84 case HASH_CODE: return str.hashCode(); |
|
85 } |
|
86 throw new Error("Bad keyKind"); |
|
87 } |
|
88 |
|
89 /** Constructor |
|
90 * @param strings the set of strings upon which to |
|
91 * find an optimal hash method. Must not contain |
|
92 * duplicates. |
|
93 */ |
|
94 public StaticStringsHash(String[] strings) { |
|
95 this.strings = strings; |
|
96 length = strings.length; |
|
97 tempKeys = new int[length]; |
|
98 bucketSizes = new int[length]; |
|
99 setMinStringLength(); |
|
100 |
|
101 // Decide on the best algorithm based on |
|
102 // which one has the smallest maximum |
|
103 // bucket depth. First, try length()... |
|
104 |
|
105 int currentMaxDepth = getKeys(LENGTH); |
|
106 int useCharAt = -1; |
|
107 boolean useHashCode = false; |
|
108 |
|
109 if (currentMaxDepth > 1) { |
|
110 |
|
111 // At least one bucket had more than one |
|
112 // entry, so try charAt(i). If there |
|
113 // are a lot of strings in the array, |
|
114 // and minStringLength is large, limit |
|
115 // the search to a smaller number of |
|
116 // characters to avoid spending a lot |
|
117 // of time here that is most likely to |
|
118 // be pointless... |
|
119 |
|
120 int minLength = minStringLength; |
|
121 if (length > CHAR_AT_MAX_LINES && |
|
122 length * minLength > CHAR_AT_MAX_CHARS) { |
|
123 minLength = length/CHAR_AT_MAX_CHARS; |
|
124 } |
|
125 |
|
126 charAt = 0; |
|
127 for (int i = 0; i < minLength; i++) { |
|
128 int charAtDepth = getKeys(CHAR_AT); |
|
129 if (charAtDepth < currentMaxDepth) { |
|
130 currentMaxDepth = charAtDepth; |
|
131 useCharAt = i; |
|
132 if (currentMaxDepth == 1) { |
|
133 break; |
|
134 } |
|
135 } |
|
136 charAt++; |
|
137 } |
|
138 charAt = useCharAt; |
|
139 |
|
140 |
|
141 if (currentMaxDepth > 1) { |
|
142 |
|
143 // At least one bucket had more than one |
|
144 // entry, try hashCode(). |
|
145 // |
|
146 // Since the cost of computing a full hashCode |
|
147 // (for the runtime target string) is much higher |
|
148 // than the previous methods, use it only if it is |
|
149 // substantially better. The definition of 'substantial' |
|
150 // here is not very well founded, and could be improved |
|
151 // with some further analysis ;^) |
|
152 |
|
153 int hashCodeDepth = getKeys(HASH_CODE); |
|
154 if (hashCodeDepth < currentMaxDepth-3) { |
|
155 |
|
156 // Using the full hashCode results in at least |
|
157 // 3 fewer entries in the worst bucket, so will |
|
158 // therefore avoid at least 3 calls to equals() |
|
159 // in the worst case. |
|
160 // |
|
161 // Note that using a number smaller than 3 could |
|
162 // result in using a hashCode when there are only |
|
163 // 2 strings in the array, and that would surely |
|
164 // be a poor performance choice. |
|
165 |
|
166 useHashCode = true; |
|
167 } |
|
168 } |
|
169 |
|
170 // Reset keys if needed... |
|
171 |
|
172 if (!useHashCode) { |
|
173 if (useCharAt >= 0) { |
|
174 |
|
175 // Use the charAt(i) method... |
|
176 |
|
177 getKeys(CHAR_AT); |
|
178 |
|
179 } else { |
|
180 |
|
181 // Use length method... |
|
182 |
|
183 getKeys(LENGTH); |
|
184 } |
|
185 } |
|
186 } |
|
187 |
|
188 // Now allocate and fill our real hashKeys array... |
|
189 |
|
190 keys = new int[bucketCount]; |
|
191 System.arraycopy(tempKeys,0,keys,0,bucketCount); |
|
192 |
|
193 // Sort keys and bucketSizes arrays... |
|
194 |
|
195 boolean didSwap; |
|
196 do { |
|
197 didSwap = false; |
|
198 for (int i = 0; i < bucketCount - 1; i++) { |
|
199 if (keys[i] > keys[i+1]) { |
|
200 int temp = keys[i]; |
|
201 keys[i] = keys[i+1]; |
|
202 keys[i+1] = temp; |
|
203 temp = bucketSizes[i]; |
|
204 bucketSizes[i] = bucketSizes[i+1]; |
|
205 bucketSizes[i+1] = temp; |
|
206 didSwap = true; |
|
207 } |
|
208 } |
|
209 } |
|
210 while (didSwap == true); |
|
211 |
|
212 // Allocate our buckets array. Fill the string |
|
213 // index slot with an unused key so we can |
|
214 // determine which are free... |
|
215 |
|
216 int unused = findUnusedKey(); |
|
217 buckets = new int[bucketCount][]; |
|
218 for (int i = 0; i < bucketCount; i++) { |
|
219 buckets[i] = new int[bucketSizes[i]]; |
|
220 for (int j = 0; j < bucketSizes[i]; j++) { |
|
221 buckets[i][j] = unused; |
|
222 } |
|
223 } |
|
224 |
|
225 // And fill it in... |
|
226 |
|
227 for(int i = 0; i < strings.length; i++) { |
|
228 int key = getKey(strings[i]); |
|
229 for (int j = 0; j < bucketCount; j++) { |
|
230 if (keys[j] == key) { |
|
231 int k = 0; |
|
232 while (buckets[j][k] != unused) { |
|
233 k++; |
|
234 } |
|
235 buckets[j][k] = i; |
|
236 break; |
|
237 } |
|
238 } |
|
239 } |
|
240 } |
|
241 |
|
242 /** Print an optimized 'contains' method for the |
|
243 * argument strings |
|
244 */ |
|
245 public static void main (String[] args) { |
|
246 StaticStringsHash hash = new StaticStringsHash(args); |
|
247 System.out.println(); |
|
248 System.out.println(" public boolean contains(String key) {"); |
|
249 System.out.println(" switch (key."+hash.method+") {"); |
|
250 for (int i = 0; i < hash.buckets.length; i++) { |
|
251 System.out.println(" case "+hash.keys[i]+": "); |
|
252 for (int j = 0; j < hash.buckets[i].length; j++) { |
|
253 if (j > 0) { |
|
254 System.out.print(" } else "); |
|
255 } else { |
|
256 System.out.print(" "); |
|
257 } |
|
258 System.out.println("if (key.equals(\""+ hash.strings[hash.buckets[i][j]] +"\")) {"); |
|
259 System.out.println(" return true;"); |
|
260 } |
|
261 System.out.println(" }"); |
|
262 } |
|
263 System.out.println(" }"); |
|
264 System.out.println(" return false;"); |
|
265 System.out.println(" }"); |
|
266 } |
|
267 |
|
268 private int length; |
|
269 private int[] tempKeys; |
|
270 private int[] bucketSizes; |
|
271 private int bucketCount; |
|
272 private int maxDepth; |
|
273 private int minStringLength = Integer.MAX_VALUE; |
|
274 private int keyKind; |
|
275 private int charAt; |
|
276 |
|
277 private static final int LENGTH = 0; |
|
278 private static final int CHAR_AT = 1; |
|
279 private static final int HASH_CODE = 2; |
|
280 |
|
281 /* Determines the maximum number of charAt(i) |
|
282 * tests that will be done. The search is |
|
283 * limited because if the number of characters |
|
284 * is large enough, the likelyhood of finding |
|
285 * a good hash key based on this method is |
|
286 * low. The CHAR_AT_MAX_CHARS limit only |
|
287 * applies f there are more strings than |
|
288 * CHAR_AT_MAX_LINES. |
|
289 */ |
|
290 private static final int CHAR_AT_MAX_LINES = 50; |
|
291 private static final int CHAR_AT_MAX_CHARS = 1000; |
|
292 |
|
293 private void resetKeys(int keyKind) { |
|
294 this.keyKind = keyKind; |
|
295 switch (keyKind) { |
|
296 case LENGTH: method = "length()"; break; |
|
297 case CHAR_AT: method = "charAt("+charAt+")"; break; |
|
298 case HASH_CODE: method = "hashCode()"; break; |
|
299 } |
|
300 maxDepth = 1; |
|
301 bucketCount = 0; |
|
302 for (int i = 0; i < length; i++) { |
|
303 tempKeys[i] = 0; |
|
304 bucketSizes[i] = 0; |
|
305 } |
|
306 } |
|
307 |
|
308 private void setMinStringLength() { |
|
309 for (int i = 0; i < length; i++) { |
|
310 if (strings[i].length() < minStringLength) { |
|
311 minStringLength = strings[i].length(); |
|
312 } |
|
313 } |
|
314 } |
|
315 |
|
316 private int findUnusedKey() { |
|
317 int unused = 0; |
|
318 int keysLength = keys.length; |
|
319 |
|
320 // Note that we just assume that resource |
|
321 // exhaustion will occur rather than an |
|
322 // infinite loop here if the set of keys |
|
323 // is very large. |
|
324 |
|
325 while (true) { |
|
326 boolean match = false; |
|
327 for (int i = 0; i < keysLength; i++) { |
|
328 if (keys[i] == unused) { |
|
329 match = true; |
|
330 break; |
|
331 } |
|
332 } |
|
333 if (match) { |
|
334 unused--; |
|
335 } else { |
|
336 break; |
|
337 } |
|
338 } |
|
339 return unused; |
|
340 } |
|
341 |
|
342 private int getKeys(int methodKind) { |
|
343 resetKeys(methodKind); |
|
344 for(int i = 0; i < strings.length; i++) { |
|
345 addKey(getKey(strings[i])); |
|
346 } |
|
347 return maxDepth; |
|
348 } |
|
349 |
|
350 private void addKey(int key) { |
|
351 |
|
352 // Have we seen this one before? |
|
353 |
|
354 boolean addIt = true; |
|
355 for (int j = 0; j < bucketCount; j++) { |
|
356 if (tempKeys[j] == key) { |
|
357 addIt = false; |
|
358 bucketSizes[j]++; |
|
359 if (bucketSizes[j] > maxDepth) { |
|
360 maxDepth = bucketSizes[j]; |
|
361 } |
|
362 break; |
|
363 } |
|
364 } |
|
365 |
|
366 if (addIt) { |
|
367 tempKeys[bucketCount] = key; |
|
368 bucketSizes[bucketCount] = 1; |
|
369 bucketCount++; |
|
370 } |
|
371 } |
|
372 } |