src/share/jaxws_classes/com/sun/xml/internal/bind/api/impl/NameUtil.java

changeset 0
373ffda63c9a
child 637
9c07ef4934dd
equal deleted inserted replaced
-1:000000000000 0:373ffda63c9a
1 /*
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package com.sun.xml.internal.bind.api.impl;
27
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Locale;
33
34 /**
35 * Methods that convert strings into various formats.
36 *
37 * <p>
38 * What JAX-RPC name binding tells us is that even such basic method
39 * like "isLetter" can be different depending on the situation.
40 *
41 * For this reason, a whole lot of methods are made non-static,
42 * even though they look like they should be static.
43 */
44 class NameUtil {
45 protected boolean isPunct(char c) {
46 return c == '-' || c == '.' || c == ':' || c == '_' || c == '\u00b7' || c == '\u0387' || c == '\u06dd' || c == '\u06de';
47 }
48
49 protected static boolean isDigit(char c) {
50 return c >= '0' && c <= '9' || Character.isDigit(c);
51 }
52
53 protected static boolean isUpper(char c) {
54 return c >= 'A' && c <= 'Z' || Character.isUpperCase(c);
55 }
56
57 protected static boolean isLower(char c) {
58 return c >= 'a' && c <= 'z' || Character.isLowerCase(c);
59 }
60
61 protected boolean isLetter(char c) {
62 return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || Character.isLetter(c);
63 }
64
65 private String toLowerCase(String s)
66 {
67 return s.toLowerCase(Locale.ENGLISH);
68 }
69
70 private String toUpperCase(char c)
71 {
72 return String.valueOf(c).toUpperCase(Locale.ENGLISH);
73 }
74
75 private String toUpperCase(String s)
76 {
77 return s.toUpperCase(Locale.ENGLISH);
78 }
79
80 /**
81 * Capitalizes the first character of the specified string,
82 * and de-capitalize the rest of characters.
83 */
84 public String capitalize(String s) {
85 if (!isLower(s.charAt(0)))
86 return s;
87 StringBuilder sb = new StringBuilder(s.length());
88 sb.append(toUpperCase(s.charAt(0)));
89 sb.append(toLowerCase(s.substring(1)));
90 return sb.toString();
91 }
92
93 // Precondition: s[start] is not punctuation
94 private int nextBreak(String s, int start) {
95 int n = s.length();
96
97 char c1 = s.charAt(start);
98 int t1 = classify(c1);
99
100 for (int i=start+1; i<n; i++) {
101 // shift (c1,t1) into (c0,t0)
102 // char c0 = c1; --- conceptually, but c0 won't be used
103 int t0 = t1;
104
105 c1 = s.charAt(i);
106 t1 = classify(c1);
107
108 switch(actionTable[t0*5+t1]) {
109 case ACTION_CHECK_PUNCT:
110 if(isPunct(c1)) return i;
111 break;
112 case ACTION_CHECK_C2:
113 if (i < n-1) {
114 char c2 = s.charAt(i+1);
115 if (isLower(c2))
116 return i;
117 }
118 break;
119 case ACTION_BREAK:
120 return i;
121 }
122 }
123 return -1;
124 }
125
126 // the 5-category classification that we use in this code
127 // to find work breaks
128 static protected final int UPPER_LETTER = 0;
129 static protected final int LOWER_LETTER = 1;
130 static protected final int OTHER_LETTER = 2;
131 static protected final int DIGIT = 3;
132 static protected final int OTHER = 4;
133
134 /**
135 * Look up table for actions.
136 * type0*5+type1 would yield the action to be taken.
137 */
138 private static final byte[] actionTable = new byte[5*5];
139
140 // action constants. see nextBreak for the meaning
141 static private final byte ACTION_CHECK_PUNCT = 0;
142 static private final byte ACTION_CHECK_C2 = 1;
143 static private final byte ACTION_BREAK = 2;
144 static private final byte ACTION_NOBREAK = 3;
145
146 /**
147 * Decide the action to be taken given
148 * the classification of the preceding character 't0' and
149 * the classification of the next character 't1'.
150 */
151 private static byte decideAction( int t0, int t1 ) {
152 if(t0==OTHER && t1==OTHER) return ACTION_CHECK_PUNCT;
153 if(!xor(t0==DIGIT,t1==DIGIT)) return ACTION_BREAK;
154 if(t0==LOWER_LETTER && t1!=LOWER_LETTER) return ACTION_BREAK;
155 if(!xor(t0<=OTHER_LETTER,t1<=OTHER_LETTER)) return ACTION_BREAK;
156 if(!xor(t0==OTHER_LETTER,t1==OTHER_LETTER)) return ACTION_BREAK;
157
158 if(t0==UPPER_LETTER && t1==UPPER_LETTER) return ACTION_CHECK_C2;
159
160 return ACTION_NOBREAK;
161 }
162
163 private static boolean xor(boolean x,boolean y) {
164 return (x&&y) || (!x&&!y);
165 }
166
167 static {
168 // initialize the action table
169 for( int t0=0; t0<5; t0++ )
170 for( int t1=0; t1<5; t1++ )
171 actionTable[t0*5+t1] = decideAction(t0,t1);
172 }
173
174 /**
175 * Classify a character into 5 categories that determine the word break.
176 */
177 protected int classify(char c0) {
178 switch(Character.getType(c0)) {
179 case Character.UPPERCASE_LETTER: return UPPER_LETTER;
180 case Character.LOWERCASE_LETTER: return LOWER_LETTER;
181 case Character.TITLECASE_LETTER:
182 case Character.MODIFIER_LETTER:
183 case Character.OTHER_LETTER: return OTHER_LETTER;
184 case Character.DECIMAL_DIGIT_NUMBER: return DIGIT;
185 default: return OTHER;
186 }
187 }
188
189
190 /**
191 * Tokenizes a string into words and capitalizes the first
192 * character of each word.
193 *
194 * <p>
195 * This method uses a change in character type as a splitter
196 * of two words. For example, "abc100ghi" will be splitted into
197 * {"Abc", "100","Ghi"}.
198 */
199 public List<String> toWordList(String s) {
200 ArrayList<String> ss = new ArrayList<String>();
201 int n = s.length();
202 for (int i = 0; i < n;) {
203
204 // Skip punctuation
205 while (i < n) {
206 if (!isPunct(s.charAt(i)))
207 break;
208 i++;
209 }
210 if (i >= n) break;
211
212 // Find next break and collect word
213 int b = nextBreak(s, i);
214 String w = (b == -1) ? s.substring(i) : s.substring(i, b);
215 ss.add(escape(capitalize(w)));
216 if (b == -1) break;
217 i = b;
218 }
219
220 // we can't guarantee a valid Java identifier anyway,
221 // so there's not much point in rejecting things in this way.
222 // if (ss.size() == 0)
223 // throw new IllegalArgumentException("Zero-length identifier");
224 return ss;
225 }
226
227 protected String toMixedCaseName(List<String> ss, boolean startUpper) {
228 StringBuilder sb = new StringBuilder();
229 if(!ss.isEmpty()) {
230 sb.append(startUpper ? ss.get(0) : toLowerCase(ss.get(0)));
231 for (int i = 1; i < ss.size(); i++)
232 sb.append(ss.get(i));
233 }
234 return sb.toString();
235 }
236
237 protected String toMixedCaseVariableName(String[] ss,
238 boolean startUpper,
239 boolean cdrUpper) {
240 if (cdrUpper)
241 for (int i = 1; i < ss.length; i++)
242 ss[i] = capitalize(ss[i]);
243 StringBuilder sb = new StringBuilder();
244 if( ss.length>0 ) {
245 sb.append(startUpper ? ss[0] : toLowerCase(ss[0]));
246 for (int i = 1; i < ss.length; i++)
247 sb.append(ss[i]);
248 }
249 return sb.toString();
250 }
251
252
253 /**
254 * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF".
255 *
256 * @return
257 * Always return a string but there's no guarantee that
258 * the generated code is a valid Java identifier.
259 */
260 public String toConstantName(String s) {
261 return toConstantName(toWordList(s));
262 }
263
264 /**
265 * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF".
266 *
267 * @return
268 * Always return a string but there's no guarantee that
269 * the generated code is a valid Java identifier.
270 */
271 public String toConstantName(List<String> ss) {
272 StringBuilder sb = new StringBuilder();
273 if( !ss.isEmpty() ) {
274 sb.append(toUpperCase(ss.get(0)));
275 for (int i = 1; i < ss.size(); i++) {
276 sb.append('_');
277 sb.append(toUpperCase(ss.get(i)));
278 }
279 }
280 return sb.toString();
281 }
282
283
284
285 /**
286 * Escapes characters is the given string so that they can be
287 * printed by only using US-ASCII characters.
288 *
289 * The escaped characters will be appended to the given
290 * StringBuffer.
291 *
292 * @param sb
293 * StringBuffer that receives escaped string.
294 * @param s
295 * String to be escaped. <code>s.substring(start)</code>
296 * will be escaped and copied to the string buffer.
297 */
298 public static void escape(StringBuilder sb, String s, int start) {
299 int n = s.length();
300 for (int i = start; i < n; i++) {
301 char c = s.charAt(i);
302 if (Character.isJavaIdentifierPart(c))
303 sb.append(c);
304 else {
305 sb.append('_');
306 if (c <= '\u000f') sb.append("000");
307 else if (c <= '\u00ff') sb.append("00");
308 else if (c <= '\u0fff') sb.append('0');
309 sb.append(Integer.toString(c, 16));
310 }
311 }
312 }
313
314 /**
315 * Escapes characters that are unusable as Java identifiers
316 * by replacing unsafe characters with safe characters.
317 */
318 private static String escape(String s) {
319 int n = s.length();
320 for (int i = 0; i < n; i++)
321 if (!Character.isJavaIdentifierPart(s.charAt(i))) {
322 StringBuilder sb = new StringBuilder(s.substring(0, i));
323 escape(sb, s, i);
324 return sb.toString();
325 }
326 return s;
327 }
328 }

mercurial