Thu, 31 Aug 2017 15:18:52 +0800
merge
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. Oracle designates this |
aoqi@0 | 8 | * particular file as subject to the "Classpath" exception as provided |
aoqi@0 | 9 | * by Oracle in the LICENSE file that accompanied this code. |
aoqi@0 | 10 | * |
aoqi@0 | 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 14 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 15 | * accompanied this code). |
aoqi@0 | 16 | * |
aoqi@0 | 17 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 20 | * |
aoqi@0 | 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 22 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 23 | * questions. |
aoqi@0 | 24 | */ |
aoqi@0 | 25 | |
aoqi@0 | 26 | package com.sun.xml.internal.bind.api.impl; |
aoqi@0 | 27 | |
aoqi@0 | 28 | import java.util.ArrayList; |
aoqi@0 | 29 | import java.util.Collections; |
aoqi@0 | 30 | import java.util.HashSet; |
aoqi@0 | 31 | import java.util.List; |
aoqi@0 | 32 | import java.util.Locale; |
aoqi@0 | 33 | |
aoqi@0 | 34 | /** |
aoqi@0 | 35 | * Methods that convert strings into various formats. |
aoqi@0 | 36 | * |
aoqi@0 | 37 | * <p> |
aoqi@0 | 38 | * What JAX-RPC name binding tells us is that even such basic method |
aoqi@0 | 39 | * like "isLetter" can be different depending on the situation. |
aoqi@0 | 40 | * |
aoqi@0 | 41 | * For this reason, a whole lot of methods are made non-static, |
aoqi@0 | 42 | * even though they look like they should be static. |
aoqi@0 | 43 | */ |
aoqi@0 | 44 | class NameUtil { |
aoqi@0 | 45 | protected boolean isPunct(char c) { |
aoqi@0 | 46 | return c == '-' || c == '.' || c == ':' || c == '_' || c == '\u00b7' || c == '\u0387' || c == '\u06dd' || c == '\u06de'; |
aoqi@0 | 47 | } |
aoqi@0 | 48 | |
aoqi@0 | 49 | protected static boolean isDigit(char c) { |
aoqi@0 | 50 | return c >= '0' && c <= '9' || Character.isDigit(c); |
aoqi@0 | 51 | } |
aoqi@0 | 52 | |
aoqi@0 | 53 | protected static boolean isUpper(char c) { |
aoqi@0 | 54 | return c >= 'A' && c <= 'Z' || Character.isUpperCase(c); |
aoqi@0 | 55 | } |
aoqi@0 | 56 | |
aoqi@0 | 57 | protected static boolean isLower(char c) { |
aoqi@0 | 58 | return c >= 'a' && c <= 'z' || Character.isLowerCase(c); |
aoqi@0 | 59 | } |
aoqi@0 | 60 | |
aoqi@0 | 61 | protected boolean isLetter(char c) { |
aoqi@0 | 62 | return c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || Character.isLetter(c); |
aoqi@0 | 63 | } |
aoqi@0 | 64 | |
aoqi@0 | 65 | private String toLowerCase(String s) |
aoqi@0 | 66 | { |
aoqi@0 | 67 | return s.toLowerCase(Locale.ENGLISH); |
aoqi@0 | 68 | } |
aoqi@0 | 69 | |
aoqi@0 | 70 | private String toUpperCase(char c) |
aoqi@0 | 71 | { |
aoqi@0 | 72 | return String.valueOf(c).toUpperCase(Locale.ENGLISH); |
aoqi@0 | 73 | } |
aoqi@0 | 74 | |
aoqi@0 | 75 | private String toUpperCase(String s) |
aoqi@0 | 76 | { |
aoqi@0 | 77 | return s.toUpperCase(Locale.ENGLISH); |
aoqi@0 | 78 | } |
aoqi@0 | 79 | |
aoqi@0 | 80 | /** |
aoqi@0 | 81 | * Capitalizes the first character of the specified string, |
aoqi@0 | 82 | * and de-capitalize the rest of characters. |
aoqi@0 | 83 | */ |
aoqi@0 | 84 | public String capitalize(String s) { |
aoqi@0 | 85 | if (!isLower(s.charAt(0))) |
aoqi@0 | 86 | return s; |
aoqi@0 | 87 | StringBuilder sb = new StringBuilder(s.length()); |
aoqi@0 | 88 | sb.append(toUpperCase(s.charAt(0))); |
aoqi@0 | 89 | sb.append(toLowerCase(s.substring(1))); |
aoqi@0 | 90 | return sb.toString(); |
aoqi@0 | 91 | } |
aoqi@0 | 92 | |
aoqi@0 | 93 | // Precondition: s[start] is not punctuation |
aoqi@0 | 94 | private int nextBreak(String s, int start) { |
aoqi@0 | 95 | int n = s.length(); |
aoqi@0 | 96 | |
aoqi@0 | 97 | char c1 = s.charAt(start); |
aoqi@0 | 98 | int t1 = classify(c1); |
aoqi@0 | 99 | |
aoqi@0 | 100 | for (int i=start+1; i<n; i++) { |
aoqi@0 | 101 | // shift (c1,t1) into (c0,t0) |
aoqi@0 | 102 | // char c0 = c1; --- conceptually, but c0 won't be used |
aoqi@0 | 103 | int t0 = t1; |
aoqi@0 | 104 | |
aoqi@0 | 105 | c1 = s.charAt(i); |
aoqi@0 | 106 | t1 = classify(c1); |
aoqi@0 | 107 | |
aoqi@0 | 108 | switch(actionTable[t0*5+t1]) { |
aoqi@0 | 109 | case ACTION_CHECK_PUNCT: |
aoqi@0 | 110 | if(isPunct(c1)) return i; |
aoqi@0 | 111 | break; |
aoqi@0 | 112 | case ACTION_CHECK_C2: |
aoqi@0 | 113 | if (i < n-1) { |
aoqi@0 | 114 | char c2 = s.charAt(i+1); |
aoqi@0 | 115 | if (isLower(c2)) |
aoqi@0 | 116 | return i; |
aoqi@0 | 117 | } |
aoqi@0 | 118 | break; |
aoqi@0 | 119 | case ACTION_BREAK: |
aoqi@0 | 120 | return i; |
aoqi@0 | 121 | } |
aoqi@0 | 122 | } |
aoqi@0 | 123 | return -1; |
aoqi@0 | 124 | } |
aoqi@0 | 125 | |
aoqi@0 | 126 | // the 5-category classification that we use in this code |
aoqi@0 | 127 | // to find work breaks |
aoqi@0 | 128 | static protected final int UPPER_LETTER = 0; |
aoqi@0 | 129 | static protected final int LOWER_LETTER = 1; |
aoqi@0 | 130 | static protected final int OTHER_LETTER = 2; |
aoqi@0 | 131 | static protected final int DIGIT = 3; |
aoqi@0 | 132 | static protected final int OTHER = 4; |
aoqi@0 | 133 | |
aoqi@0 | 134 | /** |
aoqi@0 | 135 | * Look up table for actions. |
aoqi@0 | 136 | * type0*5+type1 would yield the action to be taken. |
aoqi@0 | 137 | */ |
aoqi@0 | 138 | private static final byte[] actionTable = new byte[5*5]; |
aoqi@0 | 139 | |
aoqi@0 | 140 | // action constants. see nextBreak for the meaning |
aoqi@0 | 141 | static private final byte ACTION_CHECK_PUNCT = 0; |
aoqi@0 | 142 | static private final byte ACTION_CHECK_C2 = 1; |
aoqi@0 | 143 | static private final byte ACTION_BREAK = 2; |
aoqi@0 | 144 | static private final byte ACTION_NOBREAK = 3; |
aoqi@0 | 145 | |
aoqi@0 | 146 | /** |
aoqi@0 | 147 | * Decide the action to be taken given |
aoqi@0 | 148 | * the classification of the preceding character 't0' and |
aoqi@0 | 149 | * the classification of the next character 't1'. |
aoqi@0 | 150 | */ |
aoqi@0 | 151 | private static byte decideAction( int t0, int t1 ) { |
aoqi@0 | 152 | if(t0==OTHER && t1==OTHER) return ACTION_CHECK_PUNCT; |
aoqi@0 | 153 | if(!xor(t0==DIGIT,t1==DIGIT)) return ACTION_BREAK; |
aoqi@0 | 154 | if(t0==LOWER_LETTER && t1!=LOWER_LETTER) return ACTION_BREAK; |
aoqi@0 | 155 | if(!xor(t0<=OTHER_LETTER,t1<=OTHER_LETTER)) return ACTION_BREAK; |
aoqi@0 | 156 | if(!xor(t0==OTHER_LETTER,t1==OTHER_LETTER)) return ACTION_BREAK; |
aoqi@0 | 157 | |
aoqi@0 | 158 | if(t0==UPPER_LETTER && t1==UPPER_LETTER) return ACTION_CHECK_C2; |
aoqi@0 | 159 | |
aoqi@0 | 160 | return ACTION_NOBREAK; |
aoqi@0 | 161 | } |
aoqi@0 | 162 | |
aoqi@0 | 163 | private static boolean xor(boolean x,boolean y) { |
aoqi@0 | 164 | return (x&&y) || (!x&&!y); |
aoqi@0 | 165 | } |
aoqi@0 | 166 | |
aoqi@0 | 167 | static { |
aoqi@0 | 168 | // initialize the action table |
aoqi@0 | 169 | for( int t0=0; t0<5; t0++ ) |
aoqi@0 | 170 | for( int t1=0; t1<5; t1++ ) |
aoqi@0 | 171 | actionTable[t0*5+t1] = decideAction(t0,t1); |
aoqi@0 | 172 | } |
aoqi@0 | 173 | |
aoqi@0 | 174 | /** |
aoqi@0 | 175 | * Classify a character into 5 categories that determine the word break. |
aoqi@0 | 176 | */ |
aoqi@0 | 177 | protected int classify(char c0) { |
aoqi@0 | 178 | switch(Character.getType(c0)) { |
aoqi@0 | 179 | case Character.UPPERCASE_LETTER: return UPPER_LETTER; |
aoqi@0 | 180 | case Character.LOWERCASE_LETTER: return LOWER_LETTER; |
aoqi@0 | 181 | case Character.TITLECASE_LETTER: |
aoqi@0 | 182 | case Character.MODIFIER_LETTER: |
aoqi@0 | 183 | case Character.OTHER_LETTER: return OTHER_LETTER; |
aoqi@0 | 184 | case Character.DECIMAL_DIGIT_NUMBER: return DIGIT; |
aoqi@0 | 185 | default: return OTHER; |
aoqi@0 | 186 | } |
aoqi@0 | 187 | } |
aoqi@0 | 188 | |
aoqi@0 | 189 | |
aoqi@0 | 190 | /** |
aoqi@0 | 191 | * Tokenizes a string into words and capitalizes the first |
aoqi@0 | 192 | * character of each word. |
aoqi@0 | 193 | * |
aoqi@0 | 194 | * <p> |
aoqi@0 | 195 | * This method uses a change in character type as a splitter |
aoqi@0 | 196 | * of two words. For example, "abc100ghi" will be splitted into |
aoqi@0 | 197 | * {"Abc", "100","Ghi"}. |
aoqi@0 | 198 | */ |
aoqi@0 | 199 | public List<String> toWordList(String s) { |
aoqi@0 | 200 | ArrayList<String> ss = new ArrayList<String>(); |
aoqi@0 | 201 | int n = s.length(); |
aoqi@0 | 202 | for (int i = 0; i < n;) { |
aoqi@0 | 203 | |
aoqi@0 | 204 | // Skip punctuation |
aoqi@0 | 205 | while (i < n) { |
aoqi@0 | 206 | if (!isPunct(s.charAt(i))) |
aoqi@0 | 207 | break; |
aoqi@0 | 208 | i++; |
aoqi@0 | 209 | } |
aoqi@0 | 210 | if (i >= n) break; |
aoqi@0 | 211 | |
aoqi@0 | 212 | // Find next break and collect word |
aoqi@0 | 213 | int b = nextBreak(s, i); |
aoqi@0 | 214 | String w = (b == -1) ? s.substring(i) : s.substring(i, b); |
aoqi@0 | 215 | ss.add(escape(capitalize(w))); |
aoqi@0 | 216 | if (b == -1) break; |
aoqi@0 | 217 | i = b; |
aoqi@0 | 218 | } |
aoqi@0 | 219 | |
aoqi@0 | 220 | // we can't guarantee a valid Java identifier anyway, |
aoqi@0 | 221 | // so there's not much point in rejecting things in this way. |
aoqi@0 | 222 | // if (ss.size() == 0) |
aoqi@0 | 223 | // throw new IllegalArgumentException("Zero-length identifier"); |
aoqi@0 | 224 | return ss; |
aoqi@0 | 225 | } |
aoqi@0 | 226 | |
aoqi@0 | 227 | protected String toMixedCaseName(List<String> ss, boolean startUpper) { |
aoqi@0 | 228 | StringBuilder sb = new StringBuilder(); |
aoqi@0 | 229 | if(!ss.isEmpty()) { |
aoqi@0 | 230 | sb.append(startUpper ? ss.get(0) : toLowerCase(ss.get(0))); |
aoqi@0 | 231 | for (int i = 1; i < ss.size(); i++) |
aoqi@0 | 232 | sb.append(ss.get(i)); |
aoqi@0 | 233 | } |
aoqi@0 | 234 | return sb.toString(); |
aoqi@0 | 235 | } |
aoqi@0 | 236 | |
aoqi@0 | 237 | protected String toMixedCaseVariableName(String[] ss, |
aoqi@0 | 238 | boolean startUpper, |
aoqi@0 | 239 | boolean cdrUpper) { |
aoqi@0 | 240 | if (cdrUpper) |
aoqi@0 | 241 | for (int i = 1; i < ss.length; i++) |
aoqi@0 | 242 | ss[i] = capitalize(ss[i]); |
aoqi@0 | 243 | StringBuilder sb = new StringBuilder(); |
aoqi@0 | 244 | if( ss.length>0 ) { |
aoqi@0 | 245 | sb.append(startUpper ? ss[0] : toLowerCase(ss[0])); |
aoqi@0 | 246 | for (int i = 1; i < ss.length; i++) |
aoqi@0 | 247 | sb.append(ss[i]); |
aoqi@0 | 248 | } |
aoqi@0 | 249 | return sb.toString(); |
aoqi@0 | 250 | } |
aoqi@0 | 251 | |
aoqi@0 | 252 | |
aoqi@0 | 253 | /** |
aoqi@0 | 254 | * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF". |
aoqi@0 | 255 | * |
aoqi@0 | 256 | * @return |
aoqi@0 | 257 | * Always return a string but there's no guarantee that |
aoqi@0 | 258 | * the generated code is a valid Java identifier. |
aoqi@0 | 259 | */ |
aoqi@0 | 260 | public String toConstantName(String s) { |
aoqi@0 | 261 | return toConstantName(toWordList(s)); |
aoqi@0 | 262 | } |
aoqi@0 | 263 | |
aoqi@0 | 264 | /** |
aoqi@0 | 265 | * Formats a string into "THIS_KIND_OF_FORMAT_ABC_DEF". |
aoqi@0 | 266 | * |
aoqi@0 | 267 | * @return |
aoqi@0 | 268 | * Always return a string but there's no guarantee that |
aoqi@0 | 269 | * the generated code is a valid Java identifier. |
aoqi@0 | 270 | */ |
aoqi@0 | 271 | public String toConstantName(List<String> ss) { |
aoqi@0 | 272 | StringBuilder sb = new StringBuilder(); |
aoqi@0 | 273 | if( !ss.isEmpty() ) { |
aoqi@0 | 274 | sb.append(toUpperCase(ss.get(0))); |
aoqi@0 | 275 | for (int i = 1; i < ss.size(); i++) { |
aoqi@0 | 276 | sb.append('_'); |
aoqi@0 | 277 | sb.append(toUpperCase(ss.get(i))); |
aoqi@0 | 278 | } |
aoqi@0 | 279 | } |
aoqi@0 | 280 | return sb.toString(); |
aoqi@0 | 281 | } |
aoqi@0 | 282 | |
aoqi@0 | 283 | |
aoqi@0 | 284 | |
aoqi@0 | 285 | /** |
aoqi@0 | 286 | * Escapes characters is the given string so that they can be |
aoqi@0 | 287 | * printed by only using US-ASCII characters. |
aoqi@0 | 288 | * |
aoqi@0 | 289 | * The escaped characters will be appended to the given |
aoqi@0 | 290 | * StringBuffer. |
aoqi@0 | 291 | * |
aoqi@0 | 292 | * @param sb |
aoqi@0 | 293 | * StringBuffer that receives escaped string. |
aoqi@0 | 294 | * @param s |
aoqi@0 | 295 | * String to be escaped. <code>s.substring(start)</code> |
aoqi@0 | 296 | * will be escaped and copied to the string buffer. |
aoqi@0 | 297 | */ |
aoqi@0 | 298 | public static void escape(StringBuilder sb, String s, int start) { |
aoqi@0 | 299 | int n = s.length(); |
aoqi@0 | 300 | for (int i = start; i < n; i++) { |
aoqi@0 | 301 | char c = s.charAt(i); |
aoqi@0 | 302 | if (Character.isJavaIdentifierPart(c)) |
aoqi@0 | 303 | sb.append(c); |
aoqi@0 | 304 | else { |
aoqi@0 | 305 | sb.append('_'); |
aoqi@0 | 306 | if (c <= '\u000f') sb.append("000"); |
aoqi@0 | 307 | else if (c <= '\u00ff') sb.append("00"); |
aoqi@0 | 308 | else if (c <= '\u0fff') sb.append('0'); |
aoqi@0 | 309 | sb.append(Integer.toString(c, 16)); |
aoqi@0 | 310 | } |
aoqi@0 | 311 | } |
aoqi@0 | 312 | } |
aoqi@0 | 313 | |
aoqi@0 | 314 | /** |
aoqi@0 | 315 | * Escapes characters that are unusable as Java identifiers |
aoqi@0 | 316 | * by replacing unsafe characters with safe characters. |
aoqi@0 | 317 | */ |
aoqi@0 | 318 | private static String escape(String s) { |
aoqi@0 | 319 | int n = s.length(); |
aoqi@0 | 320 | for (int i = 0; i < n; i++) |
aoqi@0 | 321 | if (!Character.isJavaIdentifierPart(s.charAt(i))) { |
aoqi@0 | 322 | StringBuilder sb = new StringBuilder(s.substring(0, i)); |
aoqi@0 | 323 | escape(sb, s, i); |
aoqi@0 | 324 | return sb.toString(); |
aoqi@0 | 325 | } |
aoqi@0 | 326 | return s; |
aoqi@0 | 327 | } |
aoqi@0 | 328 | } |