Mon, 03 Nov 2014 11:47:41 +0100
8060204: Fix warnings in Joni and tests
Reviewed-by: hannesw, sundar, attila
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20 package jdk.nashorn.internal.runtime.regexp.joni;
22 import java.util.Arrays;
23 import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
24 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
26 @SuppressWarnings("javadoc")
27 public final class EncodingHelper {
29 final static int NEW_LINE = 0x000a;
30 final static int RETURN = 0x000d;
31 final static int LINE_SEPARATOR = 0x2028;
32 final static int PARAGRAPH_SEPARATOR = 0x2029;
34 final static char[] EMPTYCHARS = new char[0];
35 final static int[][] codeRanges = new int[15][];
37 public static int digitVal(final int code) {
38 return code - '0';
39 }
41 public static int odigitVal(final int code) {
42 return digitVal(code);
43 }
45 public static boolean isXDigit(final int code) {
46 return Character.isDigit(code) || (code >= 'a' && code <= 'f') || (code >= 'A' && code <= 'F');
47 }
49 public static int xdigitVal(final int code) {
50 if (Character.isDigit(code)) {
51 return code - '0';
52 } else if (code >= 'a' && code <= 'f') {
53 return code - 'a' + 10;
54 } else {
55 return code - 'A' + 10;
56 }
57 }
59 public static boolean isDigit(final int code) {
60 return code >= '0' && code <= '9';
61 }
63 public static boolean isWord(final int code) {
64 // letter, digit, or '_'
65 return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
66 }
68 public static boolean isNewLine(final int code) {
69 return code == NEW_LINE || code == RETURN || code == LINE_SEPARATOR || code == PARAGRAPH_SEPARATOR;
70 }
72 public static boolean isNewLine(final char[] chars, final int p, final int end) {
73 return p < end && isNewLine(chars[p]);
74 }
76 // Encoding.prevCharHead
77 public static int prevCharHead(final int p, final int s) {
78 return s <= p ? -1 : s - 1;
79 }
81 /* onigenc_get_right_adjust_char_head_with_prev */
82 public static int rightAdjustCharHeadWithPrev(final int s, final IntHolder prev) {
83 if (prev != null) {
84 prev.value = -1; /* Sorry */
85 }
86 return s;
87 }
89 // Encoding.stepBack
90 public static int stepBack(final int p, final int sp, final int np) {
91 int s = sp, n = np;
92 while (s != -1 && n-- > 0) {
93 if (s <= p) {
94 return -1;
95 }
96 s--;
97 }
98 return s;
99 }
101 public static int mbcodeStartPosition() {
102 return 0x80;
103 }
105 public static char[] caseFoldCodesByString(final int flag, final char c) {
106 char[] codes = EMPTYCHARS;
107 final char upper = toUpperCase(c);
109 if (upper != toLowerCase(upper)) {
110 int count = 0;
111 char ch = 0;
113 do {
114 final char u = toUpperCase(ch);
115 if (u == upper && ch != c) {
116 // Almost all characters will return array of length 1, very few 2 or 3, so growing by one is fine.
117 codes = count == 0 ? new char[1] : Arrays.copyOf(codes, count + 1);
118 codes[count++] = ch;
119 }
120 } while (ch++ < 0xffff);
121 }
122 return codes;
123 }
125 public static void applyAllCaseFold(final int flag, final ApplyCaseFold fun, final Object arg) {
126 for (int c = 0; c < 0xffff; c++) {
127 if (Character.isLowerCase(c)) {
128 final int upper = toUpperCase(c);
130 if (upper != c) {
131 ApplyCaseFold.apply(c, upper, arg);
132 }
133 }
134 }
136 // Some characters have multiple lower case variants, hence we need to do a second run
137 for (int c = 0; c < 0xffff; c++) {
138 if (Character.isLowerCase(c)) {
139 final int upper = toUpperCase(c);
141 if (upper != c) {
142 ApplyCaseFold.apply(upper, c, arg);
143 }
144 }
145 }
146 }
148 public static char toLowerCase(final char c) {
149 return (char)toLowerCase((int)c);
150 }
152 public static int toLowerCase(final int c) {
153 if (c < 128) {
154 return ('A' <= c && c <= 'Z') ? (c + ('a' - 'A')) : c;
155 }
156 // Do not convert non-ASCII upper case character to ASCII lower case.
157 final int lower = Character.toLowerCase(c);
158 return (lower < 128) ? c : lower;
160 }
162 public static char toUpperCase(final char c) {
163 return (char)toUpperCase((int)c);
164 }
166 public static int toUpperCase(final int c) {
167 if (c < 128) {
168 return ('a' <= c && c <= 'z') ? c + ('A' - 'a') : c;
169 }
170 // Do not convert non-ASCII lower case character to ASCII upper case.
171 final int upper = Character.toUpperCase(c);
172 return (upper < 128) ? c : upper;
173 }
175 public static int[] ctypeCodeRange(final int ctype, final IntHolder sbOut) {
176 sbOut.value = 0x100; // use bitset for codes smaller than 256
177 int[] range = null;
179 if (ctype < codeRanges.length) {
180 range = codeRanges[ctype];
182 if (range == null) {
183 // format: [numberOfRanges, rangeStart, rangeEnd, ...]
184 range = new int[16];
185 int rangeCount = 0;
186 int lastCode = -2;
188 for (int code = 0; code <= 0xffff; code++) {
189 if (isCodeCType(code, ctype)) {
190 if (lastCode < code -1) {
191 if (rangeCount * 2 + 2 >= range.length) {
192 range = Arrays.copyOf(range, range.length * 2);
193 }
194 range[rangeCount * 2 + 1] = code;
195 rangeCount++;
196 }
197 range[rangeCount * 2] = lastCode = code;
198 }
199 }
201 if (rangeCount * 2 + 1 < range.length) {
202 range = Arrays.copyOf(range, rangeCount * 2 + 1);
203 }
205 range[0] = rangeCount;
206 codeRanges[ctype] = range;
207 }
208 }
210 return range;
211 }
213 // CodeRange.isInCodeRange
214 public static boolean isInCodeRange(final int[] p, final int offset, final int code) {
215 int low = 0;
216 final int n = p[offset];
217 int high = n ;
219 while (low < high) {
220 final int x = (low + high) >> 1;
221 if (code > p[(x << 1) + 2 + offset]) {
222 low = x + 1;
223 } else {
224 high = x;
225 }
226 }
227 return low < n && code >= p[(low << 1) + 1 + offset];
228 }
230 /**
231 * @see <a href="http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt">http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt</a>
232 */
233 public static boolean isCodeCType(final int code, final int ctype) {
234 int type;
235 switch (ctype) {
236 case CharacterType.NEWLINE:
237 return isNewLine(code);
238 case CharacterType.ALPHA:
239 return (1 << Character.getType(code) & CharacterType.ALPHA_MASK) != 0;
240 case CharacterType.BLANK:
241 return code == 0x09 || Character.getType(code) == Character.SPACE_SEPARATOR;
242 case CharacterType.CNTRL:
243 type = Character.getType(code);
244 return (1 << type & CharacterType.CNTRL_MASK) != 0 || type == Character.UNASSIGNED;
245 case CharacterType.DIGIT:
246 return EncodingHelper.isDigit(code);
247 case CharacterType.GRAPH:
248 switch (code) {
249 case 0x09:
250 case 0x0a:
251 case 0x0b:
252 case 0x0c:
253 case 0x0d:
254 return false;
255 default:
256 type = Character.getType(code);
257 return (1 << type & CharacterType.GRAPH_MASK) == 0 && type != Character.UNASSIGNED;
258 }
259 case CharacterType.LOWER:
260 return Character.isLowerCase(code);
261 case CharacterType.PRINT:
262 type = Character.getType(code);
263 return (1 << type & CharacterType.PRINT_MASK) == 0 && type != Character.UNASSIGNED;
264 case CharacterType.PUNCT:
265 return (1 << Character.getType(code) & CharacterType.PUNCT_MASK) != 0;
266 case CharacterType.SPACE:
267 // ECMA 7.2 and 7.3
268 switch (code) {
269 case 0x09:
270 case 0x0a:
271 case 0x0b:
272 case 0x0c:
273 case 0x0d:
274 return true;
275 default:
276 // true if Unicode separator or BOM
277 return (1 << Character.getType(code) & CharacterType.SPACE_MASK) != 0 || code == 0xfeff;
278 }
279 case CharacterType.UPPER:
280 return Character.isUpperCase(code);
281 case CharacterType.XDIGIT:
282 return EncodingHelper.isXDigit(code);
283 case CharacterType.WORD:
284 return (1 << Character.getType(code) & CharacterType.WORD_MASK) != 0;
285 case CharacterType.ALNUM:
286 return (1 << Character.getType(code) & CharacterType.ALNUM_MASK) != 0;
287 case CharacterType.ASCII:
288 return code < 0x80;
289 default:
290 throw new RuntimeException("illegal character type: " + ctype);
291 }
292 }
293 }