|
1 /* |
|
2 * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 * |
|
25 * THIS FILE WAS MODIFIED BY SUN MICROSYSTEMS, INC. |
|
26 */ |
|
27 |
|
28 package com.sun.xml.internal.fastinfoset; |
|
29 |
|
30 public class DecoderStateTables { |
|
31 private static int RANGE_INDEX_END = 0; |
|
32 private static int RANGE_INDEX_VALUE = 1; |
|
33 |
|
34 public final static int STATE_ILLEGAL = 255; |
|
35 public final static int STATE_UNSUPPORTED = 254; |
|
36 |
|
37 // EII child states |
|
38 public final static int EII_NO_AIIS_INDEX_SMALL = 0; |
|
39 public final static int EII_AIIS_INDEX_SMALL = 1; |
|
40 public final static int EII_INDEX_MEDIUM = 2; |
|
41 public final static int EII_INDEX_LARGE = 3; |
|
42 public final static int EII_NAMESPACES = 4; |
|
43 public final static int EII_LITERAL = 5; |
|
44 public final static int CII_UTF8_SMALL_LENGTH = 6; |
|
45 public final static int CII_UTF8_MEDIUM_LENGTH = 7; |
|
46 public final static int CII_UTF8_LARGE_LENGTH = 8; |
|
47 public final static int CII_UTF16_SMALL_LENGTH = 9; |
|
48 public final static int CII_UTF16_MEDIUM_LENGTH = 10; |
|
49 public final static int CII_UTF16_LARGE_LENGTH = 11; |
|
50 public final static int CII_RA = 12; |
|
51 public final static int CII_EA = 13; |
|
52 public final static int CII_INDEX_SMALL = 14; |
|
53 public final static int CII_INDEX_MEDIUM = 15; |
|
54 public final static int CII_INDEX_LARGE = 16; |
|
55 public final static int CII_INDEX_LARGE_LARGE = 17; |
|
56 public final static int COMMENT_II = 18; |
|
57 public final static int PROCESSING_INSTRUCTION_II = 19; |
|
58 public final static int DOCUMENT_TYPE_DECLARATION_II = 20; |
|
59 public final static int UNEXPANDED_ENTITY_REFERENCE_II = 21; |
|
60 public final static int TERMINATOR_SINGLE = 22; |
|
61 public final static int TERMINATOR_DOUBLE = 23; |
|
62 |
|
63 private static final int[] DII = new int[256]; |
|
64 |
|
65 private static final int[][] DII_RANGES = { |
|
66 // EII |
|
67 |
|
68 // %00000000 to %00011111 EII no attributes small index |
|
69 { 0x1F, EII_NO_AIIS_INDEX_SMALL }, |
|
70 |
|
71 // %00100000 to %00100111 EII medium index |
|
72 { 0x27, EII_INDEX_MEDIUM }, |
|
73 |
|
74 // %00101000 to %00101111 EII large index |
|
75 // %00110000 EII very large index |
|
76 // %00101000 to %00110000 |
|
77 { 0x30, EII_INDEX_LARGE }, |
|
78 |
|
79 // %00110001 to %00110111 ILLEGAL |
|
80 { 0x37, STATE_ILLEGAL }, |
|
81 |
|
82 // %00111000 EII namespaces |
|
83 { 0x38, EII_NAMESPACES }, |
|
84 |
|
85 // %00111001 to %00111011 ILLEGAL |
|
86 { 0x3B, STATE_ILLEGAL }, |
|
87 |
|
88 // %00111100 EII literal (no prefix, no namespace) |
|
89 { 0x3C, EII_LITERAL }, |
|
90 |
|
91 // %00111101 EII literal (no prefix, namespace) |
|
92 { 0x3D, EII_LITERAL }, |
|
93 |
|
94 // %00111110 ILLEGAL |
|
95 { 0x3E, STATE_ILLEGAL }, |
|
96 |
|
97 // %00111111 EII literal (prefix, namespace) |
|
98 { 0x3F, EII_LITERAL }, |
|
99 |
|
100 // %01000000 to %01011111 EII attributes small index |
|
101 { 0x5F, EII_AIIS_INDEX_SMALL }, |
|
102 |
|
103 // %01100000 to %01100111 EII medium index |
|
104 { 0x67, EII_INDEX_MEDIUM }, |
|
105 |
|
106 // %01101000 to %01101111 EII large index |
|
107 // %01110000 EII very large index |
|
108 // %01101000 to %01110000 |
|
109 { 0x70, EII_INDEX_LARGE }, |
|
110 |
|
111 // %01110001 to %01110111 ILLEGAL |
|
112 { 0x77, STATE_ILLEGAL }, |
|
113 |
|
114 // %01111000 EII attributes namespaces |
|
115 { 0x78, EII_NAMESPACES }, |
|
116 |
|
117 // %01111001 to %01111011 ILLEGAL |
|
118 { 0x7B, STATE_ILLEGAL }, |
|
119 |
|
120 // %01111100 EII attributes literal (no prefix, no namespace) |
|
121 { 0x7C, EII_LITERAL }, |
|
122 |
|
123 // %01111101 EII attributes literal (no prefix, namespace) |
|
124 { 0x7D, EII_LITERAL }, |
|
125 |
|
126 // %01111110 ILLEGAL |
|
127 { 0x7E, STATE_ILLEGAL }, |
|
128 |
|
129 // %01111111 EII attributes literal (prefix, namespace) |
|
130 { 0x7F, EII_LITERAL }, |
|
131 |
|
132 // %10000000 to %11000011 |
|
133 { 0xC3, STATE_ILLEGAL }, |
|
134 |
|
135 // %11000100 to %11000111 |
|
136 { 0xC7, DOCUMENT_TYPE_DECLARATION_II }, |
|
137 |
|
138 // %11001000 to %1110000 |
|
139 { 0xE0, STATE_ILLEGAL }, |
|
140 |
|
141 // %11100001 processing instruction |
|
142 { 0xE1, PROCESSING_INSTRUCTION_II }, |
|
143 |
|
144 // %11100010 comment |
|
145 { 0xE2, COMMENT_II}, |
|
146 |
|
147 // %111000011 to %11101111 |
|
148 { 0xEF, STATE_ILLEGAL }, |
|
149 |
|
150 // Terminators |
|
151 |
|
152 // %11110000 single terminator |
|
153 { 0xF0, TERMINATOR_SINGLE }, |
|
154 |
|
155 // %11110000 to %11111110 ILLEGAL |
|
156 { 0xFE, STATE_ILLEGAL }, |
|
157 |
|
158 // %11111111 double terminator |
|
159 { 0xFF, TERMINATOR_DOUBLE } |
|
160 }; |
|
161 |
|
162 private static final int[] EII = new int[256]; |
|
163 |
|
164 private static final int[][] EII_RANGES = { |
|
165 // EII |
|
166 |
|
167 // %00000000 to %00011111 EII no attributes small index |
|
168 { 0x1F, EII_NO_AIIS_INDEX_SMALL }, |
|
169 |
|
170 // %00100000 to %00100111 EII medium index |
|
171 { 0x27, EII_INDEX_MEDIUM }, |
|
172 |
|
173 // %00101000 to %00101111 EII large index |
|
174 // %00110000 EII very large index |
|
175 // %00101000 to %00110000 |
|
176 { 0x30, EII_INDEX_LARGE }, |
|
177 |
|
178 // %00110001 to %00110111 ILLEGAL |
|
179 { 0x37, STATE_ILLEGAL }, |
|
180 |
|
181 // %00111000 EII namespaces |
|
182 { 0x38, EII_NAMESPACES }, |
|
183 |
|
184 // %00111001 to %00111011 ILLEGAL |
|
185 { 0x3B, STATE_ILLEGAL }, |
|
186 |
|
187 // %00111100 EII literal (no prefix, no namespace) |
|
188 { 0x3C, EII_LITERAL }, |
|
189 |
|
190 // %00111101 EII literal (no prefix, namespace) |
|
191 { 0x3D, EII_LITERAL }, |
|
192 |
|
193 // %00111110 ILLEGAL |
|
194 { 0x3E, STATE_ILLEGAL }, |
|
195 |
|
196 // %00111111 EII literal (prefix, namespace) |
|
197 { 0x3F, EII_LITERAL }, |
|
198 |
|
199 // %01000000 to %01011111 EII attributes small index |
|
200 { 0x5F, EII_AIIS_INDEX_SMALL }, |
|
201 |
|
202 // %01100000 to %01100111 EII medium index |
|
203 { 0x67, EII_INDEX_MEDIUM }, |
|
204 |
|
205 // %01101000 to %01101111 EII large index |
|
206 // %01110000 EII very large index |
|
207 // %01101000 to %01110000 |
|
208 { 0x70, EII_INDEX_LARGE }, |
|
209 |
|
210 // %01110001 to %01110111 ILLEGAL |
|
211 { 0x77, STATE_ILLEGAL }, |
|
212 |
|
213 // %01111000 EII attributes namespaces |
|
214 { 0x78, EII_NAMESPACES }, |
|
215 |
|
216 // %01111001 to %01111011 ILLEGAL |
|
217 { 0x7B, STATE_ILLEGAL }, |
|
218 |
|
219 // %01111100 EII attributes literal (no prefix, no namespace) |
|
220 { 0x7C, EII_LITERAL }, |
|
221 |
|
222 // %01111101 EII attributes literal (no prefix, namespace) |
|
223 { 0x7D, EII_LITERAL }, |
|
224 |
|
225 // %01111110 ILLEGAL |
|
226 { 0x7E, STATE_ILLEGAL }, |
|
227 |
|
228 // %01111111 EII attributes literal (prefix, namespace) |
|
229 { 0x7F, EII_LITERAL }, |
|
230 |
|
231 // CII |
|
232 |
|
233 // UTF-8 string |
|
234 |
|
235 // %10000000 to %10000001 CII UTF-8 no add to table small length |
|
236 { 0x81, CII_UTF8_SMALL_LENGTH }, |
|
237 |
|
238 // %10000010 CII UTF-8 no add to table medium length |
|
239 { 0x82, CII_UTF8_MEDIUM_LENGTH }, |
|
240 |
|
241 // %10000011 CII UTF-8 no add to table large length |
|
242 { 0x83, CII_UTF8_LARGE_LENGTH }, |
|
243 |
|
244 // UTF-16 string |
|
245 |
|
246 // %10000100 to %10000101 CII UTF-16 no add to table small length |
|
247 { 0x85, CII_UTF16_SMALL_LENGTH }, |
|
248 |
|
249 // %10000110 CII UTF-16 no add to table medium length |
|
250 { 0x86, CII_UTF16_MEDIUM_LENGTH }, |
|
251 |
|
252 // %10000111 CII UTF-16 no add to table large length |
|
253 { 0x87, CII_UTF16_LARGE_LENGTH }, |
|
254 |
|
255 // Resitricted alphabet |
|
256 |
|
257 // %10001000 to %10001011 CII RA no add to table |
|
258 { 0x8B, CII_RA }, |
|
259 |
|
260 // Encoding algorithm |
|
261 |
|
262 // %10001100 to %10001111 CII EA no add to table |
|
263 { 0x8F, CII_EA }, |
|
264 |
|
265 // UTF-8 string, add to table |
|
266 |
|
267 // %10010000 to %10010001 CII add to table small length |
|
268 { 0x91, CII_UTF8_SMALL_LENGTH }, |
|
269 |
|
270 // %10010010 CII add to table medium length |
|
271 { 0x92, CII_UTF8_MEDIUM_LENGTH }, |
|
272 |
|
273 // %10010011 CII add to table large length |
|
274 { 0x93, CII_UTF8_LARGE_LENGTH }, |
|
275 |
|
276 // UTF-16 string, add to table |
|
277 |
|
278 // %10010100 to %10010101 CII UTF-16 add to table small length |
|
279 { 0x95, CII_UTF16_SMALL_LENGTH }, |
|
280 |
|
281 // %10010110 CII UTF-16 add to table medium length |
|
282 { 0x96, CII_UTF16_MEDIUM_LENGTH }, |
|
283 |
|
284 // %10010111 CII UTF-16 add to table large length |
|
285 { 0x97, CII_UTF16_LARGE_LENGTH }, |
|
286 |
|
287 // Restricted alphabet, add to table |
|
288 |
|
289 // %10011000 to %10011011 CII RA add to table |
|
290 { 0x9B, CII_RA }, |
|
291 |
|
292 // Encoding algorithm, add to table |
|
293 |
|
294 // %10011100 to %10011111 CII EA add to table |
|
295 { 0x9F, CII_EA }, |
|
296 |
|
297 // Index |
|
298 |
|
299 // %10100000 to %10101111 CII small index |
|
300 { 0xAF, CII_INDEX_SMALL }, |
|
301 |
|
302 // %10110000 to %10110011 CII medium index |
|
303 { 0xB3, CII_INDEX_MEDIUM }, |
|
304 |
|
305 // %10110100 to %10110111 CII large index |
|
306 { 0xB7, CII_INDEX_LARGE }, |
|
307 |
|
308 // %10111000 CII very large index |
|
309 { 0xB8, CII_INDEX_LARGE_LARGE }, |
|
310 |
|
311 // %10111001 to %11000111 ILLEGAL |
|
312 { 0xC7, STATE_ILLEGAL }, |
|
313 |
|
314 // %11001000 to %11001011 |
|
315 { 0xCB, UNEXPANDED_ENTITY_REFERENCE_II }, |
|
316 |
|
317 // %11001100 to %11100000 ILLEGAL |
|
318 { 0xE0, STATE_ILLEGAL }, |
|
319 |
|
320 // %11100001 processing instruction |
|
321 { 0xE1, PROCESSING_INSTRUCTION_II }, |
|
322 |
|
323 // %11100010 comment |
|
324 { 0xE2, COMMENT_II}, |
|
325 |
|
326 // %111000011 to %11101111 |
|
327 { 0xEF, STATE_ILLEGAL }, |
|
328 |
|
329 // Terminators |
|
330 |
|
331 // %11110000 single terminator |
|
332 { 0xF0, TERMINATOR_SINGLE }, |
|
333 |
|
334 // %11110000 to %11111110 ILLEGAL |
|
335 { 0xFE, STATE_ILLEGAL }, |
|
336 |
|
337 // %11111111 double terminator |
|
338 { 0xFF, TERMINATOR_DOUBLE } |
|
339 }; |
|
340 |
|
341 |
|
342 // AII states |
|
343 public final static int AII_INDEX_SMALL = 0; |
|
344 public final static int AII_INDEX_MEDIUM = 1; |
|
345 public final static int AII_INDEX_LARGE = 2; |
|
346 public final static int AII_LITERAL = 3; |
|
347 public final static int AII_TERMINATOR_SINGLE = 4; |
|
348 public final static int AII_TERMINATOR_DOUBLE = 5; |
|
349 |
|
350 private static final int[] AII = new int[256]; |
|
351 |
|
352 private static final int[][] AII_RANGES = { |
|
353 // %00000000 to %00111111 AII small index |
|
354 { 0x3F, AII_INDEX_SMALL }, |
|
355 |
|
356 // %01000000 to %01011111 AII medium index |
|
357 { 0x5F, AII_INDEX_MEDIUM }, |
|
358 |
|
359 // %01100000 to %01101111 AII large index |
|
360 { 0x6F, AII_INDEX_LARGE }, |
|
361 |
|
362 // %01110000 to %01110111 ILLEGAL |
|
363 { 0x77, STATE_ILLEGAL }, |
|
364 |
|
365 // %01111000 AII literal (no prefix, no namespace) |
|
366 // %01111001 AII literal (no prefix, namespace) |
|
367 { 0x79, AII_LITERAL }, |
|
368 |
|
369 // %01111010 ILLEGAL |
|
370 { 0x7A, STATE_ILLEGAL }, |
|
371 |
|
372 // %01111011 AII literal (prefix, namespace) |
|
373 { 0x7B, AII_LITERAL }, |
|
374 |
|
375 // %10000000 to %11101111 ILLEGAL |
|
376 { 0xEF, STATE_ILLEGAL }, |
|
377 |
|
378 // Terminators |
|
379 |
|
380 // %11110000 single terminator |
|
381 { 0xF0, AII_TERMINATOR_SINGLE }, |
|
382 |
|
383 // %11110000 to %11111110 ILLEGAL |
|
384 { 0xFE, STATE_ILLEGAL }, |
|
385 |
|
386 // %11111111 double terminator |
|
387 { 0xFF, AII_TERMINATOR_DOUBLE } |
|
388 }; |
|
389 |
|
390 |
|
391 // AII value states |
|
392 public final static int NISTRING_UTF8_SMALL_LENGTH = 0; |
|
393 public final static int NISTRING_UTF8_MEDIUM_LENGTH = 1; |
|
394 public final static int NISTRING_UTF8_LARGE_LENGTH = 2; |
|
395 public final static int NISTRING_UTF16_SMALL_LENGTH = 3; |
|
396 public final static int NISTRING_UTF16_MEDIUM_LENGTH = 4; |
|
397 public final static int NISTRING_UTF16_LARGE_LENGTH = 5; |
|
398 public final static int NISTRING_RA = 6; |
|
399 public final static int NISTRING_EA = 7; |
|
400 public final static int NISTRING_INDEX_SMALL = 8; |
|
401 public final static int NISTRING_INDEX_MEDIUM = 9; |
|
402 public final static int NISTRING_INDEX_LARGE = 10; |
|
403 public final static int NISTRING_EMPTY = 11; |
|
404 |
|
405 private static final int[] NISTRING = new int[256]; |
|
406 |
|
407 private static final int[][] NISTRING_RANGES = { |
|
408 // UTF-8 string |
|
409 |
|
410 // %00000000 to %00000111 UTF-8 no add to table small length |
|
411 { 0x07, NISTRING_UTF8_SMALL_LENGTH }, |
|
412 |
|
413 // %00001000 UTF-8 no add to table medium length |
|
414 { 0x08, NISTRING_UTF8_MEDIUM_LENGTH }, |
|
415 |
|
416 // %00001001 to %00001011 ILLEGAL |
|
417 { 0x0B, STATE_ILLEGAL }, |
|
418 |
|
419 // %00001100 UTF-8 no add to table large length |
|
420 { 0x0C, NISTRING_UTF8_LARGE_LENGTH }, |
|
421 |
|
422 // %00001101 to %00001111 ILLEGAL |
|
423 { 0x0F, STATE_ILLEGAL }, |
|
424 |
|
425 // UTF-16 string |
|
426 |
|
427 // %00010000 to %00010111 UTF-16 no add to table small length |
|
428 { 0x17, NISTRING_UTF16_SMALL_LENGTH }, |
|
429 |
|
430 // %00001000 UTF-16 no add to table medium length |
|
431 { 0x18, NISTRING_UTF16_MEDIUM_LENGTH }, |
|
432 |
|
433 // %00011001 to %00011011 ILLEGAL |
|
434 { 0x1B, STATE_ILLEGAL }, |
|
435 |
|
436 // %00011100 UTF-16 no add to table large length |
|
437 { 0x1C, NISTRING_UTF16_LARGE_LENGTH }, |
|
438 |
|
439 // %00011101 to %00011111 ILLEGAL |
|
440 { 0x1F, STATE_ILLEGAL }, |
|
441 |
|
442 // Restricted alphabet |
|
443 |
|
444 // %00100000 to %00101111 RA no add to table small length |
|
445 { 0x2F, NISTRING_RA }, |
|
446 |
|
447 // Encoding algorithm |
|
448 |
|
449 // %00110000 to %00111111 EA no add to table |
|
450 { 0x3F, NISTRING_EA }, |
|
451 |
|
452 // UTF-8 string, add to table |
|
453 |
|
454 // %01000000 to %01000111 UTF-8 add to table small length |
|
455 { 0x47, NISTRING_UTF8_SMALL_LENGTH }, |
|
456 |
|
457 // %01001000 UTF-8 add to table medium length |
|
458 { 0x48, NISTRING_UTF8_MEDIUM_LENGTH }, |
|
459 |
|
460 // %01001001 to %01001011 ILLEGAL |
|
461 { 0x4B, STATE_ILLEGAL }, |
|
462 |
|
463 // %01001100 UTF-8 add to table large length |
|
464 { 0x4C, NISTRING_UTF8_LARGE_LENGTH }, |
|
465 |
|
466 // %01001101 to %01001111 ILLEGAL |
|
467 { 0x4F, STATE_ILLEGAL }, |
|
468 |
|
469 // UTF-16 string, add to table |
|
470 |
|
471 // %01010000 to %01010111 UTF-16 add to table small length |
|
472 { 0x57, NISTRING_UTF16_SMALL_LENGTH }, |
|
473 |
|
474 // %01001000 UTF-16 add to table medium length |
|
475 { 0x58, NISTRING_UTF16_MEDIUM_LENGTH }, |
|
476 |
|
477 // %01011001 to %01011011 ILLEGAL |
|
478 { 0x5B, STATE_ILLEGAL }, |
|
479 |
|
480 // %01011100 UTF-16 add to table large length |
|
481 { 0x5C, NISTRING_UTF16_LARGE_LENGTH }, |
|
482 |
|
483 // %01011101 to %01011111 ILLEGAL |
|
484 { 0x5F, STATE_ILLEGAL }, |
|
485 |
|
486 // Restricted alphabet, add to table |
|
487 |
|
488 // %01100000 to %01101111 RA no add to table small length |
|
489 { 0x6F, NISTRING_RA }, |
|
490 |
|
491 // Encoding algorithm, add to table |
|
492 |
|
493 // %01110000 to %01111111 EA add to table |
|
494 { 0x7F, NISTRING_EA }, |
|
495 |
|
496 // Index |
|
497 |
|
498 // %10000000 to %10111111 index small |
|
499 { 0xBF, NISTRING_INDEX_SMALL }, |
|
500 |
|
501 // %11000000 to %11011111 index medium |
|
502 { 0xDF, NISTRING_INDEX_MEDIUM }, |
|
503 |
|
504 // %11100000 to %11101111 index large |
|
505 { 0xEF, NISTRING_INDEX_LARGE }, |
|
506 |
|
507 // %11110000 to %11111110 ILLEGAL |
|
508 { 0xFE, STATE_ILLEGAL }, |
|
509 |
|
510 // %11111111 Empty value |
|
511 { 0xFF, NISTRING_EMPTY }, |
|
512 }; |
|
513 |
|
514 |
|
515 /* package */ final static int ISTRING_SMALL_LENGTH = 0; |
|
516 /* package */ final static int ISTRING_MEDIUM_LENGTH = 1; |
|
517 /* package */ final static int ISTRING_LARGE_LENGTH = 2; |
|
518 /* package */ final static int ISTRING_INDEX_SMALL = 3; |
|
519 /* package */ final static int ISTRING_INDEX_MEDIUM = 4; |
|
520 /* package */ final static int ISTRING_INDEX_LARGE = 5; |
|
521 |
|
522 private static final int[] ISTRING = new int[256]; |
|
523 |
|
524 private static final int[][] ISTRING_RANGES = { |
|
525 // %00000000 to %00111111 small length |
|
526 { 0x3F, ISTRING_SMALL_LENGTH }, |
|
527 |
|
528 // %01000000 medium length |
|
529 { 0x40, ISTRING_MEDIUM_LENGTH }, |
|
530 |
|
531 // %01000001 to %01011111 ILLEGAL |
|
532 { 0x5F, STATE_ILLEGAL }, |
|
533 |
|
534 // %01100000 large length |
|
535 { 0x60, ISTRING_LARGE_LENGTH }, |
|
536 |
|
537 // %01100001 to %01111111 ILLEGAL |
|
538 { 0x7F, STATE_ILLEGAL }, |
|
539 |
|
540 // %10000000 to %10111111 index small |
|
541 { 0xBF, ISTRING_INDEX_SMALL }, |
|
542 |
|
543 // %11000000 to %11011111 index medium |
|
544 { 0xDF, ISTRING_INDEX_MEDIUM }, |
|
545 |
|
546 // %11100000 to %11101111 index large |
|
547 { 0xEF, ISTRING_INDEX_LARGE }, |
|
548 |
|
549 // %11110000 to %11111111 ILLEGAL |
|
550 { 0xFF, STATE_ILLEGAL }, |
|
551 }; |
|
552 |
|
553 |
|
554 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_3 = 6; |
|
555 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_5 = 7; |
|
556 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_29 = 8; |
|
557 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_36 = 9; |
|
558 /* package */ final static int ISTRING_PREFIX_NAMESPACE_INDEX_ZERO = 10; |
|
559 |
|
560 private static final int[] ISTRING_PREFIX_NAMESPACE = new int[256]; |
|
561 |
|
562 private static final int[][] ISTRING_PREFIX_NAMESPACE_RANGES = { |
|
563 // %00000000 to %00000001 small length |
|
564 { 0x01, ISTRING_SMALL_LENGTH }, |
|
565 |
|
566 // %00000010 small length |
|
567 { 0x02, ISTRING_PREFIX_NAMESPACE_LENGTH_3 }, |
|
568 |
|
569 // %00000011 small length |
|
570 { 0x03, ISTRING_SMALL_LENGTH }, |
|
571 |
|
572 // %00000100 small length |
|
573 { 0x04, ISTRING_PREFIX_NAMESPACE_LENGTH_5 }, |
|
574 |
|
575 // %00011011 small length |
|
576 { 0x1B, ISTRING_SMALL_LENGTH }, |
|
577 |
|
578 // %00011100 small length |
|
579 { 0x1C, ISTRING_PREFIX_NAMESPACE_LENGTH_29 }, |
|
580 |
|
581 // %00100010 small length |
|
582 { 0x22, ISTRING_SMALL_LENGTH }, |
|
583 |
|
584 // %00100011 small length |
|
585 { 0x23, ISTRING_PREFIX_NAMESPACE_LENGTH_36 }, |
|
586 |
|
587 // %00000101 to %00111111 small length |
|
588 { 0x3F, ISTRING_SMALL_LENGTH }, |
|
589 |
|
590 |
|
591 |
|
592 |
|
593 // %01000000 medium length |
|
594 { 0x40, ISTRING_MEDIUM_LENGTH }, |
|
595 |
|
596 // %01000001 to %01011111 ILLEGAL |
|
597 { 0x5F, STATE_ILLEGAL }, |
|
598 |
|
599 // %01100000 large length |
|
600 { 0x60, ISTRING_LARGE_LENGTH }, |
|
601 |
|
602 // %01100001 to %01111111 ILLEGAL |
|
603 { 0x7F, STATE_ILLEGAL }, |
|
604 |
|
605 // %10000000 index small, 0 |
|
606 { 0x80, ISTRING_PREFIX_NAMESPACE_INDEX_ZERO }, |
|
607 |
|
608 // %10000000 to %10111111 index small |
|
609 { 0xBF, ISTRING_INDEX_SMALL }, |
|
610 |
|
611 // %11000000 to %11011111 index medium |
|
612 { 0xDF, ISTRING_INDEX_MEDIUM }, |
|
613 |
|
614 // %11100000 to %11101111 index large |
|
615 { 0xEF, ISTRING_INDEX_LARGE }, |
|
616 |
|
617 // %11110000 to %11111111 ILLEGAL |
|
618 { 0xFF, STATE_ILLEGAL }, |
|
619 }; |
|
620 |
|
621 // UTF-8 states |
|
622 /* package */ final static int UTF8_NCNAME_NCNAME = 0; |
|
623 /* package */ final static int UTF8_NCNAME_NCNAME_CHAR = 1; |
|
624 /* package */ final static int UTF8_TWO_BYTES = 2; |
|
625 /* package */ final static int UTF8_THREE_BYTES = 3; |
|
626 /* package */ final static int UTF8_FOUR_BYTES = 4; |
|
627 |
|
628 private static final int[] UTF8_NCNAME = new int[256]; |
|
629 |
|
630 private static final int[][] UTF8_NCNAME_RANGES = { |
|
631 |
|
632 // Basic Latin |
|
633 |
|
634 // %00000000 to %00101100 |
|
635 { 0x2C, STATE_ILLEGAL }, |
|
636 |
|
637 // '-' '.' |
|
638 // %%00101101 to %00101110 [#x002D-#x002E] |
|
639 { 0x2E, UTF8_NCNAME_NCNAME_CHAR }, |
|
640 |
|
641 // %00101111 |
|
642 { 0x2F, STATE_ILLEGAL }, |
|
643 |
|
644 // [0-9] |
|
645 // %0011000 to %00111001 [#x0030-#x0039] |
|
646 { 0x39, UTF8_NCNAME_NCNAME_CHAR }, |
|
647 |
|
648 // %01000000 |
|
649 { 0x40, STATE_ILLEGAL }, |
|
650 |
|
651 // [A-Z] |
|
652 // %01000001 to %01011010 [#x0041-#x005A] |
|
653 { 0x5A, UTF8_NCNAME_NCNAME }, |
|
654 |
|
655 // %01011110 |
|
656 { 0x5E, STATE_ILLEGAL }, |
|
657 |
|
658 // '_' |
|
659 // %01011111 [#x005F] |
|
660 { 0x5F, UTF8_NCNAME_NCNAME }, |
|
661 |
|
662 // %01100000 |
|
663 { 0x60, STATE_ILLEGAL }, |
|
664 |
|
665 // [a-z] |
|
666 // %01100001 to %01111010 [#x0061-#x007A] |
|
667 { 0x7A, UTF8_NCNAME_NCNAME }, |
|
668 |
|
669 // %01111011 to %01111111 |
|
670 { 0x7F, STATE_ILLEGAL }, |
|
671 |
|
672 |
|
673 // Two bytes |
|
674 |
|
675 // %10000000 to %11000001 |
|
676 { 0xC1, STATE_ILLEGAL }, |
|
677 |
|
678 // %11000010 to %11011111 |
|
679 { 0xDF, UTF8_TWO_BYTES }, |
|
680 |
|
681 |
|
682 // Three bytes |
|
683 |
|
684 // %11100000 to %11101111 |
|
685 { 0xEF, UTF8_THREE_BYTES }, |
|
686 |
|
687 |
|
688 // Four bytes |
|
689 |
|
690 // %11110000 to %11110111 |
|
691 { 0xF7, UTF8_FOUR_BYTES }, |
|
692 |
|
693 |
|
694 // %11111000 to %11111111 |
|
695 { 0xFF, STATE_ILLEGAL } |
|
696 }; |
|
697 |
|
698 /* package */ final static int UTF8_ONE_BYTE = 1; |
|
699 |
|
700 private static final int[] UTF8 = new int[256]; |
|
701 |
|
702 private static final int[][] UTF8_RANGES = { |
|
703 |
|
704 // Basic Latin |
|
705 |
|
706 // %00000000 to %00001000 |
|
707 { 0x08, STATE_ILLEGAL }, |
|
708 |
|
709 // CHARACTER TABULATION, LINE FEED |
|
710 // %%00001001 to %00001010 [#x0009-#x000A] |
|
711 { 0x0A, UTF8_ONE_BYTE }, |
|
712 |
|
713 // %00001011 to %00001100 |
|
714 { 0x0C, STATE_ILLEGAL }, |
|
715 |
|
716 // CARRIAGE RETURN |
|
717 // %00001101 [#x000D] |
|
718 { 0x0D, UTF8_ONE_BYTE }, |
|
719 |
|
720 // %00001110 to %00011111 |
|
721 { 0x1F, STATE_ILLEGAL }, |
|
722 |
|
723 // %0010000 to %01111111 |
|
724 { 0x7F, UTF8_ONE_BYTE }, |
|
725 |
|
726 |
|
727 // Two bytes |
|
728 |
|
729 // %10000000 to %11000001 |
|
730 { 0xC1, STATE_ILLEGAL }, |
|
731 |
|
732 // %11000010 to %11011111 |
|
733 { 0xDF, UTF8_TWO_BYTES }, |
|
734 |
|
735 |
|
736 // Three bytes |
|
737 |
|
738 // %11100000 to %11101111 |
|
739 { 0xEF, UTF8_THREE_BYTES }, |
|
740 |
|
741 |
|
742 // Four bytes |
|
743 |
|
744 // %11110000 to %11110111 |
|
745 { 0xF7, UTF8_FOUR_BYTES }, |
|
746 |
|
747 |
|
748 // %11111000 to %11111111 |
|
749 { 0xFF, STATE_ILLEGAL } |
|
750 }; |
|
751 |
|
752 private static void constructTable(int[] table, int[][] ranges) { |
|
753 int start = 0x00; |
|
754 for (int range = 0; range < ranges.length; range++) { |
|
755 int end = ranges[range][RANGE_INDEX_END]; |
|
756 int value = ranges[range][RANGE_INDEX_VALUE]; |
|
757 for (int i = start; i<= end; i++) { |
|
758 table[i] = value; |
|
759 } |
|
760 start = end + 1; |
|
761 } |
|
762 } |
|
763 |
|
764 public static final int DII(final int index) { |
|
765 return DII[index]; |
|
766 } |
|
767 |
|
768 public static final int EII(final int index) { |
|
769 return EII[index]; |
|
770 } |
|
771 |
|
772 public static final int AII(final int index) { |
|
773 return AII[index]; |
|
774 } |
|
775 |
|
776 public static final int NISTRING(final int index) { |
|
777 return NISTRING[index]; |
|
778 } |
|
779 |
|
780 public static final int ISTRING(final int index) { |
|
781 return ISTRING[index]; |
|
782 } |
|
783 |
|
784 public static final int ISTRING_PREFIX_NAMESPACE(final int index) { |
|
785 return ISTRING_PREFIX_NAMESPACE[index]; |
|
786 } |
|
787 |
|
788 public static final int UTF8(final int index) { |
|
789 return UTF8[index]; |
|
790 } |
|
791 |
|
792 public static final int UTF8_NCNAME(final int index) { |
|
793 return UTF8_NCNAME[index]; |
|
794 } |
|
795 |
|
796 static { |
|
797 // DII |
|
798 constructTable(DII, DII_RANGES); |
|
799 |
|
800 // EII |
|
801 constructTable(EII, EII_RANGES); |
|
802 |
|
803 // AII |
|
804 constructTable(AII, AII_RANGES); |
|
805 |
|
806 // AII Value |
|
807 constructTable(NISTRING, NISTRING_RANGES); |
|
808 |
|
809 // Identifying string |
|
810 constructTable(ISTRING, ISTRING_RANGES); |
|
811 |
|
812 // Identifying string |
|
813 constructTable(ISTRING_PREFIX_NAMESPACE, ISTRING_PREFIX_NAMESPACE_RANGES); |
|
814 |
|
815 // UTF-8 NCNAME states |
|
816 constructTable(UTF8_NCNAME, UTF8_NCNAME_RANGES); |
|
817 |
|
818 // UTF-8 states |
|
819 constructTable(UTF8, UTF8_RANGES); |
|
820 } |
|
821 |
|
822 private DecoderStateTables() { |
|
823 } |
|
824 } |