src/share/jaxws_classes/com/sun/xml/internal/fastinfoset/DecoderStateTables.java

changeset 0
373ffda63c9a
equal deleted inserted replaced
-1:000000000000 0:373ffda63c9a
1 /*
2 * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 * THIS FILE WAS MODIFIED BY SUN MICROSYSTEMS, INC.
26 */
27
28 package com.sun.xml.internal.fastinfoset;
29
30 public class DecoderStateTables {
31 private static int RANGE_INDEX_END = 0;
32 private static int RANGE_INDEX_VALUE = 1;
33
34 public final static int STATE_ILLEGAL = 255;
35 public final static int STATE_UNSUPPORTED = 254;
36
37 // EII child states
38 public final static int EII_NO_AIIS_INDEX_SMALL = 0;
39 public final static int EII_AIIS_INDEX_SMALL = 1;
40 public final static int EII_INDEX_MEDIUM = 2;
41 public final static int EII_INDEX_LARGE = 3;
42 public final static int EII_NAMESPACES = 4;
43 public final static int EII_LITERAL = 5;
44 public final static int CII_UTF8_SMALL_LENGTH = 6;
45 public final static int CII_UTF8_MEDIUM_LENGTH = 7;
46 public final static int CII_UTF8_LARGE_LENGTH = 8;
47 public final static int CII_UTF16_SMALL_LENGTH = 9;
48 public final static int CII_UTF16_MEDIUM_LENGTH = 10;
49 public final static int CII_UTF16_LARGE_LENGTH = 11;
50 public final static int CII_RA = 12;
51 public final static int CII_EA = 13;
52 public final static int CII_INDEX_SMALL = 14;
53 public final static int CII_INDEX_MEDIUM = 15;
54 public final static int CII_INDEX_LARGE = 16;
55 public final static int CII_INDEX_LARGE_LARGE = 17;
56 public final static int COMMENT_II = 18;
57 public final static int PROCESSING_INSTRUCTION_II = 19;
58 public final static int DOCUMENT_TYPE_DECLARATION_II = 20;
59 public final static int UNEXPANDED_ENTITY_REFERENCE_II = 21;
60 public final static int TERMINATOR_SINGLE = 22;
61 public final static int TERMINATOR_DOUBLE = 23;
62
63 private static final int[] DII = new int[256];
64
65 private static final int[][] DII_RANGES = {
66 // EII
67
68 // %00000000 to %00011111 EII no attributes small index
69 { 0x1F, EII_NO_AIIS_INDEX_SMALL },
70
71 // %00100000 to %00100111 EII medium index
72 { 0x27, EII_INDEX_MEDIUM },
73
74 // %00101000 to %00101111 EII large index
75 // %00110000 EII very large index
76 // %00101000 to %00110000
77 { 0x30, EII_INDEX_LARGE },
78
79 // %00110001 to %00110111 ILLEGAL
80 { 0x37, STATE_ILLEGAL },
81
82 // %00111000 EII namespaces
83 { 0x38, EII_NAMESPACES },
84
85 // %00111001 to %00111011 ILLEGAL
86 { 0x3B, STATE_ILLEGAL },
87
88 // %00111100 EII literal (no prefix, no namespace)
89 { 0x3C, EII_LITERAL },
90
91 // %00111101 EII literal (no prefix, namespace)
92 { 0x3D, EII_LITERAL },
93
94 // %00111110 ILLEGAL
95 { 0x3E, STATE_ILLEGAL },
96
97 // %00111111 EII literal (prefix, namespace)
98 { 0x3F, EII_LITERAL },
99
100 // %01000000 to %01011111 EII attributes small index
101 { 0x5F, EII_AIIS_INDEX_SMALL },
102
103 // %01100000 to %01100111 EII medium index
104 { 0x67, EII_INDEX_MEDIUM },
105
106 // %01101000 to %01101111 EII large index
107 // %01110000 EII very large index
108 // %01101000 to %01110000
109 { 0x70, EII_INDEX_LARGE },
110
111 // %01110001 to %01110111 ILLEGAL
112 { 0x77, STATE_ILLEGAL },
113
114 // %01111000 EII attributes namespaces
115 { 0x78, EII_NAMESPACES },
116
117 // %01111001 to %01111011 ILLEGAL
118 { 0x7B, STATE_ILLEGAL },
119
120 // %01111100 EII attributes literal (no prefix, no namespace)
121 { 0x7C, EII_LITERAL },
122
123 // %01111101 EII attributes literal (no prefix, namespace)
124 { 0x7D, EII_LITERAL },
125
126 // %01111110 ILLEGAL
127 { 0x7E, STATE_ILLEGAL },
128
129 // %01111111 EII attributes literal (prefix, namespace)
130 { 0x7F, EII_LITERAL },
131
132 // %10000000 to %11000011
133 { 0xC3, STATE_ILLEGAL },
134
135 // %11000100 to %11000111
136 { 0xC7, DOCUMENT_TYPE_DECLARATION_II },
137
138 // %11001000 to %1110000
139 { 0xE0, STATE_ILLEGAL },
140
141 // %11100001 processing instruction
142 { 0xE1, PROCESSING_INSTRUCTION_II },
143
144 // %11100010 comment
145 { 0xE2, COMMENT_II},
146
147 // %111000011 to %11101111
148 { 0xEF, STATE_ILLEGAL },
149
150 // Terminators
151
152 // %11110000 single terminator
153 { 0xF0, TERMINATOR_SINGLE },
154
155 // %11110000 to %11111110 ILLEGAL
156 { 0xFE, STATE_ILLEGAL },
157
158 // %11111111 double terminator
159 { 0xFF, TERMINATOR_DOUBLE }
160 };
161
162 private static final int[] EII = new int[256];
163
164 private static final int[][] EII_RANGES = {
165 // EII
166
167 // %00000000 to %00011111 EII no attributes small index
168 { 0x1F, EII_NO_AIIS_INDEX_SMALL },
169
170 // %00100000 to %00100111 EII medium index
171 { 0x27, EII_INDEX_MEDIUM },
172
173 // %00101000 to %00101111 EII large index
174 // %00110000 EII very large index
175 // %00101000 to %00110000
176 { 0x30, EII_INDEX_LARGE },
177
178 // %00110001 to %00110111 ILLEGAL
179 { 0x37, STATE_ILLEGAL },
180
181 // %00111000 EII namespaces
182 { 0x38, EII_NAMESPACES },
183
184 // %00111001 to %00111011 ILLEGAL
185 { 0x3B, STATE_ILLEGAL },
186
187 // %00111100 EII literal (no prefix, no namespace)
188 { 0x3C, EII_LITERAL },
189
190 // %00111101 EII literal (no prefix, namespace)
191 { 0x3D, EII_LITERAL },
192
193 // %00111110 ILLEGAL
194 { 0x3E, STATE_ILLEGAL },
195
196 // %00111111 EII literal (prefix, namespace)
197 { 0x3F, EII_LITERAL },
198
199 // %01000000 to %01011111 EII attributes small index
200 { 0x5F, EII_AIIS_INDEX_SMALL },
201
202 // %01100000 to %01100111 EII medium index
203 { 0x67, EII_INDEX_MEDIUM },
204
205 // %01101000 to %01101111 EII large index
206 // %01110000 EII very large index
207 // %01101000 to %01110000
208 { 0x70, EII_INDEX_LARGE },
209
210 // %01110001 to %01110111 ILLEGAL
211 { 0x77, STATE_ILLEGAL },
212
213 // %01111000 EII attributes namespaces
214 { 0x78, EII_NAMESPACES },
215
216 // %01111001 to %01111011 ILLEGAL
217 { 0x7B, STATE_ILLEGAL },
218
219 // %01111100 EII attributes literal (no prefix, no namespace)
220 { 0x7C, EII_LITERAL },
221
222 // %01111101 EII attributes literal (no prefix, namespace)
223 { 0x7D, EII_LITERAL },
224
225 // %01111110 ILLEGAL
226 { 0x7E, STATE_ILLEGAL },
227
228 // %01111111 EII attributes literal (prefix, namespace)
229 { 0x7F, EII_LITERAL },
230
231 // CII
232
233 // UTF-8 string
234
235 // %10000000 to %10000001 CII UTF-8 no add to table small length
236 { 0x81, CII_UTF8_SMALL_LENGTH },
237
238 // %10000010 CII UTF-8 no add to table medium length
239 { 0x82, CII_UTF8_MEDIUM_LENGTH },
240
241 // %10000011 CII UTF-8 no add to table large length
242 { 0x83, CII_UTF8_LARGE_LENGTH },
243
244 // UTF-16 string
245
246 // %10000100 to %10000101 CII UTF-16 no add to table small length
247 { 0x85, CII_UTF16_SMALL_LENGTH },
248
249 // %10000110 CII UTF-16 no add to table medium length
250 { 0x86, CII_UTF16_MEDIUM_LENGTH },
251
252 // %10000111 CII UTF-16 no add to table large length
253 { 0x87, CII_UTF16_LARGE_LENGTH },
254
255 // Resitricted alphabet
256
257 // %10001000 to %10001011 CII RA no add to table
258 { 0x8B, CII_RA },
259
260 // Encoding algorithm
261
262 // %10001100 to %10001111 CII EA no add to table
263 { 0x8F, CII_EA },
264
265 // UTF-8 string, add to table
266
267 // %10010000 to %10010001 CII add to table small length
268 { 0x91, CII_UTF8_SMALL_LENGTH },
269
270 // %10010010 CII add to table medium length
271 { 0x92, CII_UTF8_MEDIUM_LENGTH },
272
273 // %10010011 CII add to table large length
274 { 0x93, CII_UTF8_LARGE_LENGTH },
275
276 // UTF-16 string, add to table
277
278 // %10010100 to %10010101 CII UTF-16 add to table small length
279 { 0x95, CII_UTF16_SMALL_LENGTH },
280
281 // %10010110 CII UTF-16 add to table medium length
282 { 0x96, CII_UTF16_MEDIUM_LENGTH },
283
284 // %10010111 CII UTF-16 add to table large length
285 { 0x97, CII_UTF16_LARGE_LENGTH },
286
287 // Restricted alphabet, add to table
288
289 // %10011000 to %10011011 CII RA add to table
290 { 0x9B, CII_RA },
291
292 // Encoding algorithm, add to table
293
294 // %10011100 to %10011111 CII EA add to table
295 { 0x9F, CII_EA },
296
297 // Index
298
299 // %10100000 to %10101111 CII small index
300 { 0xAF, CII_INDEX_SMALL },
301
302 // %10110000 to %10110011 CII medium index
303 { 0xB3, CII_INDEX_MEDIUM },
304
305 // %10110100 to %10110111 CII large index
306 { 0xB7, CII_INDEX_LARGE },
307
308 // %10111000 CII very large index
309 { 0xB8, CII_INDEX_LARGE_LARGE },
310
311 // %10111001 to %11000111 ILLEGAL
312 { 0xC7, STATE_ILLEGAL },
313
314 // %11001000 to %11001011
315 { 0xCB, UNEXPANDED_ENTITY_REFERENCE_II },
316
317 // %11001100 to %11100000 ILLEGAL
318 { 0xE0, STATE_ILLEGAL },
319
320 // %11100001 processing instruction
321 { 0xE1, PROCESSING_INSTRUCTION_II },
322
323 // %11100010 comment
324 { 0xE2, COMMENT_II},
325
326 // %111000011 to %11101111
327 { 0xEF, STATE_ILLEGAL },
328
329 // Terminators
330
331 // %11110000 single terminator
332 { 0xF0, TERMINATOR_SINGLE },
333
334 // %11110000 to %11111110 ILLEGAL
335 { 0xFE, STATE_ILLEGAL },
336
337 // %11111111 double terminator
338 { 0xFF, TERMINATOR_DOUBLE }
339 };
340
341
342 // AII states
343 public final static int AII_INDEX_SMALL = 0;
344 public final static int AII_INDEX_MEDIUM = 1;
345 public final static int AII_INDEX_LARGE = 2;
346 public final static int AII_LITERAL = 3;
347 public final static int AII_TERMINATOR_SINGLE = 4;
348 public final static int AII_TERMINATOR_DOUBLE = 5;
349
350 private static final int[] AII = new int[256];
351
352 private static final int[][] AII_RANGES = {
353 // %00000000 to %00111111 AII small index
354 { 0x3F, AII_INDEX_SMALL },
355
356 // %01000000 to %01011111 AII medium index
357 { 0x5F, AII_INDEX_MEDIUM },
358
359 // %01100000 to %01101111 AII large index
360 { 0x6F, AII_INDEX_LARGE },
361
362 // %01110000 to %01110111 ILLEGAL
363 { 0x77, STATE_ILLEGAL },
364
365 // %01111000 AII literal (no prefix, no namespace)
366 // %01111001 AII literal (no prefix, namespace)
367 { 0x79, AII_LITERAL },
368
369 // %01111010 ILLEGAL
370 { 0x7A, STATE_ILLEGAL },
371
372 // %01111011 AII literal (prefix, namespace)
373 { 0x7B, AII_LITERAL },
374
375 // %10000000 to %11101111 ILLEGAL
376 { 0xEF, STATE_ILLEGAL },
377
378 // Terminators
379
380 // %11110000 single terminator
381 { 0xF0, AII_TERMINATOR_SINGLE },
382
383 // %11110000 to %11111110 ILLEGAL
384 { 0xFE, STATE_ILLEGAL },
385
386 // %11111111 double terminator
387 { 0xFF, AII_TERMINATOR_DOUBLE }
388 };
389
390
391 // AII value states
392 public final static int NISTRING_UTF8_SMALL_LENGTH = 0;
393 public final static int NISTRING_UTF8_MEDIUM_LENGTH = 1;
394 public final static int NISTRING_UTF8_LARGE_LENGTH = 2;
395 public final static int NISTRING_UTF16_SMALL_LENGTH = 3;
396 public final static int NISTRING_UTF16_MEDIUM_LENGTH = 4;
397 public final static int NISTRING_UTF16_LARGE_LENGTH = 5;
398 public final static int NISTRING_RA = 6;
399 public final static int NISTRING_EA = 7;
400 public final static int NISTRING_INDEX_SMALL = 8;
401 public final static int NISTRING_INDEX_MEDIUM = 9;
402 public final static int NISTRING_INDEX_LARGE = 10;
403 public final static int NISTRING_EMPTY = 11;
404
405 private static final int[] NISTRING = new int[256];
406
407 private static final int[][] NISTRING_RANGES = {
408 // UTF-8 string
409
410 // %00000000 to %00000111 UTF-8 no add to table small length
411 { 0x07, NISTRING_UTF8_SMALL_LENGTH },
412
413 // %00001000 UTF-8 no add to table medium length
414 { 0x08, NISTRING_UTF8_MEDIUM_LENGTH },
415
416 // %00001001 to %00001011 ILLEGAL
417 { 0x0B, STATE_ILLEGAL },
418
419 // %00001100 UTF-8 no add to table large length
420 { 0x0C, NISTRING_UTF8_LARGE_LENGTH },
421
422 // %00001101 to %00001111 ILLEGAL
423 { 0x0F, STATE_ILLEGAL },
424
425 // UTF-16 string
426
427 // %00010000 to %00010111 UTF-16 no add to table small length
428 { 0x17, NISTRING_UTF16_SMALL_LENGTH },
429
430 // %00001000 UTF-16 no add to table medium length
431 { 0x18, NISTRING_UTF16_MEDIUM_LENGTH },
432
433 // %00011001 to %00011011 ILLEGAL
434 { 0x1B, STATE_ILLEGAL },
435
436 // %00011100 UTF-16 no add to table large length
437 { 0x1C, NISTRING_UTF16_LARGE_LENGTH },
438
439 // %00011101 to %00011111 ILLEGAL
440 { 0x1F, STATE_ILLEGAL },
441
442 // Restricted alphabet
443
444 // %00100000 to %00101111 RA no add to table small length
445 { 0x2F, NISTRING_RA },
446
447 // Encoding algorithm
448
449 // %00110000 to %00111111 EA no add to table
450 { 0x3F, NISTRING_EA },
451
452 // UTF-8 string, add to table
453
454 // %01000000 to %01000111 UTF-8 add to table small length
455 { 0x47, NISTRING_UTF8_SMALL_LENGTH },
456
457 // %01001000 UTF-8 add to table medium length
458 { 0x48, NISTRING_UTF8_MEDIUM_LENGTH },
459
460 // %01001001 to %01001011 ILLEGAL
461 { 0x4B, STATE_ILLEGAL },
462
463 // %01001100 UTF-8 add to table large length
464 { 0x4C, NISTRING_UTF8_LARGE_LENGTH },
465
466 // %01001101 to %01001111 ILLEGAL
467 { 0x4F, STATE_ILLEGAL },
468
469 // UTF-16 string, add to table
470
471 // %01010000 to %01010111 UTF-16 add to table small length
472 { 0x57, NISTRING_UTF16_SMALL_LENGTH },
473
474 // %01001000 UTF-16 add to table medium length
475 { 0x58, NISTRING_UTF16_MEDIUM_LENGTH },
476
477 // %01011001 to %01011011 ILLEGAL
478 { 0x5B, STATE_ILLEGAL },
479
480 // %01011100 UTF-16 add to table large length
481 { 0x5C, NISTRING_UTF16_LARGE_LENGTH },
482
483 // %01011101 to %01011111 ILLEGAL
484 { 0x5F, STATE_ILLEGAL },
485
486 // Restricted alphabet, add to table
487
488 // %01100000 to %01101111 RA no add to table small length
489 { 0x6F, NISTRING_RA },
490
491 // Encoding algorithm, add to table
492
493 // %01110000 to %01111111 EA add to table
494 { 0x7F, NISTRING_EA },
495
496 // Index
497
498 // %10000000 to %10111111 index small
499 { 0xBF, NISTRING_INDEX_SMALL },
500
501 // %11000000 to %11011111 index medium
502 { 0xDF, NISTRING_INDEX_MEDIUM },
503
504 // %11100000 to %11101111 index large
505 { 0xEF, NISTRING_INDEX_LARGE },
506
507 // %11110000 to %11111110 ILLEGAL
508 { 0xFE, STATE_ILLEGAL },
509
510 // %11111111 Empty value
511 { 0xFF, NISTRING_EMPTY },
512 };
513
514
515 /* package */ final static int ISTRING_SMALL_LENGTH = 0;
516 /* package */ final static int ISTRING_MEDIUM_LENGTH = 1;
517 /* package */ final static int ISTRING_LARGE_LENGTH = 2;
518 /* package */ final static int ISTRING_INDEX_SMALL = 3;
519 /* package */ final static int ISTRING_INDEX_MEDIUM = 4;
520 /* package */ final static int ISTRING_INDEX_LARGE = 5;
521
522 private static final int[] ISTRING = new int[256];
523
524 private static final int[][] ISTRING_RANGES = {
525 // %00000000 to %00111111 small length
526 { 0x3F, ISTRING_SMALL_LENGTH },
527
528 // %01000000 medium length
529 { 0x40, ISTRING_MEDIUM_LENGTH },
530
531 // %01000001 to %01011111 ILLEGAL
532 { 0x5F, STATE_ILLEGAL },
533
534 // %01100000 large length
535 { 0x60, ISTRING_LARGE_LENGTH },
536
537 // %01100001 to %01111111 ILLEGAL
538 { 0x7F, STATE_ILLEGAL },
539
540 // %10000000 to %10111111 index small
541 { 0xBF, ISTRING_INDEX_SMALL },
542
543 // %11000000 to %11011111 index medium
544 { 0xDF, ISTRING_INDEX_MEDIUM },
545
546 // %11100000 to %11101111 index large
547 { 0xEF, ISTRING_INDEX_LARGE },
548
549 // %11110000 to %11111111 ILLEGAL
550 { 0xFF, STATE_ILLEGAL },
551 };
552
553
554 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_3 = 6;
555 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_5 = 7;
556 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_29 = 8;
557 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_36 = 9;
558 /* package */ final static int ISTRING_PREFIX_NAMESPACE_INDEX_ZERO = 10;
559
560 private static final int[] ISTRING_PREFIX_NAMESPACE = new int[256];
561
562 private static final int[][] ISTRING_PREFIX_NAMESPACE_RANGES = {
563 // %00000000 to %00000001 small length
564 { 0x01, ISTRING_SMALL_LENGTH },
565
566 // %00000010 small length
567 { 0x02, ISTRING_PREFIX_NAMESPACE_LENGTH_3 },
568
569 // %00000011 small length
570 { 0x03, ISTRING_SMALL_LENGTH },
571
572 // %00000100 small length
573 { 0x04, ISTRING_PREFIX_NAMESPACE_LENGTH_5 },
574
575 // %00011011 small length
576 { 0x1B, ISTRING_SMALL_LENGTH },
577
578 // %00011100 small length
579 { 0x1C, ISTRING_PREFIX_NAMESPACE_LENGTH_29 },
580
581 // %00100010 small length
582 { 0x22, ISTRING_SMALL_LENGTH },
583
584 // %00100011 small length
585 { 0x23, ISTRING_PREFIX_NAMESPACE_LENGTH_36 },
586
587 // %00000101 to %00111111 small length
588 { 0x3F, ISTRING_SMALL_LENGTH },
589
590
591
592
593 // %01000000 medium length
594 { 0x40, ISTRING_MEDIUM_LENGTH },
595
596 // %01000001 to %01011111 ILLEGAL
597 { 0x5F, STATE_ILLEGAL },
598
599 // %01100000 large length
600 { 0x60, ISTRING_LARGE_LENGTH },
601
602 // %01100001 to %01111111 ILLEGAL
603 { 0x7F, STATE_ILLEGAL },
604
605 // %10000000 index small, 0
606 { 0x80, ISTRING_PREFIX_NAMESPACE_INDEX_ZERO },
607
608 // %10000000 to %10111111 index small
609 { 0xBF, ISTRING_INDEX_SMALL },
610
611 // %11000000 to %11011111 index medium
612 { 0xDF, ISTRING_INDEX_MEDIUM },
613
614 // %11100000 to %11101111 index large
615 { 0xEF, ISTRING_INDEX_LARGE },
616
617 // %11110000 to %11111111 ILLEGAL
618 { 0xFF, STATE_ILLEGAL },
619 };
620
621 // UTF-8 states
622 /* package */ final static int UTF8_NCNAME_NCNAME = 0;
623 /* package */ final static int UTF8_NCNAME_NCNAME_CHAR = 1;
624 /* package */ final static int UTF8_TWO_BYTES = 2;
625 /* package */ final static int UTF8_THREE_BYTES = 3;
626 /* package */ final static int UTF8_FOUR_BYTES = 4;
627
628 private static final int[] UTF8_NCNAME = new int[256];
629
630 private static final int[][] UTF8_NCNAME_RANGES = {
631
632 // Basic Latin
633
634 // %00000000 to %00101100
635 { 0x2C, STATE_ILLEGAL },
636
637 // '-' '.'
638 // %%00101101 to %00101110 [#x002D-#x002E]
639 { 0x2E, UTF8_NCNAME_NCNAME_CHAR },
640
641 // %00101111
642 { 0x2F, STATE_ILLEGAL },
643
644 // [0-9]
645 // %0011000 to %00111001 [#x0030-#x0039]
646 { 0x39, UTF8_NCNAME_NCNAME_CHAR },
647
648 // %01000000
649 { 0x40, STATE_ILLEGAL },
650
651 // [A-Z]
652 // %01000001 to %01011010 [#x0041-#x005A]
653 { 0x5A, UTF8_NCNAME_NCNAME },
654
655 // %01011110
656 { 0x5E, STATE_ILLEGAL },
657
658 // '_'
659 // %01011111 [#x005F]
660 { 0x5F, UTF8_NCNAME_NCNAME },
661
662 // %01100000
663 { 0x60, STATE_ILLEGAL },
664
665 // [a-z]
666 // %01100001 to %01111010 [#x0061-#x007A]
667 { 0x7A, UTF8_NCNAME_NCNAME },
668
669 // %01111011 to %01111111
670 { 0x7F, STATE_ILLEGAL },
671
672
673 // Two bytes
674
675 // %10000000 to %11000001
676 { 0xC1, STATE_ILLEGAL },
677
678 // %11000010 to %11011111
679 { 0xDF, UTF8_TWO_BYTES },
680
681
682 // Three bytes
683
684 // %11100000 to %11101111
685 { 0xEF, UTF8_THREE_BYTES },
686
687
688 // Four bytes
689
690 // %11110000 to %11110111
691 { 0xF7, UTF8_FOUR_BYTES },
692
693
694 // %11111000 to %11111111
695 { 0xFF, STATE_ILLEGAL }
696 };
697
698 /* package */ final static int UTF8_ONE_BYTE = 1;
699
700 private static final int[] UTF8 = new int[256];
701
702 private static final int[][] UTF8_RANGES = {
703
704 // Basic Latin
705
706 // %00000000 to %00001000
707 { 0x08, STATE_ILLEGAL },
708
709 // CHARACTER TABULATION, LINE FEED
710 // %%00001001 to %00001010 [#x0009-#x000A]
711 { 0x0A, UTF8_ONE_BYTE },
712
713 // %00001011 to %00001100
714 { 0x0C, STATE_ILLEGAL },
715
716 // CARRIAGE RETURN
717 // %00001101 [#x000D]
718 { 0x0D, UTF8_ONE_BYTE },
719
720 // %00001110 to %00011111
721 { 0x1F, STATE_ILLEGAL },
722
723 // %0010000 to %01111111
724 { 0x7F, UTF8_ONE_BYTE },
725
726
727 // Two bytes
728
729 // %10000000 to %11000001
730 { 0xC1, STATE_ILLEGAL },
731
732 // %11000010 to %11011111
733 { 0xDF, UTF8_TWO_BYTES },
734
735
736 // Three bytes
737
738 // %11100000 to %11101111
739 { 0xEF, UTF8_THREE_BYTES },
740
741
742 // Four bytes
743
744 // %11110000 to %11110111
745 { 0xF7, UTF8_FOUR_BYTES },
746
747
748 // %11111000 to %11111111
749 { 0xFF, STATE_ILLEGAL }
750 };
751
752 private static void constructTable(int[] table, int[][] ranges) {
753 int start = 0x00;
754 for (int range = 0; range < ranges.length; range++) {
755 int end = ranges[range][RANGE_INDEX_END];
756 int value = ranges[range][RANGE_INDEX_VALUE];
757 for (int i = start; i<= end; i++) {
758 table[i] = value;
759 }
760 start = end + 1;
761 }
762 }
763
764 public static final int DII(final int index) {
765 return DII[index];
766 }
767
768 public static final int EII(final int index) {
769 return EII[index];
770 }
771
772 public static final int AII(final int index) {
773 return AII[index];
774 }
775
776 public static final int NISTRING(final int index) {
777 return NISTRING[index];
778 }
779
780 public static final int ISTRING(final int index) {
781 return ISTRING[index];
782 }
783
784 public static final int ISTRING_PREFIX_NAMESPACE(final int index) {
785 return ISTRING_PREFIX_NAMESPACE[index];
786 }
787
788 public static final int UTF8(final int index) {
789 return UTF8[index];
790 }
791
792 public static final int UTF8_NCNAME(final int index) {
793 return UTF8_NCNAME[index];
794 }
795
796 static {
797 // DII
798 constructTable(DII, DII_RANGES);
799
800 // EII
801 constructTable(EII, EII_RANGES);
802
803 // AII
804 constructTable(AII, AII_RANGES);
805
806 // AII Value
807 constructTable(NISTRING, NISTRING_RANGES);
808
809 // Identifying string
810 constructTable(ISTRING, ISTRING_RANGES);
811
812 // Identifying string
813 constructTable(ISTRING_PREFIX_NAMESPACE, ISTRING_PREFIX_NAMESPACE_RANGES);
814
815 // UTF-8 NCNAME states
816 constructTable(UTF8_NCNAME, UTF8_NCNAME_RANGES);
817
818 // UTF-8 states
819 constructTable(UTF8, UTF8_RANGES);
820 }
821
822 private DecoderStateTables() {
823 }
824 }

mercurial