src/share/jaxws_classes/com/sun/xml/internal/fastinfoset/DecoderStateTables.java

Thu, 12 Oct 2017 19:44:07 +0800

author
aoqi
date
Thu, 12 Oct 2017 19:44:07 +0800
changeset 760
e530533619ec
parent 0
373ffda63c9a
permissions
-rw-r--r--

merge

     1 /*
     2  * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     4  *
     5  * This code is free software; you can redistribute it and/or modify it
     6  * under the terms of the GNU General Public License version 2 only, as
     7  * published by the Free Software Foundation.  Oracle designates this
     8  * particular file as subject to the "Classpath" exception as provided
     9  * by Oracle in the LICENSE file that accompanied this code.
    10  *
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    14  * version 2 for more details (a copy is included in the LICENSE file that
    15  * accompanied this code).
    16  *
    17  * You should have received a copy of the GNU General Public License version
    18  * 2 along with this work; if not, write to the Free Software Foundation,
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    20  *
    21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    22  * or visit www.oracle.com if you need additional information or have any
    23  * questions.
    24  *
    25  * THIS FILE WAS MODIFIED BY SUN MICROSYSTEMS, INC.
    26  */
    28 package com.sun.xml.internal.fastinfoset;
    30 public class DecoderStateTables {
    31     private static int RANGE_INDEX_END      = 0;
    32     private static int RANGE_INDEX_VALUE    = 1;
    34     public final static int STATE_ILLEGAL                   = 255;
    35     public final static int STATE_UNSUPPORTED               = 254;
    37     // EII child states
    38     public final static int EII_NO_AIIS_INDEX_SMALL         = 0;
    39     public final static int EII_AIIS_INDEX_SMALL            = 1;
    40     public final static int EII_INDEX_MEDIUM                = 2;
    41     public final static int EII_INDEX_LARGE                 = 3;
    42     public final static int EII_NAMESPACES                  = 4;
    43     public final static int EII_LITERAL                     = 5;
    44     public final static int CII_UTF8_SMALL_LENGTH           = 6;
    45     public final static int CII_UTF8_MEDIUM_LENGTH          = 7;
    46     public final static int CII_UTF8_LARGE_LENGTH           = 8;
    47     public final static int CII_UTF16_SMALL_LENGTH          = 9;
    48     public final static int CII_UTF16_MEDIUM_LENGTH         = 10;
    49     public final static int CII_UTF16_LARGE_LENGTH          = 11;
    50     public final static int CII_RA                          = 12;
    51     public final static int CII_EA                          = 13;
    52     public final static int CII_INDEX_SMALL                 = 14;
    53     public final static int CII_INDEX_MEDIUM                = 15;
    54     public final static int CII_INDEX_LARGE                 = 16;
    55     public final static int CII_INDEX_LARGE_LARGE           = 17;
    56     public final static int COMMENT_II                      = 18;
    57     public final static int PROCESSING_INSTRUCTION_II       = 19;
    58     public final static int DOCUMENT_TYPE_DECLARATION_II    = 20;
    59     public final static int UNEXPANDED_ENTITY_REFERENCE_II  = 21;
    60     public final static int TERMINATOR_SINGLE               = 22;
    61     public final static int TERMINATOR_DOUBLE               = 23;
    63     private static final int[] DII = new int[256];
    65     private static final int[][] DII_RANGES = {
    66         // EII
    68         // %00000000 to %00011111  EII no attributes small index
    69         { 0x1F, EII_NO_AIIS_INDEX_SMALL },
    71         // %00100000 to %00100111  EII medium index
    72         { 0x27, EII_INDEX_MEDIUM },
    74         // %00101000 to %00101111  EII large index
    75         // %00110000  EII very large index
    76         // %00101000 to %00110000
    77         { 0x30, EII_INDEX_LARGE },
    79         // %00110001 to %00110111  ILLEGAL
    80         { 0x37, STATE_ILLEGAL },
    82         // %00111000  EII namespaces
    83         { 0x38, EII_NAMESPACES },
    85         // %00111001 to %00111011  ILLEGAL
    86         { 0x3B, STATE_ILLEGAL },
    88         // %00111100  EII literal (no prefix, no namespace)
    89         { 0x3C, EII_LITERAL },
    91         // %00111101  EII literal (no prefix, namespace)
    92         { 0x3D, EII_LITERAL },
    94         // %00111110  ILLEGAL
    95         { 0x3E, STATE_ILLEGAL },
    97         // %00111111  EII literal (prefix, namespace)
    98         { 0x3F, EII_LITERAL },
   100         // %01000000 to %01011111  EII attributes small index
   101         { 0x5F, EII_AIIS_INDEX_SMALL },
   103         // %01100000 to %01100111  EII medium index
   104         { 0x67, EII_INDEX_MEDIUM },
   106         // %01101000 to %01101111  EII large index
   107         // %01110000  EII very large index
   108         // %01101000 to %01110000
   109         { 0x70, EII_INDEX_LARGE },
   111         // %01110001 to %01110111  ILLEGAL
   112         { 0x77, STATE_ILLEGAL },
   114         // %01111000  EII attributes namespaces
   115         { 0x78, EII_NAMESPACES },
   117         // %01111001 to %01111011  ILLEGAL
   118         { 0x7B, STATE_ILLEGAL },
   120         // %01111100  EII attributes literal (no prefix, no namespace)
   121         { 0x7C, EII_LITERAL },
   123         // %01111101  EII attributes literal (no prefix, namespace)
   124         { 0x7D, EII_LITERAL },
   126         // %01111110  ILLEGAL
   127         { 0x7E, STATE_ILLEGAL },
   129         // %01111111  EII attributes literal (prefix, namespace)
   130         { 0x7F, EII_LITERAL },
   132         // %10000000 to %11000011
   133         { 0xC3, STATE_ILLEGAL },
   135         // %11000100 to %11000111
   136         { 0xC7, DOCUMENT_TYPE_DECLARATION_II },
   138         // %11001000 to %1110000
   139         { 0xE0, STATE_ILLEGAL },
   141         // %11100001 processing instruction
   142         { 0xE1, PROCESSING_INSTRUCTION_II },
   144         // %11100010 comment
   145         { 0xE2, COMMENT_II},
   147         // %111000011 to %11101111
   148         { 0xEF, STATE_ILLEGAL },
   150         // Terminators
   152         // %11110000  single terminator
   153         { 0xF0, TERMINATOR_SINGLE },
   155         // %11110000 to %11111110 ILLEGAL
   156         { 0xFE, STATE_ILLEGAL },
   158         // %11111111  double terminator
   159         { 0xFF, TERMINATOR_DOUBLE }
   160     };
   162     private static final int[] EII = new int[256];
   164     private static final int[][] EII_RANGES = {
   165         // EII
   167         // %00000000 to %00011111  EII no attributes small index
   168         { 0x1F, EII_NO_AIIS_INDEX_SMALL },
   170         // %00100000 to %00100111  EII medium index
   171         { 0x27, EII_INDEX_MEDIUM },
   173         // %00101000 to %00101111  EII large index
   174         // %00110000  EII very large index
   175         // %00101000 to %00110000
   176         { 0x30, EII_INDEX_LARGE },
   178         // %00110001 to %00110111  ILLEGAL
   179         { 0x37, STATE_ILLEGAL },
   181         // %00111000  EII namespaces
   182         { 0x38, EII_NAMESPACES },
   184         // %00111001 to %00111011  ILLEGAL
   185         { 0x3B, STATE_ILLEGAL },
   187         // %00111100  EII literal (no prefix, no namespace)
   188         { 0x3C, EII_LITERAL },
   190         // %00111101  EII literal (no prefix, namespace)
   191         { 0x3D, EII_LITERAL },
   193         // %00111110  ILLEGAL
   194         { 0x3E, STATE_ILLEGAL },
   196         // %00111111  EII literal (prefix, namespace)
   197         { 0x3F, EII_LITERAL },
   199         // %01000000 to %01011111  EII attributes small index
   200         { 0x5F, EII_AIIS_INDEX_SMALL },
   202         // %01100000 to %01100111  EII medium index
   203         { 0x67, EII_INDEX_MEDIUM },
   205         // %01101000 to %01101111  EII large index
   206         // %01110000  EII very large index
   207         // %01101000 to %01110000
   208         { 0x70, EII_INDEX_LARGE },
   210         // %01110001 to %01110111  ILLEGAL
   211         { 0x77, STATE_ILLEGAL },
   213         // %01111000  EII attributes namespaces
   214         { 0x78, EII_NAMESPACES },
   216         // %01111001 to %01111011  ILLEGAL
   217         { 0x7B, STATE_ILLEGAL },
   219         // %01111100  EII attributes literal (no prefix, no namespace)
   220         { 0x7C, EII_LITERAL },
   222         // %01111101  EII attributes literal (no prefix, namespace)
   223         { 0x7D, EII_LITERAL },
   225         // %01111110  ILLEGAL
   226         { 0x7E, STATE_ILLEGAL },
   228         // %01111111  EII attributes literal (prefix, namespace)
   229         { 0x7F, EII_LITERAL },
   231         // CII
   233         // UTF-8 string
   235         // %10000000 to %10000001  CII UTF-8 no add to table small length
   236         { 0x81, CII_UTF8_SMALL_LENGTH },
   238         // %10000010  CII UTF-8 no add to table medium length
   239         { 0x82, CII_UTF8_MEDIUM_LENGTH },
   241         // %10000011  CII UTF-8 no add to table large length
   242         { 0x83, CII_UTF8_LARGE_LENGTH },
   244         // UTF-16 string
   246         // %10000100 to %10000101  CII UTF-16 no add to table small length
   247         { 0x85, CII_UTF16_SMALL_LENGTH },
   249         // %10000110  CII UTF-16 no add to table medium length
   250         { 0x86, CII_UTF16_MEDIUM_LENGTH },
   252         // %10000111  CII UTF-16 no add to table large length
   253         { 0x87, CII_UTF16_LARGE_LENGTH },
   255         // Resitricted alphabet
   257         // %10001000 to %10001011  CII RA no add to table
   258         { 0x8B, CII_RA },
   260         // Encoding algorithm
   262         // %10001100 to %10001111  CII EA no add to table
   263         { 0x8F, CII_EA },
   265         // UTF-8 string, add to table
   267         // %10010000 to %10010001  CII add to table small length
   268         { 0x91, CII_UTF8_SMALL_LENGTH },
   270         // %10010010  CII add to table medium length
   271         { 0x92, CII_UTF8_MEDIUM_LENGTH },
   273         // %10010011  CII add to table large length
   274         { 0x93, CII_UTF8_LARGE_LENGTH },
   276         // UTF-16 string, add to table
   278         // %10010100 to %10010101  CII UTF-16 add to table small length
   279         { 0x95, CII_UTF16_SMALL_LENGTH },
   281         // %10010110  CII UTF-16 add to table medium length
   282         { 0x96, CII_UTF16_MEDIUM_LENGTH },
   284         // %10010111  CII UTF-16 add to table large length
   285         { 0x97, CII_UTF16_LARGE_LENGTH },
   287         // Restricted alphabet, add to table
   289         // %10011000 to %10011011  CII RA add to table
   290         { 0x9B, CII_RA },
   292         // Encoding algorithm, add to table
   294         // %10011100 to %10011111  CII EA add to table
   295         { 0x9F, CII_EA },
   297         // Index
   299         // %10100000 to %10101111  CII small index
   300         { 0xAF, CII_INDEX_SMALL },
   302         // %10110000 to %10110011  CII medium index
   303         { 0xB3, CII_INDEX_MEDIUM },
   305         // %10110100 to %10110111  CII large index
   306         { 0xB7, CII_INDEX_LARGE },
   308         // %10111000  CII very large index
   309         { 0xB8, CII_INDEX_LARGE_LARGE },
   311         // %10111001 to %11000111  ILLEGAL
   312         { 0xC7, STATE_ILLEGAL },
   314         // %11001000 to %11001011
   315         { 0xCB, UNEXPANDED_ENTITY_REFERENCE_II },
   317         // %11001100 to %11100000  ILLEGAL
   318         { 0xE0, STATE_ILLEGAL },
   320         // %11100001 processing instruction
   321         { 0xE1, PROCESSING_INSTRUCTION_II },
   323         // %11100010 comment
   324         { 0xE2, COMMENT_II},
   326         // %111000011 to %11101111
   327         { 0xEF, STATE_ILLEGAL },
   329         // Terminators
   331         // %11110000  single terminator
   332         { 0xF0, TERMINATOR_SINGLE },
   334         // %11110000 to %11111110 ILLEGAL
   335         { 0xFE, STATE_ILLEGAL },
   337         // %11111111  double terminator
   338         { 0xFF, TERMINATOR_DOUBLE }
   339     };
   342     // AII states
   343     public final static int AII_INDEX_SMALL                 = 0;
   344     public final static int AII_INDEX_MEDIUM                = 1;
   345     public final static int AII_INDEX_LARGE                 = 2;
   346     public final static int AII_LITERAL                     = 3;
   347     public final static int AII_TERMINATOR_SINGLE           = 4;
   348     public final static int AII_TERMINATOR_DOUBLE           = 5;
   350     private static final int[] AII = new int[256];
   352     private static final int[][] AII_RANGES = {
   353         // %00000000 to %00111111  AII small index
   354         { 0x3F, AII_INDEX_SMALL },
   356         // %01000000 to %01011111  AII medium index
   357         { 0x5F, AII_INDEX_MEDIUM },
   359         // %01100000 to %01101111  AII large index
   360         { 0x6F, AII_INDEX_LARGE },
   362         // %01110000 to %01110111  ILLEGAL
   363         { 0x77, STATE_ILLEGAL },
   365         // %01111000  AII literal (no prefix, no namespace)
   366         // %01111001  AII literal (no prefix, namespace)
   367         { 0x79, AII_LITERAL },
   369         // %01111010  ILLEGAL
   370         { 0x7A, STATE_ILLEGAL },
   372         // %01111011  AII literal (prefix, namespace)
   373         { 0x7B, AII_LITERAL },
   375         // %10000000 to %11101111  ILLEGAL
   376         { 0xEF, STATE_ILLEGAL },
   378         // Terminators
   380         // %11110000  single terminator
   381         { 0xF0, AII_TERMINATOR_SINGLE },
   383         // %11110000 to %11111110 ILLEGAL
   384         { 0xFE, STATE_ILLEGAL },
   386         // %11111111  double terminator
   387         { 0xFF, AII_TERMINATOR_DOUBLE }
   388     };
   391     // AII value states
   392     public final static int NISTRING_UTF8_SMALL_LENGTH     = 0;
   393     public final static int NISTRING_UTF8_MEDIUM_LENGTH    = 1;
   394     public final static int NISTRING_UTF8_LARGE_LENGTH     = 2;
   395     public final static int NISTRING_UTF16_SMALL_LENGTH    = 3;
   396     public final static int NISTRING_UTF16_MEDIUM_LENGTH   = 4;
   397     public final static int NISTRING_UTF16_LARGE_LENGTH    = 5;
   398     public final static int NISTRING_RA                    = 6;
   399     public final static int NISTRING_EA                    = 7;
   400     public final static int NISTRING_INDEX_SMALL           = 8;
   401     public final static int NISTRING_INDEX_MEDIUM          = 9;
   402     public final static int NISTRING_INDEX_LARGE           = 10;
   403     public final static int NISTRING_EMPTY                 = 11;
   405     private static final int[] NISTRING = new int[256];
   407     private static final int[][] NISTRING_RANGES = {
   408         // UTF-8 string
   410         // %00000000 to %00000111  UTF-8 no add to table small length
   411         { 0x07, NISTRING_UTF8_SMALL_LENGTH },
   413         // %00001000  UTF-8 no add to table medium length
   414         { 0x08, NISTRING_UTF8_MEDIUM_LENGTH },
   416         // %00001001 to %00001011 ILLEGAL
   417         { 0x0B, STATE_ILLEGAL },
   419         // %00001100  UTF-8 no add to table large length
   420         { 0x0C, NISTRING_UTF8_LARGE_LENGTH },
   422         // %00001101 to %00001111 ILLEGAL
   423         { 0x0F, STATE_ILLEGAL },
   425         // UTF-16 string
   427         // %00010000 to %00010111  UTF-16 no add to table small length
   428         { 0x17, NISTRING_UTF16_SMALL_LENGTH },
   430         // %00001000  UTF-16 no add to table medium length
   431         { 0x18, NISTRING_UTF16_MEDIUM_LENGTH },
   433         // %00011001 to %00011011 ILLEGAL
   434         { 0x1B, STATE_ILLEGAL },
   436         // %00011100  UTF-16 no add to table large length
   437         { 0x1C, NISTRING_UTF16_LARGE_LENGTH },
   439         // %00011101 to %00011111 ILLEGAL
   440         { 0x1F, STATE_ILLEGAL },
   442         // Restricted alphabet
   444         // %00100000 to %00101111  RA no add to table small length
   445         { 0x2F, NISTRING_RA },
   447         // Encoding algorithm
   449         // %00110000 to %00111111  EA no add to table
   450         { 0x3F, NISTRING_EA },
   452         // UTF-8 string, add to table
   454         // %01000000 to %01000111  UTF-8 add to table small length
   455         { 0x47, NISTRING_UTF8_SMALL_LENGTH },
   457         // %01001000  UTF-8 add to table medium length
   458         { 0x48, NISTRING_UTF8_MEDIUM_LENGTH },
   460         // %01001001 to %01001011 ILLEGAL
   461         { 0x4B, STATE_ILLEGAL },
   463         // %01001100  UTF-8 add to table large length
   464         { 0x4C, NISTRING_UTF8_LARGE_LENGTH },
   466         // %01001101 to %01001111 ILLEGAL
   467         { 0x4F, STATE_ILLEGAL },
   469         // UTF-16 string, add to table
   471         // %01010000 to %01010111  UTF-16 add to table small length
   472         { 0x57, NISTRING_UTF16_SMALL_LENGTH },
   474         // %01001000  UTF-16 add to table medium length
   475         { 0x58, NISTRING_UTF16_MEDIUM_LENGTH },
   477         // %01011001 to %01011011 ILLEGAL
   478         { 0x5B, STATE_ILLEGAL },
   480         // %01011100  UTF-16 add to table large length
   481         { 0x5C, NISTRING_UTF16_LARGE_LENGTH },
   483         // %01011101 to %01011111 ILLEGAL
   484         { 0x5F, STATE_ILLEGAL },
   486         // Restricted alphabet, add to table
   488         // %01100000 to %01101111  RA no add to table small length
   489         { 0x6F, NISTRING_RA },
   491         // Encoding algorithm, add to table
   493         // %01110000 to %01111111  EA add to table
   494         { 0x7F, NISTRING_EA },
   496         // Index
   498         // %10000000 to %10111111 index small
   499         { 0xBF, NISTRING_INDEX_SMALL },
   501         // %11000000 to %11011111 index medium
   502         { 0xDF, NISTRING_INDEX_MEDIUM },
   504         // %11100000 to %11101111 index large
   505         { 0xEF, NISTRING_INDEX_LARGE },
   507         // %11110000 to %11111110 ILLEGAL
   508         { 0xFE, STATE_ILLEGAL },
   510         // %11111111 Empty value
   511         { 0xFF, NISTRING_EMPTY },
   512     };
   515     /* package */ final static int ISTRING_SMALL_LENGTH        = 0;
   516     /* package */ final static int ISTRING_MEDIUM_LENGTH       = 1;
   517     /* package */ final static int ISTRING_LARGE_LENGTH        = 2;
   518     /* package */ final static int ISTRING_INDEX_SMALL         = 3;
   519     /* package */ final static int ISTRING_INDEX_MEDIUM        = 4;
   520     /* package */ final static int ISTRING_INDEX_LARGE         = 5;
   522     private static final int[] ISTRING = new int[256];
   524     private static final int[][] ISTRING_RANGES = {
   525         // %00000000 to %00111111 small length
   526         { 0x3F, ISTRING_SMALL_LENGTH },
   528         // %01000000 medium length
   529         { 0x40, ISTRING_MEDIUM_LENGTH },
   531         // %01000001 to %01011111 ILLEGAL
   532         { 0x5F, STATE_ILLEGAL },
   534         // %01100000 large length
   535         { 0x60, ISTRING_LARGE_LENGTH },
   537         // %01100001 to %01111111 ILLEGAL
   538         { 0x7F, STATE_ILLEGAL },
   540         // %10000000 to %10111111 index small
   541         { 0xBF, ISTRING_INDEX_SMALL },
   543         // %11000000 to %11011111 index medium
   544         { 0xDF, ISTRING_INDEX_MEDIUM },
   546         // %11100000 to %11101111 index large
   547         { 0xEF, ISTRING_INDEX_LARGE },
   549         // %11110000 to %11111111 ILLEGAL
   550         { 0xFF, STATE_ILLEGAL },
   551     };
   554     /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_3   = 6;
   555     /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_5   = 7;
   556     /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_29  = 8;
   557     /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_36  = 9;
   558     /* package */ final static int ISTRING_PREFIX_NAMESPACE_INDEX_ZERO = 10;
   560     private static final int[] ISTRING_PREFIX_NAMESPACE = new int[256];
   562     private static final int[][] ISTRING_PREFIX_NAMESPACE_RANGES = {
   563         // %00000000 to %00000001 small length
   564         { 0x01, ISTRING_SMALL_LENGTH },
   566         // %00000010 small length
   567         { 0x02, ISTRING_PREFIX_NAMESPACE_LENGTH_3 },
   569         // %00000011 small length
   570         { 0x03, ISTRING_SMALL_LENGTH },
   572         // %00000100 small length
   573         { 0x04, ISTRING_PREFIX_NAMESPACE_LENGTH_5 },
   575         // %00011011 small length
   576         { 0x1B, ISTRING_SMALL_LENGTH },
   578         // %00011100 small length
   579         { 0x1C, ISTRING_PREFIX_NAMESPACE_LENGTH_29 },
   581         // %00100010 small length
   582         { 0x22, ISTRING_SMALL_LENGTH },
   584         // %00100011 small length
   585         { 0x23, ISTRING_PREFIX_NAMESPACE_LENGTH_36 },
   587         // %00000101 to %00111111 small length
   588         { 0x3F, ISTRING_SMALL_LENGTH },
   593         // %01000000 medium length
   594         { 0x40, ISTRING_MEDIUM_LENGTH },
   596         // %01000001 to %01011111 ILLEGAL
   597         { 0x5F, STATE_ILLEGAL },
   599         // %01100000 large length
   600         { 0x60, ISTRING_LARGE_LENGTH },
   602         // %01100001 to %01111111 ILLEGAL
   603         { 0x7F, STATE_ILLEGAL },
   605         // %10000000 index small, 0
   606         { 0x80, ISTRING_PREFIX_NAMESPACE_INDEX_ZERO },
   608         // %10000000 to %10111111 index small
   609         { 0xBF, ISTRING_INDEX_SMALL },
   611         // %11000000 to %11011111 index medium
   612         { 0xDF, ISTRING_INDEX_MEDIUM },
   614         // %11100000 to %11101111 index large
   615         { 0xEF, ISTRING_INDEX_LARGE },
   617         // %11110000 to %11111111 ILLEGAL
   618         { 0xFF, STATE_ILLEGAL },
   619     };
   621     // UTF-8 states
   622     /* package */ final static int UTF8_NCNAME_NCNAME         = 0;
   623     /* package */ final static int UTF8_NCNAME_NCNAME_CHAR    = 1;
   624     /* package */ final static int UTF8_TWO_BYTES             = 2;
   625     /* package */ final static int UTF8_THREE_BYTES           = 3;
   626     /* package */ final static int UTF8_FOUR_BYTES            = 4;
   628     private static final int[] UTF8_NCNAME = new int[256];
   630     private static final int[][] UTF8_NCNAME_RANGES = {
   632         // Basic Latin
   634         // %00000000 to %00101100
   635         { 0x2C, STATE_ILLEGAL },
   637         // '-' '.'
   638         // %%00101101 to %00101110 [#x002D-#x002E]
   639         { 0x2E, UTF8_NCNAME_NCNAME_CHAR },
   641         // %00101111
   642         { 0x2F, STATE_ILLEGAL },
   644         // [0-9]
   645         // %0011000 to %00111001  [#x0030-#x0039]
   646         { 0x39, UTF8_NCNAME_NCNAME_CHAR },
   648         // %01000000
   649         { 0x40, STATE_ILLEGAL },
   651         // [A-Z]
   652         // %01000001 to %01011010 [#x0041-#x005A]
   653         { 0x5A, UTF8_NCNAME_NCNAME },
   655         // %01011110
   656         { 0x5E, STATE_ILLEGAL },
   658         // '_'
   659         // %01011111 [#x005F]
   660         { 0x5F, UTF8_NCNAME_NCNAME },
   662         // %01100000
   663         { 0x60, STATE_ILLEGAL },
   665         // [a-z]
   666         // %01100001 to %01111010 [#x0061-#x007A]
   667         { 0x7A, UTF8_NCNAME_NCNAME },
   669         // %01111011 to %01111111
   670         { 0x7F, STATE_ILLEGAL },
   673         // Two bytes
   675         // %10000000 to %11000001
   676         { 0xC1, STATE_ILLEGAL },
   678         // %11000010 to %11011111
   679         { 0xDF, UTF8_TWO_BYTES },
   682         // Three bytes
   684         // %11100000 to %11101111
   685         { 0xEF, UTF8_THREE_BYTES },
   688         // Four bytes
   690         // %11110000 to %11110111
   691         { 0xF7, UTF8_FOUR_BYTES },
   694         // %11111000 to %11111111
   695         { 0xFF, STATE_ILLEGAL }
   696     };
   698     /* package */ final static int UTF8_ONE_BYTE = 1;
   700     private static final int[] UTF8 = new int[256];
   702     private static final int[][] UTF8_RANGES = {
   704         // Basic Latin
   706         // %00000000 to %00001000
   707         { 0x08, STATE_ILLEGAL },
   709         // CHARACTER TABULATION, LINE FEED
   710         // %%00001001 to %00001010 [#x0009-#x000A]
   711         { 0x0A, UTF8_ONE_BYTE },
   713         // %00001011 to %00001100
   714         { 0x0C, STATE_ILLEGAL },
   716         // CARRIAGE RETURN
   717         // %00001101 [#x000D]
   718         { 0x0D, UTF8_ONE_BYTE },
   720         // %00001110 to %00011111
   721         { 0x1F, STATE_ILLEGAL },
   723         // %0010000 to %01111111
   724         { 0x7F, UTF8_ONE_BYTE },
   727         // Two bytes
   729         // %10000000 to %11000001
   730         { 0xC1, STATE_ILLEGAL },
   732         // %11000010 to %11011111
   733         { 0xDF, UTF8_TWO_BYTES },
   736         // Three bytes
   738         // %11100000 to %11101111
   739         { 0xEF, UTF8_THREE_BYTES },
   742         // Four bytes
   744         // %11110000 to %11110111
   745         { 0xF7, UTF8_FOUR_BYTES },
   748         // %11111000 to %11111111
   749         { 0xFF, STATE_ILLEGAL }
   750     };
   752     private static void constructTable(int[] table, int[][] ranges) {
   753         int start = 0x00;
   754         for (int range = 0; range < ranges.length; range++) {
   755             int end = ranges[range][RANGE_INDEX_END];
   756             int value = ranges[range][RANGE_INDEX_VALUE];
   757             for (int i = start; i<= end; i++) {
   758                 table[i] = value;
   759             }
   760             start = end + 1;
   761         }
   762     }
   764     public static final int DII(final int index) {
   765         return DII[index];
   766     }
   768     public static final int EII(final int index) {
   769         return EII[index];
   770     }
   772     public static final int AII(final int index) {
   773         return AII[index];
   774     }
   776     public static final int NISTRING(final int index) {
   777         return NISTRING[index];
   778     }
   780     public static final int ISTRING(final int index) {
   781         return ISTRING[index];
   782     }
   784     public static final int ISTRING_PREFIX_NAMESPACE(final int index) {
   785         return ISTRING_PREFIX_NAMESPACE[index];
   786     }
   788     public static final int UTF8(final int index) {
   789         return UTF8[index];
   790     }
   792     public static final int UTF8_NCNAME(final int index) {
   793         return UTF8_NCNAME[index];
   794     }
   796     static {
   797         // DII
   798         constructTable(DII, DII_RANGES);
   800         // EII
   801         constructTable(EII, EII_RANGES);
   803         // AII
   804         constructTable(AII, AII_RANGES);
   806         // AII Value
   807         constructTable(NISTRING, NISTRING_RANGES);
   809         // Identifying string
   810         constructTable(ISTRING, ISTRING_RANGES);
   812         // Identifying string
   813         constructTable(ISTRING_PREFIX_NAMESPACE, ISTRING_PREFIX_NAMESPACE_RANGES);
   815         // UTF-8 NCNAME states
   816         constructTable(UTF8_NCNAME, UTF8_NCNAME_RANGES);
   818         // UTF-8 states
   819         constructTable(UTF8, UTF8_RANGES);
   820     }
   822     private DecoderStateTables() {
   823     }
   824 }

mercurial