src/share/jaxws_classes/com/sun/xml/internal/fastinfoset/DecoderStateTables.java

changeset 0
373ffda63c9a
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/jaxws_classes/com/sun/xml/internal/fastinfoset/DecoderStateTables.java	Wed Apr 27 01:27:09 2016 +0800
     1.3 @@ -0,0 +1,824 @@
     1.4 +/*
     1.5 + * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + *
    1.28 + * THIS FILE WAS MODIFIED BY SUN MICROSYSTEMS, INC.
    1.29 + */
    1.30 +
    1.31 +package com.sun.xml.internal.fastinfoset;
    1.32 +
    1.33 +public class DecoderStateTables {
    1.34 +    private static int RANGE_INDEX_END      = 0;
    1.35 +    private static int RANGE_INDEX_VALUE    = 1;
    1.36 +
    1.37 +    public final static int STATE_ILLEGAL                   = 255;
    1.38 +    public final static int STATE_UNSUPPORTED               = 254;
    1.39 +
    1.40 +    // EII child states
    1.41 +    public final static int EII_NO_AIIS_INDEX_SMALL         = 0;
    1.42 +    public final static int EII_AIIS_INDEX_SMALL            = 1;
    1.43 +    public final static int EII_INDEX_MEDIUM                = 2;
    1.44 +    public final static int EII_INDEX_LARGE                 = 3;
    1.45 +    public final static int EII_NAMESPACES                  = 4;
    1.46 +    public final static int EII_LITERAL                     = 5;
    1.47 +    public final static int CII_UTF8_SMALL_LENGTH           = 6;
    1.48 +    public final static int CII_UTF8_MEDIUM_LENGTH          = 7;
    1.49 +    public final static int CII_UTF8_LARGE_LENGTH           = 8;
    1.50 +    public final static int CII_UTF16_SMALL_LENGTH          = 9;
    1.51 +    public final static int CII_UTF16_MEDIUM_LENGTH         = 10;
    1.52 +    public final static int CII_UTF16_LARGE_LENGTH          = 11;
    1.53 +    public final static int CII_RA                          = 12;
    1.54 +    public final static int CII_EA                          = 13;
    1.55 +    public final static int CII_INDEX_SMALL                 = 14;
    1.56 +    public final static int CII_INDEX_MEDIUM                = 15;
    1.57 +    public final static int CII_INDEX_LARGE                 = 16;
    1.58 +    public final static int CII_INDEX_LARGE_LARGE           = 17;
    1.59 +    public final static int COMMENT_II                      = 18;
    1.60 +    public final static int PROCESSING_INSTRUCTION_II       = 19;
    1.61 +    public final static int DOCUMENT_TYPE_DECLARATION_II    = 20;
    1.62 +    public final static int UNEXPANDED_ENTITY_REFERENCE_II  = 21;
    1.63 +    public final static int TERMINATOR_SINGLE               = 22;
    1.64 +    public final static int TERMINATOR_DOUBLE               = 23;
    1.65 +
    1.66 +    private static final int[] DII = new int[256];
    1.67 +
    1.68 +    private static final int[][] DII_RANGES = {
    1.69 +        // EII
    1.70 +
    1.71 +        // %00000000 to %00011111  EII no attributes small index
    1.72 +        { 0x1F, EII_NO_AIIS_INDEX_SMALL },
    1.73 +
    1.74 +        // %00100000 to %00100111  EII medium index
    1.75 +        { 0x27, EII_INDEX_MEDIUM },
    1.76 +
    1.77 +        // %00101000 to %00101111  EII large index
    1.78 +        // %00110000  EII very large index
    1.79 +        // %00101000 to %00110000
    1.80 +        { 0x30, EII_INDEX_LARGE },
    1.81 +
    1.82 +        // %00110001 to %00110111  ILLEGAL
    1.83 +        { 0x37, STATE_ILLEGAL },
    1.84 +
    1.85 +        // %00111000  EII namespaces
    1.86 +        { 0x38, EII_NAMESPACES },
    1.87 +
    1.88 +        // %00111001 to %00111011  ILLEGAL
    1.89 +        { 0x3B, STATE_ILLEGAL },
    1.90 +
    1.91 +        // %00111100  EII literal (no prefix, no namespace)
    1.92 +        { 0x3C, EII_LITERAL },
    1.93 +
    1.94 +        // %00111101  EII literal (no prefix, namespace)
    1.95 +        { 0x3D, EII_LITERAL },
    1.96 +
    1.97 +        // %00111110  ILLEGAL
    1.98 +        { 0x3E, STATE_ILLEGAL },
    1.99 +
   1.100 +        // %00111111  EII literal (prefix, namespace)
   1.101 +        { 0x3F, EII_LITERAL },
   1.102 +
   1.103 +        // %01000000 to %01011111  EII attributes small index
   1.104 +        { 0x5F, EII_AIIS_INDEX_SMALL },
   1.105 +
   1.106 +        // %01100000 to %01100111  EII medium index
   1.107 +        { 0x67, EII_INDEX_MEDIUM },
   1.108 +
   1.109 +        // %01101000 to %01101111  EII large index
   1.110 +        // %01110000  EII very large index
   1.111 +        // %01101000 to %01110000
   1.112 +        { 0x70, EII_INDEX_LARGE },
   1.113 +
   1.114 +        // %01110001 to %01110111  ILLEGAL
   1.115 +        { 0x77, STATE_ILLEGAL },
   1.116 +
   1.117 +        // %01111000  EII attributes namespaces
   1.118 +        { 0x78, EII_NAMESPACES },
   1.119 +
   1.120 +        // %01111001 to %01111011  ILLEGAL
   1.121 +        { 0x7B, STATE_ILLEGAL },
   1.122 +
   1.123 +        // %01111100  EII attributes literal (no prefix, no namespace)
   1.124 +        { 0x7C, EII_LITERAL },
   1.125 +
   1.126 +        // %01111101  EII attributes literal (no prefix, namespace)
   1.127 +        { 0x7D, EII_LITERAL },
   1.128 +
   1.129 +        // %01111110  ILLEGAL
   1.130 +        { 0x7E, STATE_ILLEGAL },
   1.131 +
   1.132 +        // %01111111  EII attributes literal (prefix, namespace)
   1.133 +        { 0x7F, EII_LITERAL },
   1.134 +
   1.135 +        // %10000000 to %11000011
   1.136 +        { 0xC3, STATE_ILLEGAL },
   1.137 +
   1.138 +        // %11000100 to %11000111
   1.139 +        { 0xC7, DOCUMENT_TYPE_DECLARATION_II },
   1.140 +
   1.141 +        // %11001000 to %1110000
   1.142 +        { 0xE0, STATE_ILLEGAL },
   1.143 +
   1.144 +        // %11100001 processing instruction
   1.145 +        { 0xE1, PROCESSING_INSTRUCTION_II },
   1.146 +
   1.147 +        // %11100010 comment
   1.148 +        { 0xE2, COMMENT_II},
   1.149 +
   1.150 +        // %111000011 to %11101111
   1.151 +        { 0xEF, STATE_ILLEGAL },
   1.152 +
   1.153 +        // Terminators
   1.154 +
   1.155 +        // %11110000  single terminator
   1.156 +        { 0xF0, TERMINATOR_SINGLE },
   1.157 +
   1.158 +        // %11110000 to %11111110 ILLEGAL
   1.159 +        { 0xFE, STATE_ILLEGAL },
   1.160 +
   1.161 +        // %11111111  double terminator
   1.162 +        { 0xFF, TERMINATOR_DOUBLE }
   1.163 +    };
   1.164 +
   1.165 +    private static final int[] EII = new int[256];
   1.166 +
   1.167 +    private static final int[][] EII_RANGES = {
   1.168 +        // EII
   1.169 +
   1.170 +        // %00000000 to %00011111  EII no attributes small index
   1.171 +        { 0x1F, EII_NO_AIIS_INDEX_SMALL },
   1.172 +
   1.173 +        // %00100000 to %00100111  EII medium index
   1.174 +        { 0x27, EII_INDEX_MEDIUM },
   1.175 +
   1.176 +        // %00101000 to %00101111  EII large index
   1.177 +        // %00110000  EII very large index
   1.178 +        // %00101000 to %00110000
   1.179 +        { 0x30, EII_INDEX_LARGE },
   1.180 +
   1.181 +        // %00110001 to %00110111  ILLEGAL
   1.182 +        { 0x37, STATE_ILLEGAL },
   1.183 +
   1.184 +        // %00111000  EII namespaces
   1.185 +        { 0x38, EII_NAMESPACES },
   1.186 +
   1.187 +        // %00111001 to %00111011  ILLEGAL
   1.188 +        { 0x3B, STATE_ILLEGAL },
   1.189 +
   1.190 +        // %00111100  EII literal (no prefix, no namespace)
   1.191 +        { 0x3C, EII_LITERAL },
   1.192 +
   1.193 +        // %00111101  EII literal (no prefix, namespace)
   1.194 +        { 0x3D, EII_LITERAL },
   1.195 +
   1.196 +        // %00111110  ILLEGAL
   1.197 +        { 0x3E, STATE_ILLEGAL },
   1.198 +
   1.199 +        // %00111111  EII literal (prefix, namespace)
   1.200 +        { 0x3F, EII_LITERAL },
   1.201 +
   1.202 +        // %01000000 to %01011111  EII attributes small index
   1.203 +        { 0x5F, EII_AIIS_INDEX_SMALL },
   1.204 +
   1.205 +        // %01100000 to %01100111  EII medium index
   1.206 +        { 0x67, EII_INDEX_MEDIUM },
   1.207 +
   1.208 +        // %01101000 to %01101111  EII large index
   1.209 +        // %01110000  EII very large index
   1.210 +        // %01101000 to %01110000
   1.211 +        { 0x70, EII_INDEX_LARGE },
   1.212 +
   1.213 +        // %01110001 to %01110111  ILLEGAL
   1.214 +        { 0x77, STATE_ILLEGAL },
   1.215 +
   1.216 +        // %01111000  EII attributes namespaces
   1.217 +        { 0x78, EII_NAMESPACES },
   1.218 +
   1.219 +        // %01111001 to %01111011  ILLEGAL
   1.220 +        { 0x7B, STATE_ILLEGAL },
   1.221 +
   1.222 +        // %01111100  EII attributes literal (no prefix, no namespace)
   1.223 +        { 0x7C, EII_LITERAL },
   1.224 +
   1.225 +        // %01111101  EII attributes literal (no prefix, namespace)
   1.226 +        { 0x7D, EII_LITERAL },
   1.227 +
   1.228 +        // %01111110  ILLEGAL
   1.229 +        { 0x7E, STATE_ILLEGAL },
   1.230 +
   1.231 +        // %01111111  EII attributes literal (prefix, namespace)
   1.232 +        { 0x7F, EII_LITERAL },
   1.233 +
   1.234 +        // CII
   1.235 +
   1.236 +        // UTF-8 string
   1.237 +
   1.238 +        // %10000000 to %10000001  CII UTF-8 no add to table small length
   1.239 +        { 0x81, CII_UTF8_SMALL_LENGTH },
   1.240 +
   1.241 +        // %10000010  CII UTF-8 no add to table medium length
   1.242 +        { 0x82, CII_UTF8_MEDIUM_LENGTH },
   1.243 +
   1.244 +        // %10000011  CII UTF-8 no add to table large length
   1.245 +        { 0x83, CII_UTF8_LARGE_LENGTH },
   1.246 +
   1.247 +        // UTF-16 string
   1.248 +
   1.249 +        // %10000100 to %10000101  CII UTF-16 no add to table small length
   1.250 +        { 0x85, CII_UTF16_SMALL_LENGTH },
   1.251 +
   1.252 +        // %10000110  CII UTF-16 no add to table medium length
   1.253 +        { 0x86, CII_UTF16_MEDIUM_LENGTH },
   1.254 +
   1.255 +        // %10000111  CII UTF-16 no add to table large length
   1.256 +        { 0x87, CII_UTF16_LARGE_LENGTH },
   1.257 +
   1.258 +        // Resitricted alphabet
   1.259 +
   1.260 +        // %10001000 to %10001011  CII RA no add to table
   1.261 +        { 0x8B, CII_RA },
   1.262 +
   1.263 +        // Encoding algorithm
   1.264 +
   1.265 +        // %10001100 to %10001111  CII EA no add to table
   1.266 +        { 0x8F, CII_EA },
   1.267 +
   1.268 +        // UTF-8 string, add to table
   1.269 +
   1.270 +        // %10010000 to %10010001  CII add to table small length
   1.271 +        { 0x91, CII_UTF8_SMALL_LENGTH },
   1.272 +
   1.273 +        // %10010010  CII add to table medium length
   1.274 +        { 0x92, CII_UTF8_MEDIUM_LENGTH },
   1.275 +
   1.276 +        // %10010011  CII add to table large length
   1.277 +        { 0x93, CII_UTF8_LARGE_LENGTH },
   1.278 +
   1.279 +        // UTF-16 string, add to table
   1.280 +
   1.281 +        // %10010100 to %10010101  CII UTF-16 add to table small length
   1.282 +        { 0x95, CII_UTF16_SMALL_LENGTH },
   1.283 +
   1.284 +        // %10010110  CII UTF-16 add to table medium length
   1.285 +        { 0x96, CII_UTF16_MEDIUM_LENGTH },
   1.286 +
   1.287 +        // %10010111  CII UTF-16 add to table large length
   1.288 +        { 0x97, CII_UTF16_LARGE_LENGTH },
   1.289 +
   1.290 +        // Restricted alphabet, add to table
   1.291 +
   1.292 +        // %10011000 to %10011011  CII RA add to table
   1.293 +        { 0x9B, CII_RA },
   1.294 +
   1.295 +        // Encoding algorithm, add to table
   1.296 +
   1.297 +        // %10011100 to %10011111  CII EA add to table
   1.298 +        { 0x9F, CII_EA },
   1.299 +
   1.300 +        // Index
   1.301 +
   1.302 +        // %10100000 to %10101111  CII small index
   1.303 +        { 0xAF, CII_INDEX_SMALL },
   1.304 +
   1.305 +        // %10110000 to %10110011  CII medium index
   1.306 +        { 0xB3, CII_INDEX_MEDIUM },
   1.307 +
   1.308 +        // %10110100 to %10110111  CII large index
   1.309 +        { 0xB7, CII_INDEX_LARGE },
   1.310 +
   1.311 +        // %10111000  CII very large index
   1.312 +        { 0xB8, CII_INDEX_LARGE_LARGE },
   1.313 +
   1.314 +        // %10111001 to %11000111  ILLEGAL
   1.315 +        { 0xC7, STATE_ILLEGAL },
   1.316 +
   1.317 +        // %11001000 to %11001011
   1.318 +        { 0xCB, UNEXPANDED_ENTITY_REFERENCE_II },
   1.319 +
   1.320 +        // %11001100 to %11100000  ILLEGAL
   1.321 +        { 0xE0, STATE_ILLEGAL },
   1.322 +
   1.323 +        // %11100001 processing instruction
   1.324 +        { 0xE1, PROCESSING_INSTRUCTION_II },
   1.325 +
   1.326 +        // %11100010 comment
   1.327 +        { 0xE2, COMMENT_II},
   1.328 +
   1.329 +        // %111000011 to %11101111
   1.330 +        { 0xEF, STATE_ILLEGAL },
   1.331 +
   1.332 +        // Terminators
   1.333 +
   1.334 +        // %11110000  single terminator
   1.335 +        { 0xF0, TERMINATOR_SINGLE },
   1.336 +
   1.337 +        // %11110000 to %11111110 ILLEGAL
   1.338 +        { 0xFE, STATE_ILLEGAL },
   1.339 +
   1.340 +        // %11111111  double terminator
   1.341 +        { 0xFF, TERMINATOR_DOUBLE }
   1.342 +    };
   1.343 +
   1.344 +
   1.345 +    // AII states
   1.346 +    public final static int AII_INDEX_SMALL                 = 0;
   1.347 +    public final static int AII_INDEX_MEDIUM                = 1;
   1.348 +    public final static int AII_INDEX_LARGE                 = 2;
   1.349 +    public final static int AII_LITERAL                     = 3;
   1.350 +    public final static int AII_TERMINATOR_SINGLE           = 4;
   1.351 +    public final static int AII_TERMINATOR_DOUBLE           = 5;
   1.352 +
   1.353 +    private static final int[] AII = new int[256];
   1.354 +
   1.355 +    private static final int[][] AII_RANGES = {
   1.356 +        // %00000000 to %00111111  AII small index
   1.357 +        { 0x3F, AII_INDEX_SMALL },
   1.358 +
   1.359 +        // %01000000 to %01011111  AII medium index
   1.360 +        { 0x5F, AII_INDEX_MEDIUM },
   1.361 +
   1.362 +        // %01100000 to %01101111  AII large index
   1.363 +        { 0x6F, AII_INDEX_LARGE },
   1.364 +
   1.365 +        // %01110000 to %01110111  ILLEGAL
   1.366 +        { 0x77, STATE_ILLEGAL },
   1.367 +
   1.368 +        // %01111000  AII literal (no prefix, no namespace)
   1.369 +        // %01111001  AII literal (no prefix, namespace)
   1.370 +        { 0x79, AII_LITERAL },
   1.371 +
   1.372 +        // %01111010  ILLEGAL
   1.373 +        { 0x7A, STATE_ILLEGAL },
   1.374 +
   1.375 +        // %01111011  AII literal (prefix, namespace)
   1.376 +        { 0x7B, AII_LITERAL },
   1.377 +
   1.378 +        // %10000000 to %11101111  ILLEGAL
   1.379 +        { 0xEF, STATE_ILLEGAL },
   1.380 +
   1.381 +        // Terminators
   1.382 +
   1.383 +        // %11110000  single terminator
   1.384 +        { 0xF0, AII_TERMINATOR_SINGLE },
   1.385 +
   1.386 +        // %11110000 to %11111110 ILLEGAL
   1.387 +        { 0xFE, STATE_ILLEGAL },
   1.388 +
   1.389 +        // %11111111  double terminator
   1.390 +        { 0xFF, AII_TERMINATOR_DOUBLE }
   1.391 +    };
   1.392 +
   1.393 +
   1.394 +    // AII value states
   1.395 +    public final static int NISTRING_UTF8_SMALL_LENGTH     = 0;
   1.396 +    public final static int NISTRING_UTF8_MEDIUM_LENGTH    = 1;
   1.397 +    public final static int NISTRING_UTF8_LARGE_LENGTH     = 2;
   1.398 +    public final static int NISTRING_UTF16_SMALL_LENGTH    = 3;
   1.399 +    public final static int NISTRING_UTF16_MEDIUM_LENGTH   = 4;
   1.400 +    public final static int NISTRING_UTF16_LARGE_LENGTH    = 5;
   1.401 +    public final static int NISTRING_RA                    = 6;
   1.402 +    public final static int NISTRING_EA                    = 7;
   1.403 +    public final static int NISTRING_INDEX_SMALL           = 8;
   1.404 +    public final static int NISTRING_INDEX_MEDIUM          = 9;
   1.405 +    public final static int NISTRING_INDEX_LARGE           = 10;
   1.406 +    public final static int NISTRING_EMPTY                 = 11;
   1.407 +
   1.408 +    private static final int[] NISTRING = new int[256];
   1.409 +
   1.410 +    private static final int[][] NISTRING_RANGES = {
   1.411 +        // UTF-8 string
   1.412 +
   1.413 +        // %00000000 to %00000111  UTF-8 no add to table small length
   1.414 +        { 0x07, NISTRING_UTF8_SMALL_LENGTH },
   1.415 +
   1.416 +        // %00001000  UTF-8 no add to table medium length
   1.417 +        { 0x08, NISTRING_UTF8_MEDIUM_LENGTH },
   1.418 +
   1.419 +        // %00001001 to %00001011 ILLEGAL
   1.420 +        { 0x0B, STATE_ILLEGAL },
   1.421 +
   1.422 +        // %00001100  UTF-8 no add to table large length
   1.423 +        { 0x0C, NISTRING_UTF8_LARGE_LENGTH },
   1.424 +
   1.425 +        // %00001101 to %00001111 ILLEGAL
   1.426 +        { 0x0F, STATE_ILLEGAL },
   1.427 +
   1.428 +        // UTF-16 string
   1.429 +
   1.430 +        // %00010000 to %00010111  UTF-16 no add to table small length
   1.431 +        { 0x17, NISTRING_UTF16_SMALL_LENGTH },
   1.432 +
   1.433 +        // %00001000  UTF-16 no add to table medium length
   1.434 +        { 0x18, NISTRING_UTF16_MEDIUM_LENGTH },
   1.435 +
   1.436 +        // %00011001 to %00011011 ILLEGAL
   1.437 +        { 0x1B, STATE_ILLEGAL },
   1.438 +
   1.439 +        // %00011100  UTF-16 no add to table large length
   1.440 +        { 0x1C, NISTRING_UTF16_LARGE_LENGTH },
   1.441 +
   1.442 +        // %00011101 to %00011111 ILLEGAL
   1.443 +        { 0x1F, STATE_ILLEGAL },
   1.444 +
   1.445 +        // Restricted alphabet
   1.446 +
   1.447 +        // %00100000 to %00101111  RA no add to table small length
   1.448 +        { 0x2F, NISTRING_RA },
   1.449 +
   1.450 +        // Encoding algorithm
   1.451 +
   1.452 +        // %00110000 to %00111111  EA no add to table
   1.453 +        { 0x3F, NISTRING_EA },
   1.454 +
   1.455 +        // UTF-8 string, add to table
   1.456 +
   1.457 +        // %01000000 to %01000111  UTF-8 add to table small length
   1.458 +        { 0x47, NISTRING_UTF8_SMALL_LENGTH },
   1.459 +
   1.460 +        // %01001000  UTF-8 add to table medium length
   1.461 +        { 0x48, NISTRING_UTF8_MEDIUM_LENGTH },
   1.462 +
   1.463 +        // %01001001 to %01001011 ILLEGAL
   1.464 +        { 0x4B, STATE_ILLEGAL },
   1.465 +
   1.466 +        // %01001100  UTF-8 add to table large length
   1.467 +        { 0x4C, NISTRING_UTF8_LARGE_LENGTH },
   1.468 +
   1.469 +        // %01001101 to %01001111 ILLEGAL
   1.470 +        { 0x4F, STATE_ILLEGAL },
   1.471 +
   1.472 +        // UTF-16 string, add to table
   1.473 +
   1.474 +        // %01010000 to %01010111  UTF-16 add to table small length
   1.475 +        { 0x57, NISTRING_UTF16_SMALL_LENGTH },
   1.476 +
   1.477 +        // %01001000  UTF-16 add to table medium length
   1.478 +        { 0x58, NISTRING_UTF16_MEDIUM_LENGTH },
   1.479 +
   1.480 +        // %01011001 to %01011011 ILLEGAL
   1.481 +        { 0x5B, STATE_ILLEGAL },
   1.482 +
   1.483 +        // %01011100  UTF-16 add to table large length
   1.484 +        { 0x5C, NISTRING_UTF16_LARGE_LENGTH },
   1.485 +
   1.486 +        // %01011101 to %01011111 ILLEGAL
   1.487 +        { 0x5F, STATE_ILLEGAL },
   1.488 +
   1.489 +        // Restricted alphabet, add to table
   1.490 +
   1.491 +        // %01100000 to %01101111  RA no add to table small length
   1.492 +        { 0x6F, NISTRING_RA },
   1.493 +
   1.494 +        // Encoding algorithm, add to table
   1.495 +
   1.496 +        // %01110000 to %01111111  EA add to table
   1.497 +        { 0x7F, NISTRING_EA },
   1.498 +
   1.499 +        // Index
   1.500 +
   1.501 +        // %10000000 to %10111111 index small
   1.502 +        { 0xBF, NISTRING_INDEX_SMALL },
   1.503 +
   1.504 +        // %11000000 to %11011111 index medium
   1.505 +        { 0xDF, NISTRING_INDEX_MEDIUM },
   1.506 +
   1.507 +        // %11100000 to %11101111 index large
   1.508 +        { 0xEF, NISTRING_INDEX_LARGE },
   1.509 +
   1.510 +        // %11110000 to %11111110 ILLEGAL
   1.511 +        { 0xFE, STATE_ILLEGAL },
   1.512 +
   1.513 +        // %11111111 Empty value
   1.514 +        { 0xFF, NISTRING_EMPTY },
   1.515 +    };
   1.516 +
   1.517 +
   1.518 +    /* package */ final static int ISTRING_SMALL_LENGTH        = 0;
   1.519 +    /* package */ final static int ISTRING_MEDIUM_LENGTH       = 1;
   1.520 +    /* package */ final static int ISTRING_LARGE_LENGTH        = 2;
   1.521 +    /* package */ final static int ISTRING_INDEX_SMALL         = 3;
   1.522 +    /* package */ final static int ISTRING_INDEX_MEDIUM        = 4;
   1.523 +    /* package */ final static int ISTRING_INDEX_LARGE         = 5;
   1.524 +
   1.525 +    private static final int[] ISTRING = new int[256];
   1.526 +
   1.527 +    private static final int[][] ISTRING_RANGES = {
   1.528 +        // %00000000 to %00111111 small length
   1.529 +        { 0x3F, ISTRING_SMALL_LENGTH },
   1.530 +
   1.531 +        // %01000000 medium length
   1.532 +        { 0x40, ISTRING_MEDIUM_LENGTH },
   1.533 +
   1.534 +        // %01000001 to %01011111 ILLEGAL
   1.535 +        { 0x5F, STATE_ILLEGAL },
   1.536 +
   1.537 +        // %01100000 large length
   1.538 +        { 0x60, ISTRING_LARGE_LENGTH },
   1.539 +
   1.540 +        // %01100001 to %01111111 ILLEGAL
   1.541 +        { 0x7F, STATE_ILLEGAL },
   1.542 +
   1.543 +        // %10000000 to %10111111 index small
   1.544 +        { 0xBF, ISTRING_INDEX_SMALL },
   1.545 +
   1.546 +        // %11000000 to %11011111 index medium
   1.547 +        { 0xDF, ISTRING_INDEX_MEDIUM },
   1.548 +
   1.549 +        // %11100000 to %11101111 index large
   1.550 +        { 0xEF, ISTRING_INDEX_LARGE },
   1.551 +
   1.552 +        // %11110000 to %11111111 ILLEGAL
   1.553 +        { 0xFF, STATE_ILLEGAL },
   1.554 +    };
   1.555 +
   1.556 +
   1.557 +    /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_3   = 6;
   1.558 +    /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_5   = 7;
   1.559 +    /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_29  = 8;
   1.560 +    /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_36  = 9;
   1.561 +    /* package */ final static int ISTRING_PREFIX_NAMESPACE_INDEX_ZERO = 10;
   1.562 +
   1.563 +    private static final int[] ISTRING_PREFIX_NAMESPACE = new int[256];
   1.564 +
   1.565 +    private static final int[][] ISTRING_PREFIX_NAMESPACE_RANGES = {
   1.566 +        // %00000000 to %00000001 small length
   1.567 +        { 0x01, ISTRING_SMALL_LENGTH },
   1.568 +
   1.569 +        // %00000010 small length
   1.570 +        { 0x02, ISTRING_PREFIX_NAMESPACE_LENGTH_3 },
   1.571 +
   1.572 +        // %00000011 small length
   1.573 +        { 0x03, ISTRING_SMALL_LENGTH },
   1.574 +
   1.575 +        // %00000100 small length
   1.576 +        { 0x04, ISTRING_PREFIX_NAMESPACE_LENGTH_5 },
   1.577 +
   1.578 +        // %00011011 small length
   1.579 +        { 0x1B, ISTRING_SMALL_LENGTH },
   1.580 +
   1.581 +        // %00011100 small length
   1.582 +        { 0x1C, ISTRING_PREFIX_NAMESPACE_LENGTH_29 },
   1.583 +
   1.584 +        // %00100010 small length
   1.585 +        { 0x22, ISTRING_SMALL_LENGTH },
   1.586 +
   1.587 +        // %00100011 small length
   1.588 +        { 0x23, ISTRING_PREFIX_NAMESPACE_LENGTH_36 },
   1.589 +
   1.590 +        // %00000101 to %00111111 small length
   1.591 +        { 0x3F, ISTRING_SMALL_LENGTH },
   1.592 +
   1.593 +
   1.594 +
   1.595 +
   1.596 +        // %01000000 medium length
   1.597 +        { 0x40, ISTRING_MEDIUM_LENGTH },
   1.598 +
   1.599 +        // %01000001 to %01011111 ILLEGAL
   1.600 +        { 0x5F, STATE_ILLEGAL },
   1.601 +
   1.602 +        // %01100000 large length
   1.603 +        { 0x60, ISTRING_LARGE_LENGTH },
   1.604 +
   1.605 +        // %01100001 to %01111111 ILLEGAL
   1.606 +        { 0x7F, STATE_ILLEGAL },
   1.607 +
   1.608 +        // %10000000 index small, 0
   1.609 +        { 0x80, ISTRING_PREFIX_NAMESPACE_INDEX_ZERO },
   1.610 +
   1.611 +        // %10000000 to %10111111 index small
   1.612 +        { 0xBF, ISTRING_INDEX_SMALL },
   1.613 +
   1.614 +        // %11000000 to %11011111 index medium
   1.615 +        { 0xDF, ISTRING_INDEX_MEDIUM },
   1.616 +
   1.617 +        // %11100000 to %11101111 index large
   1.618 +        { 0xEF, ISTRING_INDEX_LARGE },
   1.619 +
   1.620 +        // %11110000 to %11111111 ILLEGAL
   1.621 +        { 0xFF, STATE_ILLEGAL },
   1.622 +    };
   1.623 +
   1.624 +    // UTF-8 states
   1.625 +    /* package */ final static int UTF8_NCNAME_NCNAME         = 0;
   1.626 +    /* package */ final static int UTF8_NCNAME_NCNAME_CHAR    = 1;
   1.627 +    /* package */ final static int UTF8_TWO_BYTES             = 2;
   1.628 +    /* package */ final static int UTF8_THREE_BYTES           = 3;
   1.629 +    /* package */ final static int UTF8_FOUR_BYTES            = 4;
   1.630 +
   1.631 +    private static final int[] UTF8_NCNAME = new int[256];
   1.632 +
   1.633 +    private static final int[][] UTF8_NCNAME_RANGES = {
   1.634 +
   1.635 +        // Basic Latin
   1.636 +
   1.637 +        // %00000000 to %00101100
   1.638 +        { 0x2C, STATE_ILLEGAL },
   1.639 +
   1.640 +        // '-' '.'
   1.641 +        // %%00101101 to %00101110 [#x002D-#x002E]
   1.642 +        { 0x2E, UTF8_NCNAME_NCNAME_CHAR },
   1.643 +
   1.644 +        // %00101111
   1.645 +        { 0x2F, STATE_ILLEGAL },
   1.646 +
   1.647 +        // [0-9]
   1.648 +        // %0011000 to %00111001  [#x0030-#x0039]
   1.649 +        { 0x39, UTF8_NCNAME_NCNAME_CHAR },
   1.650 +
   1.651 +        // %01000000
   1.652 +        { 0x40, STATE_ILLEGAL },
   1.653 +
   1.654 +        // [A-Z]
   1.655 +        // %01000001 to %01011010 [#x0041-#x005A]
   1.656 +        { 0x5A, UTF8_NCNAME_NCNAME },
   1.657 +
   1.658 +        // %01011110
   1.659 +        { 0x5E, STATE_ILLEGAL },
   1.660 +
   1.661 +        // '_'
   1.662 +        // %01011111 [#x005F]
   1.663 +        { 0x5F, UTF8_NCNAME_NCNAME },
   1.664 +
   1.665 +        // %01100000
   1.666 +        { 0x60, STATE_ILLEGAL },
   1.667 +
   1.668 +        // [a-z]
   1.669 +        // %01100001 to %01111010 [#x0061-#x007A]
   1.670 +        { 0x7A, UTF8_NCNAME_NCNAME },
   1.671 +
   1.672 +        // %01111011 to %01111111
   1.673 +        { 0x7F, STATE_ILLEGAL },
   1.674 +
   1.675 +
   1.676 +        // Two bytes
   1.677 +
   1.678 +        // %10000000 to %11000001
   1.679 +        { 0xC1, STATE_ILLEGAL },
   1.680 +
   1.681 +        // %11000010 to %11011111
   1.682 +        { 0xDF, UTF8_TWO_BYTES },
   1.683 +
   1.684 +
   1.685 +        // Three bytes
   1.686 +
   1.687 +        // %11100000 to %11101111
   1.688 +        { 0xEF, UTF8_THREE_BYTES },
   1.689 +
   1.690 +
   1.691 +        // Four bytes
   1.692 +
   1.693 +        // %11110000 to %11110111
   1.694 +        { 0xF7, UTF8_FOUR_BYTES },
   1.695 +
   1.696 +
   1.697 +        // %11111000 to %11111111
   1.698 +        { 0xFF, STATE_ILLEGAL }
   1.699 +    };
   1.700 +
   1.701 +    /* package */ final static int UTF8_ONE_BYTE = 1;
   1.702 +
   1.703 +    private static final int[] UTF8 = new int[256];
   1.704 +
   1.705 +    private static final int[][] UTF8_RANGES = {
   1.706 +
   1.707 +        // Basic Latin
   1.708 +
   1.709 +        // %00000000 to %00001000
   1.710 +        { 0x08, STATE_ILLEGAL },
   1.711 +
   1.712 +        // CHARACTER TABULATION, LINE FEED
   1.713 +        // %%00001001 to %00001010 [#x0009-#x000A]
   1.714 +        { 0x0A, UTF8_ONE_BYTE },
   1.715 +
   1.716 +        // %00001011 to %00001100
   1.717 +        { 0x0C, STATE_ILLEGAL },
   1.718 +
   1.719 +        // CARRIAGE RETURN
   1.720 +        // %00001101 [#x000D]
   1.721 +        { 0x0D, UTF8_ONE_BYTE },
   1.722 +
   1.723 +        // %00001110 to %00011111
   1.724 +        { 0x1F, STATE_ILLEGAL },
   1.725 +
   1.726 +        // %0010000 to %01111111
   1.727 +        { 0x7F, UTF8_ONE_BYTE },
   1.728 +
   1.729 +
   1.730 +        // Two bytes
   1.731 +
   1.732 +        // %10000000 to %11000001
   1.733 +        { 0xC1, STATE_ILLEGAL },
   1.734 +
   1.735 +        // %11000010 to %11011111
   1.736 +        { 0xDF, UTF8_TWO_BYTES },
   1.737 +
   1.738 +
   1.739 +        // Three bytes
   1.740 +
   1.741 +        // %11100000 to %11101111
   1.742 +        { 0xEF, UTF8_THREE_BYTES },
   1.743 +
   1.744 +
   1.745 +        // Four bytes
   1.746 +
   1.747 +        // %11110000 to %11110111
   1.748 +        { 0xF7, UTF8_FOUR_BYTES },
   1.749 +
   1.750 +
   1.751 +        // %11111000 to %11111111
   1.752 +        { 0xFF, STATE_ILLEGAL }
   1.753 +    };
   1.754 +
   1.755 +    private static void constructTable(int[] table, int[][] ranges) {
   1.756 +        int start = 0x00;
   1.757 +        for (int range = 0; range < ranges.length; range++) {
   1.758 +            int end = ranges[range][RANGE_INDEX_END];
   1.759 +            int value = ranges[range][RANGE_INDEX_VALUE];
   1.760 +            for (int i = start; i<= end; i++) {
   1.761 +                table[i] = value;
   1.762 +            }
   1.763 +            start = end + 1;
   1.764 +        }
   1.765 +    }
   1.766 +
   1.767 +    public static final int DII(final int index) {
   1.768 +        return DII[index];
   1.769 +    }
   1.770 +
   1.771 +    public static final int EII(final int index) {
   1.772 +        return EII[index];
   1.773 +    }
   1.774 +
   1.775 +    public static final int AII(final int index) {
   1.776 +        return AII[index];
   1.777 +    }
   1.778 +
   1.779 +    public static final int NISTRING(final int index) {
   1.780 +        return NISTRING[index];
   1.781 +    }
   1.782 +
   1.783 +    public static final int ISTRING(final int index) {
   1.784 +        return ISTRING[index];
   1.785 +    }
   1.786 +
   1.787 +    public static final int ISTRING_PREFIX_NAMESPACE(final int index) {
   1.788 +        return ISTRING_PREFIX_NAMESPACE[index];
   1.789 +    }
   1.790 +
   1.791 +    public static final int UTF8(final int index) {
   1.792 +        return UTF8[index];
   1.793 +    }
   1.794 +
   1.795 +    public static final int UTF8_NCNAME(final int index) {
   1.796 +        return UTF8_NCNAME[index];
   1.797 +    }
   1.798 +
   1.799 +    static {
   1.800 +        // DII
   1.801 +        constructTable(DII, DII_RANGES);
   1.802 +
   1.803 +        // EII
   1.804 +        constructTable(EII, EII_RANGES);
   1.805 +
   1.806 +        // AII
   1.807 +        constructTable(AII, AII_RANGES);
   1.808 +
   1.809 +        // AII Value
   1.810 +        constructTable(NISTRING, NISTRING_RANGES);
   1.811 +
   1.812 +        // Identifying string
   1.813 +        constructTable(ISTRING, ISTRING_RANGES);
   1.814 +
   1.815 +        // Identifying string
   1.816 +        constructTable(ISTRING_PREFIX_NAMESPACE, ISTRING_PREFIX_NAMESPACE_RANGES);
   1.817 +
   1.818 +        // UTF-8 NCNAME states
   1.819 +        constructTable(UTF8_NCNAME, UTF8_NCNAME_RANGES);
   1.820 +
   1.821 +        // UTF-8 states
   1.822 +        constructTable(UTF8, UTF8_RANGES);
   1.823 +    }
   1.824 +
   1.825 +    private DecoderStateTables() {
   1.826 +    }
   1.827 +}

mercurial