aoqi@0: /* aoqi@0: * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved. aoqi@0: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@0: * aoqi@0: * This code is free software; you can redistribute it and/or modify it aoqi@0: * under the terms of the GNU General Public License version 2 only, as aoqi@0: * published by the Free Software Foundation. Oracle designates this aoqi@0: * particular file as subject to the "Classpath" exception as provided aoqi@0: * by Oracle in the LICENSE file that accompanied this code. aoqi@0: * aoqi@0: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@0: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@0: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@0: * version 2 for more details (a copy is included in the LICENSE file that aoqi@0: * accompanied this code). aoqi@0: * aoqi@0: * You should have received a copy of the GNU General Public License version aoqi@0: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@0: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@0: * aoqi@0: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@0: * or visit www.oracle.com if you need additional information or have any aoqi@0: * questions. aoqi@0: * aoqi@0: * THIS FILE WAS MODIFIED BY SUN MICROSYSTEMS, INC. aoqi@0: */ aoqi@0: aoqi@0: package com.sun.xml.internal.fastinfoset; aoqi@0: aoqi@0: public class DecoderStateTables { aoqi@0: private static int RANGE_INDEX_END = 0; aoqi@0: private static int RANGE_INDEX_VALUE = 1; aoqi@0: aoqi@0: public final static int STATE_ILLEGAL = 255; aoqi@0: public final static int STATE_UNSUPPORTED = 254; aoqi@0: aoqi@0: // EII child states aoqi@0: public final static int EII_NO_AIIS_INDEX_SMALL = 0; aoqi@0: public final static int EII_AIIS_INDEX_SMALL = 1; aoqi@0: public final static int EII_INDEX_MEDIUM = 2; aoqi@0: public final static int EII_INDEX_LARGE = 3; aoqi@0: public final static int EII_NAMESPACES = 4; aoqi@0: public final static int EII_LITERAL = 5; aoqi@0: public final static int CII_UTF8_SMALL_LENGTH = 6; aoqi@0: public final static int CII_UTF8_MEDIUM_LENGTH = 7; aoqi@0: public final static int CII_UTF8_LARGE_LENGTH = 8; aoqi@0: public final static int CII_UTF16_SMALL_LENGTH = 9; aoqi@0: public final static int CII_UTF16_MEDIUM_LENGTH = 10; aoqi@0: public final static int CII_UTF16_LARGE_LENGTH = 11; aoqi@0: public final static int CII_RA = 12; aoqi@0: public final static int CII_EA = 13; aoqi@0: public final static int CII_INDEX_SMALL = 14; aoqi@0: public final static int CII_INDEX_MEDIUM = 15; aoqi@0: public final static int CII_INDEX_LARGE = 16; aoqi@0: public final static int CII_INDEX_LARGE_LARGE = 17; aoqi@0: public final static int COMMENT_II = 18; aoqi@0: public final static int PROCESSING_INSTRUCTION_II = 19; aoqi@0: public final static int DOCUMENT_TYPE_DECLARATION_II = 20; aoqi@0: public final static int UNEXPANDED_ENTITY_REFERENCE_II = 21; aoqi@0: public final static int TERMINATOR_SINGLE = 22; aoqi@0: public final static int TERMINATOR_DOUBLE = 23; aoqi@0: aoqi@0: private static final int[] DII = new int[256]; aoqi@0: aoqi@0: private static final int[][] DII_RANGES = { aoqi@0: // EII aoqi@0: aoqi@0: // %00000000 to %00011111 EII no attributes small index aoqi@0: { 0x1F, EII_NO_AIIS_INDEX_SMALL }, aoqi@0: aoqi@0: // %00100000 to %00100111 EII medium index aoqi@0: { 0x27, EII_INDEX_MEDIUM }, aoqi@0: aoqi@0: // %00101000 to %00101111 EII large index aoqi@0: // %00110000 EII very large index aoqi@0: // %00101000 to %00110000 aoqi@0: { 0x30, EII_INDEX_LARGE }, aoqi@0: aoqi@0: // %00110001 to %00110111 ILLEGAL aoqi@0: { 0x37, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %00111000 EII namespaces aoqi@0: { 0x38, EII_NAMESPACES }, aoqi@0: aoqi@0: // %00111001 to %00111011 ILLEGAL aoqi@0: { 0x3B, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %00111100 EII literal (no prefix, no namespace) aoqi@0: { 0x3C, EII_LITERAL }, aoqi@0: aoqi@0: // %00111101 EII literal (no prefix, namespace) aoqi@0: { 0x3D, EII_LITERAL }, aoqi@0: aoqi@0: // %00111110 ILLEGAL aoqi@0: { 0x3E, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %00111111 EII literal (prefix, namespace) aoqi@0: { 0x3F, EII_LITERAL }, aoqi@0: aoqi@0: // %01000000 to %01011111 EII attributes small index aoqi@0: { 0x5F, EII_AIIS_INDEX_SMALL }, aoqi@0: aoqi@0: // %01100000 to %01100111 EII medium index aoqi@0: { 0x67, EII_INDEX_MEDIUM }, aoqi@0: aoqi@0: // %01101000 to %01101111 EII large index aoqi@0: // %01110000 EII very large index aoqi@0: // %01101000 to %01110000 aoqi@0: { 0x70, EII_INDEX_LARGE }, aoqi@0: aoqi@0: // %01110001 to %01110111 ILLEGAL aoqi@0: { 0x77, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01111000 EII attributes namespaces aoqi@0: { 0x78, EII_NAMESPACES }, aoqi@0: aoqi@0: // %01111001 to %01111011 ILLEGAL aoqi@0: { 0x7B, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01111100 EII attributes literal (no prefix, no namespace) aoqi@0: { 0x7C, EII_LITERAL }, aoqi@0: aoqi@0: // %01111101 EII attributes literal (no prefix, namespace) aoqi@0: { 0x7D, EII_LITERAL }, aoqi@0: aoqi@0: // %01111110 ILLEGAL aoqi@0: { 0x7E, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01111111 EII attributes literal (prefix, namespace) aoqi@0: { 0x7F, EII_LITERAL }, aoqi@0: aoqi@0: // %10000000 to %11000011 aoqi@0: { 0xC3, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %11000100 to %11000111 aoqi@0: { 0xC7, DOCUMENT_TYPE_DECLARATION_II }, aoqi@0: aoqi@0: // %11001000 to %1110000 aoqi@0: { 0xE0, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %11100001 processing instruction aoqi@0: { 0xE1, PROCESSING_INSTRUCTION_II }, aoqi@0: aoqi@0: // %11100010 comment aoqi@0: { 0xE2, COMMENT_II}, aoqi@0: aoqi@0: // %111000011 to %11101111 aoqi@0: { 0xEF, STATE_ILLEGAL }, aoqi@0: aoqi@0: // Terminators aoqi@0: aoqi@0: // %11110000 single terminator aoqi@0: { 0xF0, TERMINATOR_SINGLE }, aoqi@0: aoqi@0: // %11110000 to %11111110 ILLEGAL aoqi@0: { 0xFE, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %11111111 double terminator aoqi@0: { 0xFF, TERMINATOR_DOUBLE } aoqi@0: }; aoqi@0: aoqi@0: private static final int[] EII = new int[256]; aoqi@0: aoqi@0: private static final int[][] EII_RANGES = { aoqi@0: // EII aoqi@0: aoqi@0: // %00000000 to %00011111 EII no attributes small index aoqi@0: { 0x1F, EII_NO_AIIS_INDEX_SMALL }, aoqi@0: aoqi@0: // %00100000 to %00100111 EII medium index aoqi@0: { 0x27, EII_INDEX_MEDIUM }, aoqi@0: aoqi@0: // %00101000 to %00101111 EII large index aoqi@0: // %00110000 EII very large index aoqi@0: // %00101000 to %00110000 aoqi@0: { 0x30, EII_INDEX_LARGE }, aoqi@0: aoqi@0: // %00110001 to %00110111 ILLEGAL aoqi@0: { 0x37, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %00111000 EII namespaces aoqi@0: { 0x38, EII_NAMESPACES }, aoqi@0: aoqi@0: // %00111001 to %00111011 ILLEGAL aoqi@0: { 0x3B, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %00111100 EII literal (no prefix, no namespace) aoqi@0: { 0x3C, EII_LITERAL }, aoqi@0: aoqi@0: // %00111101 EII literal (no prefix, namespace) aoqi@0: { 0x3D, EII_LITERAL }, aoqi@0: aoqi@0: // %00111110 ILLEGAL aoqi@0: { 0x3E, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %00111111 EII literal (prefix, namespace) aoqi@0: { 0x3F, EII_LITERAL }, aoqi@0: aoqi@0: // %01000000 to %01011111 EII attributes small index aoqi@0: { 0x5F, EII_AIIS_INDEX_SMALL }, aoqi@0: aoqi@0: // %01100000 to %01100111 EII medium index aoqi@0: { 0x67, EII_INDEX_MEDIUM }, aoqi@0: aoqi@0: // %01101000 to %01101111 EII large index aoqi@0: // %01110000 EII very large index aoqi@0: // %01101000 to %01110000 aoqi@0: { 0x70, EII_INDEX_LARGE }, aoqi@0: aoqi@0: // %01110001 to %01110111 ILLEGAL aoqi@0: { 0x77, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01111000 EII attributes namespaces aoqi@0: { 0x78, EII_NAMESPACES }, aoqi@0: aoqi@0: // %01111001 to %01111011 ILLEGAL aoqi@0: { 0x7B, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01111100 EII attributes literal (no prefix, no namespace) aoqi@0: { 0x7C, EII_LITERAL }, aoqi@0: aoqi@0: // %01111101 EII attributes literal (no prefix, namespace) aoqi@0: { 0x7D, EII_LITERAL }, aoqi@0: aoqi@0: // %01111110 ILLEGAL aoqi@0: { 0x7E, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01111111 EII attributes literal (prefix, namespace) aoqi@0: { 0x7F, EII_LITERAL }, aoqi@0: aoqi@0: // CII aoqi@0: aoqi@0: // UTF-8 string aoqi@0: aoqi@0: // %10000000 to %10000001 CII UTF-8 no add to table small length aoqi@0: { 0x81, CII_UTF8_SMALL_LENGTH }, aoqi@0: aoqi@0: // %10000010 CII UTF-8 no add to table medium length aoqi@0: { 0x82, CII_UTF8_MEDIUM_LENGTH }, aoqi@0: aoqi@0: // %10000011 CII UTF-8 no add to table large length aoqi@0: { 0x83, CII_UTF8_LARGE_LENGTH }, aoqi@0: aoqi@0: // UTF-16 string aoqi@0: aoqi@0: // %10000100 to %10000101 CII UTF-16 no add to table small length aoqi@0: { 0x85, CII_UTF16_SMALL_LENGTH }, aoqi@0: aoqi@0: // %10000110 CII UTF-16 no add to table medium length aoqi@0: { 0x86, CII_UTF16_MEDIUM_LENGTH }, aoqi@0: aoqi@0: // %10000111 CII UTF-16 no add to table large length aoqi@0: { 0x87, CII_UTF16_LARGE_LENGTH }, aoqi@0: aoqi@0: // Resitricted alphabet aoqi@0: aoqi@0: // %10001000 to %10001011 CII RA no add to table aoqi@0: { 0x8B, CII_RA }, aoqi@0: aoqi@0: // Encoding algorithm aoqi@0: aoqi@0: // %10001100 to %10001111 CII EA no add to table aoqi@0: { 0x8F, CII_EA }, aoqi@0: aoqi@0: // UTF-8 string, add to table aoqi@0: aoqi@0: // %10010000 to %10010001 CII add to table small length aoqi@0: { 0x91, CII_UTF8_SMALL_LENGTH }, aoqi@0: aoqi@0: // %10010010 CII add to table medium length aoqi@0: { 0x92, CII_UTF8_MEDIUM_LENGTH }, aoqi@0: aoqi@0: // %10010011 CII add to table large length aoqi@0: { 0x93, CII_UTF8_LARGE_LENGTH }, aoqi@0: aoqi@0: // UTF-16 string, add to table aoqi@0: aoqi@0: // %10010100 to %10010101 CII UTF-16 add to table small length aoqi@0: { 0x95, CII_UTF16_SMALL_LENGTH }, aoqi@0: aoqi@0: // %10010110 CII UTF-16 add to table medium length aoqi@0: { 0x96, CII_UTF16_MEDIUM_LENGTH }, aoqi@0: aoqi@0: // %10010111 CII UTF-16 add to table large length aoqi@0: { 0x97, CII_UTF16_LARGE_LENGTH }, aoqi@0: aoqi@0: // Restricted alphabet, add to table aoqi@0: aoqi@0: // %10011000 to %10011011 CII RA add to table aoqi@0: { 0x9B, CII_RA }, aoqi@0: aoqi@0: // Encoding algorithm, add to table aoqi@0: aoqi@0: // %10011100 to %10011111 CII EA add to table aoqi@0: { 0x9F, CII_EA }, aoqi@0: aoqi@0: // Index aoqi@0: aoqi@0: // %10100000 to %10101111 CII small index aoqi@0: { 0xAF, CII_INDEX_SMALL }, aoqi@0: aoqi@0: // %10110000 to %10110011 CII medium index aoqi@0: { 0xB3, CII_INDEX_MEDIUM }, aoqi@0: aoqi@0: // %10110100 to %10110111 CII large index aoqi@0: { 0xB7, CII_INDEX_LARGE }, aoqi@0: aoqi@0: // %10111000 CII very large index aoqi@0: { 0xB8, CII_INDEX_LARGE_LARGE }, aoqi@0: aoqi@0: // %10111001 to %11000111 ILLEGAL aoqi@0: { 0xC7, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %11001000 to %11001011 aoqi@0: { 0xCB, UNEXPANDED_ENTITY_REFERENCE_II }, aoqi@0: aoqi@0: // %11001100 to %11100000 ILLEGAL aoqi@0: { 0xE0, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %11100001 processing instruction aoqi@0: { 0xE1, PROCESSING_INSTRUCTION_II }, aoqi@0: aoqi@0: // %11100010 comment aoqi@0: { 0xE2, COMMENT_II}, aoqi@0: aoqi@0: // %111000011 to %11101111 aoqi@0: { 0xEF, STATE_ILLEGAL }, aoqi@0: aoqi@0: // Terminators aoqi@0: aoqi@0: // %11110000 single terminator aoqi@0: { 0xF0, TERMINATOR_SINGLE }, aoqi@0: aoqi@0: // %11110000 to %11111110 ILLEGAL aoqi@0: { 0xFE, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %11111111 double terminator aoqi@0: { 0xFF, TERMINATOR_DOUBLE } aoqi@0: }; aoqi@0: aoqi@0: aoqi@0: // AII states aoqi@0: public final static int AII_INDEX_SMALL = 0; aoqi@0: public final static int AII_INDEX_MEDIUM = 1; aoqi@0: public final static int AII_INDEX_LARGE = 2; aoqi@0: public final static int AII_LITERAL = 3; aoqi@0: public final static int AII_TERMINATOR_SINGLE = 4; aoqi@0: public final static int AII_TERMINATOR_DOUBLE = 5; aoqi@0: aoqi@0: private static final int[] AII = new int[256]; aoqi@0: aoqi@0: private static final int[][] AII_RANGES = { aoqi@0: // %00000000 to %00111111 AII small index aoqi@0: { 0x3F, AII_INDEX_SMALL }, aoqi@0: aoqi@0: // %01000000 to %01011111 AII medium index aoqi@0: { 0x5F, AII_INDEX_MEDIUM }, aoqi@0: aoqi@0: // %01100000 to %01101111 AII large index aoqi@0: { 0x6F, AII_INDEX_LARGE }, aoqi@0: aoqi@0: // %01110000 to %01110111 ILLEGAL aoqi@0: { 0x77, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01111000 AII literal (no prefix, no namespace) aoqi@0: // %01111001 AII literal (no prefix, namespace) aoqi@0: { 0x79, AII_LITERAL }, aoqi@0: aoqi@0: // %01111010 ILLEGAL aoqi@0: { 0x7A, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01111011 AII literal (prefix, namespace) aoqi@0: { 0x7B, AII_LITERAL }, aoqi@0: aoqi@0: // %10000000 to %11101111 ILLEGAL aoqi@0: { 0xEF, STATE_ILLEGAL }, aoqi@0: aoqi@0: // Terminators aoqi@0: aoqi@0: // %11110000 single terminator aoqi@0: { 0xF0, AII_TERMINATOR_SINGLE }, aoqi@0: aoqi@0: // %11110000 to %11111110 ILLEGAL aoqi@0: { 0xFE, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %11111111 double terminator aoqi@0: { 0xFF, AII_TERMINATOR_DOUBLE } aoqi@0: }; aoqi@0: aoqi@0: aoqi@0: // AII value states aoqi@0: public final static int NISTRING_UTF8_SMALL_LENGTH = 0; aoqi@0: public final static int NISTRING_UTF8_MEDIUM_LENGTH = 1; aoqi@0: public final static int NISTRING_UTF8_LARGE_LENGTH = 2; aoqi@0: public final static int NISTRING_UTF16_SMALL_LENGTH = 3; aoqi@0: public final static int NISTRING_UTF16_MEDIUM_LENGTH = 4; aoqi@0: public final static int NISTRING_UTF16_LARGE_LENGTH = 5; aoqi@0: public final static int NISTRING_RA = 6; aoqi@0: public final static int NISTRING_EA = 7; aoqi@0: public final static int NISTRING_INDEX_SMALL = 8; aoqi@0: public final static int NISTRING_INDEX_MEDIUM = 9; aoqi@0: public final static int NISTRING_INDEX_LARGE = 10; aoqi@0: public final static int NISTRING_EMPTY = 11; aoqi@0: aoqi@0: private static final int[] NISTRING = new int[256]; aoqi@0: aoqi@0: private static final int[][] NISTRING_RANGES = { aoqi@0: // UTF-8 string aoqi@0: aoqi@0: // %00000000 to %00000111 UTF-8 no add to table small length aoqi@0: { 0x07, NISTRING_UTF8_SMALL_LENGTH }, aoqi@0: aoqi@0: // %00001000 UTF-8 no add to table medium length aoqi@0: { 0x08, NISTRING_UTF8_MEDIUM_LENGTH }, aoqi@0: aoqi@0: // %00001001 to %00001011 ILLEGAL aoqi@0: { 0x0B, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %00001100 UTF-8 no add to table large length aoqi@0: { 0x0C, NISTRING_UTF8_LARGE_LENGTH }, aoqi@0: aoqi@0: // %00001101 to %00001111 ILLEGAL aoqi@0: { 0x0F, STATE_ILLEGAL }, aoqi@0: aoqi@0: // UTF-16 string aoqi@0: aoqi@0: // %00010000 to %00010111 UTF-16 no add to table small length aoqi@0: { 0x17, NISTRING_UTF16_SMALL_LENGTH }, aoqi@0: aoqi@0: // %00001000 UTF-16 no add to table medium length aoqi@0: { 0x18, NISTRING_UTF16_MEDIUM_LENGTH }, aoqi@0: aoqi@0: // %00011001 to %00011011 ILLEGAL aoqi@0: { 0x1B, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %00011100 UTF-16 no add to table large length aoqi@0: { 0x1C, NISTRING_UTF16_LARGE_LENGTH }, aoqi@0: aoqi@0: // %00011101 to %00011111 ILLEGAL aoqi@0: { 0x1F, STATE_ILLEGAL }, aoqi@0: aoqi@0: // Restricted alphabet aoqi@0: aoqi@0: // %00100000 to %00101111 RA no add to table small length aoqi@0: { 0x2F, NISTRING_RA }, aoqi@0: aoqi@0: // Encoding algorithm aoqi@0: aoqi@0: // %00110000 to %00111111 EA no add to table aoqi@0: { 0x3F, NISTRING_EA }, aoqi@0: aoqi@0: // UTF-8 string, add to table aoqi@0: aoqi@0: // %01000000 to %01000111 UTF-8 add to table small length aoqi@0: { 0x47, NISTRING_UTF8_SMALL_LENGTH }, aoqi@0: aoqi@0: // %01001000 UTF-8 add to table medium length aoqi@0: { 0x48, NISTRING_UTF8_MEDIUM_LENGTH }, aoqi@0: aoqi@0: // %01001001 to %01001011 ILLEGAL aoqi@0: { 0x4B, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01001100 UTF-8 add to table large length aoqi@0: { 0x4C, NISTRING_UTF8_LARGE_LENGTH }, aoqi@0: aoqi@0: // %01001101 to %01001111 ILLEGAL aoqi@0: { 0x4F, STATE_ILLEGAL }, aoqi@0: aoqi@0: // UTF-16 string, add to table aoqi@0: aoqi@0: // %01010000 to %01010111 UTF-16 add to table small length aoqi@0: { 0x57, NISTRING_UTF16_SMALL_LENGTH }, aoqi@0: aoqi@0: // %01001000 UTF-16 add to table medium length aoqi@0: { 0x58, NISTRING_UTF16_MEDIUM_LENGTH }, aoqi@0: aoqi@0: // %01011001 to %01011011 ILLEGAL aoqi@0: { 0x5B, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01011100 UTF-16 add to table large length aoqi@0: { 0x5C, NISTRING_UTF16_LARGE_LENGTH }, aoqi@0: aoqi@0: // %01011101 to %01011111 ILLEGAL aoqi@0: { 0x5F, STATE_ILLEGAL }, aoqi@0: aoqi@0: // Restricted alphabet, add to table aoqi@0: aoqi@0: // %01100000 to %01101111 RA no add to table small length aoqi@0: { 0x6F, NISTRING_RA }, aoqi@0: aoqi@0: // Encoding algorithm, add to table aoqi@0: aoqi@0: // %01110000 to %01111111 EA add to table aoqi@0: { 0x7F, NISTRING_EA }, aoqi@0: aoqi@0: // Index aoqi@0: aoqi@0: // %10000000 to %10111111 index small aoqi@0: { 0xBF, NISTRING_INDEX_SMALL }, aoqi@0: aoqi@0: // %11000000 to %11011111 index medium aoqi@0: { 0xDF, NISTRING_INDEX_MEDIUM }, aoqi@0: aoqi@0: // %11100000 to %11101111 index large aoqi@0: { 0xEF, NISTRING_INDEX_LARGE }, aoqi@0: aoqi@0: // %11110000 to %11111110 ILLEGAL aoqi@0: { 0xFE, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %11111111 Empty value aoqi@0: { 0xFF, NISTRING_EMPTY }, aoqi@0: }; aoqi@0: aoqi@0: aoqi@0: /* package */ final static int ISTRING_SMALL_LENGTH = 0; aoqi@0: /* package */ final static int ISTRING_MEDIUM_LENGTH = 1; aoqi@0: /* package */ final static int ISTRING_LARGE_LENGTH = 2; aoqi@0: /* package */ final static int ISTRING_INDEX_SMALL = 3; aoqi@0: /* package */ final static int ISTRING_INDEX_MEDIUM = 4; aoqi@0: /* package */ final static int ISTRING_INDEX_LARGE = 5; aoqi@0: aoqi@0: private static final int[] ISTRING = new int[256]; aoqi@0: aoqi@0: private static final int[][] ISTRING_RANGES = { aoqi@0: // %00000000 to %00111111 small length aoqi@0: { 0x3F, ISTRING_SMALL_LENGTH }, aoqi@0: aoqi@0: // %01000000 medium length aoqi@0: { 0x40, ISTRING_MEDIUM_LENGTH }, aoqi@0: aoqi@0: // %01000001 to %01011111 ILLEGAL aoqi@0: { 0x5F, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01100000 large length aoqi@0: { 0x60, ISTRING_LARGE_LENGTH }, aoqi@0: aoqi@0: // %01100001 to %01111111 ILLEGAL aoqi@0: { 0x7F, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %10000000 to %10111111 index small aoqi@0: { 0xBF, ISTRING_INDEX_SMALL }, aoqi@0: aoqi@0: // %11000000 to %11011111 index medium aoqi@0: { 0xDF, ISTRING_INDEX_MEDIUM }, aoqi@0: aoqi@0: // %11100000 to %11101111 index large aoqi@0: { 0xEF, ISTRING_INDEX_LARGE }, aoqi@0: aoqi@0: // %11110000 to %11111111 ILLEGAL aoqi@0: { 0xFF, STATE_ILLEGAL }, aoqi@0: }; aoqi@0: aoqi@0: aoqi@0: /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_3 = 6; aoqi@0: /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_5 = 7; aoqi@0: /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_29 = 8; aoqi@0: /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_36 = 9; aoqi@0: /* package */ final static int ISTRING_PREFIX_NAMESPACE_INDEX_ZERO = 10; aoqi@0: aoqi@0: private static final int[] ISTRING_PREFIX_NAMESPACE = new int[256]; aoqi@0: aoqi@0: private static final int[][] ISTRING_PREFIX_NAMESPACE_RANGES = { aoqi@0: // %00000000 to %00000001 small length aoqi@0: { 0x01, ISTRING_SMALL_LENGTH }, aoqi@0: aoqi@0: // %00000010 small length aoqi@0: { 0x02, ISTRING_PREFIX_NAMESPACE_LENGTH_3 }, aoqi@0: aoqi@0: // %00000011 small length aoqi@0: { 0x03, ISTRING_SMALL_LENGTH }, aoqi@0: aoqi@0: // %00000100 small length aoqi@0: { 0x04, ISTRING_PREFIX_NAMESPACE_LENGTH_5 }, aoqi@0: aoqi@0: // %00011011 small length aoqi@0: { 0x1B, ISTRING_SMALL_LENGTH }, aoqi@0: aoqi@0: // %00011100 small length aoqi@0: { 0x1C, ISTRING_PREFIX_NAMESPACE_LENGTH_29 }, aoqi@0: aoqi@0: // %00100010 small length aoqi@0: { 0x22, ISTRING_SMALL_LENGTH }, aoqi@0: aoqi@0: // %00100011 small length aoqi@0: { 0x23, ISTRING_PREFIX_NAMESPACE_LENGTH_36 }, aoqi@0: aoqi@0: // %00000101 to %00111111 small length aoqi@0: { 0x3F, ISTRING_SMALL_LENGTH }, aoqi@0: aoqi@0: aoqi@0: aoqi@0: aoqi@0: // %01000000 medium length aoqi@0: { 0x40, ISTRING_MEDIUM_LENGTH }, aoqi@0: aoqi@0: // %01000001 to %01011111 ILLEGAL aoqi@0: { 0x5F, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %01100000 large length aoqi@0: { 0x60, ISTRING_LARGE_LENGTH }, aoqi@0: aoqi@0: // %01100001 to %01111111 ILLEGAL aoqi@0: { 0x7F, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %10000000 index small, 0 aoqi@0: { 0x80, ISTRING_PREFIX_NAMESPACE_INDEX_ZERO }, aoqi@0: aoqi@0: // %10000000 to %10111111 index small aoqi@0: { 0xBF, ISTRING_INDEX_SMALL }, aoqi@0: aoqi@0: // %11000000 to %11011111 index medium aoqi@0: { 0xDF, ISTRING_INDEX_MEDIUM }, aoqi@0: aoqi@0: // %11100000 to %11101111 index large aoqi@0: { 0xEF, ISTRING_INDEX_LARGE }, aoqi@0: aoqi@0: // %11110000 to %11111111 ILLEGAL aoqi@0: { 0xFF, STATE_ILLEGAL }, aoqi@0: }; aoqi@0: aoqi@0: // UTF-8 states aoqi@0: /* package */ final static int UTF8_NCNAME_NCNAME = 0; aoqi@0: /* package */ final static int UTF8_NCNAME_NCNAME_CHAR = 1; aoqi@0: /* package */ final static int UTF8_TWO_BYTES = 2; aoqi@0: /* package */ final static int UTF8_THREE_BYTES = 3; aoqi@0: /* package */ final static int UTF8_FOUR_BYTES = 4; aoqi@0: aoqi@0: private static final int[] UTF8_NCNAME = new int[256]; aoqi@0: aoqi@0: private static final int[][] UTF8_NCNAME_RANGES = { aoqi@0: aoqi@0: // Basic Latin aoqi@0: aoqi@0: // %00000000 to %00101100 aoqi@0: { 0x2C, STATE_ILLEGAL }, aoqi@0: aoqi@0: // '-' '.' aoqi@0: // %%00101101 to %00101110 [#x002D-#x002E] aoqi@0: { 0x2E, UTF8_NCNAME_NCNAME_CHAR }, aoqi@0: aoqi@0: // %00101111 aoqi@0: { 0x2F, STATE_ILLEGAL }, aoqi@0: aoqi@0: // [0-9] aoqi@0: // %0011000 to %00111001 [#x0030-#x0039] aoqi@0: { 0x39, UTF8_NCNAME_NCNAME_CHAR }, aoqi@0: aoqi@0: // %01000000 aoqi@0: { 0x40, STATE_ILLEGAL }, aoqi@0: aoqi@0: // [A-Z] aoqi@0: // %01000001 to %01011010 [#x0041-#x005A] aoqi@0: { 0x5A, UTF8_NCNAME_NCNAME }, aoqi@0: aoqi@0: // %01011110 aoqi@0: { 0x5E, STATE_ILLEGAL }, aoqi@0: aoqi@0: // '_' aoqi@0: // %01011111 [#x005F] aoqi@0: { 0x5F, UTF8_NCNAME_NCNAME }, aoqi@0: aoqi@0: // %01100000 aoqi@0: { 0x60, STATE_ILLEGAL }, aoqi@0: aoqi@0: // [a-z] aoqi@0: // %01100001 to %01111010 [#x0061-#x007A] aoqi@0: { 0x7A, UTF8_NCNAME_NCNAME }, aoqi@0: aoqi@0: // %01111011 to %01111111 aoqi@0: { 0x7F, STATE_ILLEGAL }, aoqi@0: aoqi@0: aoqi@0: // Two bytes aoqi@0: aoqi@0: // %10000000 to %11000001 aoqi@0: { 0xC1, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %11000010 to %11011111 aoqi@0: { 0xDF, UTF8_TWO_BYTES }, aoqi@0: aoqi@0: aoqi@0: // Three bytes aoqi@0: aoqi@0: // %11100000 to %11101111 aoqi@0: { 0xEF, UTF8_THREE_BYTES }, aoqi@0: aoqi@0: aoqi@0: // Four bytes aoqi@0: aoqi@0: // %11110000 to %11110111 aoqi@0: { 0xF7, UTF8_FOUR_BYTES }, aoqi@0: aoqi@0: aoqi@0: // %11111000 to %11111111 aoqi@0: { 0xFF, STATE_ILLEGAL } aoqi@0: }; aoqi@0: aoqi@0: /* package */ final static int UTF8_ONE_BYTE = 1; aoqi@0: aoqi@0: private static final int[] UTF8 = new int[256]; aoqi@0: aoqi@0: private static final int[][] UTF8_RANGES = { aoqi@0: aoqi@0: // Basic Latin aoqi@0: aoqi@0: // %00000000 to %00001000 aoqi@0: { 0x08, STATE_ILLEGAL }, aoqi@0: aoqi@0: // CHARACTER TABULATION, LINE FEED aoqi@0: // %%00001001 to %00001010 [#x0009-#x000A] aoqi@0: { 0x0A, UTF8_ONE_BYTE }, aoqi@0: aoqi@0: // %00001011 to %00001100 aoqi@0: { 0x0C, STATE_ILLEGAL }, aoqi@0: aoqi@0: // CARRIAGE RETURN aoqi@0: // %00001101 [#x000D] aoqi@0: { 0x0D, UTF8_ONE_BYTE }, aoqi@0: aoqi@0: // %00001110 to %00011111 aoqi@0: { 0x1F, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %0010000 to %01111111 aoqi@0: { 0x7F, UTF8_ONE_BYTE }, aoqi@0: aoqi@0: aoqi@0: // Two bytes aoqi@0: aoqi@0: // %10000000 to %11000001 aoqi@0: { 0xC1, STATE_ILLEGAL }, aoqi@0: aoqi@0: // %11000010 to %11011111 aoqi@0: { 0xDF, UTF8_TWO_BYTES }, aoqi@0: aoqi@0: aoqi@0: // Three bytes aoqi@0: aoqi@0: // %11100000 to %11101111 aoqi@0: { 0xEF, UTF8_THREE_BYTES }, aoqi@0: aoqi@0: aoqi@0: // Four bytes aoqi@0: aoqi@0: // %11110000 to %11110111 aoqi@0: { 0xF7, UTF8_FOUR_BYTES }, aoqi@0: aoqi@0: aoqi@0: // %11111000 to %11111111 aoqi@0: { 0xFF, STATE_ILLEGAL } aoqi@0: }; aoqi@0: aoqi@0: private static void constructTable(int[] table, int[][] ranges) { aoqi@0: int start = 0x00; aoqi@0: for (int range = 0; range < ranges.length; range++) { aoqi@0: int end = ranges[range][RANGE_INDEX_END]; aoqi@0: int value = ranges[range][RANGE_INDEX_VALUE]; aoqi@0: for (int i = start; i<= end; i++) { aoqi@0: table[i] = value; aoqi@0: } aoqi@0: start = end + 1; aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: public static final int DII(final int index) { aoqi@0: return DII[index]; aoqi@0: } aoqi@0: aoqi@0: public static final int EII(final int index) { aoqi@0: return EII[index]; aoqi@0: } aoqi@0: aoqi@0: public static final int AII(final int index) { aoqi@0: return AII[index]; aoqi@0: } aoqi@0: aoqi@0: public static final int NISTRING(final int index) { aoqi@0: return NISTRING[index]; aoqi@0: } aoqi@0: aoqi@0: public static final int ISTRING(final int index) { aoqi@0: return ISTRING[index]; aoqi@0: } aoqi@0: aoqi@0: public static final int ISTRING_PREFIX_NAMESPACE(final int index) { aoqi@0: return ISTRING_PREFIX_NAMESPACE[index]; aoqi@0: } aoqi@0: aoqi@0: public static final int UTF8(final int index) { aoqi@0: return UTF8[index]; aoqi@0: } aoqi@0: aoqi@0: public static final int UTF8_NCNAME(final int index) { aoqi@0: return UTF8_NCNAME[index]; aoqi@0: } aoqi@0: aoqi@0: static { aoqi@0: // DII aoqi@0: constructTable(DII, DII_RANGES); aoqi@0: aoqi@0: // EII aoqi@0: constructTable(EII, EII_RANGES); aoqi@0: aoqi@0: // AII aoqi@0: constructTable(AII, AII_RANGES); aoqi@0: aoqi@0: // AII Value aoqi@0: constructTable(NISTRING, NISTRING_RANGES); aoqi@0: aoqi@0: // Identifying string aoqi@0: constructTable(ISTRING, ISTRING_RANGES); aoqi@0: aoqi@0: // Identifying string aoqi@0: constructTable(ISTRING_PREFIX_NAMESPACE, ISTRING_PREFIX_NAMESPACE_RANGES); aoqi@0: aoqi@0: // UTF-8 NCNAME states aoqi@0: constructTable(UTF8_NCNAME, UTF8_NCNAME_RANGES); aoqi@0: aoqi@0: // UTF-8 states aoqi@0: constructTable(UTF8, UTF8_RANGES); aoqi@0: } aoqi@0: aoqi@0: private DecoderStateTables() { aoqi@0: } aoqi@0: }