src/share/jaxws_classes/com/sun/xml/internal/fastinfoset/DecoderStateTables.java

Thu, 12 Oct 2017 19:44:07 +0800

author
aoqi
date
Thu, 12 Oct 2017 19:44:07 +0800
changeset 760
e530533619ec
parent 0
373ffda63c9a
permissions
-rw-r--r--

merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 2004, 2011, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation. Oracle designates this
aoqi@0 8 * particular file as subject to the "Classpath" exception as provided
aoqi@0 9 * by Oracle in the LICENSE file that accompanied this code.
aoqi@0 10 *
aoqi@0 11 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 14 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 15 * accompanied this code).
aoqi@0 16 *
aoqi@0 17 * You should have received a copy of the GNU General Public License version
aoqi@0 18 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 20 *
aoqi@0 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 22 * or visit www.oracle.com if you need additional information or have any
aoqi@0 23 * questions.
aoqi@0 24 *
aoqi@0 25 * THIS FILE WAS MODIFIED BY SUN MICROSYSTEMS, INC.
aoqi@0 26 */
aoqi@0 27
aoqi@0 28 package com.sun.xml.internal.fastinfoset;
aoqi@0 29
aoqi@0 30 public class DecoderStateTables {
aoqi@0 31 private static int RANGE_INDEX_END = 0;
aoqi@0 32 private static int RANGE_INDEX_VALUE = 1;
aoqi@0 33
aoqi@0 34 public final static int STATE_ILLEGAL = 255;
aoqi@0 35 public final static int STATE_UNSUPPORTED = 254;
aoqi@0 36
aoqi@0 37 // EII child states
aoqi@0 38 public final static int EII_NO_AIIS_INDEX_SMALL = 0;
aoqi@0 39 public final static int EII_AIIS_INDEX_SMALL = 1;
aoqi@0 40 public final static int EII_INDEX_MEDIUM = 2;
aoqi@0 41 public final static int EII_INDEX_LARGE = 3;
aoqi@0 42 public final static int EII_NAMESPACES = 4;
aoqi@0 43 public final static int EII_LITERAL = 5;
aoqi@0 44 public final static int CII_UTF8_SMALL_LENGTH = 6;
aoqi@0 45 public final static int CII_UTF8_MEDIUM_LENGTH = 7;
aoqi@0 46 public final static int CII_UTF8_LARGE_LENGTH = 8;
aoqi@0 47 public final static int CII_UTF16_SMALL_LENGTH = 9;
aoqi@0 48 public final static int CII_UTF16_MEDIUM_LENGTH = 10;
aoqi@0 49 public final static int CII_UTF16_LARGE_LENGTH = 11;
aoqi@0 50 public final static int CII_RA = 12;
aoqi@0 51 public final static int CII_EA = 13;
aoqi@0 52 public final static int CII_INDEX_SMALL = 14;
aoqi@0 53 public final static int CII_INDEX_MEDIUM = 15;
aoqi@0 54 public final static int CII_INDEX_LARGE = 16;
aoqi@0 55 public final static int CII_INDEX_LARGE_LARGE = 17;
aoqi@0 56 public final static int COMMENT_II = 18;
aoqi@0 57 public final static int PROCESSING_INSTRUCTION_II = 19;
aoqi@0 58 public final static int DOCUMENT_TYPE_DECLARATION_II = 20;
aoqi@0 59 public final static int UNEXPANDED_ENTITY_REFERENCE_II = 21;
aoqi@0 60 public final static int TERMINATOR_SINGLE = 22;
aoqi@0 61 public final static int TERMINATOR_DOUBLE = 23;
aoqi@0 62
aoqi@0 63 private static final int[] DII = new int[256];
aoqi@0 64
aoqi@0 65 private static final int[][] DII_RANGES = {
aoqi@0 66 // EII
aoqi@0 67
aoqi@0 68 // %00000000 to %00011111 EII no attributes small index
aoqi@0 69 { 0x1F, EII_NO_AIIS_INDEX_SMALL },
aoqi@0 70
aoqi@0 71 // %00100000 to %00100111 EII medium index
aoqi@0 72 { 0x27, EII_INDEX_MEDIUM },
aoqi@0 73
aoqi@0 74 // %00101000 to %00101111 EII large index
aoqi@0 75 // %00110000 EII very large index
aoqi@0 76 // %00101000 to %00110000
aoqi@0 77 { 0x30, EII_INDEX_LARGE },
aoqi@0 78
aoqi@0 79 // %00110001 to %00110111 ILLEGAL
aoqi@0 80 { 0x37, STATE_ILLEGAL },
aoqi@0 81
aoqi@0 82 // %00111000 EII namespaces
aoqi@0 83 { 0x38, EII_NAMESPACES },
aoqi@0 84
aoqi@0 85 // %00111001 to %00111011 ILLEGAL
aoqi@0 86 { 0x3B, STATE_ILLEGAL },
aoqi@0 87
aoqi@0 88 // %00111100 EII literal (no prefix, no namespace)
aoqi@0 89 { 0x3C, EII_LITERAL },
aoqi@0 90
aoqi@0 91 // %00111101 EII literal (no prefix, namespace)
aoqi@0 92 { 0x3D, EII_LITERAL },
aoqi@0 93
aoqi@0 94 // %00111110 ILLEGAL
aoqi@0 95 { 0x3E, STATE_ILLEGAL },
aoqi@0 96
aoqi@0 97 // %00111111 EII literal (prefix, namespace)
aoqi@0 98 { 0x3F, EII_LITERAL },
aoqi@0 99
aoqi@0 100 // %01000000 to %01011111 EII attributes small index
aoqi@0 101 { 0x5F, EII_AIIS_INDEX_SMALL },
aoqi@0 102
aoqi@0 103 // %01100000 to %01100111 EII medium index
aoqi@0 104 { 0x67, EII_INDEX_MEDIUM },
aoqi@0 105
aoqi@0 106 // %01101000 to %01101111 EII large index
aoqi@0 107 // %01110000 EII very large index
aoqi@0 108 // %01101000 to %01110000
aoqi@0 109 { 0x70, EII_INDEX_LARGE },
aoqi@0 110
aoqi@0 111 // %01110001 to %01110111 ILLEGAL
aoqi@0 112 { 0x77, STATE_ILLEGAL },
aoqi@0 113
aoqi@0 114 // %01111000 EII attributes namespaces
aoqi@0 115 { 0x78, EII_NAMESPACES },
aoqi@0 116
aoqi@0 117 // %01111001 to %01111011 ILLEGAL
aoqi@0 118 { 0x7B, STATE_ILLEGAL },
aoqi@0 119
aoqi@0 120 // %01111100 EII attributes literal (no prefix, no namespace)
aoqi@0 121 { 0x7C, EII_LITERAL },
aoqi@0 122
aoqi@0 123 // %01111101 EII attributes literal (no prefix, namespace)
aoqi@0 124 { 0x7D, EII_LITERAL },
aoqi@0 125
aoqi@0 126 // %01111110 ILLEGAL
aoqi@0 127 { 0x7E, STATE_ILLEGAL },
aoqi@0 128
aoqi@0 129 // %01111111 EII attributes literal (prefix, namespace)
aoqi@0 130 { 0x7F, EII_LITERAL },
aoqi@0 131
aoqi@0 132 // %10000000 to %11000011
aoqi@0 133 { 0xC3, STATE_ILLEGAL },
aoqi@0 134
aoqi@0 135 // %11000100 to %11000111
aoqi@0 136 { 0xC7, DOCUMENT_TYPE_DECLARATION_II },
aoqi@0 137
aoqi@0 138 // %11001000 to %1110000
aoqi@0 139 { 0xE0, STATE_ILLEGAL },
aoqi@0 140
aoqi@0 141 // %11100001 processing instruction
aoqi@0 142 { 0xE1, PROCESSING_INSTRUCTION_II },
aoqi@0 143
aoqi@0 144 // %11100010 comment
aoqi@0 145 { 0xE2, COMMENT_II},
aoqi@0 146
aoqi@0 147 // %111000011 to %11101111
aoqi@0 148 { 0xEF, STATE_ILLEGAL },
aoqi@0 149
aoqi@0 150 // Terminators
aoqi@0 151
aoqi@0 152 // %11110000 single terminator
aoqi@0 153 { 0xF0, TERMINATOR_SINGLE },
aoqi@0 154
aoqi@0 155 // %11110000 to %11111110 ILLEGAL
aoqi@0 156 { 0xFE, STATE_ILLEGAL },
aoqi@0 157
aoqi@0 158 // %11111111 double terminator
aoqi@0 159 { 0xFF, TERMINATOR_DOUBLE }
aoqi@0 160 };
aoqi@0 161
aoqi@0 162 private static final int[] EII = new int[256];
aoqi@0 163
aoqi@0 164 private static final int[][] EII_RANGES = {
aoqi@0 165 // EII
aoqi@0 166
aoqi@0 167 // %00000000 to %00011111 EII no attributes small index
aoqi@0 168 { 0x1F, EII_NO_AIIS_INDEX_SMALL },
aoqi@0 169
aoqi@0 170 // %00100000 to %00100111 EII medium index
aoqi@0 171 { 0x27, EII_INDEX_MEDIUM },
aoqi@0 172
aoqi@0 173 // %00101000 to %00101111 EII large index
aoqi@0 174 // %00110000 EII very large index
aoqi@0 175 // %00101000 to %00110000
aoqi@0 176 { 0x30, EII_INDEX_LARGE },
aoqi@0 177
aoqi@0 178 // %00110001 to %00110111 ILLEGAL
aoqi@0 179 { 0x37, STATE_ILLEGAL },
aoqi@0 180
aoqi@0 181 // %00111000 EII namespaces
aoqi@0 182 { 0x38, EII_NAMESPACES },
aoqi@0 183
aoqi@0 184 // %00111001 to %00111011 ILLEGAL
aoqi@0 185 { 0x3B, STATE_ILLEGAL },
aoqi@0 186
aoqi@0 187 // %00111100 EII literal (no prefix, no namespace)
aoqi@0 188 { 0x3C, EII_LITERAL },
aoqi@0 189
aoqi@0 190 // %00111101 EII literal (no prefix, namespace)
aoqi@0 191 { 0x3D, EII_LITERAL },
aoqi@0 192
aoqi@0 193 // %00111110 ILLEGAL
aoqi@0 194 { 0x3E, STATE_ILLEGAL },
aoqi@0 195
aoqi@0 196 // %00111111 EII literal (prefix, namespace)
aoqi@0 197 { 0x3F, EII_LITERAL },
aoqi@0 198
aoqi@0 199 // %01000000 to %01011111 EII attributes small index
aoqi@0 200 { 0x5F, EII_AIIS_INDEX_SMALL },
aoqi@0 201
aoqi@0 202 // %01100000 to %01100111 EII medium index
aoqi@0 203 { 0x67, EII_INDEX_MEDIUM },
aoqi@0 204
aoqi@0 205 // %01101000 to %01101111 EII large index
aoqi@0 206 // %01110000 EII very large index
aoqi@0 207 // %01101000 to %01110000
aoqi@0 208 { 0x70, EII_INDEX_LARGE },
aoqi@0 209
aoqi@0 210 // %01110001 to %01110111 ILLEGAL
aoqi@0 211 { 0x77, STATE_ILLEGAL },
aoqi@0 212
aoqi@0 213 // %01111000 EII attributes namespaces
aoqi@0 214 { 0x78, EII_NAMESPACES },
aoqi@0 215
aoqi@0 216 // %01111001 to %01111011 ILLEGAL
aoqi@0 217 { 0x7B, STATE_ILLEGAL },
aoqi@0 218
aoqi@0 219 // %01111100 EII attributes literal (no prefix, no namespace)
aoqi@0 220 { 0x7C, EII_LITERAL },
aoqi@0 221
aoqi@0 222 // %01111101 EII attributes literal (no prefix, namespace)
aoqi@0 223 { 0x7D, EII_LITERAL },
aoqi@0 224
aoqi@0 225 // %01111110 ILLEGAL
aoqi@0 226 { 0x7E, STATE_ILLEGAL },
aoqi@0 227
aoqi@0 228 // %01111111 EII attributes literal (prefix, namespace)
aoqi@0 229 { 0x7F, EII_LITERAL },
aoqi@0 230
aoqi@0 231 // CII
aoqi@0 232
aoqi@0 233 // UTF-8 string
aoqi@0 234
aoqi@0 235 // %10000000 to %10000001 CII UTF-8 no add to table small length
aoqi@0 236 { 0x81, CII_UTF8_SMALL_LENGTH },
aoqi@0 237
aoqi@0 238 // %10000010 CII UTF-8 no add to table medium length
aoqi@0 239 { 0x82, CII_UTF8_MEDIUM_LENGTH },
aoqi@0 240
aoqi@0 241 // %10000011 CII UTF-8 no add to table large length
aoqi@0 242 { 0x83, CII_UTF8_LARGE_LENGTH },
aoqi@0 243
aoqi@0 244 // UTF-16 string
aoqi@0 245
aoqi@0 246 // %10000100 to %10000101 CII UTF-16 no add to table small length
aoqi@0 247 { 0x85, CII_UTF16_SMALL_LENGTH },
aoqi@0 248
aoqi@0 249 // %10000110 CII UTF-16 no add to table medium length
aoqi@0 250 { 0x86, CII_UTF16_MEDIUM_LENGTH },
aoqi@0 251
aoqi@0 252 // %10000111 CII UTF-16 no add to table large length
aoqi@0 253 { 0x87, CII_UTF16_LARGE_LENGTH },
aoqi@0 254
aoqi@0 255 // Resitricted alphabet
aoqi@0 256
aoqi@0 257 // %10001000 to %10001011 CII RA no add to table
aoqi@0 258 { 0x8B, CII_RA },
aoqi@0 259
aoqi@0 260 // Encoding algorithm
aoqi@0 261
aoqi@0 262 // %10001100 to %10001111 CII EA no add to table
aoqi@0 263 { 0x8F, CII_EA },
aoqi@0 264
aoqi@0 265 // UTF-8 string, add to table
aoqi@0 266
aoqi@0 267 // %10010000 to %10010001 CII add to table small length
aoqi@0 268 { 0x91, CII_UTF8_SMALL_LENGTH },
aoqi@0 269
aoqi@0 270 // %10010010 CII add to table medium length
aoqi@0 271 { 0x92, CII_UTF8_MEDIUM_LENGTH },
aoqi@0 272
aoqi@0 273 // %10010011 CII add to table large length
aoqi@0 274 { 0x93, CII_UTF8_LARGE_LENGTH },
aoqi@0 275
aoqi@0 276 // UTF-16 string, add to table
aoqi@0 277
aoqi@0 278 // %10010100 to %10010101 CII UTF-16 add to table small length
aoqi@0 279 { 0x95, CII_UTF16_SMALL_LENGTH },
aoqi@0 280
aoqi@0 281 // %10010110 CII UTF-16 add to table medium length
aoqi@0 282 { 0x96, CII_UTF16_MEDIUM_LENGTH },
aoqi@0 283
aoqi@0 284 // %10010111 CII UTF-16 add to table large length
aoqi@0 285 { 0x97, CII_UTF16_LARGE_LENGTH },
aoqi@0 286
aoqi@0 287 // Restricted alphabet, add to table
aoqi@0 288
aoqi@0 289 // %10011000 to %10011011 CII RA add to table
aoqi@0 290 { 0x9B, CII_RA },
aoqi@0 291
aoqi@0 292 // Encoding algorithm, add to table
aoqi@0 293
aoqi@0 294 // %10011100 to %10011111 CII EA add to table
aoqi@0 295 { 0x9F, CII_EA },
aoqi@0 296
aoqi@0 297 // Index
aoqi@0 298
aoqi@0 299 // %10100000 to %10101111 CII small index
aoqi@0 300 { 0xAF, CII_INDEX_SMALL },
aoqi@0 301
aoqi@0 302 // %10110000 to %10110011 CII medium index
aoqi@0 303 { 0xB3, CII_INDEX_MEDIUM },
aoqi@0 304
aoqi@0 305 // %10110100 to %10110111 CII large index
aoqi@0 306 { 0xB7, CII_INDEX_LARGE },
aoqi@0 307
aoqi@0 308 // %10111000 CII very large index
aoqi@0 309 { 0xB8, CII_INDEX_LARGE_LARGE },
aoqi@0 310
aoqi@0 311 // %10111001 to %11000111 ILLEGAL
aoqi@0 312 { 0xC7, STATE_ILLEGAL },
aoqi@0 313
aoqi@0 314 // %11001000 to %11001011
aoqi@0 315 { 0xCB, UNEXPANDED_ENTITY_REFERENCE_II },
aoqi@0 316
aoqi@0 317 // %11001100 to %11100000 ILLEGAL
aoqi@0 318 { 0xE0, STATE_ILLEGAL },
aoqi@0 319
aoqi@0 320 // %11100001 processing instruction
aoqi@0 321 { 0xE1, PROCESSING_INSTRUCTION_II },
aoqi@0 322
aoqi@0 323 // %11100010 comment
aoqi@0 324 { 0xE2, COMMENT_II},
aoqi@0 325
aoqi@0 326 // %111000011 to %11101111
aoqi@0 327 { 0xEF, STATE_ILLEGAL },
aoqi@0 328
aoqi@0 329 // Terminators
aoqi@0 330
aoqi@0 331 // %11110000 single terminator
aoqi@0 332 { 0xF0, TERMINATOR_SINGLE },
aoqi@0 333
aoqi@0 334 // %11110000 to %11111110 ILLEGAL
aoqi@0 335 { 0xFE, STATE_ILLEGAL },
aoqi@0 336
aoqi@0 337 // %11111111 double terminator
aoqi@0 338 { 0xFF, TERMINATOR_DOUBLE }
aoqi@0 339 };
aoqi@0 340
aoqi@0 341
aoqi@0 342 // AII states
aoqi@0 343 public final static int AII_INDEX_SMALL = 0;
aoqi@0 344 public final static int AII_INDEX_MEDIUM = 1;
aoqi@0 345 public final static int AII_INDEX_LARGE = 2;
aoqi@0 346 public final static int AII_LITERAL = 3;
aoqi@0 347 public final static int AII_TERMINATOR_SINGLE = 4;
aoqi@0 348 public final static int AII_TERMINATOR_DOUBLE = 5;
aoqi@0 349
aoqi@0 350 private static final int[] AII = new int[256];
aoqi@0 351
aoqi@0 352 private static final int[][] AII_RANGES = {
aoqi@0 353 // %00000000 to %00111111 AII small index
aoqi@0 354 { 0x3F, AII_INDEX_SMALL },
aoqi@0 355
aoqi@0 356 // %01000000 to %01011111 AII medium index
aoqi@0 357 { 0x5F, AII_INDEX_MEDIUM },
aoqi@0 358
aoqi@0 359 // %01100000 to %01101111 AII large index
aoqi@0 360 { 0x6F, AII_INDEX_LARGE },
aoqi@0 361
aoqi@0 362 // %01110000 to %01110111 ILLEGAL
aoqi@0 363 { 0x77, STATE_ILLEGAL },
aoqi@0 364
aoqi@0 365 // %01111000 AII literal (no prefix, no namespace)
aoqi@0 366 // %01111001 AII literal (no prefix, namespace)
aoqi@0 367 { 0x79, AII_LITERAL },
aoqi@0 368
aoqi@0 369 // %01111010 ILLEGAL
aoqi@0 370 { 0x7A, STATE_ILLEGAL },
aoqi@0 371
aoqi@0 372 // %01111011 AII literal (prefix, namespace)
aoqi@0 373 { 0x7B, AII_LITERAL },
aoqi@0 374
aoqi@0 375 // %10000000 to %11101111 ILLEGAL
aoqi@0 376 { 0xEF, STATE_ILLEGAL },
aoqi@0 377
aoqi@0 378 // Terminators
aoqi@0 379
aoqi@0 380 // %11110000 single terminator
aoqi@0 381 { 0xF0, AII_TERMINATOR_SINGLE },
aoqi@0 382
aoqi@0 383 // %11110000 to %11111110 ILLEGAL
aoqi@0 384 { 0xFE, STATE_ILLEGAL },
aoqi@0 385
aoqi@0 386 // %11111111 double terminator
aoqi@0 387 { 0xFF, AII_TERMINATOR_DOUBLE }
aoqi@0 388 };
aoqi@0 389
aoqi@0 390
aoqi@0 391 // AII value states
aoqi@0 392 public final static int NISTRING_UTF8_SMALL_LENGTH = 0;
aoqi@0 393 public final static int NISTRING_UTF8_MEDIUM_LENGTH = 1;
aoqi@0 394 public final static int NISTRING_UTF8_LARGE_LENGTH = 2;
aoqi@0 395 public final static int NISTRING_UTF16_SMALL_LENGTH = 3;
aoqi@0 396 public final static int NISTRING_UTF16_MEDIUM_LENGTH = 4;
aoqi@0 397 public final static int NISTRING_UTF16_LARGE_LENGTH = 5;
aoqi@0 398 public final static int NISTRING_RA = 6;
aoqi@0 399 public final static int NISTRING_EA = 7;
aoqi@0 400 public final static int NISTRING_INDEX_SMALL = 8;
aoqi@0 401 public final static int NISTRING_INDEX_MEDIUM = 9;
aoqi@0 402 public final static int NISTRING_INDEX_LARGE = 10;
aoqi@0 403 public final static int NISTRING_EMPTY = 11;
aoqi@0 404
aoqi@0 405 private static final int[] NISTRING = new int[256];
aoqi@0 406
aoqi@0 407 private static final int[][] NISTRING_RANGES = {
aoqi@0 408 // UTF-8 string
aoqi@0 409
aoqi@0 410 // %00000000 to %00000111 UTF-8 no add to table small length
aoqi@0 411 { 0x07, NISTRING_UTF8_SMALL_LENGTH },
aoqi@0 412
aoqi@0 413 // %00001000 UTF-8 no add to table medium length
aoqi@0 414 { 0x08, NISTRING_UTF8_MEDIUM_LENGTH },
aoqi@0 415
aoqi@0 416 // %00001001 to %00001011 ILLEGAL
aoqi@0 417 { 0x0B, STATE_ILLEGAL },
aoqi@0 418
aoqi@0 419 // %00001100 UTF-8 no add to table large length
aoqi@0 420 { 0x0C, NISTRING_UTF8_LARGE_LENGTH },
aoqi@0 421
aoqi@0 422 // %00001101 to %00001111 ILLEGAL
aoqi@0 423 { 0x0F, STATE_ILLEGAL },
aoqi@0 424
aoqi@0 425 // UTF-16 string
aoqi@0 426
aoqi@0 427 // %00010000 to %00010111 UTF-16 no add to table small length
aoqi@0 428 { 0x17, NISTRING_UTF16_SMALL_LENGTH },
aoqi@0 429
aoqi@0 430 // %00001000 UTF-16 no add to table medium length
aoqi@0 431 { 0x18, NISTRING_UTF16_MEDIUM_LENGTH },
aoqi@0 432
aoqi@0 433 // %00011001 to %00011011 ILLEGAL
aoqi@0 434 { 0x1B, STATE_ILLEGAL },
aoqi@0 435
aoqi@0 436 // %00011100 UTF-16 no add to table large length
aoqi@0 437 { 0x1C, NISTRING_UTF16_LARGE_LENGTH },
aoqi@0 438
aoqi@0 439 // %00011101 to %00011111 ILLEGAL
aoqi@0 440 { 0x1F, STATE_ILLEGAL },
aoqi@0 441
aoqi@0 442 // Restricted alphabet
aoqi@0 443
aoqi@0 444 // %00100000 to %00101111 RA no add to table small length
aoqi@0 445 { 0x2F, NISTRING_RA },
aoqi@0 446
aoqi@0 447 // Encoding algorithm
aoqi@0 448
aoqi@0 449 // %00110000 to %00111111 EA no add to table
aoqi@0 450 { 0x3F, NISTRING_EA },
aoqi@0 451
aoqi@0 452 // UTF-8 string, add to table
aoqi@0 453
aoqi@0 454 // %01000000 to %01000111 UTF-8 add to table small length
aoqi@0 455 { 0x47, NISTRING_UTF8_SMALL_LENGTH },
aoqi@0 456
aoqi@0 457 // %01001000 UTF-8 add to table medium length
aoqi@0 458 { 0x48, NISTRING_UTF8_MEDIUM_LENGTH },
aoqi@0 459
aoqi@0 460 // %01001001 to %01001011 ILLEGAL
aoqi@0 461 { 0x4B, STATE_ILLEGAL },
aoqi@0 462
aoqi@0 463 // %01001100 UTF-8 add to table large length
aoqi@0 464 { 0x4C, NISTRING_UTF8_LARGE_LENGTH },
aoqi@0 465
aoqi@0 466 // %01001101 to %01001111 ILLEGAL
aoqi@0 467 { 0x4F, STATE_ILLEGAL },
aoqi@0 468
aoqi@0 469 // UTF-16 string, add to table
aoqi@0 470
aoqi@0 471 // %01010000 to %01010111 UTF-16 add to table small length
aoqi@0 472 { 0x57, NISTRING_UTF16_SMALL_LENGTH },
aoqi@0 473
aoqi@0 474 // %01001000 UTF-16 add to table medium length
aoqi@0 475 { 0x58, NISTRING_UTF16_MEDIUM_LENGTH },
aoqi@0 476
aoqi@0 477 // %01011001 to %01011011 ILLEGAL
aoqi@0 478 { 0x5B, STATE_ILLEGAL },
aoqi@0 479
aoqi@0 480 // %01011100 UTF-16 add to table large length
aoqi@0 481 { 0x5C, NISTRING_UTF16_LARGE_LENGTH },
aoqi@0 482
aoqi@0 483 // %01011101 to %01011111 ILLEGAL
aoqi@0 484 { 0x5F, STATE_ILLEGAL },
aoqi@0 485
aoqi@0 486 // Restricted alphabet, add to table
aoqi@0 487
aoqi@0 488 // %01100000 to %01101111 RA no add to table small length
aoqi@0 489 { 0x6F, NISTRING_RA },
aoqi@0 490
aoqi@0 491 // Encoding algorithm, add to table
aoqi@0 492
aoqi@0 493 // %01110000 to %01111111 EA add to table
aoqi@0 494 { 0x7F, NISTRING_EA },
aoqi@0 495
aoqi@0 496 // Index
aoqi@0 497
aoqi@0 498 // %10000000 to %10111111 index small
aoqi@0 499 { 0xBF, NISTRING_INDEX_SMALL },
aoqi@0 500
aoqi@0 501 // %11000000 to %11011111 index medium
aoqi@0 502 { 0xDF, NISTRING_INDEX_MEDIUM },
aoqi@0 503
aoqi@0 504 // %11100000 to %11101111 index large
aoqi@0 505 { 0xEF, NISTRING_INDEX_LARGE },
aoqi@0 506
aoqi@0 507 // %11110000 to %11111110 ILLEGAL
aoqi@0 508 { 0xFE, STATE_ILLEGAL },
aoqi@0 509
aoqi@0 510 // %11111111 Empty value
aoqi@0 511 { 0xFF, NISTRING_EMPTY },
aoqi@0 512 };
aoqi@0 513
aoqi@0 514
aoqi@0 515 /* package */ final static int ISTRING_SMALL_LENGTH = 0;
aoqi@0 516 /* package */ final static int ISTRING_MEDIUM_LENGTH = 1;
aoqi@0 517 /* package */ final static int ISTRING_LARGE_LENGTH = 2;
aoqi@0 518 /* package */ final static int ISTRING_INDEX_SMALL = 3;
aoqi@0 519 /* package */ final static int ISTRING_INDEX_MEDIUM = 4;
aoqi@0 520 /* package */ final static int ISTRING_INDEX_LARGE = 5;
aoqi@0 521
aoqi@0 522 private static final int[] ISTRING = new int[256];
aoqi@0 523
aoqi@0 524 private static final int[][] ISTRING_RANGES = {
aoqi@0 525 // %00000000 to %00111111 small length
aoqi@0 526 { 0x3F, ISTRING_SMALL_LENGTH },
aoqi@0 527
aoqi@0 528 // %01000000 medium length
aoqi@0 529 { 0x40, ISTRING_MEDIUM_LENGTH },
aoqi@0 530
aoqi@0 531 // %01000001 to %01011111 ILLEGAL
aoqi@0 532 { 0x5F, STATE_ILLEGAL },
aoqi@0 533
aoqi@0 534 // %01100000 large length
aoqi@0 535 { 0x60, ISTRING_LARGE_LENGTH },
aoqi@0 536
aoqi@0 537 // %01100001 to %01111111 ILLEGAL
aoqi@0 538 { 0x7F, STATE_ILLEGAL },
aoqi@0 539
aoqi@0 540 // %10000000 to %10111111 index small
aoqi@0 541 { 0xBF, ISTRING_INDEX_SMALL },
aoqi@0 542
aoqi@0 543 // %11000000 to %11011111 index medium
aoqi@0 544 { 0xDF, ISTRING_INDEX_MEDIUM },
aoqi@0 545
aoqi@0 546 // %11100000 to %11101111 index large
aoqi@0 547 { 0xEF, ISTRING_INDEX_LARGE },
aoqi@0 548
aoqi@0 549 // %11110000 to %11111111 ILLEGAL
aoqi@0 550 { 0xFF, STATE_ILLEGAL },
aoqi@0 551 };
aoqi@0 552
aoqi@0 553
aoqi@0 554 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_3 = 6;
aoqi@0 555 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_5 = 7;
aoqi@0 556 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_29 = 8;
aoqi@0 557 /* package */ final static int ISTRING_PREFIX_NAMESPACE_LENGTH_36 = 9;
aoqi@0 558 /* package */ final static int ISTRING_PREFIX_NAMESPACE_INDEX_ZERO = 10;
aoqi@0 559
aoqi@0 560 private static final int[] ISTRING_PREFIX_NAMESPACE = new int[256];
aoqi@0 561
aoqi@0 562 private static final int[][] ISTRING_PREFIX_NAMESPACE_RANGES = {
aoqi@0 563 // %00000000 to %00000001 small length
aoqi@0 564 { 0x01, ISTRING_SMALL_LENGTH },
aoqi@0 565
aoqi@0 566 // %00000010 small length
aoqi@0 567 { 0x02, ISTRING_PREFIX_NAMESPACE_LENGTH_3 },
aoqi@0 568
aoqi@0 569 // %00000011 small length
aoqi@0 570 { 0x03, ISTRING_SMALL_LENGTH },
aoqi@0 571
aoqi@0 572 // %00000100 small length
aoqi@0 573 { 0x04, ISTRING_PREFIX_NAMESPACE_LENGTH_5 },
aoqi@0 574
aoqi@0 575 // %00011011 small length
aoqi@0 576 { 0x1B, ISTRING_SMALL_LENGTH },
aoqi@0 577
aoqi@0 578 // %00011100 small length
aoqi@0 579 { 0x1C, ISTRING_PREFIX_NAMESPACE_LENGTH_29 },
aoqi@0 580
aoqi@0 581 // %00100010 small length
aoqi@0 582 { 0x22, ISTRING_SMALL_LENGTH },
aoqi@0 583
aoqi@0 584 // %00100011 small length
aoqi@0 585 { 0x23, ISTRING_PREFIX_NAMESPACE_LENGTH_36 },
aoqi@0 586
aoqi@0 587 // %00000101 to %00111111 small length
aoqi@0 588 { 0x3F, ISTRING_SMALL_LENGTH },
aoqi@0 589
aoqi@0 590
aoqi@0 591
aoqi@0 592
aoqi@0 593 // %01000000 medium length
aoqi@0 594 { 0x40, ISTRING_MEDIUM_LENGTH },
aoqi@0 595
aoqi@0 596 // %01000001 to %01011111 ILLEGAL
aoqi@0 597 { 0x5F, STATE_ILLEGAL },
aoqi@0 598
aoqi@0 599 // %01100000 large length
aoqi@0 600 { 0x60, ISTRING_LARGE_LENGTH },
aoqi@0 601
aoqi@0 602 // %01100001 to %01111111 ILLEGAL
aoqi@0 603 { 0x7F, STATE_ILLEGAL },
aoqi@0 604
aoqi@0 605 // %10000000 index small, 0
aoqi@0 606 { 0x80, ISTRING_PREFIX_NAMESPACE_INDEX_ZERO },
aoqi@0 607
aoqi@0 608 // %10000000 to %10111111 index small
aoqi@0 609 { 0xBF, ISTRING_INDEX_SMALL },
aoqi@0 610
aoqi@0 611 // %11000000 to %11011111 index medium
aoqi@0 612 { 0xDF, ISTRING_INDEX_MEDIUM },
aoqi@0 613
aoqi@0 614 // %11100000 to %11101111 index large
aoqi@0 615 { 0xEF, ISTRING_INDEX_LARGE },
aoqi@0 616
aoqi@0 617 // %11110000 to %11111111 ILLEGAL
aoqi@0 618 { 0xFF, STATE_ILLEGAL },
aoqi@0 619 };
aoqi@0 620
aoqi@0 621 // UTF-8 states
aoqi@0 622 /* package */ final static int UTF8_NCNAME_NCNAME = 0;
aoqi@0 623 /* package */ final static int UTF8_NCNAME_NCNAME_CHAR = 1;
aoqi@0 624 /* package */ final static int UTF8_TWO_BYTES = 2;
aoqi@0 625 /* package */ final static int UTF8_THREE_BYTES = 3;
aoqi@0 626 /* package */ final static int UTF8_FOUR_BYTES = 4;
aoqi@0 627
aoqi@0 628 private static final int[] UTF8_NCNAME = new int[256];
aoqi@0 629
aoqi@0 630 private static final int[][] UTF8_NCNAME_RANGES = {
aoqi@0 631
aoqi@0 632 // Basic Latin
aoqi@0 633
aoqi@0 634 // %00000000 to %00101100
aoqi@0 635 { 0x2C, STATE_ILLEGAL },
aoqi@0 636
aoqi@0 637 // '-' '.'
aoqi@0 638 // %%00101101 to %00101110 [#x002D-#x002E]
aoqi@0 639 { 0x2E, UTF8_NCNAME_NCNAME_CHAR },
aoqi@0 640
aoqi@0 641 // %00101111
aoqi@0 642 { 0x2F, STATE_ILLEGAL },
aoqi@0 643
aoqi@0 644 // [0-9]
aoqi@0 645 // %0011000 to %00111001 [#x0030-#x0039]
aoqi@0 646 { 0x39, UTF8_NCNAME_NCNAME_CHAR },
aoqi@0 647
aoqi@0 648 // %01000000
aoqi@0 649 { 0x40, STATE_ILLEGAL },
aoqi@0 650
aoqi@0 651 // [A-Z]
aoqi@0 652 // %01000001 to %01011010 [#x0041-#x005A]
aoqi@0 653 { 0x5A, UTF8_NCNAME_NCNAME },
aoqi@0 654
aoqi@0 655 // %01011110
aoqi@0 656 { 0x5E, STATE_ILLEGAL },
aoqi@0 657
aoqi@0 658 // '_'
aoqi@0 659 // %01011111 [#x005F]
aoqi@0 660 { 0x5F, UTF8_NCNAME_NCNAME },
aoqi@0 661
aoqi@0 662 // %01100000
aoqi@0 663 { 0x60, STATE_ILLEGAL },
aoqi@0 664
aoqi@0 665 // [a-z]
aoqi@0 666 // %01100001 to %01111010 [#x0061-#x007A]
aoqi@0 667 { 0x7A, UTF8_NCNAME_NCNAME },
aoqi@0 668
aoqi@0 669 // %01111011 to %01111111
aoqi@0 670 { 0x7F, STATE_ILLEGAL },
aoqi@0 671
aoqi@0 672
aoqi@0 673 // Two bytes
aoqi@0 674
aoqi@0 675 // %10000000 to %11000001
aoqi@0 676 { 0xC1, STATE_ILLEGAL },
aoqi@0 677
aoqi@0 678 // %11000010 to %11011111
aoqi@0 679 { 0xDF, UTF8_TWO_BYTES },
aoqi@0 680
aoqi@0 681
aoqi@0 682 // Three bytes
aoqi@0 683
aoqi@0 684 // %11100000 to %11101111
aoqi@0 685 { 0xEF, UTF8_THREE_BYTES },
aoqi@0 686
aoqi@0 687
aoqi@0 688 // Four bytes
aoqi@0 689
aoqi@0 690 // %11110000 to %11110111
aoqi@0 691 { 0xF7, UTF8_FOUR_BYTES },
aoqi@0 692
aoqi@0 693
aoqi@0 694 // %11111000 to %11111111
aoqi@0 695 { 0xFF, STATE_ILLEGAL }
aoqi@0 696 };
aoqi@0 697
aoqi@0 698 /* package */ final static int UTF8_ONE_BYTE = 1;
aoqi@0 699
aoqi@0 700 private static final int[] UTF8 = new int[256];
aoqi@0 701
aoqi@0 702 private static final int[][] UTF8_RANGES = {
aoqi@0 703
aoqi@0 704 // Basic Latin
aoqi@0 705
aoqi@0 706 // %00000000 to %00001000
aoqi@0 707 { 0x08, STATE_ILLEGAL },
aoqi@0 708
aoqi@0 709 // CHARACTER TABULATION, LINE FEED
aoqi@0 710 // %%00001001 to %00001010 [#x0009-#x000A]
aoqi@0 711 { 0x0A, UTF8_ONE_BYTE },
aoqi@0 712
aoqi@0 713 // %00001011 to %00001100
aoqi@0 714 { 0x0C, STATE_ILLEGAL },
aoqi@0 715
aoqi@0 716 // CARRIAGE RETURN
aoqi@0 717 // %00001101 [#x000D]
aoqi@0 718 { 0x0D, UTF8_ONE_BYTE },
aoqi@0 719
aoqi@0 720 // %00001110 to %00011111
aoqi@0 721 { 0x1F, STATE_ILLEGAL },
aoqi@0 722
aoqi@0 723 // %0010000 to %01111111
aoqi@0 724 { 0x7F, UTF8_ONE_BYTE },
aoqi@0 725
aoqi@0 726
aoqi@0 727 // Two bytes
aoqi@0 728
aoqi@0 729 // %10000000 to %11000001
aoqi@0 730 { 0xC1, STATE_ILLEGAL },
aoqi@0 731
aoqi@0 732 // %11000010 to %11011111
aoqi@0 733 { 0xDF, UTF8_TWO_BYTES },
aoqi@0 734
aoqi@0 735
aoqi@0 736 // Three bytes
aoqi@0 737
aoqi@0 738 // %11100000 to %11101111
aoqi@0 739 { 0xEF, UTF8_THREE_BYTES },
aoqi@0 740
aoqi@0 741
aoqi@0 742 // Four bytes
aoqi@0 743
aoqi@0 744 // %11110000 to %11110111
aoqi@0 745 { 0xF7, UTF8_FOUR_BYTES },
aoqi@0 746
aoqi@0 747
aoqi@0 748 // %11111000 to %11111111
aoqi@0 749 { 0xFF, STATE_ILLEGAL }
aoqi@0 750 };
aoqi@0 751
aoqi@0 752 private static void constructTable(int[] table, int[][] ranges) {
aoqi@0 753 int start = 0x00;
aoqi@0 754 for (int range = 0; range < ranges.length; range++) {
aoqi@0 755 int end = ranges[range][RANGE_INDEX_END];
aoqi@0 756 int value = ranges[range][RANGE_INDEX_VALUE];
aoqi@0 757 for (int i = start; i<= end; i++) {
aoqi@0 758 table[i] = value;
aoqi@0 759 }
aoqi@0 760 start = end + 1;
aoqi@0 761 }
aoqi@0 762 }
aoqi@0 763
aoqi@0 764 public static final int DII(final int index) {
aoqi@0 765 return DII[index];
aoqi@0 766 }
aoqi@0 767
aoqi@0 768 public static final int EII(final int index) {
aoqi@0 769 return EII[index];
aoqi@0 770 }
aoqi@0 771
aoqi@0 772 public static final int AII(final int index) {
aoqi@0 773 return AII[index];
aoqi@0 774 }
aoqi@0 775
aoqi@0 776 public static final int NISTRING(final int index) {
aoqi@0 777 return NISTRING[index];
aoqi@0 778 }
aoqi@0 779
aoqi@0 780 public static final int ISTRING(final int index) {
aoqi@0 781 return ISTRING[index];
aoqi@0 782 }
aoqi@0 783
aoqi@0 784 public static final int ISTRING_PREFIX_NAMESPACE(final int index) {
aoqi@0 785 return ISTRING_PREFIX_NAMESPACE[index];
aoqi@0 786 }
aoqi@0 787
aoqi@0 788 public static final int UTF8(final int index) {
aoqi@0 789 return UTF8[index];
aoqi@0 790 }
aoqi@0 791
aoqi@0 792 public static final int UTF8_NCNAME(final int index) {
aoqi@0 793 return UTF8_NCNAME[index];
aoqi@0 794 }
aoqi@0 795
aoqi@0 796 static {
aoqi@0 797 // DII
aoqi@0 798 constructTable(DII, DII_RANGES);
aoqi@0 799
aoqi@0 800 // EII
aoqi@0 801 constructTable(EII, EII_RANGES);
aoqi@0 802
aoqi@0 803 // AII
aoqi@0 804 constructTable(AII, AII_RANGES);
aoqi@0 805
aoqi@0 806 // AII Value
aoqi@0 807 constructTable(NISTRING, NISTRING_RANGES);
aoqi@0 808
aoqi@0 809 // Identifying string
aoqi@0 810 constructTable(ISTRING, ISTRING_RANGES);
aoqi@0 811
aoqi@0 812 // Identifying string
aoqi@0 813 constructTable(ISTRING_PREFIX_NAMESPACE, ISTRING_PREFIX_NAMESPACE_RANGES);
aoqi@0 814
aoqi@0 815 // UTF-8 NCNAME states
aoqi@0 816 constructTable(UTF8_NCNAME, UTF8_NCNAME_RANGES);
aoqi@0 817
aoqi@0 818 // UTF-8 states
aoqi@0 819 constructTable(UTF8, UTF8_RANGES);
aoqi@0 820 }
aoqi@0 821
aoqi@0 822 private DecoderStateTables() {
aoqi@0 823 }
aoqi@0 824 }

mercurial