jdk8-mips64-public/jaxws: comparison src/share/jaxws_classes/com/sun/xml/internal/dtdparser/XmlChars.java

--1:000000000000
+:373ffda63c9a
+/*
+* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.  Oracle designates this
+* particular file as subject to the "Classpath" exception as provided
+* by Oracle in the LICENSE file that accompanied this code.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*/
+package com.sun.xml.internal.dtdparser;
+/**
+* Methods in this class are used to determine whether characters may
+* appear in certain roles in XML documents.  Such methods are used
+* both to parse and to create such documents.
+*
+* @author David Brownell
+* @version 1.1, 00/08/05
+*/
+public class XmlChars {
+// can't construct instances
+private XmlChars() {
+}
+/**
+* Returns true if the argument, a UCS-4 character code, is valid in
+* XML documents.  Unicode characters fit into the low sixteen
+* bits of a UCS-4 character, and pairs of Unicode <em>surrogate
+* characters</em> can be combined to encode UCS-4 characters in
+* documents containing only Unicode.  (The <code>char</code> datatype
+* in the Java Programming Language represents Unicode characters,
+* including unpaired surrogates.)
+* <p/>
+* <P> In XML, UCS-4 characters can also be encoded by the use of
+* <em>character references</em> such as <b>&amp;#x12345678;</b>, which
+* happens to refer to a character that is disallowed in XML documents.
+* UCS-4 characters allowed in XML documents can be expressed with
+* one or two Unicode characters.
+*
+* @param ucs4char The 32-bit UCS-4 character being tested.
+*/
+static public boolean isChar(int ucs4char) {
+// [2] Char ::= #x0009 | #x000A | #x000D
+//            | [#x0020-#xD7FF]
+//    ... surrogates excluded!
+//            | [#xE000-#xFFFD]
+//             | [#x10000-#x10ffff]
+return ((ucs4char >= 0x0020 && ucs4char <= 0xD7FF)
+|| ucs4char == 0x000A || ucs4char == 0x0009
+|| ucs4char == 0x000D
+|| (ucs4char >= 0xE000 && ucs4char <= 0xFFFD)
+|| (ucs4char >= 0x10000 && ucs4char <= 0x10ffff));
+}
+/**
+* Returns true if the character is allowed to be a non-initial
+* character in names according to the XML recommendation.
+*
+* @see #isNCNameChar(char)
+* @see #isLetter(char)
+*/
+public static boolean isNameChar(char c) {
+// [4] NameChar ::= Letter | Digit | '.' | '_' | ':'
+//            | CombiningChar | Extender
+if (isLetter2(c))
+return true;
+else if (c == '>')
+return false;
+else if (c == '.' || c == '-' || c == '_' || c == ':'
+|| isExtender(c))
+return true;
+else
+return false;
+}
+/**
+* Returns true if the character is allowed to be a non-initial
+* character in unscoped names according to the rules of the XML
+* Namespaces proposed recommendation.  Except for precluding
+* the colon (used to separate names from their scopes) these
+* characters are just as allowed by the XML recommendation.
+*
+* @see #isNameChar(char)
+* @see #isLetter(char)
+*/
+public static boolean isNCNameChar(char c) {
+// [NC 5] NCNameChar ::= Letter | Digit | '.' | '_'
+//            | CombiningChar | Extender
+return c != ':' && isNameChar(c);
+}
+/**
+* Returns true if the character is allowed where XML supports
+* whitespace characters, false otherwise.
+*/
+public static boolean isSpace(char c) {
+return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+/*
+* NOTE:  java.lang.Character.getType() values are:
+*
+* UNASSIGNED                    = 0,
+*
+* UPPERCASE_LETTER            = 1,    // Lu
+* LOWERCASE_LETTER            = 2,    // Ll
+* TITLECASE_LETTER            = 3,    // Lt
+* MODIFIER_LETTER             = 4,    // Lm
+* OTHER_LETTER                = 5,    // Lo
+* NON_SPACING_MARK            = 6,    // Mn
+* ENCLOSING_MARK              = 7,    // Me
+* COMBINING_SPACING_MARK      = 8,    // Mc
+* DECIMAL_DIGIT_NUMBER        = 9,    // Nd
+* LETTER_NUMBER               = 10,   // Nl
+* OTHER_NUMBER                = 11,   // No
+* SPACE_SEPARATOR             = 12,   // Zs
+* LINE_SEPARATOR              = 13,   // Zl
+* PARAGRAPH_SEPARATOR         = 14,   // Zp
+* CONTROL                     = 15,   // Cc
+* FORMAT                      = 16,   // Cf
+*                         // 17 reserved for proposed Ci category
+* PRIVATE_USE                 = 18,   // Co
+* SURROGATE                   = 19,   // Cs
+* DASH_PUNCTUATION            = 20,   // Pd
+* START_PUNCTUATION           = 21,   // Ps
+* END_PUNCTUATION             = 22,   // Pe
+* CONNECTOR_PUNCTUATION       = 23,   // Pc
+* OTHER_PUNCTUATION           = 24,   // Po
+* MATH_SYMBOL                 = 25,   // Sm
+* CURRENCY_SYMBOL             = 26,   // Sc
+* MODIFIER_SYMBOL             = 27,   // Sk
+* OTHER_SYMBOL                = 28;   // So
+*/
+/**
+* Returns true if the character is an XML "letter".  XML Names must
+* start with Letters or a few other characters, but other characters
+* in names must only satisfy the <em>isNameChar</em> predicate.
+*
+* @see #isNameChar(char)
+* @see #isNCNameChar(char)
+*/
+public static boolean isLetter(char c) {
+// [84] Letter ::= BaseChar | Ideographic
+// [85] BaseChar ::= ... too much to repeat
+// [86] Ideographic ::= ... too much to repeat
+//
+// Optimize the typical case.
+//
+if (c >= 'a' && c <= 'z')
+return true;
+if (c == '/')
+return false;
+if (c >= 'A' && c <= 'Z')
+return true;
+//
+// Since the tables are too ridiculous to use in code,
+// we're using the footnotes here to drive this test.
+//
+switch (Character.getType(c)) {
+// app. B footnote says these are 'name start'
+// chars' ...
+case Character.LOWERCASE_LETTER:        // Ll
+case Character.UPPERCASE_LETTER:        // Lu
+case Character.OTHER_LETTER:            // Lo
+case Character.TITLECASE_LETTER:        // Lt
+case Character.LETTER_NUMBER:            // Nl
+// OK, here we just have some exceptions to check...
+return !isCompatibilityChar(c)
+// per "5.14 of Unicode", rule out some combiners
+&& !(c >= 0x20dd && c <= 0x20e0);
+default:
+// check for some exceptions:  these are "alphabetic"
+return ((c >= 0x02bb && c <= 0x02c1)
+|| c == 0x0559 || c == 0x06e5 || c == 0x06e6);
+}
+}
+//
+// XML 1.0 discourages "compatibility" characters in names; these
+// were defined to permit passing through some information stored in
+// older non-Unicode character sets.  These always have alternative
+// representations in Unicode, e.g. using combining chars.
+//
+private static boolean isCompatibilityChar(char c) {
+// the numerous comparisions here seem unavoidable,
+// but the switch can reduce the number which must
+// actually be executed.
+switch ((c >> 8) & 0x0ff) {
+case 0x00:
+// ISO Latin/1 has a few compatibility characters
+return c == 0x00aa || c == 0x00b5 || c == 0x00ba;
+case 0x01:
+// as do Latin Extended A and (parts of) B
+return (c >= 0x0132 && c <= 0x0133)
+|| (c >= 0x013f && c <= 0x0140)
+|| c == 0x0149
+|| c == 0x017f
+|| (c >= 0x01c4 && c <= 0x01cc)
+|| (c >= 0x01f1 && c <= 0x01f3);
+case 0x02:
+// some spacing modifiers
+return (c >= 0x02b0 && c <= 0x02b8)
+|| (c >= 0x02e0 && c <= 0x02e4);
+case 0x03:
+return c == 0x037a;            // Greek
+case 0x05:
+return c == 0x0587;            // Armenian
+case 0x0e:
+return c >= 0x0edc && c <= 0x0edd;    // Laotian
+case 0x11:
+// big chunks of Hangul Jamo are all "compatibility"
+return c == 0x1101
+|| c == 0x1104
+|| c == 0x1108
+|| c == 0x110a
+|| c == 0x110d
+|| (c >= 0x1113 && c <= 0x113b)
+|| c == 0x113d
+|| c == 0x113f
+|| (c >= 0x1141 && c <= 0x114b)
+|| c == 0x114d
+|| c == 0x114f
+|| (c >= 0x1151 && c <= 0x1153)
+|| (c >= 0x1156 && c <= 0x1158)
+|| c == 0x1162
+|| c == 0x1164
+|| c == 0x1166
+|| c == 0x1168
+|| (c >= 0x116a && c <= 0x116c)
+|| (c >= 0x116f && c <= 0x1171)
+|| c == 0x1174
+|| (c >= 0x1176 && c <= 0x119d)
+|| (c >= 0x119f && c <= 0x11a2)
+|| (c >= 0x11a9 && c <= 0x11aa)
+|| (c >= 0x11ac && c <= 0x11ad)
+|| (c >= 0x11b0 && c <= 0x11b6)
+|| c == 0x11b9
+|| c == 0x11bb
+|| (c >= 0x11c3 && c <= 0x11ea)
+|| (c >= 0x11ec && c <= 0x11ef)
+|| (c >= 0x11f1 && c <= 0x11f8)
+;
+case 0x20:
+return c == 0x207f;            // superscript
+case 0x21:
+return
+// various letterlike symbols
+c == 0x2102
+|| c == 0x2107
+|| (c >= 0x210a && c <= 0x2113)
+|| c == 0x2115
+|| (c >= 0x2118 && c <= 0x211d)
+|| c == 0x2124
+|| c == 0x2128
+|| (c >= 0x212c && c <= 0x212d)
+|| (c >= 0x212f && c <= 0x2138)
+// most Roman numerals (less 1K, 5K, 10K)
+|| (c >= 0x2160 && c <= 0x217f)
+;
+case 0x30:
+// some Hiragana
+return c >= 0x309b && c <= 0x309c;
+case 0x31:
+// all Hangul Compatibility Jamo
+return c >= 0x3131 && c <= 0x318e;
+case 0xf9:
+case 0xfa:
+case 0xfb:
+case 0xfc:
+case 0xfd:
+case 0xfe:
+case 0xff:
+// the whole "compatibility" area is for that purpose!
+return true;
+default:
+// most of Unicode isn't flagged as being for compatibility
+return false;
+}
+}
+// guts of isNameChar/isNCNameChar
+private static boolean isLetter2(char c) {
+// [84] Letter ::= BaseChar | Ideographic
+// [85] BaseChar ::= ... too much to repeat
+// [86] Ideographic ::= ... too much to repeat
+// [87] CombiningChar ::= ... too much to repeat
+//
+// Optimize the typical case.
+//
+if (c >= 'a' && c <= 'z')
+return true;
+if (c == '>')
+return false;
+if (c >= 'A' && c <= 'Z')
+return true;
+//
+// Since the tables are too ridiculous to use in code,
+// we're using the footnotes here to drive this test.
+//
+switch (Character.getType(c)) {
+// app. B footnote says these are 'name start'
+// chars' ...
+case Character.LOWERCASE_LETTER:        // Ll
+case Character.UPPERCASE_LETTER:        // Lu
+case Character.OTHER_LETTER:            // Lo
+case Character.TITLECASE_LETTER:        // Lt
+case Character.LETTER_NUMBER:            // Nl
+// ... and these are name characters 'other
+// than name start characters'
+case Character.COMBINING_SPACING_MARK:    // Mc
+case Character.ENCLOSING_MARK:        // Me
+case Character.NON_SPACING_MARK:        // Mn
+case Character.MODIFIER_LETTER:        // Lm
+case Character.DECIMAL_DIGIT_NUMBER:        // Nd
+// OK, here we just have some exceptions to check...
+return !isCompatibilityChar(c)
+// per "5.14 of Unicode", rule out some combiners
+&& !(c >= 0x20dd && c <= 0x20e0);
+default:
+// added a character ...
+return c == 0x0387;
+}
+}
+private static boolean isDigit(char c) {
+// [88] Digit ::= ...
+//
+// java.lang.Character.isDigit is correct from the XML point
+// of view except that it allows "fullwidth" digits.
+//
+return Character.isDigit(c)
+&& !((c >= 0xff10) && (c <= 0xff19));
+}
+private static boolean isExtender(char c) {
+// [89] Extender ::= ...
+return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387
+|| c == 0x0640 || c == 0x0e46 || c == 0x0ec6
+|| c == 0x3005 || (c >= 0x3031 && c <= 0x3035)
+|| (c >= 0x309d && c <= 0x309e)
+|| (c >= 0x30fc && c <= 0x30fe)
+;
+}
+}

Mercurial > jdk8-mips64-public > jaxws / file comparison

comparison: src/share/jaxws_classes/com/sun/xml/internal/dtdparser/XmlChars.java

src/share/jaxws_classes/com/sun/xml/internal/dtdparser/XmlChars.java