src/share/jaxws_classes/javax/xml/bind/WhiteSpaceProcessor.java

changeset 286
f50545b5e2f1
child 397
b99d7e355d4b
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/share/jaxws_classes/javax/xml/bind/WhiteSpaceProcessor.java	Tue Mar 06 16:09:35 2012 -0800
     1.3 @@ -0,0 +1,198 @@
     1.4 +/*
     1.5 + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
     1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.7 + *
     1.8 + * This code is free software; you can redistribute it and/or modify it
     1.9 + * under the terms of the GNU General Public License version 2 only, as
    1.10 + * published by the Free Software Foundation.  Oracle designates this
    1.11 + * particular file as subject to the "Classpath" exception as provided
    1.12 + * by Oracle in the LICENSE file that accompanied this code.
    1.13 + *
    1.14 + * This code is distributed in the hope that it will be useful, but WITHOUT
    1.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    1.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    1.17 + * version 2 for more details (a copy is included in the LICENSE file that
    1.18 + * accompanied this code).
    1.19 + *
    1.20 + * You should have received a copy of the GNU General Public License version
    1.21 + * 2 along with this work; if not, write to the Free Software Foundation,
    1.22 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    1.23 + *
    1.24 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    1.25 + * or visit www.oracle.com if you need additional information or have any
    1.26 + * questions.
    1.27 + */
    1.28 +
    1.29 +package javax.xml.bind;
    1.30 +
    1.31 +/**
    1.32 + * Processes white space normalization.
    1.33 + *
    1.34 + * @since 1.0
    1.35 + */
    1.36 +abstract class WhiteSpaceProcessor {
    1.37 +
    1.38 +// benchmarking (see test/src/ReplaceTest.java in the CVS Attic)
    1.39 +// showed that this code is slower than the current code.
    1.40 +//
    1.41 +//    public static String replace(String text) {
    1.42 +//        final int len = text.length();
    1.43 +//        StringBuffer result = new StringBuffer(len);
    1.44 +//
    1.45 +//        for (int i = 0; i < len; i++) {
    1.46 +//            char ch = text.charAt(i);
    1.47 +//            if (isWhiteSpace(ch))
    1.48 +//                result.append(' ');
    1.49 +//            else
    1.50 +//                result.append(ch);
    1.51 +//        }
    1.52 +//
    1.53 +//        return result.toString();
    1.54 +//    }
    1.55 +
    1.56 +    public static String replace(String text) {
    1.57 +        return replace( (CharSequence)text ).toString();
    1.58 +    }
    1.59 +
    1.60 +    /**
    1.61 +     * @since 2.0
    1.62 +     */
    1.63 +    public static CharSequence replace(CharSequence text) {
    1.64 +        int i=text.length()-1;
    1.65 +
    1.66 +        // look for the first whitespace char.
    1.67 +        while( i>=0 && !isWhiteSpaceExceptSpace(text.charAt(i)) )
    1.68 +            i--;
    1.69 +
    1.70 +        if( i<0 )
    1.71 +            // no such whitespace. replace(text)==text.
    1.72 +            return text;
    1.73 +
    1.74 +        // we now know that we need to modify the text.
    1.75 +        // allocate a char array to do it.
    1.76 +        StringBuilder buf = new StringBuilder(text);
    1.77 +
    1.78 +        buf.setCharAt(i--,' ');
    1.79 +        for( ; i>=0; i-- )
    1.80 +            if( isWhiteSpaceExceptSpace(buf.charAt(i)))
    1.81 +                buf.setCharAt(i,' ');
    1.82 +
    1.83 +        return new String(buf);
    1.84 +    }
    1.85 +
    1.86 +    /**
    1.87 +     * Equivalent of {@link String#trim()}.
    1.88 +     * @since 2.0
    1.89 +     */
    1.90 +    public static CharSequence trim(CharSequence text) {
    1.91 +        int len = text.length();
    1.92 +        int start = 0;
    1.93 +
    1.94 +        while( start<len && isWhiteSpace(text.charAt(start)) )
    1.95 +            start++;
    1.96 +
    1.97 +        int end = len-1;
    1.98 +
    1.99 +        while( end>start && isWhiteSpace(text.charAt(end)) )
   1.100 +            end--;
   1.101 +
   1.102 +        if(start==0 && end==len-1)
   1.103 +            return text;    // no change
   1.104 +        else
   1.105 +            return text.subSequence(start,end+1);
   1.106 +    }
   1.107 +
   1.108 +    public static String collapse(String text) {
   1.109 +        return collapse( (CharSequence)text ).toString();
   1.110 +    }
   1.111 +
   1.112 +    /**
   1.113 +     * This is usually the biggest processing bottleneck.
   1.114 +     *
   1.115 +     * @since 2.0
   1.116 +     */
   1.117 +    public static CharSequence collapse(CharSequence text) {
   1.118 +        int len = text.length();
   1.119 +
   1.120 +        // most of the texts are already in the collapsed form.
   1.121 +        // so look for the first whitespace in the hope that we will
   1.122 +        // never see it.
   1.123 +        int s=0;
   1.124 +        while(s<len) {
   1.125 +            if(isWhiteSpace(text.charAt(s)))
   1.126 +                break;
   1.127 +            s++;
   1.128 +        }
   1.129 +        if(s==len)
   1.130 +            // the input happens to be already collapsed.
   1.131 +            return text;
   1.132 +
   1.133 +        // we now know that the input contains spaces.
   1.134 +        // let's sit down and do the collapsing normally.
   1.135 +
   1.136 +        StringBuilder result = new StringBuilder(len /*allocate enough size to avoid re-allocation*/ );
   1.137 +
   1.138 +        if(s!=0) {
   1.139 +            for( int i=0; i<s; i++ )
   1.140 +                result.append(text.charAt(i));
   1.141 +            result.append(' ');
   1.142 +        }
   1.143 +
   1.144 +        boolean inStripMode = true;
   1.145 +        for (int i = s+1; i < len; i++) {
   1.146 +            char ch = text.charAt(i);
   1.147 +            boolean b = isWhiteSpace(ch);
   1.148 +            if (inStripMode && b)
   1.149 +                continue; // skip this character
   1.150 +
   1.151 +            inStripMode = b;
   1.152 +            if (inStripMode)
   1.153 +                result.append(' ');
   1.154 +            else
   1.155 +                result.append(ch);
   1.156 +        }
   1.157 +
   1.158 +        // remove trailing whitespaces
   1.159 +        len = result.length();
   1.160 +        if (len > 0 && result.charAt(len - 1) == ' ')
   1.161 +            result.setLength(len - 1);
   1.162 +        // whitespaces are already collapsed,
   1.163 +        // so all we have to do is to remove the last one character
   1.164 +        // if it's a whitespace.
   1.165 +
   1.166 +        return result;
   1.167 +    }
   1.168 +
   1.169 +    /**
   1.170 +     * Returns true if the specified string is all whitespace.
   1.171 +     */
   1.172 +    public static final boolean isWhiteSpace(CharSequence s) {
   1.173 +        for( int i=s.length()-1; i>=0; i-- )
   1.174 +            if(!isWhiteSpace(s.charAt(i)))
   1.175 +                return false;
   1.176 +        return true;
   1.177 +    }
   1.178 +
   1.179 +    /** returns true if the specified char is a white space character. */
   1.180 +    public static final boolean isWhiteSpace(char ch) {
   1.181 +        // most of the characters are non-control characters.
   1.182 +        // so check that first to quickly return false for most of the cases.
   1.183 +        if( ch>0x20 )   return false;
   1.184 +
   1.185 +        // other than we have to do four comparisons.
   1.186 +        return ch == 0x9 || ch == 0xA || ch == 0xD || ch == 0x20;
   1.187 +    }
   1.188 +
   1.189 +    /**
   1.190 +     * Returns true if the specified char is a white space character
   1.191 +     * but not 0x20.
   1.192 +     */
   1.193 +    protected static final boolean isWhiteSpaceExceptSpace(char ch) {
   1.194 +        // most of the characters are non-control characters.
   1.195 +        // so check that first to quickly return false for most of the cases.
   1.196 +        if( ch>=0x20 )   return false;
   1.197 +
   1.198 +        // other than we have to do four comparisons.
   1.199 +        return ch == 0x9 || ch == 0xA || ch == 0xD;
   1.200 +    }
   1.201 +}

mercurial