Thu, 31 Aug 2017 15:18:52 +0800
merge
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. Oracle designates this |
aoqi@0 | 8 | * particular file as subject to the "Classpath" exception as provided |
aoqi@0 | 9 | * by Oracle in the LICENSE file that accompanied this code. |
aoqi@0 | 10 | * |
aoqi@0 | 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 14 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 15 | * accompanied this code). |
aoqi@0 | 16 | * |
aoqi@0 | 17 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 20 | * |
aoqi@0 | 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 22 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 23 | * questions. |
aoqi@0 | 24 | */ |
aoqi@0 | 25 | |
aoqi@0 | 26 | package com.sun.xml.internal.bind; |
aoqi@0 | 27 | |
aoqi@0 | 28 | /** |
aoqi@0 | 29 | * Processes white space normalization. |
aoqi@0 | 30 | * |
aoqi@0 | 31 | * @since 1.0 |
aoqi@0 | 32 | */ |
aoqi@0 | 33 | public abstract class WhiteSpaceProcessor { |
aoqi@0 | 34 | |
aoqi@0 | 35 | // benchmarking (see test/src/ReplaceTest.java in the CVS Attic) |
aoqi@0 | 36 | // showed that this code is slower than the current code. |
aoqi@0 | 37 | // |
aoqi@0 | 38 | // public static String replace(String text) { |
aoqi@0 | 39 | // final int len = text.length(); |
aoqi@0 | 40 | // StringBuffer result = new StringBuffer(len); |
aoqi@0 | 41 | // |
aoqi@0 | 42 | // for (int i = 0; i < len; i++) { |
aoqi@0 | 43 | // char ch = text.charAt(i); |
aoqi@0 | 44 | // if (isWhiteSpace(ch)) |
aoqi@0 | 45 | // result.append(' '); |
aoqi@0 | 46 | // else |
aoqi@0 | 47 | // result.append(ch); |
aoqi@0 | 48 | // } |
aoqi@0 | 49 | // |
aoqi@0 | 50 | // return result.toString(); |
aoqi@0 | 51 | // } |
aoqi@0 | 52 | |
aoqi@0 | 53 | public static String replace(String text) { |
aoqi@0 | 54 | return replace( (CharSequence)text ).toString(); |
aoqi@0 | 55 | } |
aoqi@0 | 56 | |
aoqi@0 | 57 | /** |
aoqi@0 | 58 | * @since 2.0 |
aoqi@0 | 59 | */ |
aoqi@0 | 60 | public static CharSequence replace(CharSequence text) { |
aoqi@0 | 61 | int i=text.length()-1; |
aoqi@0 | 62 | |
aoqi@0 | 63 | // look for the first whitespace char. |
aoqi@0 | 64 | while( i>=0 && !isWhiteSpaceExceptSpace(text.charAt(i)) ) |
aoqi@0 | 65 | i--; |
aoqi@0 | 66 | |
aoqi@0 | 67 | if( i<0 ) |
aoqi@0 | 68 | // no such whitespace. replace(text)==text. |
aoqi@0 | 69 | return text; |
aoqi@0 | 70 | |
aoqi@0 | 71 | // we now know that we need to modify the text. |
aoqi@0 | 72 | // allocate a char array to do it. |
aoqi@0 | 73 | StringBuilder buf = new StringBuilder(text); |
aoqi@0 | 74 | |
aoqi@0 | 75 | buf.setCharAt(i--,' '); |
aoqi@0 | 76 | for( ; i>=0; i-- ) |
aoqi@0 | 77 | if( isWhiteSpaceExceptSpace(buf.charAt(i))) |
aoqi@0 | 78 | buf.setCharAt(i,' '); |
aoqi@0 | 79 | |
aoqi@0 | 80 | return new String(buf); |
aoqi@0 | 81 | } |
aoqi@0 | 82 | |
aoqi@0 | 83 | /** |
aoqi@0 | 84 | * Equivalent of {@link String#trim()}. |
aoqi@0 | 85 | * @since 2.0 |
aoqi@0 | 86 | */ |
aoqi@0 | 87 | public static CharSequence trim(CharSequence text) { |
aoqi@0 | 88 | int len = text.length(); |
aoqi@0 | 89 | int start = 0; |
aoqi@0 | 90 | |
aoqi@0 | 91 | while( start<len && isWhiteSpace(text.charAt(start)) ) |
aoqi@0 | 92 | start++; |
aoqi@0 | 93 | |
aoqi@0 | 94 | int end = len-1; |
aoqi@0 | 95 | |
aoqi@0 | 96 | while( end>start && isWhiteSpace(text.charAt(end)) ) |
aoqi@0 | 97 | end--; |
aoqi@0 | 98 | |
aoqi@0 | 99 | if(start==0 && end==len-1) |
aoqi@0 | 100 | return text; // no change |
aoqi@0 | 101 | else |
aoqi@0 | 102 | return text.subSequence(start,end+1); |
aoqi@0 | 103 | } |
aoqi@0 | 104 | |
aoqi@0 | 105 | public static String collapse(String text) { |
aoqi@0 | 106 | return collapse( (CharSequence)text ).toString(); |
aoqi@0 | 107 | } |
aoqi@0 | 108 | |
aoqi@0 | 109 | /** |
aoqi@0 | 110 | * This is usually the biggest processing bottleneck. |
aoqi@0 | 111 | * |
aoqi@0 | 112 | * @since 2.0 |
aoqi@0 | 113 | */ |
aoqi@0 | 114 | public static CharSequence collapse(CharSequence text) { |
aoqi@0 | 115 | int len = text.length(); |
aoqi@0 | 116 | |
aoqi@0 | 117 | // most of the texts are already in the collapsed form. |
aoqi@0 | 118 | // so look for the first whitespace in the hope that we will |
aoqi@0 | 119 | // never see it. |
aoqi@0 | 120 | int s=0; |
aoqi@0 | 121 | while(s<len) { |
aoqi@0 | 122 | if(isWhiteSpace(text.charAt(s))) |
aoqi@0 | 123 | break; |
aoqi@0 | 124 | s++; |
aoqi@0 | 125 | } |
aoqi@0 | 126 | if(s==len) |
aoqi@0 | 127 | // the input happens to be already collapsed. |
aoqi@0 | 128 | return text; |
aoqi@0 | 129 | |
aoqi@0 | 130 | // we now know that the input contains spaces. |
aoqi@0 | 131 | // let's sit down and do the collapsing normally. |
aoqi@0 | 132 | |
aoqi@0 | 133 | StringBuilder result = new StringBuilder(len /*allocate enough size to avoid re-allocation*/ ); |
aoqi@0 | 134 | |
aoqi@0 | 135 | if(s!=0) { |
aoqi@0 | 136 | for( int i=0; i<s; i++ ) |
aoqi@0 | 137 | result.append(text.charAt(i)); |
aoqi@0 | 138 | result.append(' '); |
aoqi@0 | 139 | } |
aoqi@0 | 140 | |
aoqi@0 | 141 | boolean inStripMode = true; |
aoqi@0 | 142 | for (int i = s+1; i < len; i++) { |
aoqi@0 | 143 | char ch = text.charAt(i); |
aoqi@0 | 144 | boolean b = isWhiteSpace(ch); |
aoqi@0 | 145 | if (inStripMode && b) |
aoqi@0 | 146 | continue; // skip this character |
aoqi@0 | 147 | |
aoqi@0 | 148 | inStripMode = b; |
aoqi@0 | 149 | if (inStripMode) |
aoqi@0 | 150 | result.append(' '); |
aoqi@0 | 151 | else |
aoqi@0 | 152 | result.append(ch); |
aoqi@0 | 153 | } |
aoqi@0 | 154 | |
aoqi@0 | 155 | // remove trailing whitespaces |
aoqi@0 | 156 | len = result.length(); |
aoqi@0 | 157 | if (len > 0 && result.charAt(len - 1) == ' ') |
aoqi@0 | 158 | result.setLength(len - 1); |
aoqi@0 | 159 | // whitespaces are already collapsed, |
aoqi@0 | 160 | // so all we have to do is to remove the last one character |
aoqi@0 | 161 | // if it's a whitespace. |
aoqi@0 | 162 | |
aoqi@0 | 163 | return result; |
aoqi@0 | 164 | } |
aoqi@0 | 165 | |
aoqi@0 | 166 | /** |
aoqi@0 | 167 | * Returns true if the specified string is all whitespace. |
aoqi@0 | 168 | */ |
aoqi@0 | 169 | public static boolean isWhiteSpace(CharSequence s) { |
aoqi@0 | 170 | for( int i=s.length()-1; i>=0; i-- ) |
aoqi@0 | 171 | if(!isWhiteSpace(s.charAt(i))) |
aoqi@0 | 172 | return false; |
aoqi@0 | 173 | return true; |
aoqi@0 | 174 | } |
aoqi@0 | 175 | |
aoqi@0 | 176 | /** returns true if the specified char is a white space character. */ |
aoqi@0 | 177 | public static boolean isWhiteSpace(char ch) { |
aoqi@0 | 178 | // most of the characters are non-control characters. |
aoqi@0 | 179 | // so check that first to quickly return false for most of the cases. |
aoqi@0 | 180 | if( ch>0x20 ) return false; |
aoqi@0 | 181 | |
aoqi@0 | 182 | // other than we have to do four comparisons. |
aoqi@0 | 183 | return ch == 0x9 || ch == 0xA || ch == 0xD || ch == 0x20; |
aoqi@0 | 184 | } |
aoqi@0 | 185 | |
aoqi@0 | 186 | /** |
aoqi@0 | 187 | * Returns true if the specified char is a white space character |
aoqi@0 | 188 | * but not 0x20. |
aoqi@0 | 189 | */ |
aoqi@0 | 190 | protected static boolean isWhiteSpaceExceptSpace(char ch) { |
aoqi@0 | 191 | // most of the characters are non-control characters. |
aoqi@0 | 192 | // so check that first to quickly return false for most of the cases. |
aoqi@0 | 193 | if( ch>=0x20 ) return false; |
aoqi@0 | 194 | |
aoqi@0 | 195 | // other than we have to do four comparisons. |
aoqi@0 | 196 | return ch == 0x9 || ch == 0xA || ch == 0xD; |
aoqi@0 | 197 | } |
aoqi@0 | 198 | } |