src/share/jaxws_classes/com/sun/xml/internal/bind/v2/runtime/output/Encoded.java

Thu, 12 Oct 2017 19:44:07 +0800

author
aoqi
date
Thu, 12 Oct 2017 19:44:07 +0800
changeset 760
e530533619ec
parent 0
373ffda63c9a
permissions
-rw-r--r--

merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation. Oracle designates this
aoqi@0 8 * particular file as subject to the "Classpath" exception as provided
aoqi@0 9 * by Oracle in the LICENSE file that accompanied this code.
aoqi@0 10 *
aoqi@0 11 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 14 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 15 * accompanied this code).
aoqi@0 16 *
aoqi@0 17 * You should have received a copy of the GNU General Public License version
aoqi@0 18 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 20 *
aoqi@0 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 22 * or visit www.oracle.com if you need additional information or have any
aoqi@0 23 * questions.
aoqi@0 24 */
aoqi@0 25
aoqi@0 26 package com.sun.xml.internal.bind.v2.runtime.output;
aoqi@0 27
aoqi@0 28 import java.io.IOException;
aoqi@0 29
aoqi@0 30 /**
aoqi@0 31 * Buffer for UTF-8 encoded string.
aoqi@0 32 *
aoqi@0 33 * See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 for the UTF-8 encoding.
aoqi@0 34 *
aoqi@0 35 * @author Kohsuke Kawaguchi
aoqi@0 36 */
aoqi@0 37 public final class Encoded {
aoqi@0 38 public byte[] buf;
aoqi@0 39
aoqi@0 40 public int len;
aoqi@0 41
aoqi@0 42 public Encoded() {}
aoqi@0 43
aoqi@0 44 public Encoded(String text) {
aoqi@0 45 set(text);
aoqi@0 46 }
aoqi@0 47
aoqi@0 48 public void ensureSize(int size) {
aoqi@0 49 if(buf==null || buf.length<size)
aoqi@0 50 buf = new byte[size];
aoqi@0 51 }
aoqi@0 52
aoqi@0 53 public final void set( String text ) {
aoqi@0 54 int length = text.length();
aoqi@0 55
aoqi@0 56 ensureSize(length*3+1); // +1 for append
aoqi@0 57
aoqi@0 58 int ptr = 0;
aoqi@0 59
aoqi@0 60 for (int i = 0; i < length; i++) {
aoqi@0 61 final char chr = text.charAt(i);
aoqi@0 62 if (chr > 0x7F) {
aoqi@0 63 if (chr > 0x7FF) {
aoqi@0 64 if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) {
aoqi@0 65 // surrogate
aoqi@0 66 int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000;
aoqi@0 67
aoqi@0 68 buf[ptr++] = (byte)(0xF0 | ((uc >> 18)));
aoqi@0 69 buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F));
aoqi@0 70 buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F));
aoqi@0 71 buf[ptr++] = (byte)(0x80 + (uc & 0x3F));
aoqi@0 72 continue;
aoqi@0 73 }
aoqi@0 74 buf[ptr++] = (byte)(0xE0 + (chr >> 12));
aoqi@0 75 buf[ptr++] = (byte)(0x80 + ((chr >> 6) & 0x3F));
aoqi@0 76 } else {
aoqi@0 77 buf[ptr++] = (byte)(0xC0 + (chr >> 6));
aoqi@0 78 }
aoqi@0 79 buf[ptr++] = (byte)(0x80 + (chr & 0x3F));
aoqi@0 80 } else {
aoqi@0 81 buf[ptr++] = (byte)chr;
aoqi@0 82 }
aoqi@0 83 }
aoqi@0 84
aoqi@0 85 len = ptr;
aoqi@0 86 }
aoqi@0 87
aoqi@0 88 /**
aoqi@0 89 * Fill in the buffer by encoding the specified characters
aoqi@0 90 * while escaping characters like &lt;
aoqi@0 91 *
aoqi@0 92 * @param isAttribute
aoqi@0 93 * if true, characters like \t, \r, and \n are also escaped.
aoqi@0 94 */
aoqi@0 95 public final void setEscape(String text, boolean isAttribute) {
aoqi@0 96 int length = text.length();
aoqi@0 97 ensureSize(length*6+1); // in the worst case the text is like """""", so we need 6 bytes per char
aoqi@0 98
aoqi@0 99 int ptr = 0;
aoqi@0 100
aoqi@0 101 for (int i = 0; i < length; i++) {
aoqi@0 102 final char chr = text.charAt(i);
aoqi@0 103
aoqi@0 104 int ptr1 = ptr;
aoqi@0 105 if (chr > 0x7F) {
aoqi@0 106 if (chr > 0x7FF) {
aoqi@0 107 if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) {
aoqi@0 108 // surrogate
aoqi@0 109 int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000;
aoqi@0 110
aoqi@0 111 buf[ptr++] = (byte)(0xF0 | ((uc >> 18)));
aoqi@0 112 buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F));
aoqi@0 113 buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F));
aoqi@0 114 buf[ptr++] = (byte)(0x80 + (uc & 0x3F));
aoqi@0 115 continue;
aoqi@0 116 }
aoqi@0 117 buf[ptr1++] = (byte)(0xE0 + (chr >> 12));
aoqi@0 118 buf[ptr1++] = (byte)(0x80 + ((chr >> 6) & 0x3F));
aoqi@0 119 } else {
aoqi@0 120 buf[ptr1++] = (byte)(0xC0 + (chr >> 6));
aoqi@0 121 }
aoqi@0 122 buf[ptr1++] = (byte)(0x80 + (chr & 0x3F));
aoqi@0 123 } else {
aoqi@0 124 byte[] ent;
aoqi@0 125
aoqi@0 126 if((ent=attributeEntities[chr])!=null) {
aoqi@0 127 // the majority of the case is just printed as a char,
aoqi@0 128 // so it's very important to reject them as quickly as possible
aoqi@0 129
aoqi@0 130 // check again to see if this really needs to be escaped
aoqi@0 131 if(isAttribute || entities[chr]!=null)
aoqi@0 132 ptr1 = writeEntity(ent,ptr1);
aoqi@0 133 else
aoqi@0 134 buf[ptr1++] = (byte)chr;
aoqi@0 135 } else
aoqi@0 136 buf[ptr1++] = (byte)chr;
aoqi@0 137 }
aoqi@0 138 ptr = ptr1;
aoqi@0 139 }
aoqi@0 140 len = ptr;
aoqi@0 141 }
aoqi@0 142
aoqi@0 143 private int writeEntity( byte[] entity, int ptr ) {
aoqi@0 144 System.arraycopy(entity,0,buf,ptr,entity.length);
aoqi@0 145 return ptr+entity.length;
aoqi@0 146 }
aoqi@0 147
aoqi@0 148 /**
aoqi@0 149 * Writes the encoded bytes to the given output stream.
aoqi@0 150 */
aoqi@0 151 public final void write(UTF8XmlOutput out) throws IOException {
aoqi@0 152 out.write(buf,0,len);
aoqi@0 153 }
aoqi@0 154
aoqi@0 155 /**
aoqi@0 156 * Appends a new character to the end of the buffer.
aoqi@0 157 * This assumes that you have enough space in the buffer.
aoqi@0 158 */
aoqi@0 159 public void append(char b) {
aoqi@0 160 buf[len++] = (byte)b;
aoqi@0 161 }
aoqi@0 162
aoqi@0 163 /**
aoqi@0 164 * Reallocate the buffer to the exact size of the data
aoqi@0 165 * to reduce the memory footprint.
aoqi@0 166 */
aoqi@0 167 public void compact() {
aoqi@0 168 byte[] b = new byte[len];
aoqi@0 169 System.arraycopy(buf,0,b,0,len);
aoqi@0 170 buf = b;
aoqi@0 171 }
aoqi@0 172
aoqi@0 173 /**
aoqi@0 174 * UTF-8 encoded entities keyed by their character code.
aoqi@0 175 * e.g., entities['&'] == AMP_ENTITY.
aoqi@0 176 *
aoqi@0 177 * In attributes we need to encode more characters.
aoqi@0 178 */
aoqi@0 179 private static final byte[][] entities = new byte[0x80][];
aoqi@0 180 private static final byte[][] attributeEntities = new byte[0x80][];
aoqi@0 181
aoqi@0 182 static {
aoqi@0 183 add('&',"&amp;",false);
aoqi@0 184 add('<',"&lt;",false);
aoqi@0 185 add('>',"&gt;",false);
aoqi@0 186 add('"',"&quot;",true);
aoqi@0 187 add('\t',"&#x9;",true);
aoqi@0 188 add('\r',"&#xD;",false);
aoqi@0 189 add('\n',"&#xA;",true);
aoqi@0 190 }
aoqi@0 191
aoqi@0 192 private static void add(char c, String s, boolean attOnly) {
aoqi@0 193 byte[] image = UTF8XmlOutput.toBytes(s);
aoqi@0 194 attributeEntities[c] = image;
aoqi@0 195 if(!attOnly)
aoqi@0 196 entities[c] = image;
aoqi@0 197 }
aoqi@0 198 }

mercurial