aoqi@0: /* aoqi@0: * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. aoqi@0: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@0: * aoqi@0: * This code is free software; you can redistribute it and/or modify it aoqi@0: * under the terms of the GNU General Public License version 2 only, as aoqi@0: * published by the Free Software Foundation. Oracle designates this aoqi@0: * particular file as subject to the "Classpath" exception as provided aoqi@0: * by Oracle in the LICENSE file that accompanied this code. aoqi@0: * aoqi@0: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@0: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@0: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@0: * version 2 for more details (a copy is included in the LICENSE file that aoqi@0: * accompanied this code). aoqi@0: * aoqi@0: * You should have received a copy of the GNU General Public License version aoqi@0: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@0: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@0: * aoqi@0: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@0: * or visit www.oracle.com if you need additional information or have any aoqi@0: * questions. aoqi@0: */ aoqi@0: aoqi@0: package com.sun.xml.internal.bind.v2.runtime.output; aoqi@0: aoqi@0: import java.io.IOException; aoqi@0: aoqi@0: /** aoqi@0: * Buffer for UTF-8 encoded string. aoqi@0: * aoqi@0: * See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 for the UTF-8 encoding. aoqi@0: * aoqi@0: * @author Kohsuke Kawaguchi aoqi@0: */ aoqi@0: public final class Encoded { aoqi@0: public byte[] buf; aoqi@0: aoqi@0: public int len; aoqi@0: aoqi@0: public Encoded() {} aoqi@0: aoqi@0: public Encoded(String text) { aoqi@0: set(text); aoqi@0: } aoqi@0: aoqi@0: public void ensureSize(int size) { aoqi@0: if(buf==null || buf.length 0x7F) { aoqi@0: if (chr > 0x7FF) { aoqi@0: if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) { aoqi@0: // surrogate aoqi@0: int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000; aoqi@0: aoqi@0: buf[ptr++] = (byte)(0xF0 | ((uc >> 18))); aoqi@0: buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F)); aoqi@0: buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F)); aoqi@0: buf[ptr++] = (byte)(0x80 + (uc & 0x3F)); aoqi@0: continue; aoqi@0: } aoqi@0: buf[ptr++] = (byte)(0xE0 + (chr >> 12)); aoqi@0: buf[ptr++] = (byte)(0x80 + ((chr >> 6) & 0x3F)); aoqi@0: } else { aoqi@0: buf[ptr++] = (byte)(0xC0 + (chr >> 6)); aoqi@0: } aoqi@0: buf[ptr++] = (byte)(0x80 + (chr & 0x3F)); aoqi@0: } else { aoqi@0: buf[ptr++] = (byte)chr; aoqi@0: } aoqi@0: } aoqi@0: aoqi@0: len = ptr; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Fill in the buffer by encoding the specified characters aoqi@0: * while escaping characters like < aoqi@0: * aoqi@0: * @param isAttribute aoqi@0: * if true, characters like \t, \r, and \n are also escaped. aoqi@0: */ aoqi@0: public final void setEscape(String text, boolean isAttribute) { aoqi@0: int length = text.length(); aoqi@0: ensureSize(length*6+1); // in the worst case the text is like """""", so we need 6 bytes per char aoqi@0: aoqi@0: int ptr = 0; aoqi@0: aoqi@0: for (int i = 0; i < length; i++) { aoqi@0: final char chr = text.charAt(i); aoqi@0: aoqi@0: int ptr1 = ptr; aoqi@0: if (chr > 0x7F) { aoqi@0: if (chr > 0x7FF) { aoqi@0: if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) { aoqi@0: // surrogate aoqi@0: int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000; aoqi@0: aoqi@0: buf[ptr++] = (byte)(0xF0 | ((uc >> 18))); aoqi@0: buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F)); aoqi@0: buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F)); aoqi@0: buf[ptr++] = (byte)(0x80 + (uc & 0x3F)); aoqi@0: continue; aoqi@0: } aoqi@0: buf[ptr1++] = (byte)(0xE0 + (chr >> 12)); aoqi@0: buf[ptr1++] = (byte)(0x80 + ((chr >> 6) & 0x3F)); aoqi@0: } else { aoqi@0: buf[ptr1++] = (byte)(0xC0 + (chr >> 6)); aoqi@0: } aoqi@0: buf[ptr1++] = (byte)(0x80 + (chr & 0x3F)); aoqi@0: } else { aoqi@0: byte[] ent; aoqi@0: aoqi@0: if((ent=attributeEntities[chr])!=null) { aoqi@0: // the majority of the case is just printed as a char, aoqi@0: // so it's very important to reject them as quickly as possible aoqi@0: aoqi@0: // check again to see if this really needs to be escaped aoqi@0: if(isAttribute || entities[chr]!=null) aoqi@0: ptr1 = writeEntity(ent,ptr1); aoqi@0: else aoqi@0: buf[ptr1++] = (byte)chr; aoqi@0: } else aoqi@0: buf[ptr1++] = (byte)chr; aoqi@0: } aoqi@0: ptr = ptr1; aoqi@0: } aoqi@0: len = ptr; aoqi@0: } aoqi@0: aoqi@0: private int writeEntity( byte[] entity, int ptr ) { aoqi@0: System.arraycopy(entity,0,buf,ptr,entity.length); aoqi@0: return ptr+entity.length; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Writes the encoded bytes to the given output stream. aoqi@0: */ aoqi@0: public final void write(UTF8XmlOutput out) throws IOException { aoqi@0: out.write(buf,0,len); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Appends a new character to the end of the buffer. aoqi@0: * This assumes that you have enough space in the buffer. aoqi@0: */ aoqi@0: public void append(char b) { aoqi@0: buf[len++] = (byte)b; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Reallocate the buffer to the exact size of the data aoqi@0: * to reduce the memory footprint. aoqi@0: */ aoqi@0: public void compact() { aoqi@0: byte[] b = new byte[len]; aoqi@0: System.arraycopy(buf,0,b,0,len); aoqi@0: buf = b; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * UTF-8 encoded entities keyed by their character code. aoqi@0: * e.g., entities['&'] == AMP_ENTITY. aoqi@0: * aoqi@0: * In attributes we need to encode more characters. aoqi@0: */ aoqi@0: private static final byte[][] entities = new byte[0x80][]; aoqi@0: private static final byte[][] attributeEntities = new byte[0x80][]; aoqi@0: aoqi@0: static { aoqi@0: add('&',"&",false); aoqi@0: add('<',"<",false); aoqi@0: add('>',">",false); aoqi@0: add('"',""",true); aoqi@0: add('\t'," ",true); aoqi@0: add('\r'," ",false); aoqi@0: add('\n'," ",true); aoqi@0: } aoqi@0: aoqi@0: private static void add(char c, String s, boolean attOnly) { aoqi@0: byte[] image = UTF8XmlOutput.toBytes(s); aoqi@0: attributeEntities[c] = image; aoqi@0: if(!attOnly) aoqi@0: entities[c] = image; aoqi@0: } aoqi@0: }