Sun, 18 Jun 2017 23:18:45 +0100
8172297: In java 8, the marshalling with JAX-WS does not escape carriage return
Reviewed-by: lancea
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. Oracle designates this |
aoqi@0 | 8 | * particular file as subject to the "Classpath" exception as provided |
aoqi@0 | 9 | * by Oracle in the LICENSE file that accompanied this code. |
aoqi@0 | 10 | * |
aoqi@0 | 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 14 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 15 | * accompanied this code). |
aoqi@0 | 16 | * |
aoqi@0 | 17 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 20 | * |
aoqi@0 | 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 22 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 23 | * questions. |
aoqi@0 | 24 | */ |
aoqi@0 | 25 | |
aoqi@0 | 26 | package com.sun.xml.internal.bind.v2.runtime.output; |
aoqi@0 | 27 | |
aoqi@0 | 28 | import java.io.IOException; |
aoqi@0 | 29 | |
aoqi@0 | 30 | /** |
aoqi@0 | 31 | * Buffer for UTF-8 encoded string. |
aoqi@0 | 32 | * |
aoqi@0 | 33 | * See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 for the UTF-8 encoding. |
aoqi@0 | 34 | * |
aoqi@0 | 35 | * @author Kohsuke Kawaguchi |
aoqi@0 | 36 | */ |
aoqi@0 | 37 | public final class Encoded { |
aoqi@0 | 38 | public byte[] buf; |
aoqi@0 | 39 | |
aoqi@0 | 40 | public int len; |
aoqi@0 | 41 | |
aoqi@0 | 42 | public Encoded() {} |
aoqi@0 | 43 | |
aoqi@0 | 44 | public Encoded(String text) { |
aoqi@0 | 45 | set(text); |
aoqi@0 | 46 | } |
aoqi@0 | 47 | |
aoqi@0 | 48 | public void ensureSize(int size) { |
aoqi@0 | 49 | if(buf==null || buf.length<size) |
aoqi@0 | 50 | buf = new byte[size]; |
aoqi@0 | 51 | } |
aoqi@0 | 52 | |
aoqi@0 | 53 | public final void set( String text ) { |
aoqi@0 | 54 | int length = text.length(); |
aoqi@0 | 55 | |
aoqi@0 | 56 | ensureSize(length*3+1); // +1 for append |
aoqi@0 | 57 | |
aoqi@0 | 58 | int ptr = 0; |
aoqi@0 | 59 | |
aoqi@0 | 60 | for (int i = 0; i < length; i++) { |
aoqi@0 | 61 | final char chr = text.charAt(i); |
aoqi@0 | 62 | if (chr > 0x7F) { |
aoqi@0 | 63 | if (chr > 0x7FF) { |
aoqi@0 | 64 | if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) { |
aoqi@0 | 65 | // surrogate |
aoqi@0 | 66 | int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000; |
aoqi@0 | 67 | |
aoqi@0 | 68 | buf[ptr++] = (byte)(0xF0 | ((uc >> 18))); |
aoqi@0 | 69 | buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F)); |
aoqi@0 | 70 | buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F)); |
aoqi@0 | 71 | buf[ptr++] = (byte)(0x80 + (uc & 0x3F)); |
aoqi@0 | 72 | continue; |
aoqi@0 | 73 | } |
aoqi@0 | 74 | buf[ptr++] = (byte)(0xE0 + (chr >> 12)); |
aoqi@0 | 75 | buf[ptr++] = (byte)(0x80 + ((chr >> 6) & 0x3F)); |
aoqi@0 | 76 | } else { |
aoqi@0 | 77 | buf[ptr++] = (byte)(0xC0 + (chr >> 6)); |
aoqi@0 | 78 | } |
aoqi@0 | 79 | buf[ptr++] = (byte)(0x80 + (chr & 0x3F)); |
aoqi@0 | 80 | } else { |
aoqi@0 | 81 | buf[ptr++] = (byte)chr; |
aoqi@0 | 82 | } |
aoqi@0 | 83 | } |
aoqi@0 | 84 | |
aoqi@0 | 85 | len = ptr; |
aoqi@0 | 86 | } |
aoqi@0 | 87 | |
aoqi@0 | 88 | /** |
aoqi@0 | 89 | * Fill in the buffer by encoding the specified characters |
aoqi@0 | 90 | * while escaping characters like < |
aoqi@0 | 91 | * |
aoqi@0 | 92 | * @param isAttribute |
aoqi@0 | 93 | * if true, characters like \t, \r, and \n are also escaped. |
aoqi@0 | 94 | */ |
aoqi@0 | 95 | public final void setEscape(String text, boolean isAttribute) { |
aoqi@0 | 96 | int length = text.length(); |
aoqi@0 | 97 | ensureSize(length*6+1); // in the worst case the text is like """""", so we need 6 bytes per char |
aoqi@0 | 98 | |
aoqi@0 | 99 | int ptr = 0; |
aoqi@0 | 100 | |
aoqi@0 | 101 | for (int i = 0; i < length; i++) { |
aoqi@0 | 102 | final char chr = text.charAt(i); |
aoqi@0 | 103 | |
aoqi@0 | 104 | int ptr1 = ptr; |
aoqi@0 | 105 | if (chr > 0x7F) { |
aoqi@0 | 106 | if (chr > 0x7FF) { |
aoqi@0 | 107 | if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) { |
aoqi@0 | 108 | // surrogate |
aoqi@0 | 109 | int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000; |
aoqi@0 | 110 | |
aoqi@0 | 111 | buf[ptr++] = (byte)(0xF0 | ((uc >> 18))); |
aoqi@0 | 112 | buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F)); |
aoqi@0 | 113 | buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F)); |
aoqi@0 | 114 | buf[ptr++] = (byte)(0x80 + (uc & 0x3F)); |
aoqi@0 | 115 | continue; |
aoqi@0 | 116 | } |
aoqi@0 | 117 | buf[ptr1++] = (byte)(0xE0 + (chr >> 12)); |
aoqi@0 | 118 | buf[ptr1++] = (byte)(0x80 + ((chr >> 6) & 0x3F)); |
aoqi@0 | 119 | } else { |
aoqi@0 | 120 | buf[ptr1++] = (byte)(0xC0 + (chr >> 6)); |
aoqi@0 | 121 | } |
aoqi@0 | 122 | buf[ptr1++] = (byte)(0x80 + (chr & 0x3F)); |
aoqi@0 | 123 | } else { |
aoqi@0 | 124 | byte[] ent; |
aoqi@0 | 125 | |
aoqi@0 | 126 | if((ent=attributeEntities[chr])!=null) { |
aoqi@0 | 127 | // the majority of the case is just printed as a char, |
aoqi@0 | 128 | // so it's very important to reject them as quickly as possible |
aoqi@0 | 129 | |
aoqi@0 | 130 | // check again to see if this really needs to be escaped |
aoqi@0 | 131 | if(isAttribute || entities[chr]!=null) |
aoqi@0 | 132 | ptr1 = writeEntity(ent,ptr1); |
aoqi@0 | 133 | else |
aoqi@0 | 134 | buf[ptr1++] = (byte)chr; |
aoqi@0 | 135 | } else |
aoqi@0 | 136 | buf[ptr1++] = (byte)chr; |
aoqi@0 | 137 | } |
aoqi@0 | 138 | ptr = ptr1; |
aoqi@0 | 139 | } |
aoqi@0 | 140 | len = ptr; |
aoqi@0 | 141 | } |
aoqi@0 | 142 | |
aoqi@0 | 143 | private int writeEntity( byte[] entity, int ptr ) { |
aoqi@0 | 144 | System.arraycopy(entity,0,buf,ptr,entity.length); |
aoqi@0 | 145 | return ptr+entity.length; |
aoqi@0 | 146 | } |
aoqi@0 | 147 | |
aoqi@0 | 148 | /** |
aoqi@0 | 149 | * Writes the encoded bytes to the given output stream. |
aoqi@0 | 150 | */ |
aoqi@0 | 151 | public final void write(UTF8XmlOutput out) throws IOException { |
aoqi@0 | 152 | out.write(buf,0,len); |
aoqi@0 | 153 | } |
aoqi@0 | 154 | |
aoqi@0 | 155 | /** |
aoqi@0 | 156 | * Appends a new character to the end of the buffer. |
aoqi@0 | 157 | * This assumes that you have enough space in the buffer. |
aoqi@0 | 158 | */ |
aoqi@0 | 159 | public void append(char b) { |
aoqi@0 | 160 | buf[len++] = (byte)b; |
aoqi@0 | 161 | } |
aoqi@0 | 162 | |
aoqi@0 | 163 | /** |
aoqi@0 | 164 | * Reallocate the buffer to the exact size of the data |
aoqi@0 | 165 | * to reduce the memory footprint. |
aoqi@0 | 166 | */ |
aoqi@0 | 167 | public void compact() { |
aoqi@0 | 168 | byte[] b = new byte[len]; |
aoqi@0 | 169 | System.arraycopy(buf,0,b,0,len); |
aoqi@0 | 170 | buf = b; |
aoqi@0 | 171 | } |
aoqi@0 | 172 | |
aoqi@0 | 173 | /** |
aoqi@0 | 174 | * UTF-8 encoded entities keyed by their character code. |
aoqi@0 | 175 | * e.g., entities['&'] == AMP_ENTITY. |
aoqi@0 | 176 | * |
aoqi@0 | 177 | * In attributes we need to encode more characters. |
aoqi@0 | 178 | */ |
aoqi@0 | 179 | private static final byte[][] entities = new byte[0x80][]; |
aoqi@0 | 180 | private static final byte[][] attributeEntities = new byte[0x80][]; |
aoqi@0 | 181 | |
aoqi@0 | 182 | static { |
aoqi@0 | 183 | add('&',"&",false); |
aoqi@0 | 184 | add('<',"<",false); |
aoqi@0 | 185 | add('>',">",false); |
aoqi@0 | 186 | add('"',""",true); |
aoqi@0 | 187 | add('\t',"	",true); |
aoqi@0 | 188 | add('\r',"
",false); |
aoqi@0 | 189 | add('\n',"
",true); |
aoqi@0 | 190 | } |
aoqi@0 | 191 | |
aoqi@0 | 192 | private static void add(char c, String s, boolean attOnly) { |
aoqi@0 | 193 | byte[] image = UTF8XmlOutput.toBytes(s); |
aoqi@0 | 194 | attributeEntities[c] = image; |
aoqi@0 | 195 | if(!attOnly) |
aoqi@0 | 196 | entities[c] = image; |
aoqi@0 | 197 | } |
aoqi@0 | 198 | } |