1.1 --- a/src/share/vm/utilities/utf8.cpp Fri Nov 09 08:36:17 2012 -0800 1.2 +++ b/src/share/vm/utilities/utf8.cpp Mon Nov 12 14:03:53 2012 -0800 1.3 @@ -1,5 +1,5 @@ 1.4 /* 1.5 - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 1.6 + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 1.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.8 * 1.9 * This code is free software; you can redistribute it and/or modify it 1.10 @@ -147,7 +147,7 @@ 1.11 1.12 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) { 1.13 unsigned char ch; 1.14 - const char *ptr = (const char *)utf8_str; 1.15 + const char *ptr = utf8_str; 1.16 int index = 0; 1.17 1.18 /* ASCII case loop optimization */ 1.19 @@ -162,6 +162,119 @@ 1.20 } 1.21 } 1.22 1.23 +// returns the quoted ascii length of a 0-terminated utf8 string 1.24 +int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) { 1.25 + const char *ptr = utf8_str; 1.26 + const char* end = ptr + utf8_length; 1.27 + int result = 0; 1.28 + while (ptr < end) { 1.29 + jchar c; 1.30 + ptr = UTF8::next(ptr, &c); 1.31 + if (c >= 32 && c < 127) { 1.32 + result++; 1.33 + } else { 1.34 + result += 6; 1.35 + } 1.36 + } 1.37 + return result; 1.38 +} 1.39 + 1.40 +// converts a utf8 string to quoted ascii 1.41 +void UTF8::as_quoted_ascii(const char* utf8_str, char* buf, int buflen) { 1.42 + const char *ptr = utf8_str; 1.43 + char* p = buf; 1.44 + char* end = buf + buflen; 1.45 + while (*ptr != '\0') { 1.46 + jchar c; 1.47 + ptr = UTF8::next(ptr, &c); 1.48 + if (c >= 32 && c < 127) { 1.49 + if (p + 1 >= end) break; // string is truncated 1.50 + *p++ = (char)c; 1.51 + } else { 1.52 + if (p + 6 >= end) break; // string is truncated 1.53 + sprintf(p, "\\u%04x", c); 1.54 + p += 6; 1.55 + } 1.56 + } 1.57 + *p = '\0'; 1.58 +} 1.59 + 1.60 + 1.61 +const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) { 1.62 + const char *ptr = quoted_ascii_str; 1.63 + char* result = NULL; 1.64 + while (*ptr != '\0') { 1.65 + char c = *ptr; 1.66 + if (c < 32 || c >= 127) break; 1.67 + } 1.68 + if (*ptr == '\0') { 1.69 + // nothing to do so return original string 1.70 + return quoted_ascii_str; 1.71 + } 1.72 + // everything up to this point was ok. 1.73 + int length = ptr - quoted_ascii_str; 1.74 + char* buffer = NULL; 1.75 + for (int round = 0; round < 2; round++) { 1.76 + while (*ptr != '\0') { 1.77 + if (*ptr != '\\') { 1.78 + if (buffer != NULL) { 1.79 + buffer[length] = *ptr; 1.80 + } 1.81 + length++; 1.82 + } else { 1.83 + switch (ptr[1]) { 1.84 + case 'u': { 1.85 + ptr += 2; 1.86 + jchar value=0; 1.87 + for (int i=0; i<4; i++) { 1.88 + char c = *ptr++; 1.89 + switch (c) { 1.90 + case '0': case '1': case '2': case '3': case '4': 1.91 + case '5': case '6': case '7': case '8': case '9': 1.92 + value = (value << 4) + c - '0'; 1.93 + break; 1.94 + case 'a': case 'b': case 'c': 1.95 + case 'd': case 'e': case 'f': 1.96 + value = (value << 4) + 10 + c - 'a'; 1.97 + break; 1.98 + case 'A': case 'B': case 'C': 1.99 + case 'D': case 'E': case 'F': 1.100 + value = (value << 4) + 10 + c - 'A'; 1.101 + break; 1.102 + default: 1.103 + ShouldNotReachHere(); 1.104 + } 1.105 + } 1.106 + if (buffer == NULL) { 1.107 + char utf8_buffer[4]; 1.108 + char* next = (char*)utf8_write((u_char*)utf8_buffer, value); 1.109 + length += next - utf8_buffer; 1.110 + } else { 1.111 + char* next = (char*)utf8_write((u_char*)&buffer[length], value); 1.112 + length += next - &buffer[length]; 1.113 + } 1.114 + break; 1.115 + } 1.116 + case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break; 1.117 + case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break; 1.118 + case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break; 1.119 + case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break; 1.120 + default: 1.121 + ShouldNotReachHere(); 1.122 + } 1.123 + } 1.124 + } 1.125 + if (round == 0) { 1.126 + buffer = NEW_RESOURCE_ARRAY(char, length + 1); 1.127 + ptr = quoted_ascii_str; 1.128 + } else { 1.129 + buffer[length] = '\0'; 1.130 + } 1.131 + } 1.132 + return buffer; 1.133 +} 1.134 + 1.135 + 1.136 // Returns NULL if 'c' it not found. This only works as long 1.137 // as 'c' is an ASCII character 1.138 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) { 1.139 @@ -242,3 +355,35 @@ 1.140 } 1.141 *utf8_buffer = '\0'; 1.142 } 1.143 + 1.144 +// returns the quoted ascii length of a unicode string 1.145 +int UNICODE::quoted_ascii_length(jchar* base, int length) { 1.146 + int result = 0; 1.147 + for (int i = 0; i < length; i++) { 1.148 + jchar c = base[i]; 1.149 + if (c >= 32 && c < 127) { 1.150 + result++; 1.151 + } else { 1.152 + result += 6; 1.153 + } 1.154 + } 1.155 + return result; 1.156 +} 1.157 + 1.158 +// converts a utf8 string to quoted ascii 1.159 +void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) { 1.160 + char* p = buf; 1.161 + char* end = buf + buflen; 1.162 + for (int index = 0; index < length; index++) { 1.163 + jchar c = base[index]; 1.164 + if (c >= 32 && c < 127) { 1.165 + if (p + 1 >= end) break; // string is truncated 1.166 + *p++ = (char)c; 1.167 + } else { 1.168 + if (p + 6 >= end) break; // string is truncated 1.169 + sprintf(p, "\\u%04x", c); 1.170 + p += 6; 1.171 + } 1.172 + } 1.173 + *p = '\0'; 1.174 +}