Mon, 12 Nov 2012 14:03:53 -0800
6830717: replay of compilations would help with debugging
Summary: When java process crashed in compiler thread, repeat the compilation process will help finding root cause. This is done with using SA dump application class data and replay data from core dump, then use debug version of jvm to recompile the problematic java method.
Reviewed-by: kvn, twisti, sspitsyn
Contributed-by: yumin.qi@oracle.com
1 /*
2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "utilities/utf8.hpp"
28 // Assume the utf8 string is in legal form and has been
29 // checked in the class file parser/format checker.
30 char* UTF8::next(const char* str, jchar* value) {
31 unsigned const char *ptr = (const unsigned char *)str;
32 unsigned char ch, ch2, ch3;
33 int length = -1; /* bad length */
34 jchar result;
35 switch ((ch = ptr[0]) >> 4) {
36 default:
37 result = ch;
38 length = 1;
39 break;
41 case 0x8: case 0x9: case 0xA: case 0xB: case 0xF:
42 /* Shouldn't happen. */
43 break;
45 case 0xC: case 0xD:
46 /* 110xxxxx 10xxxxxx */
47 if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
48 unsigned char high_five = ch & 0x1F;
49 unsigned char low_six = ch2 & 0x3F;
50 result = (high_five << 6) + low_six;
51 length = 2;
52 break;
53 }
54 break;
56 case 0xE:
57 /* 1110xxxx 10xxxxxx 10xxxxxx */
58 if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
59 if (((ch3 = ptr[2]) & 0xC0) == 0x80) {
60 unsigned char high_four = ch & 0x0f;
61 unsigned char mid_six = ch2 & 0x3f;
62 unsigned char low_six = ch3 & 0x3f;
63 result = (((high_four << 6) + mid_six) << 6) + low_six;
64 length = 3;
65 }
66 }
67 break;
68 } /* end of switch */
70 if (length <= 0) {
71 *value = ptr[0]; /* default bad result; */
72 return (char*)(ptr + 1); // make progress somehow
73 }
75 *value = result;
77 // The assert is correct but the .class file is wrong
78 // assert(UNICODE::utf8_size(result) == length, "checking reverse computation");
79 return (char *)(ptr + length);
80 }
82 char* UTF8::next_character(const char* str, jint* value) {
83 unsigned const char *ptr = (const unsigned char *)str;
84 /* See if it's legal supplementary character:
85 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */
86 if (is_supplementary_character(ptr)) {
87 *value = get_supplementary_character(ptr);
88 return (char *)(ptr + 6);
89 }
90 jchar result;
91 char* next_ch = next(str, &result);
92 *value = result;
93 return next_ch;
94 }
96 // Count bytes of the form 10xxxxxx and deduct this count
97 // from the total byte count. The utf8 string must be in
98 // legal form which has been verified in the format checker.
99 int UTF8::unicode_length(const char* str, int len) {
100 int num_chars = len;
101 for (int i = 0; i < len; i++) {
102 if ((str[i] & 0xC0) == 0x80) {
103 --num_chars;
104 }
105 }
106 return num_chars;
107 }
109 // Count bytes of the utf8 string except those in form
110 // 10xxxxxx which only appear in multibyte characters.
111 // The utf8 string must be in legal form and has been
112 // verified in the format checker.
113 int UTF8::unicode_length(const char* str) {
114 int num_chars = 0;
115 for (const char* p = str; *p; p++) {
116 if (((*p) & 0xC0) != 0x80) {
117 num_chars++;
118 }
119 }
120 return num_chars;
121 }
123 // Writes a jchar a utf8 and returns the end
124 static u_char* utf8_write(u_char* base, jchar ch) {
125 if ((ch != 0) && (ch <=0x7f)) {
126 base[0] = (u_char) ch;
127 return base + 1;
128 }
130 if (ch <= 0x7FF) {
131 /* 11 bits or less. */
132 unsigned char high_five = ch >> 6;
133 unsigned char low_six = ch & 0x3F;
134 base[0] = high_five | 0xC0; /* 110xxxxx */
135 base[1] = low_six | 0x80; /* 10xxxxxx */
136 return base + 2;
137 }
138 /* possibly full 16 bits. */
139 char high_four = ch >> 12;
140 char mid_six = (ch >> 6) & 0x3F;
141 char low_six = ch & 0x3f;
142 base[0] = high_four | 0xE0; /* 1110xxxx */
143 base[1] = mid_six | 0x80; /* 10xxxxxx */
144 base[2] = low_six | 0x80; /* 10xxxxxx */
145 return base + 3;
146 }
148 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
149 unsigned char ch;
150 const char *ptr = utf8_str;
151 int index = 0;
153 /* ASCII case loop optimization */
154 for (; index < unicode_length; index++) {
155 if((ch = ptr[0]) > 0x7F) { break; }
156 unicode_str[index] = ch;
157 ptr = (const char *)(ptr + 1);
158 }
160 for (; index < unicode_length; index++) {
161 ptr = UTF8::next(ptr, &unicode_str[index]);
162 }
163 }
165 // returns the quoted ascii length of a 0-terminated utf8 string
166 int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
167 const char *ptr = utf8_str;
168 const char* end = ptr + utf8_length;
169 int result = 0;
170 while (ptr < end) {
171 jchar c;
172 ptr = UTF8::next(ptr, &c);
173 if (c >= 32 && c < 127) {
174 result++;
175 } else {
176 result += 6;
177 }
178 }
179 return result;
180 }
182 // converts a utf8 string to quoted ascii
183 void UTF8::as_quoted_ascii(const char* utf8_str, char* buf, int buflen) {
184 const char *ptr = utf8_str;
185 char* p = buf;
186 char* end = buf + buflen;
187 while (*ptr != '\0') {
188 jchar c;
189 ptr = UTF8::next(ptr, &c);
190 if (c >= 32 && c < 127) {
191 if (p + 1 >= end) break; // string is truncated
192 *p++ = (char)c;
193 } else {
194 if (p + 6 >= end) break; // string is truncated
195 sprintf(p, "\\u%04x", c);
196 p += 6;
197 }
198 }
199 *p = '\0';
200 }
203 const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {
204 const char *ptr = quoted_ascii_str;
205 char* result = NULL;
206 while (*ptr != '\0') {
207 char c = *ptr;
208 if (c < 32 || c >= 127) break;
209 }
210 if (*ptr == '\0') {
211 // nothing to do so return original string
212 return quoted_ascii_str;
213 }
214 // everything up to this point was ok.
215 int length = ptr - quoted_ascii_str;
216 char* buffer = NULL;
217 for (int round = 0; round < 2; round++) {
218 while (*ptr != '\0') {
219 if (*ptr != '\\') {
220 if (buffer != NULL) {
221 buffer[length] = *ptr;
222 }
223 length++;
224 } else {
225 switch (ptr[1]) {
226 case 'u': {
227 ptr += 2;
228 jchar value=0;
229 for (int i=0; i<4; i++) {
230 char c = *ptr++;
231 switch (c) {
232 case '0': case '1': case '2': case '3': case '4':
233 case '5': case '6': case '7': case '8': case '9':
234 value = (value << 4) + c - '0';
235 break;
236 case 'a': case 'b': case 'c':
237 case 'd': case 'e': case 'f':
238 value = (value << 4) + 10 + c - 'a';
239 break;
240 case 'A': case 'B': case 'C':
241 case 'D': case 'E': case 'F':
242 value = (value << 4) + 10 + c - 'A';
243 break;
244 default:
245 ShouldNotReachHere();
246 }
247 }
248 if (buffer == NULL) {
249 char utf8_buffer[4];
250 char* next = (char*)utf8_write((u_char*)utf8_buffer, value);
251 length += next - utf8_buffer;
252 } else {
253 char* next = (char*)utf8_write((u_char*)&buffer[length], value);
254 length += next - &buffer[length];
255 }
256 break;
257 }
258 case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break;
259 case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break;
260 case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break;
261 case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break;
262 default:
263 ShouldNotReachHere();
264 }
265 }
266 }
267 if (round == 0) {
268 buffer = NEW_RESOURCE_ARRAY(char, length + 1);
269 ptr = quoted_ascii_str;
270 } else {
271 buffer[length] = '\0';
272 }
273 }
274 return buffer;
275 }
278 // Returns NULL if 'c' it not found. This only works as long
279 // as 'c' is an ASCII character
280 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) {
281 assert(length >= 0, "sanity check");
282 assert(c >= 0, "does not work for non-ASCII characters");
283 // Skip backwards in string until 'c' is found or end is reached
284 while(--length >= 0 && base[length] != c);
285 return (length < 0) ? NULL : &base[length];
286 }
288 bool UTF8::equal(const jbyte* base1, int length1, const jbyte* base2, int length2) {
289 // Length must be the same
290 if (length1 != length2) return false;
291 for (int i = 0; i < length1; i++) {
292 if (base1[i] != base2[i]) return false;
293 }
294 return true;
295 }
297 bool UTF8::is_supplementary_character(const unsigned char* str) {
298 return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80)
299 && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80);
300 }
302 jint UTF8::get_supplementary_character(const unsigned char* str) {
303 return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10)
304 + ((str[4] & 0x0f) << 6) + (str[5] & 0x3f);
305 }
308 //-------------------------------------------------------------------------------------
311 int UNICODE::utf8_size(jchar c) {
312 if ((0x0001 <= c) && (c <= 0x007F)) return 1;
313 if (c <= 0x07FF) return 2;
314 return 3;
315 }
317 int UNICODE::utf8_length(jchar* base, int length) {
318 int result = 0;
319 for (int index = 0; index < length; index++) {
320 jchar c = base[index];
321 if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
322 else if (c <= 0x07FF) result += 2;
323 else result += 3;
324 }
325 return result;
326 }
328 char* UNICODE::as_utf8(jchar* base, int length) {
329 int utf8_len = utf8_length(base, length);
330 u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
331 u_char* p = result;
332 for (int index = 0; index < length; index++) {
333 p = utf8_write(p, base[index]);
334 }
335 *p = '\0';
336 assert(p == &result[utf8_len], "length prediction must be correct");
337 return (char*) result;
338 }
340 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
341 u_char* p = (u_char*)buf;
342 u_char* end = (u_char*)buf + buflen;
343 for (int index = 0; index < length; index++) {
344 jchar c = base[index];
345 if (p + utf8_size(c) >= end) break; // string is truncated
346 p = utf8_write(p, base[index]);
347 }
348 *p = '\0';
349 return buf;
350 }
352 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
353 for(int index = 0; index < length; index++) {
354 utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
355 }
356 *utf8_buffer = '\0';
357 }
359 // returns the quoted ascii length of a unicode string
360 int UNICODE::quoted_ascii_length(jchar* base, int length) {
361 int result = 0;
362 for (int i = 0; i < length; i++) {
363 jchar c = base[i];
364 if (c >= 32 && c < 127) {
365 result++;
366 } else {
367 result += 6;
368 }
369 }
370 return result;
371 }
373 // converts a utf8 string to quoted ascii
374 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
375 char* p = buf;
376 char* end = buf + buflen;
377 for (int index = 0; index < length; index++) {
378 jchar c = base[index];
379 if (c >= 32 && c < 127) {
380 if (p + 1 >= end) break; // string is truncated
381 *p++ = (char)c;
382 } else {
383 if (p + 6 >= end) break; // string is truncated
384 sprintf(p, "\\u%04x", c);
385 p += 6;
386 }
387 }
388 *p = '\0';
389 }