jdk8-mips64-public/hotspot: src/share/vm/utilities/utf8.cpp@bd7a7ce2e264

6830717: replay of compilations would help with debugging
Summary: When java process crashed in compiler thread, repeat the compilation process will help finding root cause. This is done with using SA dump application class data and replay data from core dump, then use debug version of jvm to recompile the problematic java method.
Reviewed-by: kvn, twisti, sspitsyn
Contributed-by: yumin.qi@oracle.com

     1 /*

     2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

    20  * or visit www.oracle.com if you need additional information or have any

    21  * questions.

    22  *

    23  */

    25 #include "precompiled.hpp"

    26 #include "utilities/utf8.hpp"

    28 // Assume the utf8 string is in legal form and has been

    29 // checked in the class file parser/format checker.

    30 char* UTF8::next(const char* str, jchar* value) {

    31   unsigned const char *ptr = (const unsigned char *)str;

    32   unsigned char ch, ch2, ch3;

    33   int length = -1;              /* bad length */

    34   jchar result;

    35   switch ((ch = ptr[0]) >> 4) {

    36     default:

    37     result = ch;

    38     length = 1;

    39     break;

    41   case 0x8: case 0x9: case 0xA: case 0xB: case 0xF:

    42     /* Shouldn't happen. */

    43     break;

    45   case 0xC: case 0xD:

    46     /* 110xxxxx  10xxxxxx */

    47     if (((ch2 = ptr[1]) & 0xC0) == 0x80) {

    48       unsigned char high_five = ch & 0x1F;

    49       unsigned char low_six = ch2 & 0x3F;

    50       result = (high_five << 6) + low_six;

    51       length = 2;

    52       break;

    53     }

    54     break;

    56   case 0xE:

    57     /* 1110xxxx 10xxxxxx 10xxxxxx */

    58     if (((ch2 = ptr[1]) & 0xC0) == 0x80) {

    59       if (((ch3 = ptr[2]) & 0xC0) == 0x80) {

    60         unsigned char high_four = ch & 0x0f;

    61         unsigned char mid_six = ch2 & 0x3f;

    62         unsigned char low_six = ch3 & 0x3f;

    63         result = (((high_four << 6) + mid_six) << 6) + low_six;

    64         length = 3;

    65       }

    66     }

    67     break;

    68   } /* end of switch */

    70   if (length <= 0) {

    71     *value = ptr[0];    /* default bad result; */

    72     return (char*)(ptr + 1); // make progress somehow

    73   }

    75   *value = result;

    77   // The assert is correct but the .class file is wrong

    78   // assert(UNICODE::utf8_size(result) == length, "checking reverse computation");

    79   return (char *)(ptr + length);

    80 }

    82 char* UTF8::next_character(const char* str, jint* value) {

    83   unsigned const char *ptr = (const unsigned char *)str;

    84   /* See if it's legal supplementary character:

    85      11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */

    86   if (is_supplementary_character(ptr)) {

    87     *value = get_supplementary_character(ptr);

    88     return (char *)(ptr + 6);

    89   }

    90   jchar result;

    91   char* next_ch = next(str, &result);

    92   *value = result;

    93   return next_ch;

    94 }

    96 // Count bytes of the form 10xxxxxx and deduct this count

    97 // from the total byte count.  The utf8 string must be in

    98 // legal form which has been verified in the format checker.

    99 int UTF8::unicode_length(const char* str, int len) {

   100   int num_chars = len;

   101   for (int i = 0; i < len; i++) {

   102     if ((str[i] & 0xC0) == 0x80) {

   103       --num_chars;

   104     }

   105   }

   106   return num_chars;

   107 }

   109 // Count bytes of the utf8 string except those in form

   110 // 10xxxxxx which only appear in multibyte characters.

   111 // The utf8 string must be in legal form and has been

   112 // verified in the format checker.

   113 int UTF8::unicode_length(const char* str) {

   114   int num_chars = 0;

   115   for (const char* p = str; *p; p++) {

   116     if (((*p) & 0xC0) != 0x80) {

   117       num_chars++;

   118     }

   119   }

   120   return num_chars;

   121 }

   123 // Writes a jchar a utf8 and returns the end

   124 static u_char* utf8_write(u_char* base, jchar ch) {

   125   if ((ch != 0) && (ch <=0x7f)) {

   126     base[0] = (u_char) ch;

   127     return base + 1;

   128   }

   130   if (ch <= 0x7FF) {

   131     /* 11 bits or less. */

   132     unsigned char high_five = ch >> 6;

   133     unsigned char low_six = ch & 0x3F;

   134     base[0] = high_five | 0xC0; /* 110xxxxx */

   135     base[1] = low_six | 0x80;   /* 10xxxxxx */

   136     return base + 2;

   137   }

   138   /* possibly full 16 bits. */

   139   char high_four = ch >> 12;

   140   char mid_six = (ch >> 6) & 0x3F;

   141   char low_six = ch & 0x3f;

   142   base[0] = high_four | 0xE0; /* 1110xxxx */

   143   base[1] = mid_six | 0x80;   /* 10xxxxxx */

   144   base[2] = low_six | 0x80;   /* 10xxxxxx */

   145   return base + 3;

   146 }

   148 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {

   149   unsigned char ch;

   150   const char *ptr = utf8_str;

   151   int index = 0;

   153   /* ASCII case loop optimization */

   154   for (; index < unicode_length; index++) {

   155     if((ch = ptr[0]) > 0x7F) { break; }

   156     unicode_str[index] = ch;

   157     ptr = (const char *)(ptr + 1);

   158   }

   160   for (; index < unicode_length; index++) {

   161     ptr = UTF8::next(ptr, &unicode_str[index]);

   162   }

   163 }

   165 // returns the quoted ascii length of a 0-terminated utf8 string

   166 int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {

   167   const char *ptr = utf8_str;

   168   const char* end = ptr + utf8_length;

   169   int result = 0;

   170   while (ptr < end) {

   171     jchar c;

   172     ptr = UTF8::next(ptr, &c);

   173     if (c >= 32 && c < 127) {

   174       result++;

   175     } else {

   176       result += 6;

   177     }

   178   }

   179   return result;

   180 }

   182 // converts a utf8 string to quoted ascii

   183 void UTF8::as_quoted_ascii(const char* utf8_str, char* buf, int buflen) {

   184   const char *ptr = utf8_str;

   185   char* p = buf;

   186   char* end = buf + buflen;

   187   while (*ptr != '\0') {

   188     jchar c;

   189     ptr = UTF8::next(ptr, &c);

   190     if (c >= 32 && c < 127) {

   191       if (p + 1 >= end) break;      // string is truncated

   192       *p++ = (char)c;

   193     } else {

   194       if (p + 6 >= end) break;      // string is truncated

   195       sprintf(p, "\\u%04x", c);

   196       p += 6;

   197     }

   198   }

   199   *p = '\0';

   200 }

   203 const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {

   204   const char *ptr = quoted_ascii_str;

   205   char* result = NULL;

   206   while (*ptr != '\0') {

   207     char c = *ptr;

   208     if (c < 32 || c >= 127) break;

   209   }

   210   if (*ptr == '\0') {

   211     // nothing to do so return original string

   212     return quoted_ascii_str;

   213   }

   214   // everything up to this point was ok.

   215   int length = ptr - quoted_ascii_str;

   216   char* buffer = NULL;

   217   for (int round = 0; round < 2; round++) {

   218     while (*ptr != '\0') {

   219       if (*ptr != '\\') {

   220         if (buffer != NULL) {

   221           buffer[length] = *ptr;

   222         }

   223         length++;

   224       } else {

   225         switch (ptr[1]) {

   226           case 'u': {

   227             ptr += 2;

   228             jchar value=0;

   229             for (int i=0; i<4; i++) {

   230               char c = *ptr++;

   231               switch (c) {

   232                 case '0': case '1': case '2': case '3': case '4':

   233                 case '5': case '6': case '7': case '8': case '9':

   234                   value = (value << 4) + c - '0';

   235                   break;

   236                 case 'a': case 'b': case 'c':

   237                 case 'd': case 'e': case 'f':

   238                   value = (value << 4) + 10 + c - 'a';

   239                   break;

   240                 case 'A': case 'B': case 'C':

   241                 case 'D': case 'E': case 'F':

   242                   value = (value << 4) + 10 + c - 'A';

   243                   break;

   244                 default:

   245                   ShouldNotReachHere();

   246               }

   247             }

   248             if (buffer == NULL) {

   249               char utf8_buffer[4];

   250               char* next = (char*)utf8_write((u_char*)utf8_buffer, value);

   251               length += next - utf8_buffer;

   252             } else {

   253               char* next = (char*)utf8_write((u_char*)&buffer[length], value);

   254               length += next - &buffer[length];

   255             }

   256             break;

   257           }

   258           case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break;

   259           case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break;

   260           case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break;

   261           case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break;

   262           default:

   263             ShouldNotReachHere();

   264         }

   265       }

   266     }

   267     if (round == 0) {

   268       buffer = NEW_RESOURCE_ARRAY(char, length + 1);

   269       ptr = quoted_ascii_str;

   270     } else {

   271       buffer[length] = '\0';

   272     }

   273   }

   274   return buffer;

   275 }

   278 // Returns NULL if 'c' it not found. This only works as long

   279 // as 'c' is an ASCII character

   280 const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) {

   281   assert(length >= 0, "sanity check");

   282   assert(c >= 0, "does not work for non-ASCII characters");

   283   // Skip backwards in string until 'c' is found or end is reached

   284   while(--length >= 0 && base[length] != c);

   285   return (length < 0) ? NULL : &base[length];

   286 }

   288 bool UTF8::equal(const jbyte* base1, int length1, const jbyte* base2, int length2) {

   289   // Length must be the same

   290   if (length1 != length2) return false;

   291   for (int i = 0; i < length1; i++) {

   292     if (base1[i] != base2[i]) return false;

   293   }

   294   return true;

   295 }

   297 bool UTF8::is_supplementary_character(const unsigned char* str) {

   298   return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80)

   299       && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80);

   300 }

   302 jint UTF8::get_supplementary_character(const unsigned char* str) {

   303   return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10)

   304                  + ((str[4] & 0x0f) << 6)  + (str[5] & 0x3f);

   305 }

   308 //-------------------------------------------------------------------------------------

   311 int UNICODE::utf8_size(jchar c) {

   312   if ((0x0001 <= c) && (c <= 0x007F)) return 1;

   313   if (c <= 0x07FF) return 2;

   314   return 3;

   315 }

   317 int UNICODE::utf8_length(jchar* base, int length) {

   318   int result = 0;

   319   for (int index = 0; index < length; index++) {

   320     jchar c = base[index];

   321     if ((0x0001 <= c) && (c <= 0x007F)) result += 1;

   322     else if (c <= 0x07FF) result += 2;

   323     else result += 3;

   324   }

   325   return result;

   326 }

   328 char* UNICODE::as_utf8(jchar* base, int length) {

   329   int utf8_len = utf8_length(base, length);

   330   u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);

   331   u_char* p = result;

   332   for (int index = 0; index < length; index++) {

   333     p = utf8_write(p, base[index]);

   334   }

   335   *p = '\0';

   336   assert(p == &result[utf8_len], "length prediction must be correct");

   337   return (char*) result;

   338 }

   340 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {

   341   u_char* p = (u_char*)buf;

   342   u_char* end = (u_char*)buf + buflen;

   343   for (int index = 0; index < length; index++) {

   344     jchar c = base[index];

   345     if (p + utf8_size(c) >= end) break;      // string is truncated

   346     p = utf8_write(p, base[index]);

   347   }

   348   *p = '\0';

   349   return buf;

   350 }

   352 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {

   353   for(int index = 0; index < length; index++) {

   354     utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);

   355   }

   356   *utf8_buffer = '\0';

   357 }

   359 // returns the quoted ascii length of a unicode string

   360 int UNICODE::quoted_ascii_length(jchar* base, int length) {

   361   int result = 0;

   362   for (int i = 0; i < length; i++) {

   363     jchar c = base[i];

   364     if (c >= 32 && c < 127) {

   365       result++;

   366     } else {

   367       result += 6;

   368     }

   369   }

   370   return result;

   371 }

   373 // converts a utf8 string to quoted ascii

   374 void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {

   375   char* p = buf;

   376   char* end = buf + buflen;

   377   for (int index = 0; index < length; index++) {

   378     jchar c = base[index];

   379     if (c >= 32 && c < 127) {

   380       if (p + 1 >= end) break;      // string is truncated

   381       *p++ = (char)c;

   382     } else {

   383       if (p + 6 >= end) break;      // string is truncated

   384       sprintf(p, "\\u%04x", c);

   385       p += 6;

   386     }

   387   }

   388   *p = '\0';

   389 }

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/utilities/utf8.cpp@bd7a7ce2e264

src/share/vm/utilities/utf8.cpp