jdk8-mips64-public/hotspot: src/share/vm/utilities/utf8.cpp@c96030fff130

6684579: SoftReference processing can be made more efficient
Summary: For current soft-ref clearing policies, we can decide at marking time if a soft-reference will definitely not be cleared, postponing the decision of whether it will definitely be cleared to the final reference processing phase. This can be especially beneficial in the case of concurrent collectors where the marking is usually concurrent but reference processing is usually not.
Reviewed-by: jmasa

     1 /*

     2  * Copyright 1997-2004 Sun Microsystems, Inc.  All Rights Reserved.

     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

     4  *

     5  * This code is free software; you can redistribute it and/or modify it

     6  * under the terms of the GNU General Public License version 2 only, as

     7  * published by the Free Software Foundation.

     8  *

     9  * This code is distributed in the hope that it will be useful, but WITHOUT

    10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

    11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

    12  * version 2 for more details (a copy is included in the LICENSE file that

    13  * accompanied this code).

    14  *

    15  * You should have received a copy of the GNU General Public License version

    16  * 2 along with this work; if not, write to the Free Software Foundation,

    17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

    18  *

    19  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,

    20  * CA 95054 USA or visit www.sun.com if you need additional information or

    21  * have any questions.

    22  *

    23  */

    25 # include "incls/_precompiled.incl"

    26 # include "incls/_utf8.cpp.incl"

    28 // Assume the utf8 string is in legal form and has been

    29 // checked in the class file parser/format checker.

    30 char* UTF8::next(const char* str, jchar* value) {

    31   unsigned const char *ptr = (const unsigned char *)str;

    32   unsigned char ch, ch2, ch3;

    33   int length = -1;              /* bad length */

    34   jchar result;

    35   switch ((ch = ptr[0]) >> 4) {

    36     default:

    37     result = ch;

    38     length = 1;

    39     break;

    41   case 0x8: case 0x9: case 0xA: case 0xB: case 0xF:

    42     /* Shouldn't happen. */

    43     break;

    45   case 0xC: case 0xD:

    46     /* 110xxxxx  10xxxxxx */

    47     if (((ch2 = ptr[1]) & 0xC0) == 0x80) {

    48       unsigned char high_five = ch & 0x1F;

    49       unsigned char low_six = ch2 & 0x3F;

    50       result = (high_five << 6) + low_six;

    51       length = 2;

    52       break;

    53     }

    54     break;

    56   case 0xE:

    57     /* 1110xxxx 10xxxxxx 10xxxxxx */

    58     if (((ch2 = ptr[1]) & 0xC0) == 0x80) {

    59       if (((ch3 = ptr[2]) & 0xC0) == 0x80) {

    60         unsigned char high_four = ch & 0x0f;

    61         unsigned char mid_six = ch2 & 0x3f;

    62         unsigned char low_six = ch3 & 0x3f;

    63         result = (((high_four << 6) + mid_six) << 6) + low_six;

    64         length = 3;

    65       }

    66     }

    67     break;

    68   } /* end of switch */

    70   if (length <= 0) {

    71     *value = ptr[0];    /* default bad result; */

    72     return (char*)(ptr + 1); // make progress somehow

    73   }

    75   *value = result;

    77   // The assert is correct but the .class file is wrong

    78   // assert(UNICODE::utf8_size(result) == length, "checking reverse computation");

    79   return (char *)(ptr + length);

    80 }

    82 char* UTF8::next_character(const char* str, jint* value) {

    83   unsigned const char *ptr = (const unsigned char *)str;

    84   /* See if it's legal supplementary character:

    85      11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */

    86   if (is_supplementary_character(ptr)) {

    87     *value = get_supplementary_character(ptr);

    88     return (char *)(ptr + 6);

    89   }

    90   jchar result;

    91   char* next_ch = next(str, &result);

    92   *value = result;

    93   return next_ch;

    94 }

    96 // Count bytes of the form 10xxxxxx and deduct this count

    97 // from the total byte count.  The utf8 string must be in

    98 // legal form which has been verified in the format checker.

    99 int UTF8::unicode_length(const char* str, int len) {

   100   int num_chars = len;

   101   for (int i = 0; i < len; i++) {

   102     if ((str[i] & 0xC0) == 0x80) {

   103       --num_chars;

   104     }

   105   }

   106   return num_chars;

   107 }

   109 // Count bytes of the utf8 string except those in form

   110 // 10xxxxxx which only appear in multibyte characters.

   111 // The utf8 string must be in legal form and has been

   112 // verified in the format checker.

   113 int UTF8::unicode_length(const char* str) {

   114   int num_chars = 0;

   115   for (const char* p = str; *p; p++) {

   116     if (((*p) & 0xC0) != 0x80) {

   117       num_chars++;

   118     }

   119   }

   120   return num_chars;

   121 }

   123 // Writes a jchar a utf8 and returns the end

   124 static u_char* utf8_write(u_char* base, jchar ch) {

   125   if ((ch != 0) && (ch <=0x7f)) {

   126     base[0] = (u_char) ch;

   127     return base + 1;

   128   }

   130   if (ch <= 0x7FF) {

   131     /* 11 bits or less. */

   132     unsigned char high_five = ch >> 6;

   133     unsigned char low_six = ch & 0x3F;

   134     base[0] = high_five | 0xC0; /* 110xxxxx */

   135     base[1] = low_six | 0x80;   /* 10xxxxxx */

   136     return base + 2;

   137   }

   138   /* possibly full 16 bits. */

   139   char high_four = ch >> 12;

   140   char mid_six = (ch >> 6) & 0x3F;

   141   char low_six = ch & 0x3f;

   142   base[0] = high_four | 0xE0; /* 1110xxxx */

   143   base[1] = mid_six | 0x80;   /* 10xxxxxx */

   144   base[2] = low_six | 0x80;   /* 10xxxxxx */

   145   return base + 3;

   146 }

   148 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {

   149   unsigned char ch;

   150   const char *ptr = (const char *)utf8_str;

   151   int index = 0;

   153   /* ASCII case loop optimization */

   154   for (; index < unicode_length; index++) {

   155     if((ch = ptr[0]) > 0x7F) { break; }

   156     unicode_str[index] = ch;

   157     ptr = (const char *)(ptr + 1);

   158   }

   160   for (; index < unicode_length; index++) {

   161     ptr = UTF8::next(ptr, &unicode_str[index]);

   162   }

   163 }

   165 // Returns NULL if 'c' it not found. This only works as long

   166 // as 'c' is an ASCII character

   167 jbyte* UTF8::strrchr(jbyte* base, int length, jbyte c) {

   168   assert(length >= 0, "sanity check");

   169   assert(c >= 0, "does not work for non-ASCII characters");

   170   // Skip backwards in string until 'c' is found or end is reached

   171   while(--length >= 0 && base[length] != c);

   172   return (length < 0) ? NULL : &base[length];

   173 }

   175 bool UTF8::equal(jbyte* base1, int length1, jbyte* base2, int length2) {

   176   // Length must be the same

   177   if (length1 != length2) return false;

   178   for (int i = 0; i < length1; i++) {

   179     if (base1[i] != base2[i]) return false;

   180   }

   181   return true;

   182 }

   184 bool UTF8::is_supplementary_character(const unsigned char* str) {

   185   return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80)

   186       && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80);

   187 }

   189 jint UTF8::get_supplementary_character(const unsigned char* str) {

   190   return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10)

   191                  + ((str[4] & 0x0f) << 6)  + (str[5] & 0x3f);

   192 }

   195 //-------------------------------------------------------------------------------------

   198 int UNICODE::utf8_size(jchar c) {

   199   if ((0x0001 <= c) && (c <= 0x007F)) return 1;

   200   if (c <= 0x07FF) return 2;

   201   return 3;

   202 }

   204 int UNICODE::utf8_length(jchar* base, int length) {

   205   int result = 0;

   206   for (int index = 0; index < length; index++) {

   207     jchar c = base[index];

   208     if ((0x0001 <= c) && (c <= 0x007F)) result += 1;

   209     else if (c <= 0x07FF) result += 2;

   210     else result += 3;

   211   }

   212   return result;

   213 }

   215 char* UNICODE::as_utf8(jchar* base, int length) {

   216   int utf8_len = utf8_length(base, length);

   217   u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);

   218   u_char* p = result;

   219   for (int index = 0; index < length; index++) {

   220     p = utf8_write(p, base[index]);

   221   }

   222   *p = '\0';

   223   assert(p == &result[utf8_len], "length prediction must be correct");

   224   return (char*) result;

   225 }

   227 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {

   228   u_char* p = (u_char*)buf;

   229   u_char* end = (u_char*)buf + buflen;

   230   for (int index = 0; index < length; index++) {

   231     jchar c = base[index];

   232     if (p + utf8_size(c) >= end) break;      // string is truncated

   233     p = utf8_write(p, base[index]);

   234   }

   235   *p = '\0';

   236   return buf;

   237 }

   239 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {

   240   for(int index = 0; index < length; index++) {

   241     utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);

   242   }

   243   *utf8_buffer = '\0';

   244 }

Mercurial > jdk8-mips64-public > hotspot / file revision

src/share/vm/utilities/utf8.cpp@c96030fff130

src/share/vm/utilities/utf8.cpp