Thu, 20 Nov 2008 16:56:09 -0800
6684579: SoftReference processing can be made more efficient
Summary: For current soft-ref clearing policies, we can decide at marking time if a soft-reference will definitely not be cleared, postponing the decision of whether it will definitely be cleared to the final reference processing phase. This can be especially beneficial in the case of concurrent collectors where the marking is usually concurrent but reference processing is usually not.
Reviewed-by: jmasa
1 /*
2 * Copyright 1997-2004 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 # include "incls/_precompiled.incl"
26 # include "incls/_utf8.cpp.incl"
28 // Assume the utf8 string is in legal form and has been
29 // checked in the class file parser/format checker.
30 char* UTF8::next(const char* str, jchar* value) {
31 unsigned const char *ptr = (const unsigned char *)str;
32 unsigned char ch, ch2, ch3;
33 int length = -1; /* bad length */
34 jchar result;
35 switch ((ch = ptr[0]) >> 4) {
36 default:
37 result = ch;
38 length = 1;
39 break;
41 case 0x8: case 0x9: case 0xA: case 0xB: case 0xF:
42 /* Shouldn't happen. */
43 break;
45 case 0xC: case 0xD:
46 /* 110xxxxx 10xxxxxx */
47 if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
48 unsigned char high_five = ch & 0x1F;
49 unsigned char low_six = ch2 & 0x3F;
50 result = (high_five << 6) + low_six;
51 length = 2;
52 break;
53 }
54 break;
56 case 0xE:
57 /* 1110xxxx 10xxxxxx 10xxxxxx */
58 if (((ch2 = ptr[1]) & 0xC0) == 0x80) {
59 if (((ch3 = ptr[2]) & 0xC0) == 0x80) {
60 unsigned char high_four = ch & 0x0f;
61 unsigned char mid_six = ch2 & 0x3f;
62 unsigned char low_six = ch3 & 0x3f;
63 result = (((high_four << 6) + mid_six) << 6) + low_six;
64 length = 3;
65 }
66 }
67 break;
68 } /* end of switch */
70 if (length <= 0) {
71 *value = ptr[0]; /* default bad result; */
72 return (char*)(ptr + 1); // make progress somehow
73 }
75 *value = result;
77 // The assert is correct but the .class file is wrong
78 // assert(UNICODE::utf8_size(result) == length, "checking reverse computation");
79 return (char *)(ptr + length);
80 }
82 char* UTF8::next_character(const char* str, jint* value) {
83 unsigned const char *ptr = (const unsigned char *)str;
84 /* See if it's legal supplementary character:
85 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx */
86 if (is_supplementary_character(ptr)) {
87 *value = get_supplementary_character(ptr);
88 return (char *)(ptr + 6);
89 }
90 jchar result;
91 char* next_ch = next(str, &result);
92 *value = result;
93 return next_ch;
94 }
96 // Count bytes of the form 10xxxxxx and deduct this count
97 // from the total byte count. The utf8 string must be in
98 // legal form which has been verified in the format checker.
99 int UTF8::unicode_length(const char* str, int len) {
100 int num_chars = len;
101 for (int i = 0; i < len; i++) {
102 if ((str[i] & 0xC0) == 0x80) {
103 --num_chars;
104 }
105 }
106 return num_chars;
107 }
109 // Count bytes of the utf8 string except those in form
110 // 10xxxxxx which only appear in multibyte characters.
111 // The utf8 string must be in legal form and has been
112 // verified in the format checker.
113 int UTF8::unicode_length(const char* str) {
114 int num_chars = 0;
115 for (const char* p = str; *p; p++) {
116 if (((*p) & 0xC0) != 0x80) {
117 num_chars++;
118 }
119 }
120 return num_chars;
121 }
123 // Writes a jchar a utf8 and returns the end
124 static u_char* utf8_write(u_char* base, jchar ch) {
125 if ((ch != 0) && (ch <=0x7f)) {
126 base[0] = (u_char) ch;
127 return base + 1;
128 }
130 if (ch <= 0x7FF) {
131 /* 11 bits or less. */
132 unsigned char high_five = ch >> 6;
133 unsigned char low_six = ch & 0x3F;
134 base[0] = high_five | 0xC0; /* 110xxxxx */
135 base[1] = low_six | 0x80; /* 10xxxxxx */
136 return base + 2;
137 }
138 /* possibly full 16 bits. */
139 char high_four = ch >> 12;
140 char mid_six = (ch >> 6) & 0x3F;
141 char low_six = ch & 0x3f;
142 base[0] = high_four | 0xE0; /* 1110xxxx */
143 base[1] = mid_six | 0x80; /* 10xxxxxx */
144 base[2] = low_six | 0x80; /* 10xxxxxx */
145 return base + 3;
146 }
148 void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
149 unsigned char ch;
150 const char *ptr = (const char *)utf8_str;
151 int index = 0;
153 /* ASCII case loop optimization */
154 for (; index < unicode_length; index++) {
155 if((ch = ptr[0]) > 0x7F) { break; }
156 unicode_str[index] = ch;
157 ptr = (const char *)(ptr + 1);
158 }
160 for (; index < unicode_length; index++) {
161 ptr = UTF8::next(ptr, &unicode_str[index]);
162 }
163 }
165 // Returns NULL if 'c' it not found. This only works as long
166 // as 'c' is an ASCII character
167 jbyte* UTF8::strrchr(jbyte* base, int length, jbyte c) {
168 assert(length >= 0, "sanity check");
169 assert(c >= 0, "does not work for non-ASCII characters");
170 // Skip backwards in string until 'c' is found or end is reached
171 while(--length >= 0 && base[length] != c);
172 return (length < 0) ? NULL : &base[length];
173 }
175 bool UTF8::equal(jbyte* base1, int length1, jbyte* base2, int length2) {
176 // Length must be the same
177 if (length1 != length2) return false;
178 for (int i = 0; i < length1; i++) {
179 if (base1[i] != base2[i]) return false;
180 }
181 return true;
182 }
184 bool UTF8::is_supplementary_character(const unsigned char* str) {
185 return ((str[0] & 0xFF) == 0xED) && ((str[1] & 0xF0) == 0xA0) && ((str[2] & 0xC0) == 0x80)
186 && ((str[3] & 0xFF) == 0xED) && ((str[4] & 0xF0) == 0xB0) && ((str[5] & 0xC0) == 0x80);
187 }
189 jint UTF8::get_supplementary_character(const unsigned char* str) {
190 return 0x10000 + ((str[1] & 0x0f) << 16) + ((str[2] & 0x3f) << 10)
191 + ((str[4] & 0x0f) << 6) + (str[5] & 0x3f);
192 }
195 //-------------------------------------------------------------------------------------
198 int UNICODE::utf8_size(jchar c) {
199 if ((0x0001 <= c) && (c <= 0x007F)) return 1;
200 if (c <= 0x07FF) return 2;
201 return 3;
202 }
204 int UNICODE::utf8_length(jchar* base, int length) {
205 int result = 0;
206 for (int index = 0; index < length; index++) {
207 jchar c = base[index];
208 if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
209 else if (c <= 0x07FF) result += 2;
210 else result += 3;
211 }
212 return result;
213 }
215 char* UNICODE::as_utf8(jchar* base, int length) {
216 int utf8_len = utf8_length(base, length);
217 u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
218 u_char* p = result;
219 for (int index = 0; index < length; index++) {
220 p = utf8_write(p, base[index]);
221 }
222 *p = '\0';
223 assert(p == &result[utf8_len], "length prediction must be correct");
224 return (char*) result;
225 }
227 char* UNICODE::as_utf8(jchar* base, int length, char* buf, int buflen) {
228 u_char* p = (u_char*)buf;
229 u_char* end = (u_char*)buf + buflen;
230 for (int index = 0; index < length; index++) {
231 jchar c = base[index];
232 if (p + utf8_size(c) >= end) break; // string is truncated
233 p = utf8_write(p, base[index]);
234 }
235 *p = '\0';
236 return buf;
237 }
239 void UNICODE::convert_to_utf8(const jchar* base, int length, char* utf8_buffer) {
240 for(int index = 0; index < length; index++) {
241 utf8_buffer = (char*)utf8_write((u_char*)utf8_buffer, base[index]);
242 }
243 *utf8_buffer = '\0';
244 }