src/share/vm/utilities/utf8.cpp

changeset 4267
bd7a7ce2e264
parent 2708
1d1603768966
child 4851
8c03fc47511d
     1.1 --- a/src/share/vm/utilities/utf8.cpp	Fri Nov 09 08:36:17 2012 -0800
     1.2 +++ b/src/share/vm/utilities/utf8.cpp	Mon Nov 12 14:03:53 2012 -0800
     1.3 @@ -1,5 +1,5 @@
     1.4  /*
     1.5 - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
     1.6 + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
     1.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     1.8   *
     1.9   * This code is free software; you can redistribute it and/or modify it
    1.10 @@ -147,7 +147,7 @@
    1.11  
    1.12  void UTF8::convert_to_unicode(const char* utf8_str, jchar* unicode_str, int unicode_length) {
    1.13    unsigned char ch;
    1.14 -  const char *ptr = (const char *)utf8_str;
    1.15 +  const char *ptr = utf8_str;
    1.16    int index = 0;
    1.17  
    1.18    /* ASCII case loop optimization */
    1.19 @@ -162,6 +162,119 @@
    1.20    }
    1.21  }
    1.22  
    1.23 +// returns the quoted ascii length of a 0-terminated utf8 string
    1.24 +int UTF8::quoted_ascii_length(const char* utf8_str, int utf8_length) {
    1.25 +  const char *ptr = utf8_str;
    1.26 +  const char* end = ptr + utf8_length;
    1.27 +  int result = 0;
    1.28 +  while (ptr < end) {
    1.29 +    jchar c;
    1.30 +    ptr = UTF8::next(ptr, &c);
    1.31 +    if (c >= 32 && c < 127) {
    1.32 +      result++;
    1.33 +    } else {
    1.34 +      result += 6;
    1.35 +    }
    1.36 +  }
    1.37 +  return result;
    1.38 +}
    1.39 +
    1.40 +// converts a utf8 string to quoted ascii
    1.41 +void UTF8::as_quoted_ascii(const char* utf8_str, char* buf, int buflen) {
    1.42 +  const char *ptr = utf8_str;
    1.43 +  char* p = buf;
    1.44 +  char* end = buf + buflen;
    1.45 +  while (*ptr != '\0') {
    1.46 +    jchar c;
    1.47 +    ptr = UTF8::next(ptr, &c);
    1.48 +    if (c >= 32 && c < 127) {
    1.49 +      if (p + 1 >= end) break;      // string is truncated
    1.50 +      *p++ = (char)c;
    1.51 +    } else {
    1.52 +      if (p + 6 >= end) break;      // string is truncated
    1.53 +      sprintf(p, "\\u%04x", c);
    1.54 +      p += 6;
    1.55 +    }
    1.56 +  }
    1.57 +  *p = '\0';
    1.58 +}
    1.59 +
    1.60 +
    1.61 +const char* UTF8::from_quoted_ascii(const char* quoted_ascii_str) {
    1.62 +  const char *ptr = quoted_ascii_str;
    1.63 +  char* result = NULL;
    1.64 +  while (*ptr != '\0') {
    1.65 +    char c = *ptr;
    1.66 +    if (c < 32 || c >= 127) break;
    1.67 +  }
    1.68 +  if (*ptr == '\0') {
    1.69 +    // nothing to do so return original string
    1.70 +    return quoted_ascii_str;
    1.71 +  }
    1.72 +  // everything up to this point was ok.
    1.73 +  int length = ptr - quoted_ascii_str;
    1.74 +  char* buffer = NULL;
    1.75 +  for (int round = 0; round < 2; round++) {
    1.76 +    while (*ptr != '\0') {
    1.77 +      if (*ptr != '\\') {
    1.78 +        if (buffer != NULL) {
    1.79 +          buffer[length] = *ptr;
    1.80 +        }
    1.81 +        length++;
    1.82 +      } else {
    1.83 +        switch (ptr[1]) {
    1.84 +          case 'u': {
    1.85 +            ptr += 2;
    1.86 +            jchar value=0;
    1.87 +            for (int i=0; i<4; i++) {
    1.88 +              char c = *ptr++;
    1.89 +              switch (c) {
    1.90 +                case '0': case '1': case '2': case '3': case '4':
    1.91 +                case '5': case '6': case '7': case '8': case '9':
    1.92 +                  value = (value << 4) + c - '0';
    1.93 +                  break;
    1.94 +                case 'a': case 'b': case 'c':
    1.95 +                case 'd': case 'e': case 'f':
    1.96 +                  value = (value << 4) + 10 + c - 'a';
    1.97 +                  break;
    1.98 +                case 'A': case 'B': case 'C':
    1.99 +                case 'D': case 'E': case 'F':
   1.100 +                  value = (value << 4) + 10 + c - 'A';
   1.101 +                  break;
   1.102 +                default:
   1.103 +                  ShouldNotReachHere();
   1.104 +              }
   1.105 +            }
   1.106 +            if (buffer == NULL) {
   1.107 +              char utf8_buffer[4];
   1.108 +              char* next = (char*)utf8_write((u_char*)utf8_buffer, value);
   1.109 +              length += next - utf8_buffer;
   1.110 +            } else {
   1.111 +              char* next = (char*)utf8_write((u_char*)&buffer[length], value);
   1.112 +              length += next - &buffer[length];
   1.113 +            }
   1.114 +            break;
   1.115 +          }
   1.116 +          case 't': if (buffer != NULL) buffer[length] = '\t'; ptr += 2; length++; break;
   1.117 +          case 'n': if (buffer != NULL) buffer[length] = '\n'; ptr += 2; length++; break;
   1.118 +          case 'r': if (buffer != NULL) buffer[length] = '\r'; ptr += 2; length++; break;
   1.119 +          case 'f': if (buffer != NULL) buffer[length] = '\f'; ptr += 2; length++; break;
   1.120 +          default:
   1.121 +            ShouldNotReachHere();
   1.122 +        }
   1.123 +      }
   1.124 +    }
   1.125 +    if (round == 0) {
   1.126 +      buffer = NEW_RESOURCE_ARRAY(char, length + 1);
   1.127 +      ptr = quoted_ascii_str;
   1.128 +    } else {
   1.129 +      buffer[length] = '\0';
   1.130 +    }
   1.131 +  }
   1.132 +  return buffer;
   1.133 +}
   1.134 +
   1.135 +
   1.136  // Returns NULL if 'c' it not found. This only works as long
   1.137  // as 'c' is an ASCII character
   1.138  const jbyte* UTF8::strrchr(const jbyte* base, int length, jbyte c) {
   1.139 @@ -242,3 +355,35 @@
   1.140    }
   1.141    *utf8_buffer = '\0';
   1.142  }
   1.143 +
   1.144 +// returns the quoted ascii length of a unicode string
   1.145 +int UNICODE::quoted_ascii_length(jchar* base, int length) {
   1.146 +  int result = 0;
   1.147 +  for (int i = 0; i < length; i++) {
   1.148 +    jchar c = base[i];
   1.149 +    if (c >= 32 && c < 127) {
   1.150 +      result++;
   1.151 +    } else {
   1.152 +      result += 6;
   1.153 +    }
   1.154 +  }
   1.155 +  return result;
   1.156 +}
   1.157 +
   1.158 +// converts a utf8 string to quoted ascii
   1.159 +void UNICODE::as_quoted_ascii(const jchar* base, int length, char* buf, int buflen) {
   1.160 +  char* p = buf;
   1.161 +  char* end = buf + buflen;
   1.162 +  for (int index = 0; index < length; index++) {
   1.163 +    jchar c = base[index];
   1.164 +    if (c >= 32 && c < 127) {
   1.165 +      if (p + 1 >= end) break;      // string is truncated
   1.166 +      *p++ = (char)c;
   1.167 +    } else {
   1.168 +      if (p + 6 >= end) break;      // string is truncated
   1.169 +      sprintf(p, "\\u%04x", c);
   1.170 +      p += 6;
   1.171 +    }
   1.172 +  }
   1.173 +  *p = '\0';
   1.174 +}

mercurial