8016518: Parsing of octal string escapes is broken

Thu, 13 Jun 2013 12:52:09 +0200

author
hannesw
date
Thu, 13 Jun 2013 12:52:09 +0200
changeset 345
6d6133ef1fd5
parent 344
b0dcc3727fc3
child 346
18362e95e638

8016518: Parsing of octal string escapes is broken
Reviewed-by: sundar, lagergren

src/jdk/nashorn/internal/parser/Lexer.java file | annotate | diff | comparison | revisions
test/script/basic/JDK-8016518.js file | annotate | diff | comparison | revisions
test/script/basic/JDK-8016518.js.EXPECTED file | annotate | diff | comparison | revisions
     1.1 --- a/src/jdk/nashorn/internal/parser/Lexer.java	Thu Jun 13 16:08:35 2013 +0530
     1.2 +++ b/src/jdk/nashorn/internal/parser/Lexer.java	Thu Jun 13 12:52:09 2013 +0200
     1.3 @@ -666,37 +666,24 @@
     1.4  
     1.5  
     1.6      /**
     1.7 -     * Get the value of a numeric sequence.
     1.8 +     * Get the value of a hexadecimal numeric sequence.
     1.9       *
    1.10 -     * @param base  Numeric base.
    1.11 -     * @param max   Maximum number of digits.
    1.12 -     * @param skip  Skip over escape first.
    1.13 -     * @param check Tells whether to throw error if a digit is invalid for the given base.
    1.14 -     * @param type  Type of token to report against.
    1.15 -     *
    1.16 +     * @param length Number of digits.
    1.17 +     * @param type   Type of token to report against.
    1.18       * @return Value of sequence or < 0 if no digits.
    1.19       */
    1.20 -    private int valueOfSequence(final int base, final int max, final boolean skip, final boolean check, final TokenType type) {
    1.21 -        assert base == 16 || base == 8 : "base other than 16 or 8";
    1.22 -        final boolean isHex = base == 16;
    1.23 -        final int shift = isHex ? 4 : 3;
    1.24 +    private int hexSequence(final int length, final TokenType type) {
    1.25          int value = 0;
    1.26  
    1.27 -        if (skip) {
    1.28 -            skip(2);
    1.29 -        }
    1.30 -
    1.31 -        for (int i = 0; i < max; i++) {
    1.32 -            final int digit = convertDigit(ch0, base);
    1.33 +        for (int i = 0; i < length; i++) {
    1.34 +            final int digit = convertDigit(ch0, 16);
    1.35  
    1.36              if (digit == -1) {
    1.37 -                if (check) {
    1.38 -                    error(Lexer.message("invalid." + (isHex ? "hex" : "octal")), type, position, limit);
    1.39 -                }
    1.40 +                error(Lexer.message("invalid.hex"), type, position, limit);
    1.41                  return i == 0 ? -1 : value;
    1.42              }
    1.43  
    1.44 -            value = value << shift | digit;
    1.45 +            value = digit | value << 4;
    1.46              skip(1);
    1.47          }
    1.48  
    1.49 @@ -704,6 +691,30 @@
    1.50      }
    1.51  
    1.52      /**
    1.53 +     * Get the value of an octal numeric sequence. This parses up to 3 digits with a maximum value of 255.
    1.54 +     *
    1.55 +     * @return Value of sequence.
    1.56 +     */
    1.57 +    private int octalSequence() {
    1.58 +        int value = 0;
    1.59 +
    1.60 +        for (int i = 0; i < 3; i++) {
    1.61 +            final int digit = convertDigit(ch0, 8);
    1.62 +
    1.63 +            if (digit == -1) {
    1.64 +                break;
    1.65 +            }
    1.66 +            value = digit | value << 3;
    1.67 +            skip(1);
    1.68 +
    1.69 +            if (i == 1 && value >= 32) {
    1.70 +                break;
    1.71 +            }
    1.72 +        }
    1.73 +        return value;
    1.74 +    }
    1.75 +
    1.76 +    /**
    1.77       * Convert a string to a JavaScript identifier.
    1.78       *
    1.79       * @param start  Position in source content.
    1.80 @@ -724,7 +735,8 @@
    1.81          while (!atEOF() && position < end && !isEOL(ch0)) {
    1.82              // If escape character.
    1.83              if (ch0 == '\\' && ch1 == 'u') {
    1.84 -                final int ch = valueOfSequence(16, 4, true, true, TokenType.IDENT);
    1.85 +                skip(2);
    1.86 +                final int ch = hexSequence(4, TokenType.IDENT);
    1.87                  if (isWhitespace((char)ch)) {
    1.88                      return null;
    1.89                  }
    1.90 @@ -815,7 +827,7 @@
    1.91                      }
    1.92                      reset(afterSlash);
    1.93                      // Octal sequence.
    1.94 -                    final int ch = valueOfSequence(8, 3, false, false, STRING);
    1.95 +                    final int ch = octalSequence();
    1.96  
    1.97                      if (ch < 0) {
    1.98                          sb.append('\\');
    1.99 @@ -862,7 +874,7 @@
   1.100                      break;
   1.101                  case 'x': {
   1.102                      // Hex sequence.
   1.103 -                    final int ch = valueOfSequence(16, 2, false, true, STRING);
   1.104 +                    final int ch = hexSequence(2, STRING);
   1.105  
   1.106                      if (ch < 0) {
   1.107                          sb.append('\\');
   1.108 @@ -874,7 +886,7 @@
   1.109                      break;
   1.110                  case 'u': {
   1.111                      // Unicode sequence.
   1.112 -                    final int ch = valueOfSequence(16, 4, false, true, STRING);
   1.113 +                    final int ch = hexSequence(4, STRING);
   1.114  
   1.115                      if (ch < 0) {
   1.116                          sb.append('\\');
   1.117 @@ -1191,7 +1203,8 @@
   1.118  
   1.119          // Make sure first character is valid start character.
   1.120          if (ch0 == '\\' && ch1 == 'u') {
   1.121 -            final int ch = valueOfSequence(16, 4, true, true, TokenType.IDENT);
   1.122 +            skip(2);
   1.123 +            final int ch = hexSequence(4, TokenType.IDENT);
   1.124  
   1.125              if (!Character.isJavaIdentifierStart(ch)) {
   1.126                  error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
   1.127 @@ -1204,7 +1217,8 @@
   1.128          // Make sure remaining characters are valid part characters.
   1.129          while (!atEOF()) {
   1.130              if (ch0 == '\\' && ch1 == 'u') {
   1.131 -                final int ch = valueOfSequence(16, 4, true, true, TokenType.IDENT);
   1.132 +                skip(2);
   1.133 +                final int ch = hexSequence(4, TokenType.IDENT);
   1.134  
   1.135                  if (!Character.isJavaIdentifierPart(ch)) {
   1.136                      error(Lexer.message("illegal.identifier.character"), TokenType.IDENT, start, position);
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/test/script/basic/JDK-8016518.js	Thu Jun 13 12:52:09 2013 +0200
     2.3 @@ -0,0 +1,38 @@
     2.4 +/*
     2.5 + * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
     2.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     2.7 + * 
     2.8 + * This code is free software; you can redistribute it and/or modify it
     2.9 + * under the terms of the GNU General Public License version 2 only, as
    2.10 + * published by the Free Software Foundation.
    2.11 + * 
    2.12 + * This code is distributed in the hope that it will be useful, but WITHOUT
    2.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    2.14 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    2.15 + * version 2 for more details (a copy is included in the LICENSE file that
    2.16 + * accompanied this code).
    2.17 + * 
    2.18 + * You should have received a copy of the GNU General Public License version
    2.19 + * 2 along with this work; if not, write to the Free Software Foundation,
    2.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
    2.21 + * 
    2.22 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
    2.23 + * or visit www.oracle.com if you need additional information or have any
    2.24 + * questions.
    2.25 + */
    2.26 +
    2.27 +/**
    2.28 + * JDK-8016518: Parsing of octal string escapes is broken
    2.29 + *
    2.30 + * @test
    2.31 + * @run
    2.32 + */
    2.33 +
    2.34 +print("\471".charCodeAt(0));
    2.35 +print("\471".charCodeAt(1));
    2.36 +
    2.37 +print("\377".length);
    2.38 +print("\377".charCodeAt(0));
    2.39 +print("\400".length);
    2.40 +print("\400".charCodeAt(0));
    2.41 +print("\400".charCodeAt(1));
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/test/script/basic/JDK-8016518.js.EXPECTED	Thu Jun 13 12:52:09 2013 +0200
     3.3 @@ -0,0 +1,7 @@
     3.4 +39
     3.5 +49
     3.6 +1
     3.7 +255
     3.8 +2
     3.9 +32
    3.10 +48

mercurial