Merge mips-jdk8u272-b10

Sat, 24 Oct 2020 16:43:03 +0800

author
aoqi
date
Sat, 24 Oct 2020 16:43:03 +0800
changeset 2116
aaee9ae4799a
parent 2090
3b8ebb957957
parent 2115
ba503169016f
child 2117
a5f920b6d2b5

Merge

.hgtags file | annotate | diff | comparison | revisions
THIRD_PARTY_README file | annotate | diff | comparison | revisions
src/com/sun/org/apache/bcel/internal/util/InstructionFinder.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/CharacterArrayCharacterIterator.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/CharacterIterator.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/RE.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/RECompiler.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/REDebugCompiler.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/REProgram.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/RESyntaxException.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/RETest.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/REUtil.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/ReaderCharacterIterator.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/StreamCharacterIterator.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/StringCharacterIterator.java file | annotate | diff | comparison | revisions
src/com/sun/org/apache/regexp/internal/recompile.java file | annotate | diff | comparison | revisions
     1.1 --- a/.hgtags	Sat Oct 24 16:18:47 2020 +0800
     1.2 +++ b/.hgtags	Sat Oct 24 16:43:03 2020 +0800
     1.3 @@ -1069,8 +1069,23 @@
     1.4  560093f3167970da2935b745493653420fdea008 jdk8u262-b03
     1.5  d054aabd2e3c09de0ff622b4fab09388d30aee02 jdk8u262-b04
     1.6  976e73cfac410997160b1d3d6e14a88a324440c3 jdk8u262-b05
     1.7 +976e73cfac410997160b1d3d6e14a88a324440c3 jdk8u272-b00
     1.8  ddbd856338439f2d5f742040d896e27f0f104cd1 jdk8u262-b06
     1.9  ebb0a284b7e75dfb741af3332eb87b37aca66875 jdk8u262-b07
    1.10  0cccb32a50471fd52ecf2f697d95e7254798ab26 jdk8u262-b08
    1.11  779db06fb02444e294b7c93fe3902afee615df2a jdk8u262-b09
    1.12 +63884b34cac1b652cf49289199a00cb363cb93dd jdk8u262-b10
    1.13 +63884b34cac1b652cf49289199a00cb363cb93dd jdk8u262-ga
    1.14  3b85d4e65538af51987a00b276cde9c250615f9d mips-jdk8u262-b10
    1.15 +63884b34cac1b652cf49289199a00cb363cb93dd jdk8u265-b00
    1.16 +3147b24fc8b092b34599830b56d03da4731577a2 jdk8u265-b01
    1.17 +3147b24fc8b092b34599830b56d03da4731577a2 jdk8u265-ga
    1.18 +1bc3598fbad03fa73168f64cea4d0628e75a292b jdk8u272-b01
    1.19 +7694bb86e0236ba9a89326206af46da8c252aad5 jdk8u272-b02
    1.20 +370157535629da61a0f0ac045d77c384b98211f6 jdk8u272-b03
    1.21 +89445883ffdec61e5b32980633b67d932d602582 jdk8u272-b04
    1.22 +36d18f0fd6eeffc14f311dc5ff5a18ae870fc1d0 jdk8u272-b05
    1.23 +44cbebcc276cddad3ad0aa67f4da313d50af7e4b jdk8u272-b06
    1.24 +bd015816ce490762772ca71c86bd90f58a90fb8c jdk8u272-b07
    1.25 +9d92962b2fe312a045e5814d4604d00e04492515 jdk8u272-b08
    1.26 +a5b79eebcc1f3c9afbe9927d672be64364647049 jdk8u272-b09
     2.1 --- a/THIRD_PARTY_README	Sat Oct 24 16:18:47 2020 +0800
     2.2 +++ b/THIRD_PARTY_README	Sat Oct 24 16:43:03 2020 +0800
     2.3 @@ -2240,7 +2240,7 @@
     2.4  
     2.5  -------------------------------------------------------------------------------
     2.6  
     2.7 -%% This notice is provided with respect to PC/SC Lite v1.8.24,
     2.8 +%% This notice is provided with respect to PC/SC Lite v1.8.26,
     2.9  which may be included with JRE 8, JDK 8, and OpenJDK 8 on Linux and Solaris.
    2.10  
    2.11  --- begin of LICENSE ---
    2.12 @@ -3028,8 +3028,7 @@
    2.13    Apache Commons Math 3.2
    2.14    Apache Derby 10.11.1.2
    2.15    Apache Jakarta BCEL 5.1 
    2.16 -  Apache Jakarta Regexp 1.4 
    2.17 -  Apache Santuario XML Security for Java 1.5.4
    2.18 +  Apache Santuario XML Security for Java 2.1.1
    2.19    Apache Xalan-Java 2.7.2
    2.20    Apache Xerces Java 2.10.0 
    2.21    Apache XML Resolver 1.1 
    2.22 @@ -3243,3 +3242,41 @@
    2.23  
    2.24  -------------------------------------------------------------------------------
    2.25  
    2.26 +%% This notice is provided with respect to OASIS PKCS #11 Cryptographic Token
    2.27 +Interface v2.40,  which may be included with JRE 8, JDK 8, and OpenJDK 8.
    2.28 +
    2.29 +--- begin of LICENSE ---
    2.30 +
    2.31 +Copyright (c) OASIS Open 2016. All Rights Reserved.
    2.32 +
    2.33 +All capitalized terms in the following text have the meanings assigned to them
    2.34 +in the OASIS Intellectual Property Rights Policy (the "OASIS IPR Policy"). The
    2.35 +full Policy may be found at the OASIS website:
    2.36 +[http://www.oasis-open.org/policies-guidelines/ipr]
    2.37 +
    2.38 +This document and translations of it may be copied and furnished to others, and
    2.39 +derivative works that comment on or otherwise explain it or assist in its
    2.40 +implementation may be prepared, copied, published, and distributed, in whole or
    2.41 +in part, without restriction of any kind, provided that the above copyright
    2.42 +notice and this section are included on all such copies and derivative works.
    2.43 +However, this document itself may not be modified in any way, including by
    2.44 +removing the copyright notice or references to OASIS, except as needed for the
    2.45 +purpose of developing any document or deliverable produced by an OASIS
    2.46 +Technical Committee (in which case the rules applicable to copyrights, as set
    2.47 +forth in the OASIS IPR Policy, must be followed) or as required to translate it
    2.48 +into languages other than English.
    2.49 +
    2.50 +The limited permissions granted above are perpetual and will not be revoked by
    2.51 +OASIS or its successors or assigns.
    2.52 +
    2.53 +This document and the information contained herein is provided on an "AS IS"
    2.54 +basis and OASIS DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT
    2.55 +LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION HEREIN WILL NOT
    2.56 +INFRINGE ANY OWNERSHIP RIGHTS OR ANY IMPLIED WARRANTIES OF MERCHANTABILITY OR
    2.57 +FITNESS FOR A PARTICULAR PURPOSE. OASIS AND ITS MEMBERS WILL NOT BE LIABLE FOR
    2.58 +ANY DIRECT, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE
    2.59 +OF THIS DOCUMENT OR ANY PART THEREOF.
    2.60 +
    2.61 +--- end of LICENSE ---
    2.62 +
    2.63 +-------------------------------------------------------------------------------
     3.1 --- a/src/com/sun/org/apache/bcel/internal/util/InstructionFinder.java	Sat Oct 24 16:18:47 2020 +0800
     3.2 +++ b/src/com/sun/org/apache/bcel/internal/util/InstructionFinder.java	Sat Oct 24 16:43:03 2020 +0800
     3.3 @@ -4,64 +4,29 @@
     3.4   */
     3.5  package com.sun.org.apache.bcel.internal.util;
     3.6  
     3.7 -/* ====================================================================
     3.8 - * The Apache Software License, Version 1.1
     3.9 +/*
    3.10 + * Licensed to the Apache Software Foundation (ASF) under one or more
    3.11 + * contributor license agreements.  See the NOTICE file distributed with
    3.12 + * this work for additional information regarding copyright ownership.
    3.13 + * The ASF licenses this file to You under the Apache License, Version 2.0
    3.14 + * (the "License"); you may not use this file except in compliance with
    3.15 + * the License.  You may obtain a copy of the License at
    3.16   *
    3.17 - * Copyright (c) 2001 The Apache Software Foundation.  All rights
    3.18 - * reserved.
    3.19 + *      http://www.apache.org/licenses/LICENSE-2.0
    3.20   *
    3.21 - * Redistribution and use in source and binary forms, with or without
    3.22 - * modification, are permitted provided that the following conditions
    3.23 - * are met:
    3.24 + *  Unless required by applicable law or agreed to in writing, software
    3.25 + *  distributed under the License is distributed on an "AS IS" BASIS,
    3.26 + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    3.27 + *  See the License for the specific language governing permissions and
    3.28 + *  limitations under the License.
    3.29   *
    3.30 - * 1. Redistributions of source code must retain the above copyright
    3.31 - *    notice, this list of conditions and the following disclaimer.
    3.32 - *
    3.33 - * 2. Redistributions in binary form must reproduce the above copyright
    3.34 - *    notice, this list of conditions and the following disclaimer in
    3.35 - *    the documentation and/or other materials provided with the
    3.36 - *    distribution.
    3.37 - *
    3.38 - * 3. The end-user documentation included with the redistribution,
    3.39 - *    if any, must include the following acknowledgment:
    3.40 - *       "This product includes software developed by the
    3.41 - *        Apache Software Foundation (http://www.apache.org/)."
    3.42 - *    Alternately, this acknowledgment may appear in the software itself,
    3.43 - *    if and wherever such third-party acknowledgments normally appear.
    3.44 - *
    3.45 - * 4. The names "Apache" and "Apache Software Foundation" and
    3.46 - *    "Apache BCEL" must not be used to endorse or promote products
    3.47 - *    derived from this software without prior written permission. For
    3.48 - *    written permission, please contact apache@apache.org.
    3.49 - *
    3.50 - * 5. Products derived from this software may not be called "Apache",
    3.51 - *    "Apache BCEL", nor may "Apache" appear in their name, without
    3.52 - *    prior written permission of the Apache Software Foundation.
    3.53 - *
    3.54 - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
    3.55 - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
    3.56 - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    3.57 - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
    3.58 - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    3.59 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    3.60 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
    3.61 - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
    3.62 - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    3.63 - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
    3.64 - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    3.65 - * SUCH DAMAGE.
    3.66 - * ====================================================================
    3.67 - *
    3.68 - * This software consists of voluntary contributions made by many
    3.69 - * individuals on behalf of the Apache Software Foundation.  For more
    3.70 - * information on the Apache Software Foundation, please see
    3.71 - * <http://www.apache.org/>.
    3.72   */
    3.73  
    3.74 -import java.util.*;
    3.75  import com.sun.org.apache.bcel.internal.Constants;
    3.76  import com.sun.org.apache.bcel.internal.generic.*;
    3.77 -import com.sun.org.apache.regexp.internal.*;
    3.78 +import java.util.*;
    3.79 +import java.util.regex.Matcher;
    3.80 +import java.util.regex.Pattern;
    3.81  
    3.82  /**
    3.83   * InstructionFinder is a tool to search for given instructions patterns,
    3.84 @@ -231,28 +196,22 @@
    3.85      if(start == -1)
    3.86        throw new ClassGenException("Instruction handle " + from +
    3.87                                    " not found in instruction list.");
    3.88 -    try {
    3.89 -      RE regex = new RE(search);
    3.90 -      ArrayList matches = new ArrayList();
    3.91  
    3.92 -      while(start < il_string.length() && regex.match(il_string, start)) {
    3.93 -        int startExpr = regex.getParenStart(0);
    3.94 -        int endExpr   = regex.getParenEnd(0);
    3.95 -        int lenExpr   = regex.getParenLength(0);
    3.96 +    Pattern regex = Pattern.compile(search);
    3.97 +    List<InstructionHandle[]> matches = new ArrayList<>();
    3.98 +    Matcher matcher = regex.matcher(il_string);
    3.99 +    while(start < il_string.length() && matcher.find(start)) {
   3.100 +      int startExpr = matcher.start();
   3.101 +      int endExpr   = matcher.end();
   3.102 +      int lenExpr   = endExpr - startExpr;
   3.103 +      InstructionHandle[] match = getMatch(startExpr, lenExpr);
   3.104  
   3.105 -        InstructionHandle[] match = getMatch(startExpr, lenExpr);
   3.106 -
   3.107 -        if((constraint == null) || constraint.checkCode(match))
   3.108 -          matches.add(match);
   3.109 -        start = endExpr;
   3.110 -      }
   3.111 -
   3.112 -      return matches.iterator();
   3.113 -    } catch(RESyntaxException e) {
   3.114 -      System.err.println(e);
   3.115 +      if((constraint == null) || constraint.checkCode(match))
   3.116 +        matches.add(match);
   3.117 +      start = endExpr;
   3.118      }
   3.119  
   3.120 -    return null;
   3.121 +    return matches.iterator();
   3.122    }
   3.123  
   3.124    /**
     4.1 --- a/src/com/sun/org/apache/regexp/internal/CharacterArrayCharacterIterator.java	Sat Oct 24 16:18:47 2020 +0800
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,76 +0,0 @@
     4.4 -/*
     4.5 - * reserved comment block
     4.6 - * DO NOT REMOVE OR ALTER!
     4.7 - */
     4.8 -/*
     4.9 - * Copyright 1999-2004 The Apache Software Foundation.
    4.10 - *
    4.11 - * Licensed under the Apache License, Version 2.0 (the "License");
    4.12 - * you may not use this file except in compliance with the License.
    4.13 - * You may obtain a copy of the License at
    4.14 - *
    4.15 - *     http://www.apache.org/licenses/LICENSE-2.0
    4.16 - *
    4.17 - * Unless required by applicable law or agreed to in writing, software
    4.18 - * distributed under the License is distributed on an "AS IS" BASIS,
    4.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    4.20 - * See the License for the specific language governing permissions and
    4.21 - * limitations under the License.
    4.22 - */
    4.23 -
    4.24 -package com.sun.org.apache.regexp.internal;
    4.25 -
    4.26 -/**
    4.27 - * Encapsulates char[] as CharacterIterator
    4.28 - *
    4.29 - * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
    4.30 - */
    4.31 -public final class CharacterArrayCharacterIterator implements CharacterIterator
    4.32 -{
    4.33 -    /** encapsulated */
    4.34 -    private final char[] src;
    4.35 -    /** offset in the char array */
    4.36 -    private final int off;
    4.37 -    /** used portion of the array */
    4.38 -    private final int len;
    4.39 -
    4.40 -    /** @param src - encapsulated String */
    4.41 -    public CharacterArrayCharacterIterator(char[] src, int off, int len)
    4.42 -    {
    4.43 -        this.src = src;
    4.44 -        this.off = off;
    4.45 -        this.len = len;
    4.46 -    }
    4.47 -
    4.48 -    /** @return a substring */
    4.49 -    public String substring(int beginIndex, int endIndex)
    4.50 -    {
    4.51 -        if (endIndex > len) {
    4.52 -            throw new IndexOutOfBoundsException("endIndex=" + endIndex
    4.53 -                                                + "; sequence size=" + len);
    4.54 -        }
    4.55 -        if (beginIndex < 0 || beginIndex > endIndex) {
    4.56 -            throw new IndexOutOfBoundsException("beginIndex=" + beginIndex
    4.57 -                                                + "; endIndex=" + endIndex);
    4.58 -        }
    4.59 -        return new String(src, off + beginIndex, endIndex - beginIndex);
    4.60 -    }
    4.61 -
    4.62 -    /** @return a substring */
    4.63 -    public String substring(int beginIndex)
    4.64 -    {
    4.65 -        return substring(beginIndex, len);
    4.66 -    }
    4.67 -
    4.68 -    /** @return a character at the specified position. */
    4.69 -    public char charAt(int pos)
    4.70 -    {
    4.71 -        return src[off + pos];
    4.72 -    }
    4.73 -
    4.74 -    /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */
    4.75 -    public boolean isEnd(int pos)
    4.76 -    {
    4.77 -        return (pos >= len);
    4.78 -    }
    4.79 -}
     5.1 --- a/src/com/sun/org/apache/regexp/internal/CharacterIterator.java	Sat Oct 24 16:18:47 2020 +0800
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,42 +0,0 @@
     5.4 -/*
     5.5 - * reserved comment block
     5.6 - * DO NOT REMOVE OR ALTER!
     5.7 - */
     5.8 -/*
     5.9 - * Copyright 1999-2004 The Apache Software Foundation.
    5.10 - *
    5.11 - * Licensed under the Apache License, Version 2.0 (the "License");
    5.12 - * you may not use this file except in compliance with the License.
    5.13 - * You may obtain a copy of the License at
    5.14 - *
    5.15 - *     http://www.apache.org/licenses/LICENSE-2.0
    5.16 - *
    5.17 - * Unless required by applicable law or agreed to in writing, software
    5.18 - * distributed under the License is distributed on an "AS IS" BASIS,
    5.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    5.20 - * See the License for the specific language governing permissions and
    5.21 - * limitations under the License.
    5.22 - */
    5.23 -
    5.24 -package com.sun.org.apache.regexp.internal;
    5.25 -
    5.26 -/**
    5.27 - * Encapsulates different types of character sources - String, InputStream, ...
    5.28 - * Defines a set of common methods
    5.29 - *
    5.30 - * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
    5.31 - */
    5.32 -public interface CharacterIterator
    5.33 -{
    5.34 -    /** @return a substring */
    5.35 -    String substring(int beginIndex, int endIndex);
    5.36 -
    5.37 -    /** @return a substring */
    5.38 -    String substring(int beginIndex);
    5.39 -
    5.40 -    /** @return a character at the specified position. */
    5.41 -    char charAt(int pos);
    5.42 -
    5.43 -    /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */
    5.44 -    boolean isEnd(int pos);
    5.45 -}
     6.1 --- a/src/com/sun/org/apache/regexp/internal/RE.java	Sat Oct 24 16:18:47 2020 +0800
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,1760 +0,0 @@
     6.4 -/*
     6.5 - * reserved comment block
     6.6 - * DO NOT REMOVE OR ALTER!
     6.7 - */
     6.8 -/*
     6.9 - * Copyright 1999-2004 The Apache Software Foundation.
    6.10 - *
    6.11 - * Licensed under the Apache License, Version 2.0 (the "License");
    6.12 - * you may not use this file except in compliance with the License.
    6.13 - * You may obtain a copy of the License at
    6.14 - *
    6.15 - *     http://www.apache.org/licenses/LICENSE-2.0
    6.16 - *
    6.17 - * Unless required by applicable law or agreed to in writing, software
    6.18 - * distributed under the License is distributed on an "AS IS" BASIS,
    6.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    6.20 - * See the License for the specific language governing permissions and
    6.21 - * limitations under the License.
    6.22 - */
    6.23 -
    6.24 -package com.sun.org.apache.regexp.internal;
    6.25 -
    6.26 -import java.io.Serializable;
    6.27 -import java.util.Vector;
    6.28 -
    6.29 -/**
    6.30 - * RE is an efficient, lightweight regular expression evaluator/matcher
    6.31 - * class. Regular expressions are pattern descriptions which enable
    6.32 - * sophisticated matching of strings.  In addition to being able to
    6.33 - * match a string against a pattern, you can also extract parts of the
    6.34 - * match.  This is especially useful in text parsing! Details on the
    6.35 - * syntax of regular expression patterns are given below.
    6.36 - *
    6.37 - * <p>
    6.38 - * To compile a regular expression (RE), you can simply construct an RE
    6.39 - * matcher object from the string specification of the pattern, like this:
    6.40 - *
    6.41 - * <pre>
    6.42 - *  RE r = new RE("a*b");
    6.43 - * </pre>
    6.44 - *
    6.45 - * <p>
    6.46 - * Once you have done this, you can call either of the RE.match methods to
    6.47 - * perform matching on a String.  For example:
    6.48 - *
    6.49 - * <pre>
    6.50 - *  boolean matched = r.match("aaaab");
    6.51 - * </pre>
    6.52 - *
    6.53 - * will cause the boolean matched to be set to true because the
    6.54 - * pattern "a*b" matches the string "aaaab".
    6.55 - *
    6.56 - * <p>
    6.57 - * If you were interested in the <i>number</i> of a's which matched the
    6.58 - * first part of our example expression, you could change the expression to
    6.59 - * "(a*)b".  Then when you compiled the expression and matched it against
    6.60 - * something like "xaaaab", you would get results like this:
    6.61 - *
    6.62 - * <pre>
    6.63 - *  RE r = new RE("(a*)b");                  // Compile expression
    6.64 - *  boolean matched = r.match("xaaaab");     // Match against "xaaaab"
    6.65 - *
    6.66 - *  String wholeExpr = r.getParen(0);        // wholeExpr will be 'aaaab'
    6.67 - *  String insideParens = r.getParen(1);     // insideParens will be 'aaaa'
    6.68 - *
    6.69 - *  int startWholeExpr = r.getParenStart(0); // startWholeExpr will be index 1
    6.70 - *  int endWholeExpr = r.getParenEnd(0);     // endWholeExpr will be index 6
    6.71 - *  int lenWholeExpr = r.getParenLength(0);  // lenWholeExpr will be 5
    6.72 - *
    6.73 - *  int startInside = r.getParenStart(1);    // startInside will be index 1
    6.74 - *  int endInside = r.getParenEnd(1);        // endInside will be index 5
    6.75 - *  int lenInside = r.getParenLength(1);     // lenInside will be 4
    6.76 - * </pre>
    6.77 - *
    6.78 - * You can also refer to the contents of a parenthesized expression
    6.79 - * within a regular expression itself.  This is called a
    6.80 - * 'backreference'.  The first backreference in a regular expression is
    6.81 - * denoted by \1, the second by \2 and so on.  So the expression:
    6.82 - *
    6.83 - * <pre>
    6.84 - *  ([0-9]+)=\1
    6.85 - * </pre>
    6.86 - *
    6.87 - * will match any string of the form n=n (like 0=0 or 2=2).
    6.88 - *
    6.89 - * <p>
    6.90 - * The full regular expression syntax accepted by RE is described here:
    6.91 - *
    6.92 - * <pre>
    6.93 - *
    6.94 - *  <b><font face=times roman>Characters</font></b>
    6.95 - *
    6.96 - *    <i>unicodeChar</i>   Matches any identical unicode character
    6.97 - *    \                    Used to quote a meta-character (like '*')
    6.98 - *    \\                   Matches a single '\' character
    6.99 - *    \0nnn                Matches a given octal character
   6.100 - *    \xhh                 Matches a given 8-bit hexadecimal character
   6.101 - *    \\uhhhh              Matches a given 16-bit hexadecimal character
   6.102 - *    \t                   Matches an ASCII tab character
   6.103 - *    \n                   Matches an ASCII newline character
   6.104 - *    \r                   Matches an ASCII return character
   6.105 - *    \f                   Matches an ASCII form feed character
   6.106 - *
   6.107 - *
   6.108 - *  <b><font face=times roman>Character Classes</font></b>
   6.109 - *
   6.110 - *    [abc]                Simple character class
   6.111 - *    [a-zA-Z]             Character class with ranges
   6.112 - *    [^abc]               Negated character class
   6.113 - * </pre>
   6.114 - *
   6.115 - * <b>NOTE:</b> Incomplete ranges will be interpreted as &quot;starts
   6.116 - * from zero&quot; or &quot;ends with last character&quot;.
   6.117 - * <br>
   6.118 - * I.e. [-a] is the same as [\\u0000-a], and [a-] is the same as [a-\\uFFFF],
   6.119 - * [-] means &quot;all characters&quot;.
   6.120 - *
   6.121 - * <pre>
   6.122 - *
   6.123 - *  <b><font face=times roman>Standard POSIX Character Classes</font></b>
   6.124 - *
   6.125 - *    [:alnum:]            Alphanumeric characters.
   6.126 - *    [:alpha:]            Alphabetic characters.
   6.127 - *    [:blank:]            Space and tab characters.
   6.128 - *    [:cntrl:]            Control characters.
   6.129 - *    [:digit:]            Numeric characters.
   6.130 - *    [:graph:]            Characters that are printable and are also visible.
   6.131 - *                         (A space is printable, but not visible, while an
   6.132 - *                         `a' is both.)
   6.133 - *    [:lower:]            Lower-case alphabetic characters.
   6.134 - *    [:print:]            Printable characters (characters that are not
   6.135 - *                         control characters.)
   6.136 - *    [:punct:]            Punctuation characters (characters that are not letter,
   6.137 - *                         digits, control characters, or space characters).
   6.138 - *    [:space:]            Space characters (such as space, tab, and formfeed,
   6.139 - *                         to name a few).
   6.140 - *    [:upper:]            Upper-case alphabetic characters.
   6.141 - *    [:xdigit:]           Characters that are hexadecimal digits.
   6.142 - *
   6.143 - *
   6.144 - *  <b><font face=times roman>Non-standard POSIX-style Character Classes</font></b>
   6.145 - *
   6.146 - *    [:javastart:]        Start of a Java identifier
   6.147 - *    [:javapart:]         Part of a Java identifier
   6.148 - *
   6.149 - *
   6.150 - *  <b><font face=times roman>Predefined Classes</font></b>
   6.151 - *
   6.152 - *    .         Matches any character other than newline
   6.153 - *    \w        Matches a "word" character (alphanumeric plus "_")
   6.154 - *    \W        Matches a non-word character
   6.155 - *    \s        Matches a whitespace character
   6.156 - *    \S        Matches a non-whitespace character
   6.157 - *    \d        Matches a digit character
   6.158 - *    \D        Matches a non-digit character
   6.159 - *
   6.160 - *
   6.161 - *  <b><font face=times roman>Boundary Matchers</font></b>
   6.162 - *
   6.163 - *    ^         Matches only at the beginning of a line
   6.164 - *    $         Matches only at the end of a line
   6.165 - *    \b        Matches only at a word boundary
   6.166 - *    \B        Matches only at a non-word boundary
   6.167 - *
   6.168 - *
   6.169 - *  <b><font face=times roman>Greedy Closures</font></b>
   6.170 - *
   6.171 - *    A*        Matches A 0 or more times (greedy)
   6.172 - *    A+        Matches A 1 or more times (greedy)
   6.173 - *    A?        Matches A 1 or 0 times (greedy)
   6.174 - *    A{n}      Matches A exactly n times (greedy)
   6.175 - *    A{n,}     Matches A at least n times (greedy)
   6.176 - *    A{n,m}    Matches A at least n but not more than m times (greedy)
   6.177 - *
   6.178 - *
   6.179 - *  <b><font face=times roman>Reluctant Closures</font></b>
   6.180 - *
   6.181 - *    A*?       Matches A 0 or more times (reluctant)
   6.182 - *    A+?       Matches A 1 or more times (reluctant)
   6.183 - *    A??       Matches A 0 or 1 times (reluctant)
   6.184 - *
   6.185 - *
   6.186 - *  <b><font face=times roman>Logical Operators</font></b>
   6.187 - *
   6.188 - *    AB        Matches A followed by B
   6.189 - *    A|B       Matches either A or B
   6.190 - *    (A)       Used for subexpression grouping
   6.191 - *   (?:A)      Used for subexpression clustering (just like grouping but
   6.192 - *              no backrefs)
   6.193 - *
   6.194 - *
   6.195 - *  <b><font face=times roman>Backreferences</font></b>
   6.196 - *
   6.197 - *    \1    Backreference to 1st parenthesized subexpression
   6.198 - *    \2    Backreference to 2nd parenthesized subexpression
   6.199 - *    \3    Backreference to 3rd parenthesized subexpression
   6.200 - *    \4    Backreference to 4th parenthesized subexpression
   6.201 - *    \5    Backreference to 5th parenthesized subexpression
   6.202 - *    \6    Backreference to 6th parenthesized subexpression
   6.203 - *    \7    Backreference to 7th parenthesized subexpression
   6.204 - *    \8    Backreference to 8th parenthesized subexpression
   6.205 - *    \9    Backreference to 9th parenthesized subexpression
   6.206 - * </pre>
   6.207 - *
   6.208 - * <p>
   6.209 - * All closure operators (+, *, ?, {m,n}) are greedy by default, meaning
   6.210 - * that they match as many elements of the string as possible without
   6.211 - * causing the overall match to fail.  If you want a closure to be
   6.212 - * reluctant (non-greedy), you can simply follow it with a '?'.  A
   6.213 - * reluctant closure will match as few elements of the string as
   6.214 - * possible when finding matches.  {m,n} closures don't currently
   6.215 - * support reluctancy.
   6.216 - *
   6.217 - * <p>
   6.218 - * <b><font face="times roman">Line terminators</font></b>
   6.219 - * <br>
   6.220 - * A line terminator is a one- or two-character sequence that marks
   6.221 - * the end of a line of the input character sequence. The following
   6.222 - * are recognized as line terminators:
   6.223 - * <ul>
   6.224 - * <li>A newline (line feed) character ('\n'),</li>
   6.225 - * <li>A carriage-return character followed immediately by a newline character ("\r\n"),</li>
   6.226 - * <li>A standalone carriage-return character ('\r'),</li>
   6.227 - * <li>A next-line character ('\u0085'),</li>
   6.228 - * <li>A line-separator character ('\u2028'), or</li>
   6.229 - * <li>A paragraph-separator character ('\u2029).</li>
   6.230 - * </ul>
   6.231 - *
   6.232 - * <p>
   6.233 - * RE runs programs compiled by the RECompiler class.  But the RE
   6.234 - * matcher class does not include the actual regular expression compiler
   6.235 - * for reasons of efficiency.  In fact, if you want to pre-compile one
   6.236 - * or more regular expressions, the 'recompile' class can be invoked
   6.237 - * from the command line to produce compiled output like this:
   6.238 - *
   6.239 - * <pre>
   6.240 - *    // Pre-compiled regular expression "a*b"
   6.241 - *    char[] re1Instructions =
   6.242 - *    {
   6.243 - *        0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
   6.244 - *        0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
   6.245 - *        0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
   6.246 - *        0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
   6.247 - *        0x0000,
   6.248 - *    };
   6.249 - *
   6.250 - *
   6.251 - *    REProgram re1 = new REProgram(re1Instructions);
   6.252 - * </pre>
   6.253 - *
   6.254 - * You can then construct a regular expression matcher (RE) object from
   6.255 - * the pre-compiled expression re1 and thus avoid the overhead of
   6.256 - * compiling the expression at runtime. If you require more dynamic
   6.257 - * regular expressions, you can construct a single RECompiler object and
   6.258 - * re-use it to compile each expression. Similarly, you can change the
   6.259 - * program run by a given matcher object at any time. However, RE and
   6.260 - * RECompiler are not threadsafe (for efficiency reasons, and because
   6.261 - * requiring thread safety in this class is deemed to be a rare
   6.262 - * requirement), so you will need to construct a separate compiler or
   6.263 - * matcher object for each thread (unless you do thread synchronization
   6.264 - * yourself). Once expression compiled into the REProgram object, REProgram
   6.265 - * can be safely shared across multiple threads and RE objects.
   6.266 - *
   6.267 - * <br><p><br>
   6.268 - *
   6.269 - * <font color="red">
   6.270 - * <i>ISSUES:</i>
   6.271 - *
   6.272 - * <ul>
   6.273 - *  <li>com.weusours.util.re is not currently compatible with all
   6.274 - *      standard POSIX regcomp flags</li>
   6.275 - *  <li>com.weusours.util.re does not support POSIX equivalence classes
   6.276 - *      ([=foo=] syntax) (I18N/locale issue)</li>
   6.277 - *  <li>com.weusours.util.re does not support nested POSIX character
   6.278 - *      classes (definitely should, but not completely trivial)</li>
   6.279 - *  <li>com.weusours.util.re Does not support POSIX character collation
   6.280 - *      concepts ([.foo.] syntax) (I18N/locale issue)</li>
   6.281 - *  <li>Should there be different matching styles (simple, POSIX, Perl etc?)</li>
   6.282 - *  <li>Should RE support character iterators (for backwards RE matching!)?</li>
   6.283 - *  <li>Should RE support reluctant {m,n} closures (does anyone care)?</li>
   6.284 - *  <li>Not *all* possibilities are considered for greediness when backreferences
   6.285 - *      are involved (as POSIX suggests should be the case).  The POSIX RE
   6.286 - *      "(ac*)c*d[ac]*\1", when matched against "acdacaa" should yield a match
   6.287 - *      of acdacaa where \1 is "a".  This is not the case in this RE package,
   6.288 - *      and actually Perl doesn't go to this extent either!  Until someone
   6.289 - *      actually complains about this, I'm not sure it's worth "fixing".
   6.290 - *      If it ever is fixed, test #137 in RETest.txt should be updated.</li>
   6.291 - * </ul>
   6.292 - *
   6.293 - * </font>
   6.294 - *
   6.295 - * @see recompile
   6.296 - * @see RECompiler
   6.297 - *
   6.298 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
   6.299 - * @author <a href="mailto:ts@sch-fer.de">Tobias Sch&auml;fer</a>
   6.300 - */
   6.301 -public class RE implements Serializable
   6.302 -{
   6.303 -    /**
   6.304 -     * Specifies normal, case-sensitive matching behaviour.
   6.305 -     */
   6.306 -    public static final int MATCH_NORMAL          = 0x0000;
   6.307 -
   6.308 -    /**
   6.309 -     * Flag to indicate that matching should be case-independent (folded)
   6.310 -     */
   6.311 -    public static final int MATCH_CASEINDEPENDENT = 0x0001;
   6.312 -
   6.313 -    /**
   6.314 -     * Newlines should match as BOL/EOL (^ and $)
   6.315 -     */
   6.316 -    public static final int MATCH_MULTILINE       = 0x0002;
   6.317 -
   6.318 -    /**
   6.319 -     * Consider all input a single body of text - newlines are matched by .
   6.320 -     */
   6.321 -    public static final int MATCH_SINGLELINE      = 0x0004;
   6.322 -
   6.323 -    /************************************************
   6.324 -     *                                              *
   6.325 -     * The format of a node in a program is:        *
   6.326 -     *                                              *
   6.327 -     * [ OPCODE ] [ OPDATA ] [ OPNEXT ] [ OPERAND ] *
   6.328 -     *                                              *
   6.329 -     * char OPCODE - instruction                    *
   6.330 -     * char OPDATA - modifying data                 *
   6.331 -     * char OPNEXT - next node (relative offset)    *
   6.332 -     *                                              *
   6.333 -     ************************************************/
   6.334 -
   6.335 -                 //   Opcode              Char       Opdata/Operand  Meaning
   6.336 -                 //   ----------          ---------- --------------- --------------------------------------------------
   6.337 -    static final char OP_END              = 'E';  //                 end of program
   6.338 -    static final char OP_BOL              = '^';  //                 match only if at beginning of line
   6.339 -    static final char OP_EOL              = '$';  //                 match only if at end of line
   6.340 -    static final char OP_ANY              = '.';  //                 match any single character except newline
   6.341 -    static final char OP_ANYOF            = '[';  // count/ranges    match any char in the list of ranges
   6.342 -    static final char OP_BRANCH           = '|';  // node            match this alternative or the next one
   6.343 -    static final char OP_ATOM             = 'A';  // length/string   length of string followed by string itself
   6.344 -    static final char OP_STAR             = '*';  // node            kleene closure
   6.345 -    static final char OP_PLUS             = '+';  // node            positive closure
   6.346 -    static final char OP_MAYBE            = '?';  // node            optional closure
   6.347 -    static final char OP_ESCAPE           = '\\'; // escape          special escape code char class (escape is E_* code)
   6.348 -    static final char OP_OPEN             = '(';  // number          nth opening paren
   6.349 -    static final char OP_OPEN_CLUSTER     = '<';  //                 opening cluster
   6.350 -    static final char OP_CLOSE            = ')';  // number          nth closing paren
   6.351 -    static final char OP_CLOSE_CLUSTER    = '>';  //                 closing cluster
   6.352 -    static final char OP_BACKREF          = '#';  // number          reference nth already matched parenthesized string
   6.353 -    static final char OP_GOTO             = 'G';  //                 nothing but a (back-)pointer
   6.354 -    static final char OP_NOTHING          = 'N';  //                 match null string such as in '(a|)'
   6.355 -    static final char OP_RELUCTANTSTAR    = '8';  // none/expr       reluctant '*' (mnemonic for char is unshifted '*')
   6.356 -    static final char OP_RELUCTANTPLUS    = '=';  // none/expr       reluctant '+' (mnemonic for char is unshifted '+')
   6.357 -    static final char OP_RELUCTANTMAYBE   = '/';  // none/expr       reluctant '?' (mnemonic for char is unshifted '?')
   6.358 -    static final char OP_POSIXCLASS       = 'P';  // classid         one of the posix character classes
   6.359 -
   6.360 -    // Escape codes
   6.361 -    static final char E_ALNUM             = 'w';  // Alphanumeric
   6.362 -    static final char E_NALNUM            = 'W';  // Non-alphanumeric
   6.363 -    static final char E_BOUND             = 'b';  // Word boundary
   6.364 -    static final char E_NBOUND            = 'B';  // Non-word boundary
   6.365 -    static final char E_SPACE             = 's';  // Whitespace
   6.366 -    static final char E_NSPACE            = 'S';  // Non-whitespace
   6.367 -    static final char E_DIGIT             = 'd';  // Digit
   6.368 -    static final char E_NDIGIT            = 'D';  // Non-digit
   6.369 -
   6.370 -    // Posix character classes
   6.371 -    static final char POSIX_CLASS_ALNUM   = 'w';  // Alphanumerics
   6.372 -    static final char POSIX_CLASS_ALPHA   = 'a';  // Alphabetics
   6.373 -    static final char POSIX_CLASS_BLANK   = 'b';  // Blanks
   6.374 -    static final char POSIX_CLASS_CNTRL   = 'c';  // Control characters
   6.375 -    static final char POSIX_CLASS_DIGIT   = 'd';  // Digits
   6.376 -    static final char POSIX_CLASS_GRAPH   = 'g';  // Graphic characters
   6.377 -    static final char POSIX_CLASS_LOWER   = 'l';  // Lowercase characters
   6.378 -    static final char POSIX_CLASS_PRINT   = 'p';  // Printable characters
   6.379 -    static final char POSIX_CLASS_PUNCT   = '!';  // Punctuation
   6.380 -    static final char POSIX_CLASS_SPACE   = 's';  // Spaces
   6.381 -    static final char POSIX_CLASS_UPPER   = 'u';  // Uppercase characters
   6.382 -    static final char POSIX_CLASS_XDIGIT  = 'x';  // Hexadecimal digits
   6.383 -    static final char POSIX_CLASS_JSTART  = 'j';  // Java identifier start
   6.384 -    static final char POSIX_CLASS_JPART   = 'k';  // Java identifier part
   6.385 -
   6.386 -    // Limits
   6.387 -    static final int maxNode  = 65536;            // Maximum number of nodes in a program
   6.388 -    static final int MAX_PAREN = 16;              // Number of paren pairs (only 9 can be backrefs)
   6.389 -
   6.390 -    // Node layout constants
   6.391 -    static final int offsetOpcode = 0;            // Opcode offset (first character)
   6.392 -    static final int offsetOpdata = 1;            // Opdata offset (second char)
   6.393 -    static final int offsetNext   = 2;            // Next index offset (third char)
   6.394 -    static final int nodeSize     = 3;            // Node size (in chars)
   6.395 -
   6.396 -    // State of current program
   6.397 -    REProgram program;                            // Compiled regular expression 'program'
   6.398 -    transient CharacterIterator search;           // The string being matched against
   6.399 -    int matchFlags;                               // Match behaviour flags
   6.400 -    int maxParen = MAX_PAREN;
   6.401 -
   6.402 -    // Parenthesized subexpressions
   6.403 -    transient int parenCount;                     // Number of subexpressions matched (num open parens + 1)
   6.404 -    transient int start0;                         // Cache of start[0]
   6.405 -    transient int end0;                           // Cache of start[0]
   6.406 -    transient int start1;                         // Cache of start[1]
   6.407 -    transient int end1;                           // Cache of start[1]
   6.408 -    transient int start2;                         // Cache of start[2]
   6.409 -    transient int end2;                           // Cache of start[2]
   6.410 -    transient int[] startn;                       // Lazy-alloced array of sub-expression starts
   6.411 -    transient int[] endn;                         // Lazy-alloced array of sub-expression ends
   6.412 -
   6.413 -    // Backreferences
   6.414 -    transient int[] startBackref;                 // Lazy-alloced array of backref starts
   6.415 -    transient int[] endBackref;                   // Lazy-alloced array of backref ends
   6.416 -
   6.417 -    /**
   6.418 -     * Constructs a regular expression matcher from a String by compiling it
   6.419 -     * using a new instance of RECompiler.  If you will be compiling many
   6.420 -     * expressions, you may prefer to use a single RECompiler object instead.
   6.421 -     *
   6.422 -     * @param pattern The regular expression pattern to compile.
   6.423 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   6.424 -     * @see RECompiler
   6.425 -     * @see recompile
   6.426 -     */
   6.427 -    public RE(String pattern) throws RESyntaxException
   6.428 -    {
   6.429 -        this(pattern, MATCH_NORMAL);
   6.430 -    }
   6.431 -
   6.432 -    /**
   6.433 -     * Constructs a regular expression matcher from a String by compiling it
   6.434 -     * using a new instance of RECompiler.  If you will be compiling many
   6.435 -     * expressions, you may prefer to use a single RECompiler object instead.
   6.436 -     *
   6.437 -     * @param pattern The regular expression pattern to compile.
   6.438 -     * @param matchFlags The matching style
   6.439 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   6.440 -     * @see RECompiler
   6.441 -     * @see recompile
   6.442 -     */
   6.443 -    public RE(String pattern, int matchFlags) throws RESyntaxException
   6.444 -    {
   6.445 -        this(new RECompiler().compile(pattern));
   6.446 -        setMatchFlags(matchFlags);
   6.447 -    }
   6.448 -
   6.449 -    /**
   6.450 -     * Construct a matcher for a pre-compiled regular expression from program
   6.451 -     * (bytecode) data.  Permits special flags to be passed in to modify matching
   6.452 -     * behaviour.
   6.453 -     *
   6.454 -     * @param program Compiled regular expression program (see RECompiler and/or recompile)
   6.455 -     * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
   6.456 -     *
   6.457 -     * <pre>
   6.458 -     *   MATCH_NORMAL              // Normal (case-sensitive) matching
   6.459 -     *   MATCH_CASEINDEPENDENT     // Case folded comparisons
   6.460 -     *   MATCH_MULTILINE           // Newline matches as BOL/EOL
   6.461 -     * </pre>
   6.462 -     *
   6.463 -     * @see RECompiler
   6.464 -     * @see REProgram
   6.465 -     * @see recompile
   6.466 -     */
   6.467 -    public RE(REProgram program, int matchFlags)
   6.468 -    {
   6.469 -        setProgram(program);
   6.470 -        setMatchFlags(matchFlags);
   6.471 -    }
   6.472 -
   6.473 -    /**
   6.474 -     * Construct a matcher for a pre-compiled regular expression from program
   6.475 -     * (bytecode) data.
   6.476 -     *
   6.477 -     * @param program Compiled regular expression program
   6.478 -     * @see RECompiler
   6.479 -     * @see recompile
   6.480 -     */
   6.481 -    public RE(REProgram program)
   6.482 -    {
   6.483 -        this(program, MATCH_NORMAL);
   6.484 -    }
   6.485 -
   6.486 -    /**
   6.487 -     * Constructs a regular expression matcher with no initial program.
   6.488 -     * This is likely to be an uncommon practice, but is still supported.
   6.489 -     */
   6.490 -    public RE()
   6.491 -    {
   6.492 -        this((REProgram)null, MATCH_NORMAL);
   6.493 -    }
   6.494 -
   6.495 -    /**
   6.496 -     * Converts a 'simplified' regular expression to a full regular expression
   6.497 -     *
   6.498 -     * @param pattern The pattern to convert
   6.499 -     * @return The full regular expression
   6.500 -     */
   6.501 -    public static String simplePatternToFullRegularExpression(String pattern)
   6.502 -    {
   6.503 -        StringBuffer buf = new StringBuffer();
   6.504 -        for (int i = 0; i < pattern.length(); i++)
   6.505 -        {
   6.506 -            char c = pattern.charAt(i);
   6.507 -            switch (c)
   6.508 -            {
   6.509 -                case '*':
   6.510 -                    buf.append(".*");
   6.511 -                    break;
   6.512 -
   6.513 -                case '.':
   6.514 -                case '[':
   6.515 -                case ']':
   6.516 -                case '\\':
   6.517 -                case '+':
   6.518 -                case '?':
   6.519 -                case '{':
   6.520 -                case '}':
   6.521 -                case '$':
   6.522 -                case '^':
   6.523 -                case '|':
   6.524 -                case '(':
   6.525 -                case ')':
   6.526 -                    buf.append('\\');
   6.527 -                default:
   6.528 -                    buf.append(c);
   6.529 -                    break;
   6.530 -            }
   6.531 -        }
   6.532 -        return buf.toString();
   6.533 -    }
   6.534 -
   6.535 -    /**
   6.536 -     * Sets match behaviour flags which alter the way RE does matching.
   6.537 -     * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
   6.538 -     *
   6.539 -     * <pre>
   6.540 -     *   MATCH_NORMAL              // Normal (case-sensitive) matching
   6.541 -     *   MATCH_CASEINDEPENDENT     // Case folded comparisons
   6.542 -     *   MATCH_MULTILINE           // Newline matches as BOL/EOL
   6.543 -     * </pre>
   6.544 -     */
   6.545 -    public void setMatchFlags(int matchFlags)
   6.546 -    {
   6.547 -        this.matchFlags = matchFlags;
   6.548 -    }
   6.549 -
   6.550 -    /**
   6.551 -     * Returns the current match behaviour flags.
   6.552 -     * @return Current match behaviour flags (RE.MATCH_*).
   6.553 -     *
   6.554 -     * <pre>
   6.555 -     *   MATCH_NORMAL              // Normal (case-sensitive) matching
   6.556 -     *   MATCH_CASEINDEPENDENT     // Case folded comparisons
   6.557 -     *   MATCH_MULTILINE           // Newline matches as BOL/EOL
   6.558 -     * </pre>
   6.559 -     *
   6.560 -     * @see #setMatchFlags
   6.561 -     */
   6.562 -    public int getMatchFlags()
   6.563 -    {
   6.564 -        return matchFlags;
   6.565 -    }
   6.566 -
   6.567 -    /**
   6.568 -     * Sets the current regular expression program used by this matcher object.
   6.569 -     *
   6.570 -     * @param program Regular expression program compiled by RECompiler.
   6.571 -     * @see RECompiler
   6.572 -     * @see REProgram
   6.573 -     * @see recompile
   6.574 -     */
   6.575 -    public void setProgram(REProgram program)
   6.576 -    {
   6.577 -        this.program = program;
   6.578 -        if (program != null && program.maxParens != -1) {
   6.579 -            this.maxParen = program.maxParens;
   6.580 -        } else {
   6.581 -            this.maxParen = MAX_PAREN;
   6.582 -        }
   6.583 -    }
   6.584 -
   6.585 -    /**
   6.586 -     * Returns the current regular expression program in use by this matcher object.
   6.587 -     *
   6.588 -     * @return Regular expression program
   6.589 -     * @see #setProgram
   6.590 -     */
   6.591 -    public REProgram getProgram()
   6.592 -    {
   6.593 -        return program;
   6.594 -    }
   6.595 -
   6.596 -    /**
   6.597 -     * Returns the number of parenthesized subexpressions available after a successful match.
   6.598 -     *
   6.599 -     * @return Number of available parenthesized subexpressions
   6.600 -     */
   6.601 -    public int getParenCount()
   6.602 -    {
   6.603 -        return parenCount;
   6.604 -    }
   6.605 -
   6.606 -    /**
   6.607 -     * Gets the contents of a parenthesized subexpression after a successful match.
   6.608 -     *
   6.609 -     * @param which Nesting level of subexpression
   6.610 -     * @return String
   6.611 -     */
   6.612 -    public String getParen(int which)
   6.613 -    {
   6.614 -        int start;
   6.615 -        if (which < parenCount && (start = getParenStart(which)) >= 0)
   6.616 -        {
   6.617 -            return search.substring(start, getParenEnd(which));
   6.618 -        }
   6.619 -        return null;
   6.620 -    }
   6.621 -
   6.622 -    /**
   6.623 -     * Returns the start index of a given paren level.
   6.624 -     *
   6.625 -     * @param which Nesting level of subexpression
   6.626 -     * @return String index
   6.627 -     */
   6.628 -    public final int getParenStart(int which)
   6.629 -    {
   6.630 -        if (which < parenCount)
   6.631 -        {
   6.632 -            switch (which)
   6.633 -            {
   6.634 -                case 0:
   6.635 -                    return start0;
   6.636 -
   6.637 -                case 1:
   6.638 -                    return start1;
   6.639 -
   6.640 -                case 2:
   6.641 -                    return start2;
   6.642 -
   6.643 -                default:
   6.644 -                    if (startn == null)
   6.645 -                    {
   6.646 -                        allocParens();
   6.647 -                    }
   6.648 -                    return startn[which];
   6.649 -            }
   6.650 -        }
   6.651 -        return -1;
   6.652 -    }
   6.653 -
   6.654 -    /**
   6.655 -     * Returns the end index of a given paren level.
   6.656 -     *
   6.657 -     * @param which Nesting level of subexpression
   6.658 -     * @return String index
   6.659 -     */
   6.660 -    public final int getParenEnd(int which)
   6.661 -    {
   6.662 -        if (which < parenCount)
   6.663 -        {
   6.664 -            switch (which)
   6.665 -            {
   6.666 -                case 0:
   6.667 -                    return end0;
   6.668 -
   6.669 -                case 1:
   6.670 -                    return end1;
   6.671 -
   6.672 -                case 2:
   6.673 -                    return end2;
   6.674 -
   6.675 -                default:
   6.676 -                    if (endn == null)
   6.677 -                    {
   6.678 -                        allocParens();
   6.679 -                    }
   6.680 -                    return endn[which];
   6.681 -            }
   6.682 -        }
   6.683 -        return -1;
   6.684 -    }
   6.685 -
   6.686 -    /**
   6.687 -     * Returns the length of a given paren level.
   6.688 -     *
   6.689 -     * @param which Nesting level of subexpression
   6.690 -     * @return Number of characters in the parenthesized subexpression
   6.691 -     */
   6.692 -    public final int getParenLength(int which)
   6.693 -    {
   6.694 -        if (which < parenCount)
   6.695 -        {
   6.696 -            return getParenEnd(which) - getParenStart(which);
   6.697 -        }
   6.698 -        return -1;
   6.699 -    }
   6.700 -
   6.701 -    /**
   6.702 -     * Sets the start of a paren level
   6.703 -     *
   6.704 -     * @param which Which paren level
   6.705 -     * @param i Index in input array
   6.706 -     */
   6.707 -    protected final void setParenStart(int which, int i)
   6.708 -    {
   6.709 -        if (which < parenCount)
   6.710 -        {
   6.711 -            switch (which)
   6.712 -            {
   6.713 -                case 0:
   6.714 -                    start0 = i;
   6.715 -                    break;
   6.716 -
   6.717 -                case 1:
   6.718 -                    start1 = i;
   6.719 -                    break;
   6.720 -
   6.721 -                case 2:
   6.722 -                    start2 = i;
   6.723 -                    break;
   6.724 -
   6.725 -                default:
   6.726 -                    if (startn == null)
   6.727 -                    {
   6.728 -                        allocParens();
   6.729 -                    }
   6.730 -                    startn[which] = i;
   6.731 -                    break;
   6.732 -            }
   6.733 -        }
   6.734 -    }
   6.735 -
   6.736 -    /**
   6.737 -     * Sets the end of a paren level
   6.738 -     *
   6.739 -     * @param which Which paren level
   6.740 -     * @param i Index in input array
   6.741 -     */
   6.742 -    protected final void setParenEnd(int which, int i)
   6.743 -    {
   6.744 -        if (which < parenCount)
   6.745 -        {
   6.746 -            switch (which)
   6.747 -            {
   6.748 -                case 0:
   6.749 -                    end0 = i;
   6.750 -                    break;
   6.751 -
   6.752 -                case 1:
   6.753 -                    end1 = i;
   6.754 -                    break;
   6.755 -
   6.756 -                case 2:
   6.757 -                    end2 = i;
   6.758 -                    break;
   6.759 -
   6.760 -                default:
   6.761 -                    if (endn == null)
   6.762 -                    {
   6.763 -                        allocParens();
   6.764 -                    }
   6.765 -                    endn[which] = i;
   6.766 -                    break;
   6.767 -            }
   6.768 -        }
   6.769 -    }
   6.770 -
   6.771 -    /**
   6.772 -     * Throws an Error representing an internal error condition probably resulting
   6.773 -     * from a bug in the regular expression compiler (or possibly data corruption).
   6.774 -     * In practice, this should be very rare.
   6.775 -     *
   6.776 -     * @param s Error description
   6.777 -     */
   6.778 -    protected void internalError(String s) throws Error
   6.779 -    {
   6.780 -        throw new Error("RE internal error: " + s);
   6.781 -    }
   6.782 -
   6.783 -    /**
   6.784 -     * Performs lazy allocation of subexpression arrays
   6.785 -     */
   6.786 -    private final void allocParens()
   6.787 -    {
   6.788 -        // Allocate arrays for subexpressions
   6.789 -        startn = new int[maxParen];
   6.790 -        endn = new int[maxParen];
   6.791 -
   6.792 -        // Set sub-expression pointers to invalid values
   6.793 -        for (int i = 0; i < maxParen; i++)
   6.794 -        {
   6.795 -            startn[i] = -1;
   6.796 -            endn[i] = -1;
   6.797 -        }
   6.798 -    }
   6.799 -
   6.800 -    /**
   6.801 -     * Try to match a string against a subset of nodes in the program
   6.802 -     *
   6.803 -     * @param firstNode Node to start at in program
   6.804 -     * @param lastNode  Last valid node (used for matching a subexpression without
   6.805 -     *                  matching the rest of the program as well).
   6.806 -     * @param idxStart  Starting position in character array
   6.807 -     * @return Final input array index if match succeeded.  -1 if not.
   6.808 -     */
   6.809 -    protected int matchNodes(int firstNode, int lastNode, int idxStart)
   6.810 -    {
   6.811 -        // Our current place in the string
   6.812 -        int idx = idxStart;
   6.813 -
   6.814 -        // Loop while node is valid
   6.815 -        int next, opcode, opdata;
   6.816 -        int idxNew;
   6.817 -        char[] instruction = program.instruction;
   6.818 -        for (int node = firstNode; node < lastNode; )
   6.819 -        {
   6.820 -            opcode = instruction[node + offsetOpcode];
   6.821 -            next   = node + (short)instruction[node + offsetNext];
   6.822 -            opdata = instruction[node + offsetOpdata];
   6.823 -
   6.824 -            switch (opcode)
   6.825 -            {
   6.826 -                case OP_RELUCTANTMAYBE:
   6.827 -                    {
   6.828 -                        int once = 0;
   6.829 -                        do
   6.830 -                        {
   6.831 -                            // Try to match the rest without using the reluctant subexpr
   6.832 -                            if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
   6.833 -                            {
   6.834 -                                return idxNew;
   6.835 -                            }
   6.836 -                        }
   6.837 -                        while ((once++ == 0) && (idx = matchNodes(node + nodeSize, next, idx)) != -1);
   6.838 -                        return -1;
   6.839 -                    }
   6.840 -
   6.841 -                case OP_RELUCTANTPLUS:
   6.842 -                    while ((idx = matchNodes(node + nodeSize, next, idx)) != -1)
   6.843 -                    {
   6.844 -                        // Try to match the rest without using the reluctant subexpr
   6.845 -                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
   6.846 -                        {
   6.847 -                            return idxNew;
   6.848 -                        }
   6.849 -                    }
   6.850 -                    return -1;
   6.851 -
   6.852 -                case OP_RELUCTANTSTAR:
   6.853 -                    do
   6.854 -                    {
   6.855 -                        // Try to match the rest without using the reluctant subexpr
   6.856 -                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
   6.857 -                        {
   6.858 -                            return idxNew;
   6.859 -                        }
   6.860 -                    }
   6.861 -                    while ((idx = matchNodes(node + nodeSize, next, idx)) != -1);
   6.862 -                    return -1;
   6.863 -
   6.864 -                case OP_OPEN:
   6.865 -
   6.866 -                    // Match subexpression
   6.867 -                    if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
   6.868 -                    {
   6.869 -                        startBackref[opdata] = idx;
   6.870 -                    }
   6.871 -                    if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
   6.872 -                    {
   6.873 -                        // Increase valid paren count
   6.874 -                        if ((opdata + 1) > parenCount)
   6.875 -                        {
   6.876 -                            parenCount = opdata + 1;
   6.877 -                        }
   6.878 -
   6.879 -                        // Don't set paren if already set later on
   6.880 -                        if (getParenStart(opdata) == -1)
   6.881 -                        {
   6.882 -                            setParenStart(opdata, idx);
   6.883 -                        }
   6.884 -                    }
   6.885 -                    return idxNew;
   6.886 -
   6.887 -                case OP_CLOSE:
   6.888 -
   6.889 -                    // Done matching subexpression
   6.890 -                    if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
   6.891 -                    {
   6.892 -                        endBackref[opdata] = idx;
   6.893 -                    }
   6.894 -                    if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
   6.895 -                    {
   6.896 -                        // Increase valid paren count
   6.897 -                        if ((opdata + 1) > parenCount)
   6.898 -                        {
   6.899 -                            parenCount = opdata + 1;
   6.900 -                        }
   6.901 -
   6.902 -                        // Don't set paren if already set later on
   6.903 -                        if (getParenEnd(opdata) == -1)
   6.904 -                        {
   6.905 -                            setParenEnd(opdata, idx);
   6.906 -                        }
   6.907 -                    }
   6.908 -                    return idxNew;
   6.909 -
   6.910 -                case OP_OPEN_CLUSTER:
   6.911 -                case OP_CLOSE_CLUSTER:
   6.912 -                    // starting or ending the matching of a subexpression which has no backref.
   6.913 -                    return matchNodes( next, maxNode, idx );
   6.914 -
   6.915 -                case OP_BACKREF:
   6.916 -                    {
   6.917 -                        // Get the start and end of the backref
   6.918 -                        int s = startBackref[opdata];
   6.919 -                        int e = endBackref[opdata];
   6.920 -
   6.921 -                        // We don't know the backref yet
   6.922 -                        if (s == -1 || e == -1)
   6.923 -                        {
   6.924 -                            return -1;
   6.925 -                        }
   6.926 -
   6.927 -                        // The backref is empty size
   6.928 -                        if (s == e)
   6.929 -                        {
   6.930 -                            break;
   6.931 -                        }
   6.932 -
   6.933 -                        // Get the length of the backref
   6.934 -                        int l = e - s;
   6.935 -
   6.936 -                        // If there's not enough input left, give up.
   6.937 -                        if (search.isEnd(idx + l - 1))
   6.938 -                        {
   6.939 -                            return -1;
   6.940 -                        }
   6.941 -
   6.942 -                        // Case fold the backref?
   6.943 -                        final boolean caseFold =
   6.944 -                            ((matchFlags & MATCH_CASEINDEPENDENT) != 0);
   6.945 -                        // Compare backref to input
   6.946 -                        for (int i = 0; i < l; i++)
   6.947 -                        {
   6.948 -                            if (compareChars(search.charAt(idx++), search.charAt(s + i), caseFold) != 0)
   6.949 -                            {
   6.950 -                                return -1;
   6.951 -                            }
   6.952 -                        }
   6.953 -                    }
   6.954 -                    break;
   6.955 -
   6.956 -                case OP_BOL:
   6.957 -
   6.958 -                    // Fail if we're not at the start of the string
   6.959 -                    if (idx != 0)
   6.960 -                    {
   6.961 -                        // If we're multiline matching, we could still be at the start of a line
   6.962 -                        if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
   6.963 -                        {
   6.964 -                            // If not at start of line, give up
   6.965 -                            if (idx <= 0 || !isNewline(idx - 1)) {
   6.966 -                                return -1;
   6.967 -                            } else {
   6.968 -                                break;
   6.969 -                            }
   6.970 -                        }
   6.971 -                        return -1;
   6.972 -                    }
   6.973 -                    break;
   6.974 -
   6.975 -                case OP_EOL:
   6.976 -
   6.977 -                    // If we're not at the end of string
   6.978 -                    if (!search.isEnd(0) && !search.isEnd(idx))
   6.979 -                    {
   6.980 -                        // If we're multi-line matching
   6.981 -                        if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
   6.982 -                        {
   6.983 -                            // Give up if we're not at the end of a line
   6.984 -                            if (!isNewline(idx)) {
   6.985 -                                return -1;
   6.986 -                            } else {
   6.987 -                                break;
   6.988 -                            }
   6.989 -                        }
   6.990 -                        return -1;
   6.991 -                    }
   6.992 -                    break;
   6.993 -
   6.994 -                case OP_ESCAPE:
   6.995 -
   6.996 -                    // Which escape?
   6.997 -                    switch (opdata)
   6.998 -                    {
   6.999 -                        // Word boundary match
  6.1000 -                        case E_NBOUND:
  6.1001 -                        case E_BOUND:
  6.1002 -                            {
  6.1003 -                                char cLast = ((idx == 0) ? '\n' : search.charAt(idx - 1));
  6.1004 -                                char cNext = ((search.isEnd(idx)) ? '\n' : search.charAt(idx));
  6.1005 -                                if ((Character.isLetterOrDigit(cLast) == Character.isLetterOrDigit(cNext)) == (opdata == E_BOUND))
  6.1006 -                                {
  6.1007 -                                    return -1;
  6.1008 -                                }
  6.1009 -                            }
  6.1010 -                            break;
  6.1011 -
  6.1012 -                        // Alpha-numeric, digit, space, javaLetter, javaLetterOrDigit
  6.1013 -                        case E_ALNUM:
  6.1014 -                        case E_NALNUM:
  6.1015 -                        case E_DIGIT:
  6.1016 -                        case E_NDIGIT:
  6.1017 -                        case E_SPACE:
  6.1018 -                        case E_NSPACE:
  6.1019 -
  6.1020 -                            // Give up if out of input
  6.1021 -                            if (search.isEnd(idx))
  6.1022 -                            {
  6.1023 -                                return -1;
  6.1024 -                            }
  6.1025 -
  6.1026 -                            char c = search.charAt(idx);
  6.1027 -
  6.1028 -                            // Switch on escape
  6.1029 -                            switch (opdata)
  6.1030 -                            {
  6.1031 -                                case E_ALNUM:
  6.1032 -                                case E_NALNUM:
  6.1033 -                                    if (!((Character.isLetterOrDigit(c) || c == '_') == (opdata == E_ALNUM)))
  6.1034 -                                    {
  6.1035 -                                        return -1;
  6.1036 -                                    }
  6.1037 -                                    break;
  6.1038 -
  6.1039 -                                case E_DIGIT:
  6.1040 -                                case E_NDIGIT:
  6.1041 -                                    if (!(Character.isDigit(c) == (opdata == E_DIGIT)))
  6.1042 -                                    {
  6.1043 -                                        return -1;
  6.1044 -                                    }
  6.1045 -                                    break;
  6.1046 -
  6.1047 -                                case E_SPACE:
  6.1048 -                                case E_NSPACE:
  6.1049 -                                    if (!(Character.isWhitespace(c) == (opdata == E_SPACE)))
  6.1050 -                                    {
  6.1051 -                                        return -1;
  6.1052 -                                    }
  6.1053 -                                    break;
  6.1054 -                            }
  6.1055 -                            idx++;
  6.1056 -                            break;
  6.1057 -
  6.1058 -                        default:
  6.1059 -                            internalError("Unrecognized escape '" + opdata + "'");
  6.1060 -                    }
  6.1061 -                    break;
  6.1062 -
  6.1063 -                case OP_ANY:
  6.1064 -
  6.1065 -                    if ((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) {
  6.1066 -                        // Match anything
  6.1067 -                        if (search.isEnd(idx))
  6.1068 -                        {
  6.1069 -                            return -1;
  6.1070 -                        }
  6.1071 -                    }
  6.1072 -                    else
  6.1073 -                    {
  6.1074 -                        // Match anything but a newline
  6.1075 -                        if (search.isEnd(idx) || isNewline(idx))
  6.1076 -                        {
  6.1077 -                            return -1;
  6.1078 -                        }
  6.1079 -                    }
  6.1080 -                    idx++;
  6.1081 -                    break;
  6.1082 -
  6.1083 -                case OP_ATOM:
  6.1084 -                    {
  6.1085 -                        // Match an atom value
  6.1086 -                        if (search.isEnd(idx))
  6.1087 -                        {
  6.1088 -                            return -1;
  6.1089 -                        }
  6.1090 -
  6.1091 -                        // Get length of atom and starting index
  6.1092 -                        int lenAtom = opdata;
  6.1093 -                        int startAtom = node + nodeSize;
  6.1094 -
  6.1095 -                        // Give up if not enough input remains to have a match
  6.1096 -                        if (search.isEnd(lenAtom + idx - 1))
  6.1097 -                        {
  6.1098 -                            return -1;
  6.1099 -                        }
  6.1100 -
  6.1101 -                        // Match atom differently depending on casefolding flag
  6.1102 -                        final boolean caseFold =
  6.1103 -                            ((matchFlags & MATCH_CASEINDEPENDENT) != 0);
  6.1104 -
  6.1105 -                        for (int i = 0; i < lenAtom; i++)
  6.1106 -                        {
  6.1107 -                            if (compareChars(search.charAt(idx++), instruction[startAtom + i], caseFold) != 0)
  6.1108 -                            {
  6.1109 -                                return -1;
  6.1110 -                            }
  6.1111 -                        }
  6.1112 -                    }
  6.1113 -                    break;
  6.1114 -
  6.1115 -                case OP_POSIXCLASS:
  6.1116 -                    {
  6.1117 -                        // Out of input?
  6.1118 -                        if (search.isEnd(idx))
  6.1119 -                        {
  6.1120 -                            return -1;
  6.1121 -                        }
  6.1122 -
  6.1123 -                        switch (opdata)
  6.1124 -                        {
  6.1125 -                            case POSIX_CLASS_ALNUM:
  6.1126 -                                if (!Character.isLetterOrDigit(search.charAt(idx)))
  6.1127 -                                {
  6.1128 -                                    return -1;
  6.1129 -                                }
  6.1130 -                                break;
  6.1131 -
  6.1132 -                            case POSIX_CLASS_ALPHA:
  6.1133 -                                if (!Character.isLetter(search.charAt(idx)))
  6.1134 -                                {
  6.1135 -                                    return -1;
  6.1136 -                                }
  6.1137 -                                break;
  6.1138 -
  6.1139 -                            case POSIX_CLASS_DIGIT:
  6.1140 -                                if (!Character.isDigit(search.charAt(idx)))
  6.1141 -                                {
  6.1142 -                                    return -1;
  6.1143 -                                }
  6.1144 -                                break;
  6.1145 -
  6.1146 -                            case POSIX_CLASS_BLANK: // JWL - bugbug: is this right??
  6.1147 -                                if (!Character.isSpaceChar(search.charAt(idx)))
  6.1148 -                                {
  6.1149 -                                    return -1;
  6.1150 -                                }
  6.1151 -                                break;
  6.1152 -
  6.1153 -                            case POSIX_CLASS_SPACE:
  6.1154 -                                if (!Character.isWhitespace(search.charAt(idx)))
  6.1155 -                                {
  6.1156 -                                    return -1;
  6.1157 -                                }
  6.1158 -                                break;
  6.1159 -
  6.1160 -                            case POSIX_CLASS_CNTRL:
  6.1161 -                                if (Character.getType(search.charAt(idx)) != Character.CONTROL)
  6.1162 -                                {
  6.1163 -                                    return -1;
  6.1164 -                                }
  6.1165 -                                break;
  6.1166 -
  6.1167 -                            case POSIX_CLASS_GRAPH: // JWL - bugbug???
  6.1168 -                                switch (Character.getType(search.charAt(idx)))
  6.1169 -                                {
  6.1170 -                                    case Character.MATH_SYMBOL:
  6.1171 -                                    case Character.CURRENCY_SYMBOL:
  6.1172 -                                    case Character.MODIFIER_SYMBOL:
  6.1173 -                                    case Character.OTHER_SYMBOL:
  6.1174 -                                        break;
  6.1175 -
  6.1176 -                                    default:
  6.1177 -                                        return -1;
  6.1178 -                                }
  6.1179 -                                break;
  6.1180 -
  6.1181 -                            case POSIX_CLASS_LOWER:
  6.1182 -                                if (Character.getType(search.charAt(idx)) != Character.LOWERCASE_LETTER)
  6.1183 -                                {
  6.1184 -                                    return -1;
  6.1185 -                                }
  6.1186 -                                break;
  6.1187 -
  6.1188 -                            case POSIX_CLASS_UPPER:
  6.1189 -                                if (Character.getType(search.charAt(idx)) != Character.UPPERCASE_LETTER)
  6.1190 -                                {
  6.1191 -                                    return -1;
  6.1192 -                                }
  6.1193 -                                break;
  6.1194 -
  6.1195 -                            case POSIX_CLASS_PRINT:
  6.1196 -                                if (Character.getType(search.charAt(idx)) == Character.CONTROL)
  6.1197 -                                {
  6.1198 -                                    return -1;
  6.1199 -                                }
  6.1200 -                                break;
  6.1201 -
  6.1202 -                            case POSIX_CLASS_PUNCT:
  6.1203 -                            {
  6.1204 -                                int type = Character.getType(search.charAt(idx));
  6.1205 -                                switch(type)
  6.1206 -                                {
  6.1207 -                                    case Character.DASH_PUNCTUATION:
  6.1208 -                                    case Character.START_PUNCTUATION:
  6.1209 -                                    case Character.END_PUNCTUATION:
  6.1210 -                                    case Character.CONNECTOR_PUNCTUATION:
  6.1211 -                                    case Character.OTHER_PUNCTUATION:
  6.1212 -                                        break;
  6.1213 -
  6.1214 -                                    default:
  6.1215 -                                        return -1;
  6.1216 -                                }
  6.1217 -                            }
  6.1218 -                            break;
  6.1219 -
  6.1220 -                            case POSIX_CLASS_XDIGIT: // JWL - bugbug??
  6.1221 -                            {
  6.1222 -                                boolean isXDigit = ((search.charAt(idx) >= '0' && search.charAt(idx) <= '9') ||
  6.1223 -                                                    (search.charAt(idx) >= 'a' && search.charAt(idx) <= 'f') ||
  6.1224 -                                                    (search.charAt(idx) >= 'A' && search.charAt(idx) <= 'F'));
  6.1225 -                                if (!isXDigit)
  6.1226 -                                {
  6.1227 -                                    return -1;
  6.1228 -                                }
  6.1229 -                            }
  6.1230 -                            break;
  6.1231 -
  6.1232 -                            case POSIX_CLASS_JSTART:
  6.1233 -                                if (!Character.isJavaIdentifierStart(search.charAt(idx)))
  6.1234 -                                {
  6.1235 -                                    return -1;
  6.1236 -                                }
  6.1237 -                                break;
  6.1238 -
  6.1239 -                            case POSIX_CLASS_JPART:
  6.1240 -                                if (!Character.isJavaIdentifierPart(search.charAt(idx)))
  6.1241 -                                {
  6.1242 -                                    return -1;
  6.1243 -                                }
  6.1244 -                                break;
  6.1245 -
  6.1246 -                            default:
  6.1247 -                                internalError("Bad posix class");
  6.1248 -                                break;
  6.1249 -                        }
  6.1250 -
  6.1251 -                        // Matched.
  6.1252 -                        idx++;
  6.1253 -                    }
  6.1254 -                    break;
  6.1255 -
  6.1256 -                case OP_ANYOF:
  6.1257 -                    {
  6.1258 -                        // Out of input?
  6.1259 -                        if (search.isEnd(idx))
  6.1260 -                        {
  6.1261 -                            return -1;
  6.1262 -                        }
  6.1263 -
  6.1264 -                        // Get character to match against character class and maybe casefold
  6.1265 -                        char c = search.charAt(idx);
  6.1266 -                        boolean caseFold = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
  6.1267 -                        // Loop through character class checking our match character
  6.1268 -                        int idxRange = node + nodeSize;
  6.1269 -                        int idxEnd = idxRange + (opdata * 2);
  6.1270 -                        boolean match = false;
  6.1271 -                        for (int i = idxRange; !match && i < idxEnd; )
  6.1272 -                        {
  6.1273 -                            // Get start, end and match characters
  6.1274 -                            char s = instruction[i++];
  6.1275 -                            char e = instruction[i++];
  6.1276 -
  6.1277 -                            match = ((compareChars(c, s, caseFold) >= 0)
  6.1278 -                                     && (compareChars(c, e, caseFold) <= 0));
  6.1279 -                        }
  6.1280 -
  6.1281 -                        // Fail if we didn't match the character class
  6.1282 -                        if (!match)
  6.1283 -                        {
  6.1284 -                            return -1;
  6.1285 -                        }
  6.1286 -                        idx++;
  6.1287 -                    }
  6.1288 -                    break;
  6.1289 -
  6.1290 -                case OP_BRANCH:
  6.1291 -                {
  6.1292 -                    // Check for choices
  6.1293 -                    if (instruction[next + offsetOpcode] != OP_BRANCH)
  6.1294 -                    {
  6.1295 -                        // If there aren't any other choices, just evaluate this branch.
  6.1296 -                        node += nodeSize;
  6.1297 -                        continue;
  6.1298 -                    }
  6.1299 -
  6.1300 -                    // Try all available branches
  6.1301 -                    short nextBranch;
  6.1302 -                    do
  6.1303 -                    {
  6.1304 -                        // Try matching the branch against the string
  6.1305 -                        if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1)
  6.1306 -                        {
  6.1307 -                            return idxNew;
  6.1308 -                        }
  6.1309 -
  6.1310 -                        // Go to next branch (if any)
  6.1311 -                        nextBranch = (short)instruction[node + offsetNext];
  6.1312 -                        node += nextBranch;
  6.1313 -                    }
  6.1314 -                    while (nextBranch != 0 && (instruction[node + offsetOpcode] == OP_BRANCH));
  6.1315 -
  6.1316 -                    // Failed to match any branch!
  6.1317 -                    return -1;
  6.1318 -                }
  6.1319 -
  6.1320 -                case OP_NOTHING:
  6.1321 -                case OP_GOTO:
  6.1322 -
  6.1323 -                    // Just advance to the next node without doing anything
  6.1324 -                    break;
  6.1325 -
  6.1326 -                case OP_END:
  6.1327 -
  6.1328 -                    // Match has succeeded!
  6.1329 -                    setParenEnd(0, idx);
  6.1330 -                    return idx;
  6.1331 -
  6.1332 -                default:
  6.1333 -
  6.1334 -                    // Corrupt program
  6.1335 -                    internalError("Invalid opcode '" + opcode + "'");
  6.1336 -            }
  6.1337 -
  6.1338 -            // Advance to the next node in the program
  6.1339 -            node = next;
  6.1340 -        }
  6.1341 -
  6.1342 -        // We "should" never end up here
  6.1343 -        internalError("Corrupt program");
  6.1344 -        return -1;
  6.1345 -    }
  6.1346 -
  6.1347 -    /**
  6.1348 -     * Match the current regular expression program against the current
  6.1349 -     * input string, starting at index i of the input string.  This method
  6.1350 -     * is only meant for internal use.
  6.1351 -     *
  6.1352 -     * @param i The input string index to start matching at
  6.1353 -     * @return True if the input matched the expression
  6.1354 -     */
  6.1355 -    protected boolean matchAt(int i)
  6.1356 -    {
  6.1357 -        // Initialize start pointer, paren cache and paren count
  6.1358 -        start0 = -1;
  6.1359 -        end0   = -1;
  6.1360 -        start1 = -1;
  6.1361 -        end1   = -1;
  6.1362 -        start2 = -1;
  6.1363 -        end2   = -1;
  6.1364 -        startn = null;
  6.1365 -        endn   = null;
  6.1366 -        parenCount = 1;
  6.1367 -        setParenStart(0, i);
  6.1368 -
  6.1369 -        // Allocate backref arrays (unless optimizations indicate otherwise)
  6.1370 -        if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
  6.1371 -        {
  6.1372 -            startBackref = new int[maxParen];
  6.1373 -            endBackref = new int[maxParen];
  6.1374 -        }
  6.1375 -
  6.1376 -        // Match against string
  6.1377 -        int idx;
  6.1378 -        if ((idx = matchNodes(0, maxNode, i)) != -1)
  6.1379 -        {
  6.1380 -            setParenEnd(0, idx);
  6.1381 -            return true;
  6.1382 -        }
  6.1383 -
  6.1384 -        // Didn't match
  6.1385 -        parenCount = 0;
  6.1386 -        return false;
  6.1387 -    }
  6.1388 -
  6.1389 -    /**
  6.1390 -     * Matches the current regular expression program against a character array,
  6.1391 -     * starting at a given index.
  6.1392 -     *
  6.1393 -     * @param search String to match against
  6.1394 -     * @param i Index to start searching at
  6.1395 -     * @return True if string matched
  6.1396 -     */
  6.1397 -    public boolean match(String search, int i)
  6.1398 -    {
  6.1399 -        return match(new StringCharacterIterator(search), i);
  6.1400 -    }
  6.1401 -
  6.1402 -    /**
  6.1403 -     * Matches the current regular expression program against a character array,
  6.1404 -     * starting at a given index.
  6.1405 -     *
  6.1406 -     * @param search String to match against
  6.1407 -     * @param i Index to start searching at
  6.1408 -     * @return True if string matched
  6.1409 -     */
  6.1410 -    public boolean match(CharacterIterator search, int i)
  6.1411 -    {
  6.1412 -        // There is no compiled program to search with!
  6.1413 -        if (program == null)
  6.1414 -        {
  6.1415 -            // This should be uncommon enough to be an error case rather
  6.1416 -            // than an exception (which would have to be handled everywhere)
  6.1417 -            internalError("No RE program to run!");
  6.1418 -        }
  6.1419 -
  6.1420 -        // Save string to search
  6.1421 -        this.search = search;
  6.1422 -
  6.1423 -        // Can we optimize the search by looking for a prefix string?
  6.1424 -        if (program.prefix == null)
  6.1425 -        {
  6.1426 -            // Unprefixed matching must try for a match at each character
  6.1427 -            for ( ;! search.isEnd(i - 1); i++)
  6.1428 -            {
  6.1429 -                // Try a match at index i
  6.1430 -                if (matchAt(i))
  6.1431 -                {
  6.1432 -                    return true;
  6.1433 -                }
  6.1434 -            }
  6.1435 -            return false;
  6.1436 -        }
  6.1437 -        else
  6.1438 -        {
  6.1439 -            // Prefix-anchored matching is possible
  6.1440 -            boolean caseIndependent = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
  6.1441 -            char[] prefix = program.prefix;
  6.1442 -            for ( ; !search.isEnd(i + prefix.length - 1); i++)
  6.1443 -            {
  6.1444 -                int j = i;
  6.1445 -                int k = 0;
  6.1446 -
  6.1447 -                boolean match;
  6.1448 -                do {
  6.1449 -                    // If there's a mismatch of any character in the prefix, give up
  6.1450 -                    match = (compareChars(search.charAt(j++), prefix[k++], caseIndependent) == 0);
  6.1451 -                } while (match && k < prefix.length);
  6.1452 -
  6.1453 -                // See if the whole prefix string matched
  6.1454 -                if (k == prefix.length)
  6.1455 -                {
  6.1456 -                    // We matched the full prefix at firstChar, so try it
  6.1457 -                    if (matchAt(i))
  6.1458 -                    {
  6.1459 -                        return true;
  6.1460 -                    }
  6.1461 -                }
  6.1462 -            }
  6.1463 -            return false;
  6.1464 -        }
  6.1465 -    }
  6.1466 -
  6.1467 -    /**
  6.1468 -     * Matches the current regular expression program against a String.
  6.1469 -     *
  6.1470 -     * @param search String to match against
  6.1471 -     * @return True if string matched
  6.1472 -     */
  6.1473 -    public boolean match(String search)
  6.1474 -    {
  6.1475 -        return match(search, 0);
  6.1476 -    }
  6.1477 -
  6.1478 -    /**
  6.1479 -     * Splits a string into an array of strings on regular expression boundaries.
  6.1480 -     * This function works the same way as the Perl function of the same name.
  6.1481 -     * Given a regular expression of "[ab]+" and a string to split of
  6.1482 -     * "xyzzyababbayyzabbbab123", the result would be the array of Strings
  6.1483 -     * "[xyzzy, yyz, 123]".
  6.1484 -     *
  6.1485 -     * <p>Please note that the first string in the resulting array may be an empty
  6.1486 -     * string. This happens when the very first character of input string is
  6.1487 -     * matched by the pattern.
  6.1488 -     *
  6.1489 -     * @param s String to split on this regular exression
  6.1490 -     * @return Array of strings
  6.1491 -     */
  6.1492 -    public String[] split(String s)
  6.1493 -    {
  6.1494 -        // Create new vector
  6.1495 -        Vector v = new Vector();
  6.1496 -
  6.1497 -        // Start at position 0 and search the whole string
  6.1498 -        int pos = 0;
  6.1499 -        int len = s.length();
  6.1500 -
  6.1501 -        // Try a match at each position
  6.1502 -        while (pos < len && match(s, pos))
  6.1503 -        {
  6.1504 -            // Get start of match
  6.1505 -            int start = getParenStart(0);
  6.1506 -
  6.1507 -            // Get end of match
  6.1508 -            int newpos = getParenEnd(0);
  6.1509 -
  6.1510 -            // Check if no progress was made
  6.1511 -            if (newpos == pos)
  6.1512 -            {
  6.1513 -                v.addElement(s.substring(pos, start + 1));
  6.1514 -                newpos++;
  6.1515 -            }
  6.1516 -            else
  6.1517 -            {
  6.1518 -                v.addElement(s.substring(pos, start));
  6.1519 -            }
  6.1520 -
  6.1521 -            // Move to new position
  6.1522 -            pos = newpos;
  6.1523 -        }
  6.1524 -
  6.1525 -        // Push remainder if it's not empty
  6.1526 -        String remainder = s.substring(pos);
  6.1527 -        if (remainder.length() != 0)
  6.1528 -        {
  6.1529 -            v.addElement(remainder);
  6.1530 -        }
  6.1531 -
  6.1532 -        // Return vector as an array of strings
  6.1533 -        String[] ret = new String[v.size()];
  6.1534 -        v.copyInto(ret);
  6.1535 -        return ret;
  6.1536 -    }
  6.1537 -
  6.1538 -    /**
  6.1539 -     * Flag bit that indicates that subst should replace all occurrences of this
  6.1540 -     * regular expression.
  6.1541 -     */
  6.1542 -    public static final int REPLACE_ALL            = 0x0000;
  6.1543 -
  6.1544 -    /**
  6.1545 -     * Flag bit that indicates that subst should only replace the first occurrence
  6.1546 -     * of this regular expression.
  6.1547 -     */
  6.1548 -    public static final int REPLACE_FIRSTONLY      = 0x0001;
  6.1549 -
  6.1550 -    /**
  6.1551 -     * Flag bit that indicates that subst should replace backreferences
  6.1552 -     */
  6.1553 -    public static final int REPLACE_BACKREFERENCES = 0x0002;
  6.1554 -
  6.1555 -    /**
  6.1556 -     * Substitutes a string for this regular expression in another string.
  6.1557 -     * This method works like the Perl function of the same name.
  6.1558 -     * Given a regular expression of "a*b", a String to substituteIn of
  6.1559 -     * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
  6.1560 -     * resulting String returned by subst would be "-foo-garply-wacky-".
  6.1561 -     *
  6.1562 -     * @param substituteIn String to substitute within
  6.1563 -     * @param substitution String to substitute for all matches of this regular expression.
  6.1564 -     * @return The string substituteIn with zero or more occurrences of the current
  6.1565 -     * regular expression replaced with the substitution String (if this regular
  6.1566 -     * expression object doesn't match at any position, the original String is returned
  6.1567 -     * unchanged).
  6.1568 -     */
  6.1569 -    public String subst(String substituteIn, String substitution)
  6.1570 -    {
  6.1571 -        return subst(substituteIn, substitution, REPLACE_ALL);
  6.1572 -    }
  6.1573 -
  6.1574 -    /**
  6.1575 -     * Substitutes a string for this regular expression in another string.
  6.1576 -     * This method works like the Perl function of the same name.
  6.1577 -     * Given a regular expression of "a*b", a String to substituteIn of
  6.1578 -     * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
  6.1579 -     * resulting String returned by subst would be "-foo-garply-wacky-".
  6.1580 -     * <p>
  6.1581 -     * It is also possible to reference the contents of a parenthesized expression
  6.1582 -     * with $0, $1, ... $9. A regular expression of "http://[\\.\\w\\-\\?/~_@&=%]+",
  6.1583 -     * a String to substituteIn of "visit us: http://www.apache.org!" and the
  6.1584 -     * substitution String "&lt;a href=\"$0\"&gt;$0&lt;/a&gt;", the resulting String
  6.1585 -     * returned by subst would be
  6.1586 -     * "visit us: &lt;a href=\"http://www.apache.org\"&gt;http://www.apache.org&lt;/a&gt;!".
  6.1587 -     * <p>
  6.1588 -     * <i>Note:</i> $0 represents the whole match.
  6.1589 -     *
  6.1590 -     * @param substituteIn String to substitute within
  6.1591 -     * @param substitution String to substitute for matches of this regular expression
  6.1592 -     * @param flags One or more bitwise flags from REPLACE_*.  If the REPLACE_FIRSTONLY
  6.1593 -     * flag bit is set, only the first occurrence of this regular expression is replaced.
  6.1594 -     * If the bit is not set (REPLACE_ALL), all occurrences of this pattern will be
  6.1595 -     * replaced. If the flag REPLACE_BACKREFERENCES is set, all backreferences will
  6.1596 -     * be processed.
  6.1597 -     * @return The string substituteIn with zero or more occurrences of the current
  6.1598 -     * regular expression replaced with the substitution String (if this regular
  6.1599 -     * expression object doesn't match at any position, the original String is returned
  6.1600 -     * unchanged).
  6.1601 -     */
  6.1602 -    public String subst(String substituteIn, String substitution, int flags)
  6.1603 -    {
  6.1604 -        // String to return
  6.1605 -        StringBuffer ret = new StringBuffer();
  6.1606 -
  6.1607 -        // Start at position 0 and search the whole string
  6.1608 -        int pos = 0;
  6.1609 -        int len = substituteIn.length();
  6.1610 -
  6.1611 -        // Try a match at each position
  6.1612 -        while (pos < len && match(substituteIn, pos))
  6.1613 -        {
  6.1614 -            // Append string before match
  6.1615 -            ret.append(substituteIn.substring(pos, getParenStart(0)));
  6.1616 -
  6.1617 -            if ((flags & REPLACE_BACKREFERENCES) != 0)
  6.1618 -            {
  6.1619 -                // Process backreferences
  6.1620 -                int lCurrentPosition = 0;
  6.1621 -                int lLastPosition = -2;
  6.1622 -                int lLength = substitution.length();
  6.1623 -                boolean bAddedPrefix = false;
  6.1624 -
  6.1625 -                while ((lCurrentPosition = substitution.indexOf("$", lCurrentPosition)) >= 0)
  6.1626 -                {
  6.1627 -                    if ((lCurrentPosition == 0 || substitution.charAt(lCurrentPosition - 1) != '\\')
  6.1628 -                        && lCurrentPosition+1 < lLength)
  6.1629 -                    {
  6.1630 -                        char c = substitution.charAt(lCurrentPosition + 1);
  6.1631 -                        if (c >= '0' && c <= '9')
  6.1632 -                        {
  6.1633 -                            if (bAddedPrefix == false)
  6.1634 -                            {
  6.1635 -                                // Append everything between the beginning of the
  6.1636 -                                // substitution string and the current $ sign
  6.1637 -                                ret.append(substitution.substring(0, lCurrentPosition));
  6.1638 -                                bAddedPrefix = true;
  6.1639 -                            }
  6.1640 -                            else
  6.1641 -                            {
  6.1642 -                                // Append everything between the last and the current $ sign
  6.1643 -                                ret.append(substitution.substring(lLastPosition + 2, lCurrentPosition));
  6.1644 -                            }
  6.1645 -
  6.1646 -                            // Append the parenthesized expression
  6.1647 -                            // Note: if a parenthesized expression of the requested
  6.1648 -                            // index is not available "null" is added to the string
  6.1649 -                            ret.append(getParen(c - '0'));
  6.1650 -                            lLastPosition = lCurrentPosition;
  6.1651 -                        }
  6.1652 -                    }
  6.1653 -
  6.1654 -                    // Move forward, skipping past match
  6.1655 -                    lCurrentPosition++;
  6.1656 -                }
  6.1657 -
  6.1658 -                // Append everything after the last $ sign
  6.1659 -                ret.append(substitution.substring(lLastPosition + 2, lLength));
  6.1660 -            }
  6.1661 -            else
  6.1662 -            {
  6.1663 -                // Append substitution without processing backreferences
  6.1664 -                ret.append(substitution);
  6.1665 -            }
  6.1666 -
  6.1667 -            // Move forward, skipping past match
  6.1668 -            int newpos = getParenEnd(0);
  6.1669 -
  6.1670 -            // We always want to make progress!
  6.1671 -            if (newpos == pos)
  6.1672 -            {
  6.1673 -                newpos++;
  6.1674 -            }
  6.1675 -
  6.1676 -            // Try new position
  6.1677 -            pos = newpos;
  6.1678 -
  6.1679 -            // Break out if we're only supposed to replace one occurrence
  6.1680 -            if ((flags & REPLACE_FIRSTONLY) != 0)
  6.1681 -            {
  6.1682 -                break;
  6.1683 -            }
  6.1684 -        }
  6.1685 -
  6.1686 -        // If there's remaining input, append it
  6.1687 -        if (pos < len)
  6.1688 -        {
  6.1689 -            ret.append(substituteIn.substring(pos));
  6.1690 -        }
  6.1691 -
  6.1692 -        // Return string buffer as string
  6.1693 -        return ret.toString();
  6.1694 -    }
  6.1695 -
  6.1696 -    /**
  6.1697 -     * Returns an array of Strings, whose toString representation matches a regular
  6.1698 -     * expression. This method works like the Perl function of the same name.  Given
  6.1699 -     * a regular expression of "a*b" and an array of String objects of [foo, aab, zzz,
  6.1700 -     * aaaab], the array of Strings returned by grep would be [aab, aaaab].
  6.1701 -     *
  6.1702 -     * @param search Array of Objects to search
  6.1703 -     * @return Array of Strings whose toString() value matches this regular expression.
  6.1704 -     */
  6.1705 -    public String[] grep(Object[] search)
  6.1706 -    {
  6.1707 -        // Create new vector to hold return items
  6.1708 -        Vector v = new Vector();
  6.1709 -
  6.1710 -        // Traverse array of objects
  6.1711 -        for (int i = 0; i < search.length; i++)
  6.1712 -        {
  6.1713 -            // Get next object as a string
  6.1714 -            String s = search[i].toString();
  6.1715 -
  6.1716 -            // If it matches this regexp, add it to the list
  6.1717 -            if (match(s))
  6.1718 -            {
  6.1719 -                v.addElement(s);
  6.1720 -            }
  6.1721 -        }
  6.1722 -
  6.1723 -        // Return vector as an array of strings
  6.1724 -        String[] ret = new String[v.size()];
  6.1725 -        v.copyInto(ret);
  6.1726 -        return ret;
  6.1727 -    }
  6.1728 -
  6.1729 -    /**
  6.1730 -     * @return true if character at i-th position in the <code>search</code> string is a newline
  6.1731 -     */
  6.1732 -    private boolean isNewline(int i)
  6.1733 -    {
  6.1734 -        char nextChar = search.charAt(i);
  6.1735 -
  6.1736 -        if (nextChar == '\n' || nextChar == '\r' || nextChar == '\u0085'
  6.1737 -            || nextChar == '\u2028' || nextChar == '\u2029')
  6.1738 -        {
  6.1739 -            return true;
  6.1740 -        }
  6.1741 -
  6.1742 -        return false;
  6.1743 -    }
  6.1744 -
  6.1745 -    /**
  6.1746 -     * Compares two characters.
  6.1747 -     *
  6.1748 -     * @param c1 first character to compare.
  6.1749 -     * @param c2 second character to compare.
  6.1750 -     * @param caseIndependent whether comparision is case insensitive or not.
  6.1751 -     * @return negative, 0, or positive integer as the first character
  6.1752 -     *         less than, equal to, or greater then the second.
  6.1753 -     */
  6.1754 -    private int compareChars(char c1, char c2, boolean caseIndependent)
  6.1755 -    {
  6.1756 -        if (caseIndependent)
  6.1757 -        {
  6.1758 -            c1 = Character.toLowerCase(c1);
  6.1759 -            c2 = Character.toLowerCase(c2);
  6.1760 -        }
  6.1761 -        return ((int)c1 - (int)c2);
  6.1762 -    }
  6.1763 -}
     7.1 --- a/src/com/sun/org/apache/regexp/internal/RECompiler.java	Sat Oct 24 16:18:47 2020 +0800
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,1520 +0,0 @@
     7.4 -/*
     7.5 - * reserved comment block
     7.6 - * DO NOT REMOVE OR ALTER!
     7.7 - */
     7.8 -/*
     7.9 - * Copyright 1999-2004 The Apache Software Foundation.
    7.10 - *
    7.11 - * Licensed under the Apache License, Version 2.0 (the "License");
    7.12 - * you may not use this file except in compliance with the License.
    7.13 - * You may obtain a copy of the License at
    7.14 - *
    7.15 - *     http://www.apache.org/licenses/LICENSE-2.0
    7.16 - *
    7.17 - * Unless required by applicable law or agreed to in writing, software
    7.18 - * distributed under the License is distributed on an "AS IS" BASIS,
    7.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    7.20 - * See the License for the specific language governing permissions and
    7.21 - * limitations under the License.
    7.22 - */
    7.23 -
    7.24 -package com.sun.org.apache.regexp.internal;
    7.25 -
    7.26 -import com.sun.org.apache.regexp.internal.RE;
    7.27 -import java.util.Hashtable;
    7.28 -
    7.29 -/**
    7.30 - * A regular expression compiler class.  This class compiles a pattern string into a
    7.31 - * regular expression program interpretable by the RE evaluator class.  The 'recompile'
    7.32 - * command line tool uses this compiler to pre-compile regular expressions for use
    7.33 - * with RE.  For a description of the syntax accepted by RECompiler and what you can
    7.34 - * do with regular expressions, see the documentation for the RE matcher class.
    7.35 - *
    7.36 - * @see RE
    7.37 - * @see recompile
    7.38 - *
    7.39 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
    7.40 - * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
    7.41 - */
    7.42 -public class RECompiler
    7.43 -{
    7.44 -    // The compiled program
    7.45 -    char[] instruction;                                 // The compiled RE 'program' instruction buffer
    7.46 -    int lenInstruction;                                 // The amount of the program buffer currently in use
    7.47 -
    7.48 -    // Input state for compiling regular expression
    7.49 -    String pattern;                                     // Input string
    7.50 -    int len;                                            // Length of the pattern string
    7.51 -    int idx;                                            // Current input index into ac
    7.52 -    int parens;                                         // Total number of paren pairs
    7.53 -
    7.54 -    // Node flags
    7.55 -    static final int NODE_NORMAL   = 0;                 // No flags (nothing special)
    7.56 -    static final int NODE_NULLABLE = 1;                 // True if node is potentially null
    7.57 -    static final int NODE_TOPLEVEL = 2;                 // True if top level expr
    7.58 -
    7.59 -    // Special types of 'escapes'
    7.60 -    static final int ESC_MASK      = 0xffff0;           // Escape complexity mask
    7.61 -    static final int ESC_BACKREF   = 0xfffff;           // Escape is really a backreference
    7.62 -    static final int ESC_COMPLEX   = 0xffffe;           // Escape isn't really a true character
    7.63 -    static final int ESC_CLASS     = 0xffffd;           // Escape represents a whole class of characters
    7.64 -
    7.65 -    // {m,n} stacks
    7.66 -    int maxBrackets = 10;                               // Maximum number of bracket pairs
    7.67 -    static final int bracketUnbounded = -1;             // Unbounded value
    7.68 -    int brackets = 0;                                   // Number of bracket sets
    7.69 -    int[] bracketStart = null;                          // Starting point
    7.70 -    int[] bracketEnd = null;                            // Ending point
    7.71 -    int[] bracketMin = null;                            // Minimum number of matches
    7.72 -    int[] bracketOpt = null;                            // Additional optional matches
    7.73 -
    7.74 -    // Lookup table for POSIX character class names
    7.75 -    static Hashtable hashPOSIX = new Hashtable();
    7.76 -    static
    7.77 -    {
    7.78 -        hashPOSIX.put("alnum",     new Character(RE.POSIX_CLASS_ALNUM));
    7.79 -        hashPOSIX.put("alpha",     new Character(RE.POSIX_CLASS_ALPHA));
    7.80 -        hashPOSIX.put("blank",     new Character(RE.POSIX_CLASS_BLANK));
    7.81 -        hashPOSIX.put("cntrl",     new Character(RE.POSIX_CLASS_CNTRL));
    7.82 -        hashPOSIX.put("digit",     new Character(RE.POSIX_CLASS_DIGIT));
    7.83 -        hashPOSIX.put("graph",     new Character(RE.POSIX_CLASS_GRAPH));
    7.84 -        hashPOSIX.put("lower",     new Character(RE.POSIX_CLASS_LOWER));
    7.85 -        hashPOSIX.put("print",     new Character(RE.POSIX_CLASS_PRINT));
    7.86 -        hashPOSIX.put("punct",     new Character(RE.POSIX_CLASS_PUNCT));
    7.87 -        hashPOSIX.put("space",     new Character(RE.POSIX_CLASS_SPACE));
    7.88 -        hashPOSIX.put("upper",     new Character(RE.POSIX_CLASS_UPPER));
    7.89 -        hashPOSIX.put("xdigit",    new Character(RE.POSIX_CLASS_XDIGIT));
    7.90 -        hashPOSIX.put("javastart", new Character(RE.POSIX_CLASS_JSTART));
    7.91 -        hashPOSIX.put("javapart",  new Character(RE.POSIX_CLASS_JPART));
    7.92 -    }
    7.93 -
    7.94 -    /**
    7.95 -     * Constructor.  Creates (initially empty) storage for a regular expression program.
    7.96 -     */
    7.97 -    public RECompiler()
    7.98 -    {
    7.99 -        // Start off with a generous, yet reasonable, initial size
   7.100 -        instruction = new char[128];
   7.101 -        lenInstruction = 0;
   7.102 -    }
   7.103 -
   7.104 -    /**
   7.105 -     * Ensures that n more characters can fit in the program buffer.
   7.106 -     * If n more can't fit, then the size is doubled until it can.
   7.107 -     * @param n Number of additional characters to ensure will fit.
   7.108 -     */
   7.109 -    void ensure(int n)
   7.110 -    {
   7.111 -        // Get current program length
   7.112 -        int curlen = instruction.length;
   7.113 -
   7.114 -        // If the current length + n more is too much
   7.115 -        if (lenInstruction + n >= curlen)
   7.116 -        {
   7.117 -            // Double the size of the program array until n more will fit
   7.118 -            while (lenInstruction + n >= curlen)
   7.119 -            {
   7.120 -                curlen *= 2;
   7.121 -            }
   7.122 -
   7.123 -            // Allocate new program array and move data into it
   7.124 -            char[] newInstruction = new char[curlen];
   7.125 -            System.arraycopy(instruction, 0, newInstruction, 0, lenInstruction);
   7.126 -            instruction = newInstruction;
   7.127 -        }
   7.128 -    }
   7.129 -
   7.130 -    /**
   7.131 -     * Emit a single character into the program stream.
   7.132 -     * @param c Character to add
   7.133 -     */
   7.134 -    void emit(char c)
   7.135 -    {
   7.136 -        // Make room for character
   7.137 -        ensure(1);
   7.138 -
   7.139 -        // Add character
   7.140 -        instruction[lenInstruction++] = c;
   7.141 -    }
   7.142 -
   7.143 -    /**
   7.144 -     * Inserts a node with a given opcode and opdata at insertAt.  The node relative next
   7.145 -     * pointer is initialized to 0.
   7.146 -     * @param opcode Opcode for new node
   7.147 -     * @param opdata Opdata for new node (only the low 16 bits are currently used)
   7.148 -     * @param insertAt Index at which to insert the new node in the program
   7.149 -     */
   7.150 -    void nodeInsert(char opcode, int opdata, int insertAt)
   7.151 -    {
   7.152 -        // Make room for a new node
   7.153 -        ensure(RE.nodeSize);
   7.154 -
   7.155 -        // Move everything from insertAt to the end down nodeSize elements
   7.156 -        System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction - insertAt);
   7.157 -        instruction[insertAt + RE.offsetOpcode] = opcode;
   7.158 -        instruction[insertAt + RE.offsetOpdata] = (char)opdata;
   7.159 -        instruction[insertAt + RE.offsetNext] = 0;
   7.160 -        lenInstruction += RE.nodeSize;
   7.161 -    }
   7.162 -
   7.163 -    /**
   7.164 -     * Appends a node to the end of a node chain
   7.165 -     * @param node Start of node chain to traverse
   7.166 -     * @param pointTo Node to have the tail of the chain point to
   7.167 -     */
   7.168 -    void setNextOfEnd(int node, int pointTo)
   7.169 -    {
   7.170 -        // Traverse the chain until the next offset is 0
   7.171 -        int next = instruction[node + RE.offsetNext];
   7.172 -        // while the 'node' is not the last in the chain
   7.173 -        // and the 'node' is not the last in the program.
   7.174 -        while ( next != 0 && node < lenInstruction )
   7.175 -        {
   7.176 -            // if the node we are supposed to point to is in the chain then
   7.177 -            // point to the end of the program instead.
   7.178 -            // Michael McCallum <gholam@xtra.co.nz>
   7.179 -            // FIXME: // This is a _hack_ to stop infinite programs.
   7.180 -            // I believe that the implementation of the reluctant matches is wrong but
   7.181 -            // have not worked out a better way yet.
   7.182 -            if ( node == pointTo ) {
   7.183 -              pointTo = lenInstruction;
   7.184 -            }
   7.185 -            node += next;
   7.186 -            next = instruction[node + RE.offsetNext];
   7.187 -        }
   7.188 -        // if we have reached the end of the program then dont set the pointTo.
   7.189 -        // im not sure if this will break any thing but passes all the tests.
   7.190 -        if ( node < lenInstruction ) {
   7.191 -            // Point the last node in the chain to pointTo.
   7.192 -            instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
   7.193 -        }
   7.194 -    }
   7.195 -
   7.196 -    /**
   7.197 -     * Adds a new node
   7.198 -     * @param opcode Opcode for node
   7.199 -     * @param opdata Opdata for node (only the low 16 bits are currently used)
   7.200 -     * @return Index of new node in program
   7.201 -     */
   7.202 -    int node(char opcode, int opdata)
   7.203 -    {
   7.204 -        // Make room for a new node
   7.205 -        ensure(RE.nodeSize);
   7.206 -
   7.207 -        // Add new node at end
   7.208 -        instruction[lenInstruction + RE.offsetOpcode] = opcode;
   7.209 -        instruction[lenInstruction + RE.offsetOpdata] = (char)opdata;
   7.210 -        instruction[lenInstruction + RE.offsetNext] = 0;
   7.211 -        lenInstruction += RE.nodeSize;
   7.212 -
   7.213 -        // Return index of new node
   7.214 -        return lenInstruction - RE.nodeSize;
   7.215 -    }
   7.216 -
   7.217 -
   7.218 -    /**
   7.219 -     * Throws a new internal error exception
   7.220 -     * @exception Error Thrown in the event of an internal error.
   7.221 -     */
   7.222 -    void internalError() throws Error
   7.223 -    {
   7.224 -        throw new Error("Internal error!");
   7.225 -    }
   7.226 -
   7.227 -    /**
   7.228 -     * Throws a new syntax error exception
   7.229 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   7.230 -     */
   7.231 -    void syntaxError(String s) throws RESyntaxException
   7.232 -    {
   7.233 -        throw new RESyntaxException(s);
   7.234 -    }
   7.235 -
   7.236 -    /**
   7.237 -     * Allocate storage for brackets only as needed
   7.238 -     */
   7.239 -    void allocBrackets()
   7.240 -    {
   7.241 -        // Allocate bracket stacks if not already done
   7.242 -        if (bracketStart == null)
   7.243 -        {
   7.244 -            // Allocate storage
   7.245 -            bracketStart = new int[maxBrackets];
   7.246 -            bracketEnd   = new int[maxBrackets];
   7.247 -            bracketMin   = new int[maxBrackets];
   7.248 -            bracketOpt   = new int[maxBrackets];
   7.249 -
   7.250 -            // Initialize to invalid values
   7.251 -            for (int i = 0; i < maxBrackets; i++)
   7.252 -            {
   7.253 -                bracketStart[i] = bracketEnd[i] = bracketMin[i] = bracketOpt[i] = -1;
   7.254 -            }
   7.255 -        }
   7.256 -    }
   7.257 -
   7.258 -    /** Enlarge storage for brackets only as needed. */
   7.259 -    synchronized void reallocBrackets() {
   7.260 -        // trick the tricky
   7.261 -        if (bracketStart == null) {
   7.262 -            allocBrackets();
   7.263 -        }
   7.264 -
   7.265 -        int new_size = maxBrackets * 2;
   7.266 -        int[] new_bS = new int[new_size];
   7.267 -        int[] new_bE = new int[new_size];
   7.268 -        int[] new_bM = new int[new_size];
   7.269 -        int[] new_bO = new int[new_size];
   7.270 -        // Initialize to invalid values
   7.271 -        for (int i=brackets; i<new_size; i++) {
   7.272 -            new_bS[i] = new_bE[i] = new_bM[i] = new_bO[i] = -1;
   7.273 -        }
   7.274 -        System.arraycopy(bracketStart,0, new_bS,0, brackets);
   7.275 -        System.arraycopy(bracketEnd,0,   new_bE,0, brackets);
   7.276 -        System.arraycopy(bracketMin,0,   new_bM,0, brackets);
   7.277 -        System.arraycopy(bracketOpt,0,   new_bO,0, brackets);
   7.278 -        bracketStart = new_bS;
   7.279 -        bracketEnd   = new_bE;
   7.280 -        bracketMin   = new_bM;
   7.281 -        bracketOpt   = new_bO;
   7.282 -        maxBrackets  = new_size;
   7.283 -    }
   7.284 -
   7.285 -    /**
   7.286 -     * Match bracket {m,n} expression put results in bracket member variables
   7.287 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   7.288 -     */
   7.289 -    void bracket() throws RESyntaxException
   7.290 -    {
   7.291 -        // Current character must be a '{'
   7.292 -        if (idx >= len || pattern.charAt(idx++) != '{')
   7.293 -        {
   7.294 -            internalError();
   7.295 -        }
   7.296 -
   7.297 -        // Next char must be a digit
   7.298 -        if (idx >= len || !Character.isDigit(pattern.charAt(idx)))
   7.299 -        {
   7.300 -            syntaxError("Expected digit");
   7.301 -        }
   7.302 -
   7.303 -        // Get min ('m' of {m,n}) number
   7.304 -        StringBuffer number = new StringBuffer();
   7.305 -        while (idx < len && Character.isDigit(pattern.charAt(idx)))
   7.306 -        {
   7.307 -            number.append(pattern.charAt(idx++));
   7.308 -        }
   7.309 -        try
   7.310 -        {
   7.311 -            bracketMin[brackets] = Integer.parseInt(number.toString());
   7.312 -        }
   7.313 -        catch (NumberFormatException e)
   7.314 -        {
   7.315 -            syntaxError("Expected valid number");
   7.316 -        }
   7.317 -
   7.318 -        // If out of input, fail
   7.319 -        if (idx >= len)
   7.320 -        {
   7.321 -            syntaxError("Expected comma or right bracket");
   7.322 -        }
   7.323 -
   7.324 -        // If end of expr, optional limit is 0
   7.325 -        if (pattern.charAt(idx) == '}')
   7.326 -        {
   7.327 -            idx++;
   7.328 -            bracketOpt[brackets] = 0;
   7.329 -            return;
   7.330 -        }
   7.331 -
   7.332 -        // Must have at least {m,} and maybe {m,n}.
   7.333 -        if (idx >= len || pattern.charAt(idx++) != ',')
   7.334 -        {
   7.335 -            syntaxError("Expected comma");
   7.336 -        }
   7.337 -
   7.338 -        // If out of input, fail
   7.339 -        if (idx >= len)
   7.340 -        {
   7.341 -            syntaxError("Expected comma or right bracket");
   7.342 -        }
   7.343 -
   7.344 -        // If {m,} max is unlimited
   7.345 -        if (pattern.charAt(idx) == '}')
   7.346 -        {
   7.347 -            idx++;
   7.348 -            bracketOpt[brackets] = bracketUnbounded;
   7.349 -            return;
   7.350 -        }
   7.351 -
   7.352 -        // Next char must be a digit
   7.353 -        if (idx >= len || !Character.isDigit(pattern.charAt(idx)))
   7.354 -        {
   7.355 -            syntaxError("Expected digit");
   7.356 -        }
   7.357 -
   7.358 -        // Get max number
   7.359 -        number.setLength(0);
   7.360 -        while (idx < len && Character.isDigit(pattern.charAt(idx)))
   7.361 -        {
   7.362 -            number.append(pattern.charAt(idx++));
   7.363 -        }
   7.364 -        try
   7.365 -        {
   7.366 -            bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets];
   7.367 -        }
   7.368 -        catch (NumberFormatException e)
   7.369 -        {
   7.370 -            syntaxError("Expected valid number");
   7.371 -        }
   7.372 -
   7.373 -        // Optional repetitions must be >= 0
   7.374 -        if (bracketOpt[brackets] < 0)
   7.375 -        {
   7.376 -            syntaxError("Bad range");
   7.377 -        }
   7.378 -
   7.379 -        // Must have close brace
   7.380 -        if (idx >= len || pattern.charAt(idx++) != '}')
   7.381 -        {
   7.382 -            syntaxError("Missing close brace");
   7.383 -        }
   7.384 -    }
   7.385 -
   7.386 -    /**
   7.387 -     * Match an escape sequence.  Handles quoted chars and octal escapes as well
   7.388 -     * as normal escape characters.  Always advances the input stream by the
   7.389 -     * right amount. This code "understands" the subtle difference between an
   7.390 -     * octal escape and a backref.  You can access the type of ESC_CLASS or
   7.391 -     * ESC_COMPLEX or ESC_BACKREF by looking at pattern[idx - 1].
   7.392 -     * @return ESC_* code or character if simple escape
   7.393 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   7.394 -     */
   7.395 -    int escape() throws RESyntaxException
   7.396 -    {
   7.397 -        // "Shouldn't" happen
   7.398 -        if (pattern.charAt(idx) != '\\')
   7.399 -        {
   7.400 -            internalError();
   7.401 -        }
   7.402 -
   7.403 -        // Escape shouldn't occur as last character in string!
   7.404 -        if (idx + 1 == len)
   7.405 -        {
   7.406 -            syntaxError("Escape terminates string");
   7.407 -        }
   7.408 -
   7.409 -        // Switch on character after backslash
   7.410 -        idx += 2;
   7.411 -        char escapeChar = pattern.charAt(idx - 1);
   7.412 -        switch (escapeChar)
   7.413 -        {
   7.414 -            case RE.E_BOUND:
   7.415 -            case RE.E_NBOUND:
   7.416 -                return ESC_COMPLEX;
   7.417 -
   7.418 -            case RE.E_ALNUM:
   7.419 -            case RE.E_NALNUM:
   7.420 -            case RE.E_SPACE:
   7.421 -            case RE.E_NSPACE:
   7.422 -            case RE.E_DIGIT:
   7.423 -            case RE.E_NDIGIT:
   7.424 -                return ESC_CLASS;
   7.425 -
   7.426 -            case 'u':
   7.427 -            case 'x':
   7.428 -                {
   7.429 -                    // Exact required hex digits for escape type
   7.430 -                    int hexDigits = (escapeChar == 'u' ? 4 : 2);
   7.431 -
   7.432 -                    // Parse up to hexDigits characters from input
   7.433 -                    int val = 0;
   7.434 -                    for ( ; idx < len && hexDigits-- > 0; idx++)
   7.435 -                    {
   7.436 -                        // Get char
   7.437 -                        char c = pattern.charAt(idx);
   7.438 -
   7.439 -                        // If it's a hexadecimal digit (0-9)
   7.440 -                        if (c >= '0' && c <= '9')
   7.441 -                        {
   7.442 -                            // Compute new value
   7.443 -                            val = (val << 4) + c - '0';
   7.444 -                        }
   7.445 -                        else
   7.446 -                        {
   7.447 -                            // If it's a hexadecimal letter (a-f)
   7.448 -                            c = Character.toLowerCase(c);
   7.449 -                            if (c >= 'a' && c <= 'f')
   7.450 -                            {
   7.451 -                                // Compute new value
   7.452 -                                val = (val << 4) + (c - 'a') + 10;
   7.453 -                            }
   7.454 -                            else
   7.455 -                            {
   7.456 -                                // If it's not a valid digit or hex letter, the escape must be invalid
   7.457 -                                // because hexDigits of input have not been absorbed yet.
   7.458 -                                syntaxError("Expected " + hexDigits + " hexadecimal digits after \\" + escapeChar);
   7.459 -                            }
   7.460 -                        }
   7.461 -                    }
   7.462 -                    return val;
   7.463 -                }
   7.464 -
   7.465 -            case 't':
   7.466 -                return '\t';
   7.467 -
   7.468 -            case 'n':
   7.469 -                return '\n';
   7.470 -
   7.471 -            case 'r':
   7.472 -                return '\r';
   7.473 -
   7.474 -            case 'f':
   7.475 -                return '\f';
   7.476 -
   7.477 -            case '0':
   7.478 -            case '1':
   7.479 -            case '2':
   7.480 -            case '3':
   7.481 -            case '4':
   7.482 -            case '5':
   7.483 -            case '6':
   7.484 -            case '7':
   7.485 -            case '8':
   7.486 -            case '9':
   7.487 -
   7.488 -                // An octal escape starts with a 0 or has two digits in a row
   7.489 -                if ((idx < len && Character.isDigit(pattern.charAt(idx))) || escapeChar == '0')
   7.490 -                {
   7.491 -                    // Handle \nnn octal escapes
   7.492 -                    int val = escapeChar - '0';
   7.493 -                    if (idx < len && Character.isDigit(pattern.charAt(idx)))
   7.494 -                    {
   7.495 -                        val = ((val << 3) + (pattern.charAt(idx++) - '0'));
   7.496 -                        if (idx < len && Character.isDigit(pattern.charAt(idx)))
   7.497 -                        {
   7.498 -                            val = ((val << 3) + (pattern.charAt(idx++) - '0'));
   7.499 -                        }
   7.500 -                    }
   7.501 -                    return val;
   7.502 -                }
   7.503 -
   7.504 -                // It's actually a backreference (\[1-9]), not an escape
   7.505 -                return ESC_BACKREF;
   7.506 -
   7.507 -            default:
   7.508 -
   7.509 -                // Simple quoting of a character
   7.510 -                return escapeChar;
   7.511 -        }
   7.512 -    }
   7.513 -
   7.514 -    /**
   7.515 -     * Compile a character class
   7.516 -     * @return Index of class node
   7.517 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   7.518 -     */
   7.519 -    int characterClass() throws RESyntaxException
   7.520 -    {
   7.521 -        // Check for bad calling or empty class
   7.522 -        if (pattern.charAt(idx) != '[')
   7.523 -        {
   7.524 -            internalError();
   7.525 -        }
   7.526 -
   7.527 -        // Check for unterminated or empty class
   7.528 -        if ((idx + 1) >= len || pattern.charAt(++idx) == ']')
   7.529 -        {
   7.530 -            syntaxError("Empty or unterminated class");
   7.531 -        }
   7.532 -
   7.533 -        // Check for POSIX character class
   7.534 -        if (idx < len && pattern.charAt(idx) == ':')
   7.535 -        {
   7.536 -            // Skip colon
   7.537 -            idx++;
   7.538 -
   7.539 -            // POSIX character classes are denoted with lowercase ASCII strings
   7.540 -            int idxStart = idx;
   7.541 -            while (idx < len && pattern.charAt(idx) >= 'a' && pattern.charAt(idx) <= 'z')
   7.542 -            {
   7.543 -                idx++;
   7.544 -            }
   7.545 -
   7.546 -            // Should be a ":]" to terminate the POSIX character class
   7.547 -            if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']')
   7.548 -            {
   7.549 -                // Get character class
   7.550 -                String charClass = pattern.substring(idxStart, idx);
   7.551 -
   7.552 -                // Select the POSIX class id
   7.553 -                Character i = (Character)hashPOSIX.get(charClass);
   7.554 -                if (i != null)
   7.555 -                {
   7.556 -                    // Move past colon and right bracket
   7.557 -                    idx += 2;
   7.558 -
   7.559 -                    // Return new POSIX character class node
   7.560 -                    return node(RE.OP_POSIXCLASS, i.charValue());
   7.561 -                }
   7.562 -                syntaxError("Invalid POSIX character class '" + charClass + "'");
   7.563 -            }
   7.564 -            syntaxError("Invalid POSIX character class syntax");
   7.565 -        }
   7.566 -
   7.567 -        // Try to build a class.  Create OP_ANYOF node
   7.568 -        int ret = node(RE.OP_ANYOF, 0);
   7.569 -
   7.570 -        // Parse class declaration
   7.571 -        char CHAR_INVALID = Character.MAX_VALUE;
   7.572 -        char last = CHAR_INVALID;
   7.573 -        char simpleChar = 0;
   7.574 -        boolean include = true;
   7.575 -        boolean definingRange = false;
   7.576 -        int idxFirst = idx;
   7.577 -        char rangeStart = Character.MIN_VALUE;
   7.578 -        char rangeEnd;
   7.579 -        RERange range = new RERange();
   7.580 -        while (idx < len && pattern.charAt(idx) != ']')
   7.581 -        {
   7.582 -
   7.583 -            switchOnCharacter:
   7.584 -
   7.585 -            // Switch on character
   7.586 -            switch (pattern.charAt(idx))
   7.587 -            {
   7.588 -                case '^':
   7.589 -                    include = !include;
   7.590 -                    if (idx == idxFirst)
   7.591 -                    {
   7.592 -                        range.include(Character.MIN_VALUE, Character.MAX_VALUE, true);
   7.593 -                    }
   7.594 -                    idx++;
   7.595 -                    continue;
   7.596 -
   7.597 -                case '\\':
   7.598 -                {
   7.599 -                    // Escape always advances the stream
   7.600 -                    int c;
   7.601 -                    switch (c = escape ())
   7.602 -                    {
   7.603 -                        case ESC_COMPLEX:
   7.604 -                        case ESC_BACKREF:
   7.605 -
   7.606 -                            // Word boundaries and backrefs not allowed in a character class!
   7.607 -                            syntaxError("Bad character class");
   7.608 -
   7.609 -                        case ESC_CLASS:
   7.610 -
   7.611 -                            // Classes can't be an endpoint of a range
   7.612 -                            if (definingRange)
   7.613 -                            {
   7.614 -                                syntaxError("Bad character class");
   7.615 -                            }
   7.616 -
   7.617 -                            // Handle specific type of class (some are ok)
   7.618 -                            switch (pattern.charAt(idx - 1))
   7.619 -                            {
   7.620 -                                case RE.E_NSPACE:
   7.621 -                                case RE.E_NDIGIT:
   7.622 -                                case RE.E_NALNUM:
   7.623 -                                    syntaxError("Bad character class");
   7.624 -
   7.625 -                                case RE.E_SPACE:
   7.626 -                                    range.include('\t', include);
   7.627 -                                    range.include('\r', include);
   7.628 -                                    range.include('\f', include);
   7.629 -                                    range.include('\n', include);
   7.630 -                                    range.include('\b', include);
   7.631 -                                    range.include(' ', include);
   7.632 -                                    break;
   7.633 -
   7.634 -                                case RE.E_ALNUM:
   7.635 -                                    range.include('a', 'z', include);
   7.636 -                                    range.include('A', 'Z', include);
   7.637 -                                    range.include('_', include);
   7.638 -
   7.639 -                                    // Fall through!
   7.640 -
   7.641 -                                case RE.E_DIGIT:
   7.642 -                                    range.include('0', '9', include);
   7.643 -                                    break;
   7.644 -                            }
   7.645 -
   7.646 -                            // Make last char invalid (can't be a range start)
   7.647 -                            last = CHAR_INVALID;
   7.648 -                            break;
   7.649 -
   7.650 -                        default:
   7.651 -
   7.652 -                            // Escape is simple so treat as a simple char
   7.653 -                            simpleChar = (char) c;
   7.654 -                            break switchOnCharacter;
   7.655 -                    }
   7.656 -                }
   7.657 -                continue;
   7.658 -
   7.659 -                case '-':
   7.660 -
   7.661 -                    // Start a range if one isn't already started
   7.662 -                    if (definingRange)
   7.663 -                    {
   7.664 -                        syntaxError("Bad class range");
   7.665 -                    }
   7.666 -                    definingRange = true;
   7.667 -
   7.668 -                    // If no last character, start of range is 0
   7.669 -                    rangeStart = (last == CHAR_INVALID ? 0 : last);
   7.670 -
   7.671 -                    // Premature end of range. define up to Character.MAX_VALUE
   7.672 -                    if ((idx + 1) < len && pattern.charAt(++idx) == ']')
   7.673 -                    {
   7.674 -                        simpleChar = Character.MAX_VALUE;
   7.675 -                        break;
   7.676 -                    }
   7.677 -                    continue;
   7.678 -
   7.679 -                default:
   7.680 -                    simpleChar = pattern.charAt(idx++);
   7.681 -                    break;
   7.682 -            }
   7.683 -
   7.684 -            // Handle simple character simpleChar
   7.685 -            if (definingRange)
   7.686 -            {
   7.687 -                // if we are defining a range make it now
   7.688 -                rangeEnd = simpleChar;
   7.689 -
   7.690 -                // Actually create a range if the range is ok
   7.691 -                if (rangeStart >= rangeEnd)
   7.692 -                {
   7.693 -                    syntaxError("Bad character class");
   7.694 -                }
   7.695 -                range.include(rangeStart, rangeEnd, include);
   7.696 -
   7.697 -                // We are done defining the range
   7.698 -                last = CHAR_INVALID;
   7.699 -                definingRange = false;
   7.700 -            }
   7.701 -            else
   7.702 -            {
   7.703 -                // If simple character and not start of range, include it
   7.704 -                if (idx >= len || pattern.charAt(idx) != '-')
   7.705 -                {
   7.706 -                    range.include(simpleChar, include);
   7.707 -                }
   7.708 -                last = simpleChar;
   7.709 -            }
   7.710 -        }
   7.711 -
   7.712 -        // Shouldn't be out of input
   7.713 -        if (idx == len)
   7.714 -        {
   7.715 -            syntaxError("Unterminated character class");
   7.716 -        }
   7.717 -
   7.718 -        // Absorb the ']' end of class marker
   7.719 -        idx++;
   7.720 -
   7.721 -        // Emit character class definition
   7.722 -        instruction[ret + RE.offsetOpdata] = (char)range.num;
   7.723 -        for (int i = 0; i < range.num; i++)
   7.724 -        {
   7.725 -            emit((char)range.minRange[i]);
   7.726 -            emit((char)range.maxRange[i]);
   7.727 -        }
   7.728 -        return ret;
   7.729 -    }
   7.730 -
   7.731 -    /**
   7.732 -     * Absorb an atomic character string.  This method is a little tricky because
   7.733 -     * it can un-include the last character of string if a closure operator follows.
   7.734 -     * This is correct because *+? have higher precedence than concatentation (thus
   7.735 -     * ABC* means AB(C*) and NOT (ABC)*).
   7.736 -     * @return Index of new atom node
   7.737 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   7.738 -     */
   7.739 -    int atom() throws RESyntaxException
   7.740 -    {
   7.741 -        // Create a string node
   7.742 -        int ret = node(RE.OP_ATOM, 0);
   7.743 -
   7.744 -        // Length of atom
   7.745 -        int lenAtom = 0;
   7.746 -
   7.747 -        // Loop while we've got input
   7.748 -
   7.749 -        atomLoop:
   7.750 -
   7.751 -        while (idx < len)
   7.752 -        {
   7.753 -            // Is there a next char?
   7.754 -            if ((idx + 1) < len)
   7.755 -            {
   7.756 -                char c = pattern.charAt(idx + 1);
   7.757 -
   7.758 -                // If the next 'char' is an escape, look past the whole escape
   7.759 -                if (pattern.charAt(idx) == '\\')
   7.760 -                {
   7.761 -                    int idxEscape = idx;
   7.762 -                    escape();
   7.763 -                    if (idx < len)
   7.764 -                    {
   7.765 -                        c = pattern.charAt(idx);
   7.766 -                    }
   7.767 -                    idx = idxEscape;
   7.768 -                }
   7.769 -
   7.770 -                // Switch on next char
   7.771 -                switch (c)
   7.772 -                {
   7.773 -                    case '{':
   7.774 -                    case '?':
   7.775 -                    case '*':
   7.776 -                    case '+':
   7.777 -
   7.778 -                        // If the next character is a closure operator and our atom is non-empty, the
   7.779 -                        // current character should bind to the closure operator rather than the atom
   7.780 -                        if (lenAtom != 0)
   7.781 -                        {
   7.782 -                            break atomLoop;
   7.783 -                        }
   7.784 -                }
   7.785 -            }
   7.786 -
   7.787 -            // Switch on current char
   7.788 -            switch (pattern.charAt(idx))
   7.789 -            {
   7.790 -                case ']':
   7.791 -                case '^':
   7.792 -                case '$':
   7.793 -                case '.':
   7.794 -                case '[':
   7.795 -                case '(':
   7.796 -                case ')':
   7.797 -                case '|':
   7.798 -                    break atomLoop;
   7.799 -
   7.800 -                case '{':
   7.801 -                case '?':
   7.802 -                case '*':
   7.803 -                case '+':
   7.804 -
   7.805 -                    // We should have an atom by now
   7.806 -                    if (lenAtom == 0)
   7.807 -                    {
   7.808 -                        // No atom before closure
   7.809 -                        syntaxError("Missing operand to closure");
   7.810 -                    }
   7.811 -                    break atomLoop;
   7.812 -
   7.813 -                case '\\':
   7.814 -
   7.815 -                    {
   7.816 -                        // Get the escaped character (advances input automatically)
   7.817 -                        int idxBeforeEscape = idx;
   7.818 -                        int c = escape();
   7.819 -
   7.820 -                        // Check if it's a simple escape (as opposed to, say, a backreference)
   7.821 -                        if ((c & ESC_MASK) == ESC_MASK)
   7.822 -                        {
   7.823 -                            // Not a simple escape, so backup to where we were before the escape.
   7.824 -                            idx = idxBeforeEscape;
   7.825 -                            break atomLoop;
   7.826 -                        }
   7.827 -
   7.828 -                        // Add escaped char to atom
   7.829 -                        emit((char) c);
   7.830 -                        lenAtom++;
   7.831 -                    }
   7.832 -                    break;
   7.833 -
   7.834 -                default:
   7.835 -
   7.836 -                    // Add normal character to atom
   7.837 -                    emit(pattern.charAt(idx++));
   7.838 -                    lenAtom++;
   7.839 -                    break;
   7.840 -            }
   7.841 -        }
   7.842 -
   7.843 -        // This "shouldn't" happen
   7.844 -        if (lenAtom == 0)
   7.845 -        {
   7.846 -            internalError();
   7.847 -        }
   7.848 -
   7.849 -        // Emit the atom length into the program
   7.850 -        instruction[ret + RE.offsetOpdata] = (char)lenAtom;
   7.851 -        return ret;
   7.852 -    }
   7.853 -
   7.854 -    /**
   7.855 -     * Match a terminal node.
   7.856 -     * @param flags Flags
   7.857 -     * @return Index of terminal node (closeable)
   7.858 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   7.859 -     */
   7.860 -    int terminal(int[] flags) throws RESyntaxException
   7.861 -    {
   7.862 -        switch (pattern.charAt(idx))
   7.863 -        {
   7.864 -        case RE.OP_EOL:
   7.865 -        case RE.OP_BOL:
   7.866 -        case RE.OP_ANY:
   7.867 -            return node(pattern.charAt(idx++), 0);
   7.868 -
   7.869 -        case '[':
   7.870 -            return characterClass();
   7.871 -
   7.872 -        case '(':
   7.873 -            return expr(flags);
   7.874 -
   7.875 -        case ')':
   7.876 -            syntaxError("Unexpected close paren");
   7.877 -
   7.878 -        case '|':
   7.879 -            internalError();
   7.880 -
   7.881 -        case ']':
   7.882 -            syntaxError("Mismatched class");
   7.883 -
   7.884 -        case 0:
   7.885 -            syntaxError("Unexpected end of input");
   7.886 -
   7.887 -        case '?':
   7.888 -        case '+':
   7.889 -        case '{':
   7.890 -        case '*':
   7.891 -            syntaxError("Missing operand to closure");
   7.892 -
   7.893 -        case '\\':
   7.894 -            {
   7.895 -                // Don't forget, escape() advances the input stream!
   7.896 -                int idxBeforeEscape = idx;
   7.897 -
   7.898 -                // Switch on escaped character
   7.899 -                switch (escape())
   7.900 -                {
   7.901 -                    case ESC_CLASS:
   7.902 -                    case ESC_COMPLEX:
   7.903 -                        flags[0] &= ~NODE_NULLABLE;
   7.904 -                        return node(RE.OP_ESCAPE, pattern.charAt(idx - 1));
   7.905 -
   7.906 -                    case ESC_BACKREF:
   7.907 -                        {
   7.908 -                            char backreference = (char)(pattern.charAt(idx - 1) - '0');
   7.909 -                            if (parens <= backreference)
   7.910 -                            {
   7.911 -                                syntaxError("Bad backreference");
   7.912 -                            }
   7.913 -                            flags[0] |= NODE_NULLABLE;
   7.914 -                            return node(RE.OP_BACKREF, backreference);
   7.915 -                        }
   7.916 -
   7.917 -                    default:
   7.918 -
   7.919 -                        // We had a simple escape and we want to have it end up in
   7.920 -                        // an atom, so we back up and fall though to the default handling
   7.921 -                        idx = idxBeforeEscape;
   7.922 -                        flags[0] &= ~NODE_NULLABLE;
   7.923 -                        break;
   7.924 -                }
   7.925 -            }
   7.926 -        }
   7.927 -
   7.928 -        // Everything above either fails or returns.
   7.929 -        // If it wasn't one of the above, it must be the start of an atom.
   7.930 -        flags[0] &= ~NODE_NULLABLE;
   7.931 -        return atom();
   7.932 -    }
   7.933 -
   7.934 -    /**
   7.935 -     * Compile a possibly closured terminal
   7.936 -     * @param flags Flags passed by reference
   7.937 -     * @return Index of closured node
   7.938 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   7.939 -     */
   7.940 -    int closure(int[] flags) throws RESyntaxException
   7.941 -    {
   7.942 -        // Before terminal
   7.943 -        int idxBeforeTerminal = idx;
   7.944 -
   7.945 -        // Values to pass by reference to terminal()
   7.946 -        int[] terminalFlags = { NODE_NORMAL };
   7.947 -
   7.948 -        // Get terminal symbol
   7.949 -        int ret = terminal(terminalFlags);
   7.950 -
   7.951 -        // Or in flags from terminal symbol
   7.952 -        flags[0] |= terminalFlags[0];
   7.953 -
   7.954 -        // Advance input, set NODE_NULLABLE flag and do sanity checks
   7.955 -        if (idx >= len)
   7.956 -        {
   7.957 -            return ret;
   7.958 -        }
   7.959 -        boolean greedy = true;
   7.960 -        char closureType = pattern.charAt(idx);
   7.961 -        switch (closureType)
   7.962 -        {
   7.963 -            case '?':
   7.964 -            case '*':
   7.965 -
   7.966 -                // The current node can be null
   7.967 -                flags[0] |= NODE_NULLABLE;
   7.968 -
   7.969 -            case '+':
   7.970 -
   7.971 -                // Eat closure character
   7.972 -                idx++;
   7.973 -
   7.974 -            case '{':
   7.975 -
   7.976 -                // Don't allow blantant stupidity
   7.977 -                int opcode = instruction[ret + RE.offsetOpcode];
   7.978 -                if (opcode == RE.OP_BOL || opcode == RE.OP_EOL)
   7.979 -                {
   7.980 -                    syntaxError("Bad closure operand");
   7.981 -                }
   7.982 -                if ((terminalFlags[0] & NODE_NULLABLE) != 0)
   7.983 -                {
   7.984 -                    syntaxError("Closure operand can't be nullable");
   7.985 -                }
   7.986 -                break;
   7.987 -        }
   7.988 -
   7.989 -        // If the next character is a '?', make the closure non-greedy (reluctant)
   7.990 -        if (idx < len && pattern.charAt(idx) == '?')
   7.991 -        {
   7.992 -            idx++;
   7.993 -            greedy = false;
   7.994 -        }
   7.995 -
   7.996 -        if (greedy)
   7.997 -        {
   7.998 -            // Actually do the closure now
   7.999 -            switch (closureType)
  7.1000 -            {
  7.1001 -                case '{':
  7.1002 -                {
  7.1003 -                    // We look for our bracket in the list
  7.1004 -                    boolean found = false;
  7.1005 -                    int i;
  7.1006 -                    allocBrackets();
  7.1007 -                    for (i = 0; i < brackets; i++)
  7.1008 -                    {
  7.1009 -                        if (bracketStart[i] == idx)
  7.1010 -                        {
  7.1011 -                            found = true;
  7.1012 -                            break;
  7.1013 -                        }
  7.1014 -                    }
  7.1015 -
  7.1016 -                    // If its not in the list we parse the {m,n}
  7.1017 -                    if (!found)
  7.1018 -                    {
  7.1019 -                        if (brackets >= maxBrackets)
  7.1020 -                        {
  7.1021 -                            reallocBrackets();
  7.1022 -                        }
  7.1023 -                        bracketStart[brackets] = idx;
  7.1024 -                        bracket();
  7.1025 -                        bracketEnd[brackets] = idx;
  7.1026 -                        i = brackets++;
  7.1027 -                    }
  7.1028 -
  7.1029 -                    // Process min first
  7.1030 -                    if (bracketMin[i]-- > 0)
  7.1031 -                    {
  7.1032 -                        if (bracketMin[i] > 0 || bracketOpt[i] != 0) {
  7.1033 -                            // Rewind stream and run it through again - more matchers coming
  7.1034 -                            for (int j = 0; j < brackets; j++) {
  7.1035 -                                if (j != i && bracketStart[j] < idx
  7.1036 -                                    && bracketStart[j] >= idxBeforeTerminal)
  7.1037 -                                {
  7.1038 -                                    brackets--;
  7.1039 -                                    bracketStart[j] = bracketStart[brackets];
  7.1040 -                                    bracketEnd[j] = bracketEnd[brackets];
  7.1041 -                                    bracketMin[j] = bracketMin[brackets];
  7.1042 -                                    bracketOpt[j] = bracketOpt[brackets];
  7.1043 -                                }
  7.1044 -                            }
  7.1045 -
  7.1046 -                            idx = idxBeforeTerminal;
  7.1047 -                        } else {
  7.1048 -                            // Bug #1030: No optinal matches - no need to rewind
  7.1049 -                            idx = bracketEnd[i];
  7.1050 -                        }
  7.1051 -                        break;
  7.1052 -                    }
  7.1053 -
  7.1054 -                    // Do the right thing for maximum ({m,})
  7.1055 -                    if (bracketOpt[i] == bracketUnbounded)
  7.1056 -                    {
  7.1057 -                        // Drop through now and closure expression.
  7.1058 -                        // We are done with the {m,} expr, so skip rest
  7.1059 -                        closureType = '*';
  7.1060 -                        bracketOpt[i] = 0;
  7.1061 -                        idx = bracketEnd[i];
  7.1062 -                    }
  7.1063 -                    else
  7.1064 -                        if (bracketOpt[i]-- > 0)
  7.1065 -                        {
  7.1066 -                            if (bracketOpt[i] > 0)
  7.1067 -                            {
  7.1068 -                                // More optional matchers - 'play it again sam!'
  7.1069 -                                idx = idxBeforeTerminal;
  7.1070 -                            } else {
  7.1071 -                                // Bug #1030: We are done - this one is last and optional
  7.1072 -                                idx = bracketEnd[i];
  7.1073 -                            }
  7.1074 -                            // Drop through to optionally close
  7.1075 -                            closureType = '?';
  7.1076 -                        }
  7.1077 -                        else
  7.1078 -                        {
  7.1079 -                            // Rollback terminal - neither min nor opt matchers present
  7.1080 -                            lenInstruction = ret;
  7.1081 -                            node(RE.OP_NOTHING, 0);
  7.1082 -
  7.1083 -                            // We are done. skip the rest of {m,n} expr
  7.1084 -                            idx = bracketEnd[i];
  7.1085 -                            break;
  7.1086 -                        }
  7.1087 -                }
  7.1088 -
  7.1089 -                // Fall through!
  7.1090 -
  7.1091 -                case '?':
  7.1092 -                case '*':
  7.1093 -
  7.1094 -                    if (!greedy)
  7.1095 -                    {
  7.1096 -                        break;
  7.1097 -                    }
  7.1098 -
  7.1099 -                    if (closureType == '?')
  7.1100 -                    {
  7.1101 -                        // X? is compiled as (X|)
  7.1102 -                        nodeInsert(RE.OP_BRANCH, 0, ret);                 // branch before X
  7.1103 -                        setNextOfEnd(ret, node (RE.OP_BRANCH, 0));        // inserted branch to option
  7.1104 -                        int nothing = node (RE.OP_NOTHING, 0);            // which is OP_NOTHING
  7.1105 -                        setNextOfEnd(ret, nothing);                       // point (second) branch to OP_NOTHING
  7.1106 -                        setNextOfEnd(ret + RE.nodeSize, nothing);         // point the end of X to OP_NOTHING node
  7.1107 -                    }
  7.1108 -
  7.1109 -                    if (closureType == '*')
  7.1110 -                    {
  7.1111 -                        // X* is compiled as (X{gotoX}|)
  7.1112 -                        nodeInsert(RE.OP_BRANCH, 0, ret);                         // branch before X
  7.1113 -                        setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH, 0));   // end of X points to an option
  7.1114 -                        setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0));     // to goto
  7.1115 -                        setNextOfEnd(ret + RE.nodeSize, ret);                     // the start again
  7.1116 -                        setNextOfEnd(ret, node(RE.OP_BRANCH, 0));                 // the other option is
  7.1117 -                        setNextOfEnd(ret, node(RE.OP_NOTHING, 0));                // OP_NOTHING
  7.1118 -                    }
  7.1119 -                    break;
  7.1120 -
  7.1121 -                case '+':
  7.1122 -                {
  7.1123 -                    // X+ is compiled as X({gotoX}|)
  7.1124 -                    int branch;
  7.1125 -                    branch = node(RE.OP_BRANCH, 0);                   // a new branch
  7.1126 -                    setNextOfEnd(ret, branch);                        // is added to the end of X
  7.1127 -                    setNextOfEnd(node(RE.OP_GOTO, 0), ret);           // one option is to go back to the start
  7.1128 -                    setNextOfEnd(branch, node(RE.OP_BRANCH, 0));      // the other option
  7.1129 -                    setNextOfEnd(ret, node(RE.OP_NOTHING, 0));        // is OP_NOTHING
  7.1130 -                }
  7.1131 -                break;
  7.1132 -            }
  7.1133 -        }
  7.1134 -        else
  7.1135 -        {
  7.1136 -            // Add end after closured subexpr
  7.1137 -            setNextOfEnd(ret, node(RE.OP_END, 0));
  7.1138 -
  7.1139 -            // Actually do the closure now
  7.1140 -            switch (closureType)
  7.1141 -            {
  7.1142 -                case '?':
  7.1143 -                    nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret);
  7.1144 -                    break;
  7.1145 -
  7.1146 -                case '*':
  7.1147 -                    nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret);
  7.1148 -                    break;
  7.1149 -
  7.1150 -                case '+':
  7.1151 -                    nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret);
  7.1152 -                    break;
  7.1153 -            }
  7.1154 -
  7.1155 -            // Point to the expr after the closure
  7.1156 -            setNextOfEnd(ret, lenInstruction);
  7.1157 -        }
  7.1158 -        return ret;
  7.1159 -    }
  7.1160 -
  7.1161 -    /**
  7.1162 -     * Compile one branch of an or operator (implements concatenation)
  7.1163 -     * @param flags Flags passed by reference
  7.1164 -     * @return Pointer to branch node
  7.1165 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
  7.1166 -     */
  7.1167 -    int branch(int[] flags) throws RESyntaxException
  7.1168 -    {
  7.1169 -        // Get each possibly closured piece and concat
  7.1170 -        int node;
  7.1171 -        int ret = node(RE.OP_BRANCH, 0);
  7.1172 -        int chain = -1;
  7.1173 -        int[] closureFlags = new int[1];
  7.1174 -        boolean nullable = true;
  7.1175 -        while (idx < len && pattern.charAt(idx) != '|' && pattern.charAt(idx) != ')')
  7.1176 -        {
  7.1177 -            // Get new node
  7.1178 -            closureFlags[0] = NODE_NORMAL;
  7.1179 -            node = closure(closureFlags);
  7.1180 -            if (closureFlags[0] == NODE_NORMAL)
  7.1181 -            {
  7.1182 -                nullable = false;
  7.1183 -            }
  7.1184 -
  7.1185 -            // If there's a chain, append to the end
  7.1186 -            if (chain != -1)
  7.1187 -            {
  7.1188 -                setNextOfEnd(chain, node);
  7.1189 -            }
  7.1190 -
  7.1191 -            // Chain starts at current
  7.1192 -            chain = node;
  7.1193 -        }
  7.1194 -
  7.1195 -        // If we don't run loop, make a nothing node
  7.1196 -        if (chain == -1)
  7.1197 -        {
  7.1198 -            node(RE.OP_NOTHING, 0);
  7.1199 -        }
  7.1200 -
  7.1201 -        // Set nullable flag for this branch
  7.1202 -        if (nullable)
  7.1203 -        {
  7.1204 -            flags[0] |= NODE_NULLABLE;
  7.1205 -        }
  7.1206 -        return ret;
  7.1207 -    }
  7.1208 -
  7.1209 -    /**
  7.1210 -     * Compile an expression with possible parens around it.  Paren matching
  7.1211 -     * is done at this level so we can tie the branch tails together.
  7.1212 -     * @param flags Flag value passed by reference
  7.1213 -     * @return Node index of expression in instruction array
  7.1214 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
  7.1215 -     */
  7.1216 -    int expr(int[] flags) throws RESyntaxException
  7.1217 -    {
  7.1218 -        // Create open paren node unless we were called from the top level (which has no parens)
  7.1219 -        int paren = -1;
  7.1220 -        int ret = -1;
  7.1221 -        int closeParens = parens;
  7.1222 -        if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(')
  7.1223 -        {
  7.1224 -            // if its a cluster ( rather than a proper subexpression ie with backrefs )
  7.1225 -            if ( idx + 2 < len && pattern.charAt( idx + 1 ) == '?' && pattern.charAt( idx + 2 ) == ':' )
  7.1226 -            {
  7.1227 -                paren = 2;
  7.1228 -                idx += 3;
  7.1229 -                ret = node( RE.OP_OPEN_CLUSTER, 0 );
  7.1230 -            }
  7.1231 -            else
  7.1232 -            {
  7.1233 -                paren = 1;
  7.1234 -                idx++;
  7.1235 -                ret = node(RE.OP_OPEN, parens++);
  7.1236 -            }
  7.1237 -        }
  7.1238 -        flags[0] &= ~NODE_TOPLEVEL;
  7.1239 -
  7.1240 -        // Create a branch node
  7.1241 -        int branch = branch(flags);
  7.1242 -        if (ret == -1)
  7.1243 -        {
  7.1244 -            ret = branch;
  7.1245 -        }
  7.1246 -        else
  7.1247 -        {
  7.1248 -            setNextOfEnd(ret, branch);
  7.1249 -        }
  7.1250 -
  7.1251 -        // Loop through branches
  7.1252 -        while (idx < len && pattern.charAt(idx) == '|')
  7.1253 -        {
  7.1254 -            idx++;
  7.1255 -            branch = branch(flags);
  7.1256 -            setNextOfEnd(ret, branch);
  7.1257 -        }
  7.1258 -
  7.1259 -        // Create an ending node (either a close paren or an OP_END)
  7.1260 -        int end;
  7.1261 -        if ( paren > 0 )
  7.1262 -        {
  7.1263 -            if (idx < len && pattern.charAt(idx) == ')')
  7.1264 -            {
  7.1265 -                idx++;
  7.1266 -            }
  7.1267 -            else
  7.1268 -            {
  7.1269 -                syntaxError("Missing close paren");
  7.1270 -            }
  7.1271 -            if ( paren == 1 )
  7.1272 -            {
  7.1273 -                end = node(RE.OP_CLOSE, closeParens);
  7.1274 -            }
  7.1275 -            else
  7.1276 -            {
  7.1277 -                end = node( RE.OP_CLOSE_CLUSTER, 0 );
  7.1278 -            }
  7.1279 -        }
  7.1280 -        else
  7.1281 -        {
  7.1282 -            end = node(RE.OP_END, 0);
  7.1283 -        }
  7.1284 -
  7.1285 -        // Append the ending node to the ret nodelist
  7.1286 -        setNextOfEnd(ret, end);
  7.1287 -
  7.1288 -        // Hook the ends of each branch to the end node
  7.1289 -        int currentNode = ret;
  7.1290 -        int nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
  7.1291 -        // while the next node o
  7.1292 -        while ( nextNodeOffset != 0 && currentNode < lenInstruction )
  7.1293 -        {
  7.1294 -            // If branch, make the end of the branch's operand chain point to the end node.
  7.1295 -            if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH )
  7.1296 -            {
  7.1297 -                setNextOfEnd( currentNode + RE.nodeSize, end );
  7.1298 -            }
  7.1299 -            nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
  7.1300 -            currentNode += nextNodeOffset;
  7.1301 -        }
  7.1302 -
  7.1303 -        // Return the node list
  7.1304 -        return ret;
  7.1305 -    }
  7.1306 -
  7.1307 -    /**
  7.1308 -     * Compiles a regular expression pattern into a program runnable by the pattern
  7.1309 -     * matcher class 'RE'.
  7.1310 -     * @param pattern Regular expression pattern to compile (see RECompiler class
  7.1311 -     * for details).
  7.1312 -     * @return A compiled regular expression program.
  7.1313 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
  7.1314 -     * @see RECompiler
  7.1315 -     * @see RE
  7.1316 -     */
  7.1317 -    public REProgram compile(String pattern) throws RESyntaxException
  7.1318 -    {
  7.1319 -        // Initialize variables for compilation
  7.1320 -        this.pattern = pattern;                         // Save pattern in instance variable
  7.1321 -        len = pattern.length();                         // Precompute pattern length for speed
  7.1322 -        idx = 0;                                        // Set parsing index to the first character
  7.1323 -        lenInstruction = 0;                             // Set emitted instruction count to zero
  7.1324 -        parens = 1;                                     // Set paren level to 1 (the implicit outer parens)
  7.1325 -        brackets = 0;                                   // No bracketed closures yet
  7.1326 -
  7.1327 -        // Initialize pass by reference flags value
  7.1328 -        int[] flags = { NODE_TOPLEVEL };
  7.1329 -
  7.1330 -        // Parse expression
  7.1331 -        expr(flags);
  7.1332 -
  7.1333 -        // Should be at end of input
  7.1334 -        if (idx != len)
  7.1335 -        {
  7.1336 -            if (pattern.charAt(idx) == ')')
  7.1337 -            {
  7.1338 -                syntaxError("Unmatched close paren");
  7.1339 -            }
  7.1340 -            syntaxError("Unexpected input remains");
  7.1341 -        }
  7.1342 -
  7.1343 -        // Return the result
  7.1344 -        char[] ins = new char[lenInstruction];
  7.1345 -        System.arraycopy(instruction, 0, ins, 0, lenInstruction);
  7.1346 -        return new REProgram(parens, ins);
  7.1347 -    }
  7.1348 -
  7.1349 -    /**
  7.1350 -     * Local, nested class for maintaining character ranges for character classes.
  7.1351 -     */
  7.1352 -    class RERange
  7.1353 -    {
  7.1354 -        int size = 16;                      // Capacity of current range arrays
  7.1355 -        int[] minRange = new int[size];     // Range minima
  7.1356 -        int[] maxRange = new int[size];     // Range maxima
  7.1357 -        int num = 0;                        // Number of range array elements in use
  7.1358 -
  7.1359 -        /**
  7.1360 -         * Deletes the range at a given index from the range lists
  7.1361 -         * @param index Index of range to delete from minRange and maxRange arrays.
  7.1362 -         */
  7.1363 -        void delete(int index)
  7.1364 -        {
  7.1365 -            // Return if no elements left or index is out of range
  7.1366 -            if (num == 0 || index >= num)
  7.1367 -            {
  7.1368 -                return;
  7.1369 -            }
  7.1370 -
  7.1371 -            // Move elements down
  7.1372 -            while (++index < num)
  7.1373 -            {
  7.1374 -                if (index - 1 >= 0)
  7.1375 -                {
  7.1376 -                    minRange[index-1] = minRange[index];
  7.1377 -                    maxRange[index-1] = maxRange[index];
  7.1378 -                }
  7.1379 -            }
  7.1380 -
  7.1381 -            // One less element now
  7.1382 -            num--;
  7.1383 -        }
  7.1384 -
  7.1385 -        /**
  7.1386 -         * Merges a range into the range list, coalescing ranges if possible.
  7.1387 -         * @param min Minimum end of range
  7.1388 -         * @param max Maximum end of range
  7.1389 -         */
  7.1390 -        void merge(int min, int max)
  7.1391 -        {
  7.1392 -            // Loop through ranges
  7.1393 -            for (int i = 0; i < num; i++)
  7.1394 -            {
  7.1395 -                // Min-max is subsumed by minRange[i]-maxRange[i]
  7.1396 -                if (min >= minRange[i] && max <= maxRange[i])
  7.1397 -                {
  7.1398 -                    return;
  7.1399 -                }
  7.1400 -
  7.1401 -                // Min-max subsumes minRange[i]-maxRange[i]
  7.1402 -                else if (min <= minRange[i] && max >= maxRange[i])
  7.1403 -                {
  7.1404 -                    delete(i);
  7.1405 -                    merge(min, max);
  7.1406 -                    return;
  7.1407 -                }
  7.1408 -
  7.1409 -                // Min is in the range, but max is outside
  7.1410 -                else if (min >= minRange[i] && min <= maxRange[i])
  7.1411 -                {
  7.1412 -                    delete(i);
  7.1413 -                    min = minRange[i];
  7.1414 -                    merge(min, max);
  7.1415 -                    return;
  7.1416 -                }
  7.1417 -
  7.1418 -                // Max is in the range, but min is outside
  7.1419 -                else if (max >= minRange[i] && max <= maxRange[i])
  7.1420 -                {
  7.1421 -                    delete(i);
  7.1422 -                    max = maxRange[i];
  7.1423 -                    merge(min, max);
  7.1424 -                    return;
  7.1425 -                }
  7.1426 -            }
  7.1427 -
  7.1428 -            // Must not overlap any other ranges
  7.1429 -            if (num >= size)
  7.1430 -            {
  7.1431 -                size *= 2;
  7.1432 -                int[] newMin = new int[size];
  7.1433 -                int[] newMax = new int[size];
  7.1434 -                System.arraycopy(minRange, 0, newMin, 0, num);
  7.1435 -                System.arraycopy(maxRange, 0, newMax, 0, num);
  7.1436 -                minRange = newMin;
  7.1437 -                maxRange = newMax;
  7.1438 -            }
  7.1439 -            minRange[num] = min;
  7.1440 -            maxRange[num] = max;
  7.1441 -            num++;
  7.1442 -        }
  7.1443 -
  7.1444 -        /**
  7.1445 -         * Removes a range by deleting or shrinking all other ranges
  7.1446 -         * @param min Minimum end of range
  7.1447 -         * @param max Maximum end of range
  7.1448 -         */
  7.1449 -        void remove(int min, int max)
  7.1450 -        {
  7.1451 -            // Loop through ranges
  7.1452 -            for (int i = 0; i < num; i++)
  7.1453 -            {
  7.1454 -                // minRange[i]-maxRange[i] is subsumed by min-max
  7.1455 -                if (minRange[i] >= min && maxRange[i] <= max)
  7.1456 -                {
  7.1457 -                    delete(i);
  7.1458 -                    i--;
  7.1459 -                    return;
  7.1460 -                }
  7.1461 -
  7.1462 -                // min-max is subsumed by minRange[i]-maxRange[i]
  7.1463 -                else if (min >= minRange[i] && max <= maxRange[i])
  7.1464 -                {
  7.1465 -                    int minr = minRange[i];
  7.1466 -                    int maxr = maxRange[i];
  7.1467 -                    delete(i);
  7.1468 -                    if (minr < min)
  7.1469 -                    {
  7.1470 -                        merge(minr, min - 1);
  7.1471 -                    }
  7.1472 -                    if (max < maxr)
  7.1473 -                    {
  7.1474 -                        merge(max + 1, maxr);
  7.1475 -                    }
  7.1476 -                    return;
  7.1477 -                }
  7.1478 -
  7.1479 -                // minRange is in the range, but maxRange is outside
  7.1480 -                else if (minRange[i] >= min && minRange[i] <= max)
  7.1481 -                {
  7.1482 -                    minRange[i] = max + 1;
  7.1483 -                    return;
  7.1484 -                }
  7.1485 -
  7.1486 -                // maxRange is in the range, but minRange is outside
  7.1487 -                else if (maxRange[i] >= min && maxRange[i] <= max)
  7.1488 -                {
  7.1489 -                    maxRange[i] = min - 1;
  7.1490 -                    return;
  7.1491 -                }
  7.1492 -            }
  7.1493 -        }
  7.1494 -
  7.1495 -        /**
  7.1496 -         * Includes (or excludes) the range from min to max, inclusive.
  7.1497 -         * @param min Minimum end of range
  7.1498 -         * @param max Maximum end of range
  7.1499 -         * @param include True if range should be included.  False otherwise.
  7.1500 -         */
  7.1501 -        void include(int min, int max, boolean include)
  7.1502 -        {
  7.1503 -            if (include)
  7.1504 -            {
  7.1505 -                merge(min, max);
  7.1506 -            }
  7.1507 -            else
  7.1508 -            {
  7.1509 -                remove(min, max);
  7.1510 -            }
  7.1511 -        }
  7.1512 -
  7.1513 -        /**
  7.1514 -         * Includes a range with the same min and max
  7.1515 -         * @param minmax Minimum and maximum end of range (inclusive)
  7.1516 -         * @param include True if range should be included.  False otherwise.
  7.1517 -         */
  7.1518 -        void include(char minmax, boolean include)
  7.1519 -        {
  7.1520 -            include(minmax, minmax, include);
  7.1521 -        }
  7.1522 -    }
  7.1523 -}
     8.1 --- a/src/com/sun/org/apache/regexp/internal/REDebugCompiler.java	Sat Oct 24 16:18:47 2020 +0800
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,225 +0,0 @@
     8.4 -/*
     8.5 - * reserved comment block
     8.6 - * DO NOT REMOVE OR ALTER!
     8.7 - */
     8.8 -/*
     8.9 - * Copyright 1999-2004 The Apache Software Foundation.
    8.10 - *
    8.11 - * Licensed under the Apache License, Version 2.0 (the "License");
    8.12 - * you may not use this file except in compliance with the License.
    8.13 - * You may obtain a copy of the License at
    8.14 - *
    8.15 - *     http://www.apache.org/licenses/LICENSE-2.0
    8.16 - *
    8.17 - * Unless required by applicable law or agreed to in writing, software
    8.18 - * distributed under the License is distributed on an "AS IS" BASIS,
    8.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    8.20 - * See the License for the specific language governing permissions and
    8.21 - * limitations under the License.
    8.22 - */
    8.23 -
    8.24 -package com.sun.org.apache.regexp.internal;
    8.25 -
    8.26 -import java.io.PrintWriter;
    8.27 -import java.util.Hashtable;
    8.28 -
    8.29 -/**
    8.30 - * A subclass of RECompiler which can dump a regular expression program
    8.31 - * for debugging purposes.
    8.32 - *
    8.33 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
    8.34 - */
    8.35 -public class REDebugCompiler extends RECompiler
    8.36 -{
    8.37 -    /**
    8.38 -     * Mapping from opcodes to descriptive strings
    8.39 -     */
    8.40 -    static Hashtable hashOpcode = new Hashtable();
    8.41 -    static
    8.42 -    {
    8.43 -        hashOpcode.put(new Integer(RE.OP_RELUCTANTSTAR),    "OP_RELUCTANTSTAR");
    8.44 -        hashOpcode.put(new Integer(RE.OP_RELUCTANTPLUS),    "OP_RELUCTANTPLUS");
    8.45 -        hashOpcode.put(new Integer(RE.OP_RELUCTANTMAYBE),   "OP_RELUCTANTMAYBE");
    8.46 -        hashOpcode.put(new Integer(RE.OP_END),              "OP_END");
    8.47 -        hashOpcode.put(new Integer(RE.OP_BOL),              "OP_BOL");
    8.48 -        hashOpcode.put(new Integer(RE.OP_EOL),              "OP_EOL");
    8.49 -        hashOpcode.put(new Integer(RE.OP_ANY),              "OP_ANY");
    8.50 -        hashOpcode.put(new Integer(RE.OP_ANYOF),            "OP_ANYOF");
    8.51 -        hashOpcode.put(new Integer(RE.OP_BRANCH),           "OP_BRANCH");
    8.52 -        hashOpcode.put(new Integer(RE.OP_ATOM),             "OP_ATOM");
    8.53 -        hashOpcode.put(new Integer(RE.OP_STAR),             "OP_STAR");
    8.54 -        hashOpcode.put(new Integer(RE.OP_PLUS),             "OP_PLUS");
    8.55 -        hashOpcode.put(new Integer(RE.OP_MAYBE),            "OP_MAYBE");
    8.56 -        hashOpcode.put(new Integer(RE.OP_NOTHING),          "OP_NOTHING");
    8.57 -        hashOpcode.put(new Integer(RE.OP_GOTO),             "OP_GOTO");
    8.58 -        hashOpcode.put(new Integer(RE.OP_ESCAPE),           "OP_ESCAPE");
    8.59 -        hashOpcode.put(new Integer(RE.OP_OPEN),             "OP_OPEN");
    8.60 -        hashOpcode.put(new Integer(RE.OP_CLOSE),            "OP_CLOSE");
    8.61 -        hashOpcode.put(new Integer(RE.OP_BACKREF),          "OP_BACKREF");
    8.62 -        hashOpcode.put(new Integer(RE.OP_POSIXCLASS),       "OP_POSIXCLASS");
    8.63 -        hashOpcode.put(new Integer(RE.OP_OPEN_CLUSTER),      "OP_OPEN_CLUSTER");
    8.64 -        hashOpcode.put(new Integer(RE.OP_CLOSE_CLUSTER),      "OP_CLOSE_CLUSTER");
    8.65 -    }
    8.66 -
    8.67 -    /**
    8.68 -     * Returns a descriptive string for an opcode.
    8.69 -     * @param opcode Opcode to convert to a string
    8.70 -     * @return Description of opcode
    8.71 -     */
    8.72 -    String opcodeToString(char opcode)
    8.73 -    {
    8.74 -        // Get string for opcode
    8.75 -        String ret =(String)hashOpcode.get(new Integer(opcode));
    8.76 -
    8.77 -        // Just in case we have a corrupt program
    8.78 -        if (ret == null)
    8.79 -        {
    8.80 -            ret = "OP_????";
    8.81 -        }
    8.82 -        return ret;
    8.83 -    }
    8.84 -
    8.85 -    /**
    8.86 -     * Return a string describing a (possibly unprintable) character.
    8.87 -     * @param c Character to convert to a printable representation
    8.88 -     * @return String representation of character
    8.89 -     */
    8.90 -    String charToString(char c)
    8.91 -    {
    8.92 -        // If it's unprintable, convert to '\###'
    8.93 -        if (c < ' ' || c > 127)
    8.94 -        {
    8.95 -            return "\\" + (int)c;
    8.96 -        }
    8.97 -
    8.98 -        // Return the character as a string
    8.99 -        return String.valueOf(c);
   8.100 -    }
   8.101 -
   8.102 -    /**
   8.103 -     * Returns a descriptive string for a node in a regular expression program.
   8.104 -     * @param node Node to describe
   8.105 -     * @return Description of node
   8.106 -     */
   8.107 -    String nodeToString(int node)
   8.108 -    {
   8.109 -        // Get opcode and opdata for node
   8.110 -        char opcode =      instruction[node + RE.offsetOpcode];
   8.111 -        int opdata  = (int)instruction[node + RE.offsetOpdata];
   8.112 -
   8.113 -        // Return opcode as a string and opdata value
   8.114 -        return opcodeToString(opcode) + ", opdata = " + opdata;
   8.115 -    }
   8.116 -
   8.117 -    /**
   8.118 -     * Inserts a node with a given opcode and opdata at insertAt.  The node relative next
   8.119 -     * pointer is initialized to 0.
   8.120 -     * @param opcode Opcode for new node
   8.121 -     * @param opdata Opdata for new node (only the low 16 bits are currently used)
   8.122 -     * @param insertAt Index at which to insert the new node in the program * /
   8.123 -    void nodeInsert(char opcode, int opdata, int insertAt) {
   8.124 -        System.out.println( "====> " + opcode + " " + opdata + " " + insertAt );
   8.125 -        PrintWriter writer = new PrintWriter( System.out );
   8.126 -        dumpProgram( writer );
   8.127 -        super.nodeInsert( opcode, opdata, insertAt );
   8.128 -        System.out.println( "====< " );
   8.129 -        dumpProgram( writer );
   8.130 -        writer.flush();
   8.131 -    }/**/
   8.132 -
   8.133 -
   8.134 -    /**
   8.135 -    * Appends a node to the end of a node chain
   8.136 -    * @param node Start of node chain to traverse
   8.137 -    * @param pointTo Node to have the tail of the chain point to * /
   8.138 -    void setNextOfEnd(int node, int pointTo) {
   8.139 -        System.out.println( "====> " + node + " " + pointTo );
   8.140 -        PrintWriter writer = new PrintWriter( System.out );
   8.141 -        dumpProgram( writer );
   8.142 -        super.setNextOfEnd( node, pointTo );
   8.143 -        System.out.println( "====< " );
   8.144 -        dumpProgram( writer );
   8.145 -        writer.flush();
   8.146 -    }/**/
   8.147 -
   8.148 -
   8.149 -    /**
   8.150 -     * Dumps the current program to a PrintWriter
   8.151 -     * @param p PrintWriter for program dump output
   8.152 -     */
   8.153 -    public void dumpProgram(PrintWriter p)
   8.154 -    {
   8.155 -        // Loop through the whole program
   8.156 -        for (int i = 0; i < lenInstruction; )
   8.157 -        {
   8.158 -            // Get opcode, opdata and next fields of current program node
   8.159 -            char opcode =        instruction[i + RE.offsetOpcode];
   8.160 -            char opdata =        instruction[i + RE.offsetOpdata];
   8.161 -            short next  = (short)instruction[i + RE.offsetNext];
   8.162 -
   8.163 -            // Display the current program node
   8.164 -            p.print(i + ". " + nodeToString(i) + ", next = ");
   8.165 -
   8.166 -            // If there's no next, say 'none', otherwise give absolute index of next node
   8.167 -            if (next == 0)
   8.168 -            {
   8.169 -                p.print("none");
   8.170 -            }
   8.171 -            else
   8.172 -            {
   8.173 -                p.print(i + next);
   8.174 -            }
   8.175 -
   8.176 -            // Move past node
   8.177 -            i += RE.nodeSize;
   8.178 -
   8.179 -            // If character class
   8.180 -            if (opcode == RE.OP_ANYOF)
   8.181 -            {
   8.182 -                // Opening bracket for start of char class
   8.183 -                p.print(", [");
   8.184 -
   8.185 -                // Show each range in the char class
   8.186 -                int rangeCount = opdata;
   8.187 -                for (int r = 0; r < rangeCount; r++)
   8.188 -                {
   8.189 -                    // Get first and last chars in range
   8.190 -                    char charFirst = instruction[i++];
   8.191 -                    char charLast  = instruction[i++];
   8.192 -
   8.193 -                    // Print range as X-Y, unless range encompasses only one char
   8.194 -                    if (charFirst == charLast)
   8.195 -                    {
   8.196 -                        p.print(charToString(charFirst));
   8.197 -                    }
   8.198 -                    else
   8.199 -                    {
   8.200 -                        p.print(charToString(charFirst) + "-" + charToString(charLast));
   8.201 -                    }
   8.202 -                }
   8.203 -
   8.204 -                // Annotate the end of the char class
   8.205 -                p.print("]");
   8.206 -            }
   8.207 -
   8.208 -            // If atom
   8.209 -            if (opcode == RE.OP_ATOM)
   8.210 -            {
   8.211 -                // Open quote
   8.212 -                p.print(", \"");
   8.213 -
   8.214 -                // Print each character in the atom
   8.215 -                for (int len = opdata; len-- != 0; )
   8.216 -                {
   8.217 -                    p.print(charToString(instruction[i++]));
   8.218 -                }
   8.219 -
   8.220 -                // Close quote
   8.221 -                p.print("\"");
   8.222 -            }
   8.223 -
   8.224 -            // Print a newline
   8.225 -            p.println("");
   8.226 -        }
   8.227 -    }
   8.228 -}
     9.1 --- a/src/com/sun/org/apache/regexp/internal/REProgram.java	Sat Oct 24 16:18:47 2020 +0800
     9.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.3 @@ -1,158 +0,0 @@
     9.4 -/*
     9.5 - * reserved comment block
     9.6 - * DO NOT REMOVE OR ALTER!
     9.7 - */
     9.8 -/*
     9.9 - * Copyright 1999-2004 The Apache Software Foundation.
    9.10 - *
    9.11 - * Licensed under the Apache License, Version 2.0 (the "License");
    9.12 - * you may not use this file except in compliance with the License.
    9.13 - * You may obtain a copy of the License at
    9.14 - *
    9.15 - *     http://www.apache.org/licenses/LICENSE-2.0
    9.16 - *
    9.17 - * Unless required by applicable law or agreed to in writing, software
    9.18 - * distributed under the License is distributed on an "AS IS" BASIS,
    9.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    9.20 - * See the License for the specific language governing permissions and
    9.21 - * limitations under the License.
    9.22 - */
    9.23 -
    9.24 -package com.sun.org.apache.regexp.internal;
    9.25 -
    9.26 -import java.io.Serializable;
    9.27 -
    9.28 -/**
    9.29 - * A class that holds compiled regular expressions.  This is exposed mainly
    9.30 - * for use by the recompile utility (which helps you produce precompiled
    9.31 - * REProgram objects). You should not otherwise need to work directly with
    9.32 - * this class.
    9.33 -*
    9.34 - * @see RE
    9.35 - * @see RECompiler
    9.36 - *
    9.37 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
    9.38 - */
    9.39 -public class REProgram implements Serializable
    9.40 -{
    9.41 -    static final int OPT_HASBACKREFS = 1;
    9.42 -
    9.43 -    char[] instruction;         // The compiled regular expression 'program'
    9.44 -    int lenInstruction;         // The amount of the instruction buffer in use
    9.45 -    char[] prefix;              // Prefix string optimization
    9.46 -    int flags;                  // Optimization flags (REProgram.OPT_*)
    9.47 -    int maxParens = -1;
    9.48 -
    9.49 -    /**
    9.50 -     * Constructs a program object from a character array
    9.51 -     * @param instruction Character array with RE opcode instructions in it
    9.52 -     */
    9.53 -    public REProgram(char[] instruction)
    9.54 -    {
    9.55 -        this(instruction, instruction.length);
    9.56 -    }
    9.57 -
    9.58 -    /**
    9.59 -     * Constructs a program object from a character array
    9.60 -     * @param parens Count of parens in the program
    9.61 -     * @param instruction Character array with RE opcode instructions in it
    9.62 -     */
    9.63 -    public REProgram(int parens, char[] instruction)
    9.64 -    {
    9.65 -        this(instruction, instruction.length);
    9.66 -        this.maxParens = parens;
    9.67 -    }
    9.68 -
    9.69 -    /**
    9.70 -     * Constructs a program object from a character array
    9.71 -     * @param instruction Character array with RE opcode instructions in it
    9.72 -     * @param lenInstruction Amount of instruction array in use
    9.73 -     */
    9.74 -    public REProgram(char[] instruction, int lenInstruction)
    9.75 -    {
    9.76 -        setInstructions(instruction, lenInstruction);
    9.77 -    }
    9.78 -
    9.79 -    /**
    9.80 -     * Returns a copy of the current regular expression program in a character
    9.81 -     * array that is exactly the right length to hold the program.  If there is
    9.82 -     * no program compiled yet, getInstructions() will return null.
    9.83 -     * @return A copy of the current compiled RE program
    9.84 -     */
    9.85 -    public char[] getInstructions()
    9.86 -    {
    9.87 -        // Ensure program has been compiled!
    9.88 -        if (lenInstruction != 0)
    9.89 -        {
    9.90 -            // Return copy of program
    9.91 -            char[] ret = new char[lenInstruction];
    9.92 -            System.arraycopy(instruction, 0, ret, 0, lenInstruction);
    9.93 -            return ret;
    9.94 -        }
    9.95 -        return null;
    9.96 -    }
    9.97 -
    9.98 -    /**
    9.99 -     * Sets a new regular expression program to run.  It is this method which
   9.100 -     * performs any special compile-time search optimizations.  Currently only
   9.101 -     * two optimizations are in place - one which checks for backreferences
   9.102 -     * (so that they can be lazily allocated) and another which attempts to
   9.103 -     * find an prefix anchor string so that substantial amounts of input can
   9.104 -     * potentially be skipped without running the actual program.
   9.105 -     * @param instruction Program instruction buffer
   9.106 -     * @param lenInstruction Length of instruction buffer in use
   9.107 -     */
   9.108 -    public void setInstructions(char[] instruction, int lenInstruction)
   9.109 -    {
   9.110 -        // Save reference to instruction array
   9.111 -        this.instruction = instruction;
   9.112 -        this.lenInstruction = lenInstruction;
   9.113 -
   9.114 -        // Initialize other program-related variables
   9.115 -        flags = 0;
   9.116 -        prefix = null;
   9.117 -
   9.118 -        // Try various compile-time optimizations if there's a program
   9.119 -        if (instruction != null && lenInstruction != 0)
   9.120 -        {
   9.121 -            // If the first node is a branch
   9.122 -            if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
   9.123 -            {
   9.124 -                // to the end node
   9.125 -                int next = instruction[0 + RE.offsetNext];
   9.126 -                if (instruction[next + RE.offsetOpcode] == RE.OP_END)
   9.127 -                {
   9.128 -                    // and the branch starts with an atom
   9.129 -                    if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM)
   9.130 -                    {
   9.131 -                        // then get that atom as an prefix because there's no other choice
   9.132 -                        int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
   9.133 -                        prefix = new char[lenAtom];
   9.134 -                        System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
   9.135 -                    }
   9.136 -                }
   9.137 -            }
   9.138 -
   9.139 -            BackrefScanLoop:
   9.140 -
   9.141 -            // Check for backreferences
   9.142 -            for (int i = 0; i < lenInstruction; i += RE.nodeSize)
   9.143 -            {
   9.144 -                switch (instruction[i + RE.offsetOpcode])
   9.145 -                {
   9.146 -                    case RE.OP_ANYOF:
   9.147 -                        i += (instruction[i + RE.offsetOpdata] * 2);
   9.148 -                        break;
   9.149 -
   9.150 -                    case RE.OP_ATOM:
   9.151 -                        i += instruction[i + RE.offsetOpdata];
   9.152 -                        break;
   9.153 -
   9.154 -                    case RE.OP_BACKREF:
   9.155 -                        flags |= OPT_HASBACKREFS;
   9.156 -                        break BackrefScanLoop;
   9.157 -                }
   9.158 -            }
   9.159 -        }
   9.160 -    }
   9.161 -}
    10.1 --- a/src/com/sun/org/apache/regexp/internal/RESyntaxException.java	Sat Oct 24 16:18:47 2020 +0800
    10.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.3 @@ -1,43 +0,0 @@
    10.4 -/*
    10.5 - * reserved comment block
    10.6 - * DO NOT REMOVE OR ALTER!
    10.7 - */
    10.8 -/*
    10.9 - * Copyright 1999-2004 The Apache Software Foundation.
   10.10 - *
   10.11 - * Licensed under the Apache License, Version 2.0 (the "License");
   10.12 - * you may not use this file except in compliance with the License.
   10.13 - * You may obtain a copy of the License at
   10.14 - *
   10.15 - *     http://www.apache.org/licenses/LICENSE-2.0
   10.16 - *
   10.17 - * Unless required by applicable law or agreed to in writing, software
   10.18 - * distributed under the License is distributed on an "AS IS" BASIS,
   10.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   10.20 - * See the License for the specific language governing permissions and
   10.21 - * limitations under the License.
   10.22 - */
   10.23 -
   10.24 -package com.sun.org.apache.regexp.internal;
   10.25 -
   10.26 -/**
   10.27 - * Exception thrown to indicate a syntax error in a regular expression.
   10.28 - * This is a non-checked exception because you should only have problems compiling
   10.29 - * a regular expression during development.
   10.30 - * If you are making regular expresion programs dynamically then you can catch it
   10.31 - * if you wish. But should not be forced to.
   10.32 - *
   10.33 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
   10.34 - * @author <a href="mailto:gholam@xtra.co.nz>Michael McCallum</a>
   10.35 - */
   10.36 -public class RESyntaxException extends RuntimeException
   10.37 -{
   10.38 -    /**
   10.39 -     * Constructor.
   10.40 -     * @param s Further description of the syntax error
   10.41 -     */
   10.42 -    public RESyntaxException(String s)
   10.43 -    {
   10.44 -        super("Syntax error: " + s);
   10.45 -    }
   10.46 -}
    11.1 --- a/src/com/sun/org/apache/regexp/internal/RETest.java	Sat Oct 24 16:18:47 2020 +0800
    11.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.3 @@ -1,883 +0,0 @@
    11.4 -/*
    11.5 - * reserved comment block
    11.6 - * DO NOT REMOVE OR ALTER!
    11.7 - */
    11.8 -/*
    11.9 - * Copyright 1999-2004 The Apache Software Foundation.
   11.10 - *
   11.11 - * Licensed under the Apache License, Version 2.0 (the "License");
   11.12 - * you may not use this file except in compliance with the License.
   11.13 - * You may obtain a copy of the License at
   11.14 - *
   11.15 - *     http://www.apache.org/licenses/LICENSE-2.0
   11.16 - *
   11.17 - * Unless required by applicable law or agreed to in writing, software
   11.18 - * distributed under the License is distributed on an "AS IS" BASIS,
   11.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   11.20 - * See the License for the specific language governing permissions and
   11.21 - * limitations under the License.
   11.22 - */
   11.23 -
   11.24 -package com.sun.org.apache.regexp.internal;
   11.25 -
   11.26 -import java.io.BufferedReader;
   11.27 -import java.io.FileReader;
   11.28 -import java.io.InputStreamReader;
   11.29 -import java.io.PrintWriter;
   11.30 -import java.io.File;
   11.31 -import java.io.ByteArrayOutputStream;
   11.32 -import java.io.ObjectOutputStream;
   11.33 -import java.io.ByteArrayInputStream;
   11.34 -import java.io.ObjectInputStream;
   11.35 -import java.io.StringBufferInputStream;
   11.36 -import java.io.StringReader;
   11.37 -import java.io.IOException;
   11.38 -
   11.39 -/**
   11.40 - * Data driven (and optionally interactive) testing harness to exercise regular
   11.41 - * expression compiler and matching engine.
   11.42 - *
   11.43 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
   11.44 - * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
   11.45 - * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
   11.46 - */
   11.47 -public class RETest
   11.48 -{
   11.49 -    // True if we want to see output from success cases
   11.50 -    static final boolean showSuccesses = false;
   11.51 -
   11.52 -    // A new line character.
   11.53 -    static final String NEW_LINE = System.getProperty( "line.separator" );
   11.54 -
   11.55 -    // Construct a debug compiler
   11.56 -    REDebugCompiler compiler = new REDebugCompiler();
   11.57 -
   11.58 -    /**
   11.59 -     * Main program entrypoint.  If an argument is given, it will be compiled
   11.60 -     * and interactive matching will ensue.  If no argument is given, the
   11.61 -     * file RETest.txt will be used as automated testing input.
   11.62 -     * @param args Command line arguments (optional regular expression)
   11.63 -     */
   11.64 -    public static void main(String[] args)
   11.65 -    {
   11.66 -        try
   11.67 -        {
   11.68 -            if (!test( args )) {
   11.69 -                System.exit(1);
   11.70 -            }
   11.71 -        }
   11.72 -        catch (Exception e)
   11.73 -        {
   11.74 -            e.printStackTrace();
   11.75 -            System.exit(1);
   11.76 -        }
   11.77 -    }
   11.78 -
   11.79 -    /**
   11.80 -     * Testing entrypoint.
   11.81 -     * @param args Command line arguments
   11.82 -     * @exception Exception thrown in case of error
   11.83 -     */
   11.84 -    public static boolean test( String[] args ) throws Exception
   11.85 -    {
   11.86 -        RETest test = new RETest();
   11.87 -        // Run interactive tests against a single regexp
   11.88 -        if (args.length == 2)
   11.89 -        {
   11.90 -            test.runInteractiveTests(args[1]);
   11.91 -        }
   11.92 -        else if (args.length == 1)
   11.93 -        {
   11.94 -            // Run automated tests
   11.95 -            test.runAutomatedTests(args[0]);
   11.96 -        }
   11.97 -        else
   11.98 -        {
   11.99 -            System.out.println( "Usage: RETest ([-i] [regex]) ([/path/to/testfile.txt])" );
  11.100 -            System.out.println( "By Default will run automated tests from file 'docs/RETest.txt' ..." );
  11.101 -            System.out.println();
  11.102 -            test.runAutomatedTests("docs/RETest.txt");
  11.103 -        }
  11.104 -        return test.failures == 0;
  11.105 -    }
  11.106 -
  11.107 -    /**
  11.108 -     * Constructor
  11.109 -     */
  11.110 -    public RETest()
  11.111 -    {
  11.112 -    }
  11.113 -
  11.114 -    /**
  11.115 -     * Compile and test matching against a single expression
  11.116 -     * @param expr Expression to compile and test
  11.117 -     */
  11.118 -    void runInteractiveTests(String expr)
  11.119 -    {
  11.120 -        RE r = new RE();
  11.121 -        try
  11.122 -        {
  11.123 -            // Compile expression
  11.124 -            r.setProgram(compiler.compile(expr));
  11.125 -
  11.126 -            // Show expression
  11.127 -            say("" + NEW_LINE + "" + expr + "" + NEW_LINE + "");
  11.128 -
  11.129 -            // Show program for compiled expression
  11.130 -            PrintWriter writer = new PrintWriter( System.out );
  11.131 -            compiler.dumpProgram( writer );
  11.132 -            writer.flush();
  11.133 -
  11.134 -            boolean running = true;
  11.135 -            // Test matching against compiled expression
  11.136 -            while ( running )
  11.137 -            {
  11.138 -                // Read from keyboard
  11.139 -                BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
  11.140 -                System.out.print("> ");
  11.141 -                System.out.flush();
  11.142 -                String match = br.readLine();
  11.143 -
  11.144 -                if ( match != null )
  11.145 -                {
  11.146 -                    // Try a match against the keyboard input
  11.147 -                    if (r.match(match))
  11.148 -                    {
  11.149 -                        say("Match successful.");
  11.150 -                    }
  11.151 -                    else
  11.152 -                    {
  11.153 -                        say("Match failed.");
  11.154 -                    }
  11.155 -
  11.156 -                    // Show subparen registers
  11.157 -                    showParens(r);
  11.158 -                }
  11.159 -                else
  11.160 -                {
  11.161 -                    running = false;
  11.162 -                    System.out.println();
  11.163 -                }
  11.164 -            }
  11.165 -        }
  11.166 -        catch (Exception e)
  11.167 -        {
  11.168 -            say("Error: " + e.toString());
  11.169 -            e.printStackTrace();
  11.170 -        }
  11.171 -    }
  11.172 -
  11.173 -    /**
  11.174 -     * Exit with a fatal error.
  11.175 -     * @param s Last famous words before exiting
  11.176 -     */
  11.177 -    void die(String s)
  11.178 -    {
  11.179 -        say("FATAL ERROR: " + s);
  11.180 -        System.exit(-1);
  11.181 -    }
  11.182 -
  11.183 -    /**
  11.184 -     * Fail with an error. Will print a big failure message to System.out.
  11.185 -     *
  11.186 -     * @param log Output before failure
  11.187 -     * @param s Failure description
  11.188 -     */
  11.189 -    void fail(StringBuffer log, String s)
  11.190 -    {
  11.191 -        System.out.print(log.toString());
  11.192 -        fail(s);
  11.193 -    }
  11.194 -
  11.195 -    /**
  11.196 -     * Fail with an error. Will print a big failure message to System.out.
  11.197 -     *
  11.198 -     * @param s Failure description
  11.199 -     */
  11.200 -    void fail(String s)
  11.201 -    {
  11.202 -        failures++;
  11.203 -        say("" + NEW_LINE + "");
  11.204 -        say("*******************************************************");
  11.205 -        say("*********************  FAILURE!  **********************");
  11.206 -        say("*******************************************************");
  11.207 -        say("" + NEW_LINE + "");
  11.208 -        say(s);
  11.209 -        say("");
  11.210 -        // make sure the writer gets flushed.
  11.211 -        if (compiler != null) {
  11.212 -            PrintWriter writer = new PrintWriter( System.out );
  11.213 -            compiler.dumpProgram( writer );
  11.214 -            writer.flush();
  11.215 -            say("" + NEW_LINE + "");
  11.216 -        }
  11.217 -    }
  11.218 -
  11.219 -    /**
  11.220 -     * Say something to standard out
  11.221 -     * @param s What to say
  11.222 -     */
  11.223 -    void say(String s)
  11.224 -    {
  11.225 -        System.out.println(s);
  11.226 -    }
  11.227 -
  11.228 -    /**
  11.229 -     * Dump parenthesized subexpressions found by a regular expression matcher object
  11.230 -     * @param r Matcher object with results to show
  11.231 -     */
  11.232 -    void showParens(RE r)
  11.233 -    {
  11.234 -        // Loop through each paren
  11.235 -        for (int i = 0; i < r.getParenCount(); i++)
  11.236 -        {
  11.237 -            // Show paren register
  11.238 -            say("$" + i + " = " + r.getParen(i));
  11.239 -        }
  11.240 -    }
  11.241 -
  11.242 -    /*
  11.243 -     * number in automated test
  11.244 -     */
  11.245 -    int testCount = 0;
  11.246 -
  11.247 -    /*
  11.248 -     * Count of failures in automated test
  11.249 -     */
  11.250 -    int failures = 0;
  11.251 -
  11.252 -    /**
  11.253 -     * Run automated tests in RETest.txt file (from Perl 4.0 test battery)
  11.254 -     * @exception Exception thrown in case of error
  11.255 -     */
  11.256 -    void runAutomatedTests(String testDocument) throws Exception
  11.257 -    {
  11.258 -        long ms = System.currentTimeMillis();
  11.259 -
  11.260 -        // Some unit tests
  11.261 -        testPrecompiledRE();
  11.262 -        testSplitAndGrep();
  11.263 -        testSubst();
  11.264 -        testOther();
  11.265 -
  11.266 -        // Test from script file
  11.267 -        File testInput = new File(testDocument);
  11.268 -        if (! testInput.exists()) {
  11.269 -            throw new Exception ("Could not find: " + testDocument);
  11.270 -        }
  11.271 -
  11.272 -        BufferedReader br = new BufferedReader(new FileReader(testInput));
  11.273 -        try
  11.274 -        {
  11.275 -            // While input is available, parse lines
  11.276 -            while (br.ready())
  11.277 -            {
  11.278 -                RETestCase testcase = getNextTestCase(br);
  11.279 -                if (testcase != null) {
  11.280 -                    testcase.runTest();
  11.281 -                }
  11.282 -            }
  11.283 -        }
  11.284 -        finally
  11.285 -        {
  11.286 -            br.close();
  11.287 -        }
  11.288 -
  11.289 -        // Show match time
  11.290 -        say(NEW_LINE + NEW_LINE + "Match time = " + (System.currentTimeMillis() - ms) + " ms.");
  11.291 -
  11.292 -        // Print final results
  11.293 -        if (failures > 0) {
  11.294 -            say("*************** THERE ARE FAILURES! *******************");
  11.295 -        }
  11.296 -        say("Tests complete.  " + testCount + " tests, " + failures + " failure(s).");
  11.297 -    }
  11.298 -
  11.299 -    /**
  11.300 -     * Run automated unit test
  11.301 -     * @exception Exception thrown in case of error
  11.302 -     */
  11.303 -    void testOther() throws Exception
  11.304 -    {
  11.305 -        // Serialization test 1: Compile regexp and serialize/deserialize it
  11.306 -        RE r = new RE("(a*)b");
  11.307 -        say("Serialized/deserialized (a*)b");
  11.308 -        ByteArrayOutputStream out = new ByteArrayOutputStream(128);
  11.309 -        new ObjectOutputStream(out).writeObject(r);
  11.310 -        ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
  11.311 -        r = (RE)new ObjectInputStream(in).readObject();
  11.312 -        if (!r.match("aaab"))
  11.313 -        {
  11.314 -            fail("Did not match 'aaab' with deserialized RE.");
  11.315 -        } else {
  11.316 -            say("aaaab = true");
  11.317 -            showParens(r);
  11.318 -        }
  11.319 -
  11.320 -        // Serialization test 2: serialize/deserialize used regexp
  11.321 -        out.reset();
  11.322 -        say("Deserialized (a*)b");
  11.323 -        new ObjectOutputStream(out).writeObject(r);
  11.324 -        in = new ByteArrayInputStream(out.toByteArray());
  11.325 -        r = (RE)new ObjectInputStream(in).readObject();
  11.326 -        if (r.getParenCount() != 0)
  11.327 -        {
  11.328 -            fail("Has parens after deserialization.");
  11.329 -        }
  11.330 -        if (!r.match("aaab"))
  11.331 -        {
  11.332 -            fail("Did not match 'aaab' with deserialized RE.");
  11.333 -        } else {
  11.334 -            say("aaaab = true");
  11.335 -            showParens(r);
  11.336 -        }
  11.337 -
  11.338 -        // Test MATCH_CASEINDEPENDENT
  11.339 -        r = new RE("abc(\\w*)");
  11.340 -        say("MATCH_CASEINDEPENDENT abc(\\w*)");
  11.341 -        r.setMatchFlags(RE.MATCH_CASEINDEPENDENT);
  11.342 -        say("abc(d*)");
  11.343 -        if (!r.match("abcddd"))
  11.344 -        {
  11.345 -            fail("Did not match 'abcddd'.");
  11.346 -        } else {
  11.347 -            say("abcddd = true");
  11.348 -            showParens(r);
  11.349 -        }
  11.350 -
  11.351 -        if (!r.match("aBcDDdd"))
  11.352 -        {
  11.353 -            fail("Did not match 'aBcDDdd'.");
  11.354 -        } else {
  11.355 -            say("aBcDDdd = true");
  11.356 -            showParens(r);
  11.357 -        }
  11.358 -
  11.359 -        if (!r.match("ABCDDDDD"))
  11.360 -        {
  11.361 -            fail("Did not match 'ABCDDDDD'.");
  11.362 -        } else {
  11.363 -            say("ABCDDDDD = true");
  11.364 -            showParens(r);
  11.365 -        }
  11.366 -
  11.367 -        r = new RE("(A*)b\\1");
  11.368 -        r.setMatchFlags(RE.MATCH_CASEINDEPENDENT);
  11.369 -        if (!r.match("AaAaaaBAAAAAA"))
  11.370 -        {
  11.371 -            fail("Did not match 'AaAaaaBAAAAAA'.");
  11.372 -        } else {
  11.373 -            say("AaAaaaBAAAAAA = true");
  11.374 -            showParens(r);
  11.375 -        }
  11.376 -
  11.377 -        r = new RE("[A-Z]*");
  11.378 -        r.setMatchFlags(RE.MATCH_CASEINDEPENDENT);
  11.379 -        if (!r.match("CaBgDe12"))
  11.380 -        {
  11.381 -            fail("Did not match 'CaBgDe12'.");
  11.382 -        } else {
  11.383 -            say("CaBgDe12 = true");
  11.384 -            showParens(r);
  11.385 -        }
  11.386 -
  11.387 -        // Test MATCH_MULTILINE. Test for eol/bol symbols.
  11.388 -        r = new RE("^abc$", RE.MATCH_MULTILINE);
  11.389 -        if (!r.match("\nabc")) {
  11.390 -            fail("\"\\nabc\" doesn't match \"^abc$\"");
  11.391 -        }
  11.392 -        if (!r.match("\rabc")) {
  11.393 -            fail("\"\\rabc\" doesn't match \"^abc$\"");
  11.394 -        }
  11.395 -        if (!r.match("\r\nabc")) {
  11.396 -            fail("\"\\r\\nabc\" doesn't match \"^abc$\"");
  11.397 -        }
  11.398 -        if (!r.match("\u0085abc")) {
  11.399 -            fail("\"\\u0085abc\" doesn't match \"^abc$\"");
  11.400 -        }
  11.401 -        if (!r.match("\u2028abc")) {
  11.402 -            fail("\"\\u2028abc\" doesn't match \"^abc$\"");
  11.403 -        }
  11.404 -        if (!r.match("\u2029abc")) {
  11.405 -            fail("\"\\u2029abc\" doesn't match \"^abc$\"");
  11.406 -        }
  11.407 -
  11.408 -        // Test MATCH_MULTILINE. Test that '.' does not matches new line.
  11.409 -        r = new RE("^a.*b$", RE.MATCH_MULTILINE);
  11.410 -        if (r.match("a\nb")) {
  11.411 -            fail("\"a\\nb\" matches \"^a.*b$\"");
  11.412 -        }
  11.413 -        if (r.match("a\rb")) {
  11.414 -            fail("\"a\\rb\" matches \"^a.*b$\"");
  11.415 -        }
  11.416 -        if (r.match("a\r\nb")) {
  11.417 -            fail("\"a\\r\\nb\" matches \"^a.*b$\"");
  11.418 -        }
  11.419 -        if (r.match("a\u0085b")) {
  11.420 -            fail("\"a\\u0085b\" matches \"^a.*b$\"");
  11.421 -        }
  11.422 -        if (r.match("a\u2028b")) {
  11.423 -            fail("\"a\\u2028b\" matches \"^a.*b$\"");
  11.424 -        }
  11.425 -        if (r.match("a\u2029b")) {
  11.426 -            fail("\"a\\u2029b\" matches \"^a.*b$\"");
  11.427 -        }
  11.428 -    }
  11.429 -
  11.430 -    private void testPrecompiledRE()
  11.431 -    {
  11.432 -        // Pre-compiled regular expression "a*b"
  11.433 -        char[] re1Instructions =
  11.434 -        {
  11.435 -            0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
  11.436 -            0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
  11.437 -            0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
  11.438 -            0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
  11.439 -            0x0000,
  11.440 -        };
  11.441 -
  11.442 -        REProgram re1 = new REProgram(re1Instructions);
  11.443 -
  11.444 -        // Simple test of pre-compiled regular expressions
  11.445 -        RE r = new RE(re1);
  11.446 -        say("a*b");
  11.447 -        boolean result = r.match("aaab");
  11.448 -        say("aaab = " + result);
  11.449 -        showParens(r);
  11.450 -        if (!result) {
  11.451 -            fail("\"aaab\" doesn't match to precompiled \"a*b\"");
  11.452 -        }
  11.453 -
  11.454 -        result = r.match("b");
  11.455 -        say("b = " + result);
  11.456 -        showParens(r);
  11.457 -        if (!result) {
  11.458 -            fail("\"b\" doesn't match to precompiled \"a*b\"");
  11.459 -        }
  11.460 -
  11.461 -        result = r.match("c");
  11.462 -        say("c = " + result);
  11.463 -        showParens(r);
  11.464 -        if (result) {
  11.465 -            fail("\"c\" matches to precompiled \"a*b\"");
  11.466 -        }
  11.467 -
  11.468 -        result = r.match("ccccaaaaab");
  11.469 -        say("ccccaaaaab = " + result);
  11.470 -        showParens(r);
  11.471 -        if (!result) {
  11.472 -            fail("\"ccccaaaaab\" doesn't match to precompiled \"a*b\"");
  11.473 -        }
  11.474 -    }
  11.475 -
  11.476 -    private void testSplitAndGrep()
  11.477 -    {
  11.478 -        String[] expected = {"xxxx", "xxxx", "yyyy", "zzz"};
  11.479 -        RE r = new RE("a*b");
  11.480 -        String[] s = r.split("xxxxaabxxxxbyyyyaaabzzz");
  11.481 -        for (int i = 0; i < expected.length && i < s.length; i++) {
  11.482 -            assertEquals("Wrong splitted part", expected[i], s[i]);
  11.483 -        }
  11.484 -        assertEquals("Wrong number of splitted parts", expected.length,
  11.485 -                     s.length);
  11.486 -
  11.487 -        r = new RE("x+");
  11.488 -        expected = new String[] {"xxxx", "xxxx"};
  11.489 -        s = r.grep(s);
  11.490 -        for (int i = 0; i < s.length; i++)
  11.491 -        {
  11.492 -            say("s[" + i + "] = " + s[i]);
  11.493 -            assertEquals("Grep fails", expected[i], s[i]);
  11.494 -        }
  11.495 -        assertEquals("Wrong number of string found by grep", expected.length,
  11.496 -                     s.length);
  11.497 -    }
  11.498 -
  11.499 -    private void testSubst()
  11.500 -    {
  11.501 -        RE r = new RE("a*b");
  11.502 -        String expected = "-foo-garply-wacky-";
  11.503 -        String actual = r.subst("aaaabfooaaabgarplyaaabwackyb", "-");
  11.504 -        assertEquals("Wrong result of substitution in \"a*b\"", expected, actual);
  11.505 -
  11.506 -        // Test subst() with backreferences
  11.507 -        r = new RE("http://[\\.\\w\\-\\?/~_@&=%]+");
  11.508 -        actual = r.subst("visit us: http://www.apache.org!",
  11.509 -                         "1234<a href=\"$0\">$0</a>", RE.REPLACE_BACKREFERENCES);
  11.510 -        assertEquals("Wrong subst() result", "visit us: 1234<a href=\"http://www.apache.org\">http://www.apache.org</a>!", actual);
  11.511 -
  11.512 -        // Test subst() with backreferences without leading characters
  11.513 -        // before first backreference
  11.514 -        r = new RE("(.*?)=(.*)");
  11.515 -        actual = r.subst("variable=value",
  11.516 -                         "$1_test_$212", RE.REPLACE_BACKREFERENCES);
  11.517 -        assertEquals("Wrong subst() result", "variable_test_value12", actual);
  11.518 -
  11.519 -        // Test subst() with NO backreferences
  11.520 -        r = new RE("^a$");
  11.521 -        actual = r.subst("a",
  11.522 -                         "b", RE.REPLACE_BACKREFERENCES);
  11.523 -        assertEquals("Wrong subst() result", "b", actual);
  11.524 -
  11.525 -        // Test subst() with NO backreferences
  11.526 -        r = new RE("^a$", RE.MATCH_MULTILINE);
  11.527 -        actual = r.subst("\r\na\r\n",
  11.528 -                         "b", RE.REPLACE_BACKREFERENCES);
  11.529 -        assertEquals("Wrong subst() result", "\r\nb\r\n", actual);
  11.530 -    }
  11.531 -
  11.532 -    public void assertEquals(String message, String expected, String actual)
  11.533 -    {
  11.534 -        if (expected != null && !expected.equals(actual)
  11.535 -            || actual != null && !actual.equals(expected))
  11.536 -        {
  11.537 -            fail(message + " (expected \"" + expected
  11.538 -                 + "\", actual \"" + actual + "\")");
  11.539 -        }
  11.540 -    }
  11.541 -
  11.542 -    public void assertEquals(String message, int expected, int actual)
  11.543 -    {
  11.544 -        if (expected != actual) {
  11.545 -            fail(message + " (expected \"" + expected
  11.546 -                 + "\", actual \"" + actual + "\")");
  11.547 -        }
  11.548 -    }
  11.549 -
  11.550 -    /**
  11.551 -     * Converts yesno string to boolean.
  11.552 -     * @param yesno string representation of expected result
  11.553 -     * @return true if yesno is "YES", false if yesno is "NO"
  11.554 -     *         stops program otherwise.
  11.555 -     */
  11.556 -    private boolean getExpectedResult(String yesno)
  11.557 -    {
  11.558 -        if ("NO".equals(yesno))
  11.559 -        {
  11.560 -            return false;
  11.561 -        }
  11.562 -        else if ("YES".equals(yesno))
  11.563 -        {
  11.564 -            return true;
  11.565 -        }
  11.566 -        else
  11.567 -        {
  11.568 -            // Bad test script
  11.569 -            die("Test script error!");
  11.570 -            return false; //to please javac
  11.571 -        }
  11.572 -    }
  11.573 -
  11.574 -    /**
  11.575 -     * Finds next test description in a given script.
  11.576 -     * @param br <code>BufferedReader</code> for a script file
  11.577 -     * @return strign tag for next test description
  11.578 -     * @exception IOException if some io problems occured
  11.579 -     */
  11.580 -    private String findNextTest(BufferedReader br) throws IOException
  11.581 -    {
  11.582 -        String number = "";
  11.583 -
  11.584 -        while (br.ready())
  11.585 -        {
  11.586 -            number = br.readLine();
  11.587 -            if (number == null)
  11.588 -            {
  11.589 -                break;
  11.590 -            }
  11.591 -            number = number.trim();
  11.592 -            if (number.startsWith("#"))
  11.593 -            {
  11.594 -                break;
  11.595 -            }
  11.596 -            if (!number.equals(""))
  11.597 -            {
  11.598 -                say("Script error.  Line = " + number);
  11.599 -                System.exit(-1);
  11.600 -            }
  11.601 -        }
  11.602 -        return number;
  11.603 -    }
  11.604 -
  11.605 -    /**
  11.606 -     * Creates testcase for the next test description in the script file.
  11.607 -     * @param br <code>BufferedReader</code> for script file.
  11.608 -     * @return a new tescase or null.
  11.609 -     * @exception IOException if some io problems occured
  11.610 -     */
  11.611 -    private RETestCase getNextTestCase(BufferedReader br) throws IOException
  11.612 -    {
  11.613 -        // Find next re test case
  11.614 -        final String tag = findNextTest(br);
  11.615 -
  11.616 -        // Are we done?
  11.617 -        if (!br.ready())
  11.618 -        {
  11.619 -            return null;
  11.620 -        }
  11.621 -
  11.622 -        // Get expression
  11.623 -        final String expr = br.readLine();
  11.624 -
  11.625 -        // Get test information
  11.626 -        final String matchAgainst = br.readLine();
  11.627 -        final boolean badPattern = "ERR".equals(matchAgainst);
  11.628 -        boolean shouldMatch = false;
  11.629 -        int expectedParenCount = 0;
  11.630 -        String[] expectedParens = null;
  11.631 -
  11.632 -        if (!badPattern) {
  11.633 -            shouldMatch = getExpectedResult(br.readLine().trim());
  11.634 -            if (shouldMatch) {
  11.635 -                expectedParenCount = Integer.parseInt(br.readLine().trim());
  11.636 -                expectedParens = new String[expectedParenCount];
  11.637 -                for (int i = 0; i < expectedParenCount; i++) {
  11.638 -                    expectedParens[i] = br.readLine();
  11.639 -                }
  11.640 -            }
  11.641 -        }
  11.642 -
  11.643 -        return new RETestCase(this, tag, expr, matchAgainst, badPattern,
  11.644 -                              shouldMatch, expectedParens);
  11.645 -    }
  11.646 -}
  11.647 -
  11.648 -final class RETestCase
  11.649 -{
  11.650 -    final private StringBuffer log = new StringBuffer();
  11.651 -    final private int number;
  11.652 -    final private String tag; // number from script file
  11.653 -    final private String pattern;
  11.654 -    final private String toMatch;
  11.655 -    final private boolean badPattern;
  11.656 -    final private boolean shouldMatch;
  11.657 -    final private String[] parens;
  11.658 -    final private RETest test;
  11.659 -    private RE regexp;
  11.660 -
  11.661 -    public RETestCase(RETest test, String tag, String pattern,
  11.662 -                      String toMatch, boolean badPattern,
  11.663 -                      boolean shouldMatch, String[] parens)
  11.664 -    {
  11.665 -        this.number = ++test.testCount;
  11.666 -        this.test = test;
  11.667 -        this.tag = tag;
  11.668 -        this.pattern = pattern;
  11.669 -        this.toMatch = toMatch;
  11.670 -        this.badPattern = badPattern;
  11.671 -        this.shouldMatch = shouldMatch;
  11.672 -        if (parens != null) {
  11.673 -            this.parens = new String[parens.length];
  11.674 -            for (int i = 0; i < parens.length; i++) {
  11.675 -                this.parens[i] = parens[i];
  11.676 -            }
  11.677 -        } else {
  11.678 -            this.parens = null;
  11.679 -        }
  11.680 -    }
  11.681 -
  11.682 -    public void runTest()
  11.683 -    {
  11.684 -        test.say(tag + "(" + number + "): " + pattern);
  11.685 -        if (testCreation()) {
  11.686 -            testMatch();
  11.687 -        }
  11.688 -    }
  11.689 -
  11.690 -    boolean testCreation()
  11.691 -    {
  11.692 -        try
  11.693 -        {
  11.694 -            // Compile it
  11.695 -            regexp = new RE();
  11.696 -            regexp.setProgram(test.compiler.compile(pattern));
  11.697 -            // Expression didn't cause an expected error
  11.698 -            if (badPattern)
  11.699 -            {
  11.700 -                test.fail(log, "Was expected to be an error, but wasn't.");
  11.701 -                return false;
  11.702 -            }
  11.703 -
  11.704 -            return true;
  11.705 -        }
  11.706 -        // Some expressions *should* cause exceptions to be thrown
  11.707 -        catch (Exception e)
  11.708 -        {
  11.709 -            // If it was supposed to be an error, report success and continue
  11.710 -            if (badPattern)
  11.711 -            {
  11.712 -                log.append("   Match: ERR\n");
  11.713 -                success("Produces an error (" + e.toString() + "), as expected.");
  11.714 -                return false;
  11.715 -            }
  11.716 -
  11.717 -            // Wasn't supposed to be an error
  11.718 -            String message = (e.getMessage() == null) ? e.toString() : e.getMessage();
  11.719 -            test.fail(log, "Produces an unexpected exception \"" + message + "\"");
  11.720 -            e.printStackTrace();
  11.721 -        }
  11.722 -        catch (Error e)
  11.723 -        {
  11.724 -            // Internal error happened
  11.725 -            test.fail(log, "Compiler threw fatal error \"" + e.getMessage() + "\"");
  11.726 -            e.printStackTrace();
  11.727 -        }
  11.728 -
  11.729 -        return false;
  11.730 -    }
  11.731 -
  11.732 -    private void testMatch()
  11.733 -    {
  11.734 -        log.append("   Match against: '" + toMatch + "'\n");
  11.735 -        // Try regular matching
  11.736 -        try
  11.737 -        {
  11.738 -            // Match against the string
  11.739 -            boolean result = regexp.match(toMatch);
  11.740 -            log.append("   Matched: " + (result ? "YES" : "NO") + "\n");
  11.741 -
  11.742 -            // Check result, parens, and iterators
  11.743 -            if (checkResult(result) && (!shouldMatch || checkParens()))
  11.744 -            {
  11.745 -                // test match(CharacterIterator, int)
  11.746 -                // for every CharacterIterator implementation.
  11.747 -                log.append("   Match using StringCharacterIterator\n");
  11.748 -                if (!tryMatchUsingCI(new StringCharacterIterator(toMatch)))
  11.749 -                    return;
  11.750 -
  11.751 -                log.append("   Match using CharacterArrayCharacterIterator\n");
  11.752 -                if (!tryMatchUsingCI(new CharacterArrayCharacterIterator(toMatch.toCharArray(), 0, toMatch.length())))
  11.753 -                    return;
  11.754 -
  11.755 -                log.append("   Match using StreamCharacterIterator\n");
  11.756 -                if (!tryMatchUsingCI(new StreamCharacterIterator(new StringBufferInputStream(toMatch))))
  11.757 -                    return;
  11.758 -
  11.759 -                log.append("   Match using ReaderCharacterIterator\n");
  11.760 -                if (!tryMatchUsingCI(new ReaderCharacterIterator(new StringReader(toMatch))))
  11.761 -                    return;
  11.762 -            }
  11.763 -        }
  11.764 -        // Matcher blew it
  11.765 -        catch(Exception e)
  11.766 -        {
  11.767 -            test.fail(log, "Matcher threw exception: " + e.toString());
  11.768 -            e.printStackTrace();
  11.769 -        }
  11.770 -        // Internal error
  11.771 -        catch(Error e)
  11.772 -        {
  11.773 -            test.fail(log, "Matcher threw fatal error \"" + e.getMessage() + "\"");
  11.774 -            e.printStackTrace();
  11.775 -        }
  11.776 -    }
  11.777 -
  11.778 -    private boolean checkResult(boolean result)
  11.779 -    {
  11.780 -        // Write status
  11.781 -        if (result == shouldMatch) {
  11.782 -            success((shouldMatch ? "Matched" : "Did not match")
  11.783 -                    + " \"" + toMatch + "\", as expected:");
  11.784 -            return true;
  11.785 -        } else {
  11.786 -            if (shouldMatch) {
  11.787 -                test.fail(log, "Did not match \"" + toMatch + "\", when expected to.");
  11.788 -            } else {
  11.789 -                test.fail(log, "Matched \"" + toMatch + "\", when not expected to.");
  11.790 -            }
  11.791 -            return false;
  11.792 -        }
  11.793 -    }
  11.794 -
  11.795 -    private boolean checkParens()
  11.796 -    {
  11.797 -        // Show subexpression registers
  11.798 -        if (RETest.showSuccesses)
  11.799 -        {
  11.800 -            test.showParens(regexp);
  11.801 -        }
  11.802 -
  11.803 -        log.append("   Paren count: " + regexp.getParenCount() + "\n");
  11.804 -        if (!assertEquals(log, "Wrong number of parens", parens.length, regexp.getParenCount()))
  11.805 -        {
  11.806 -            return false;
  11.807 -        }
  11.808 -
  11.809 -        // Check registers against expected contents
  11.810 -        for (int p = 0; p < regexp.getParenCount(); p++)
  11.811 -        {
  11.812 -            log.append("   Paren " + p + ": " + regexp.getParen(p) + "\n");
  11.813 -
  11.814 -            // Compare expected result with actual
  11.815 -            if ("null".equals(parens[p]) && regexp.getParen(p) == null)
  11.816 -            {
  11.817 -                // Consider "null" in test file equal to null
  11.818 -                continue;
  11.819 -            }
  11.820 -            if (!assertEquals(log, "Wrong register " + p, parens[p], regexp.getParen(p)))
  11.821 -            {
  11.822 -                return false;
  11.823 -            }
  11.824 -        }
  11.825 -
  11.826 -        return true;
  11.827 -    }
  11.828 -
  11.829 -    boolean tryMatchUsingCI(CharacterIterator matchAgainst)
  11.830 -    {
  11.831 -        try {
  11.832 -            boolean result = regexp.match(matchAgainst, 0);
  11.833 -            log.append("   Match: " + (result ? "YES" : "NO") + "\n");
  11.834 -            return checkResult(result) && (!shouldMatch || checkParens());
  11.835 -        }
  11.836 -        // Matcher blew it
  11.837 -        catch(Exception e)
  11.838 -        {
  11.839 -            test.fail(log, "Matcher threw exception: " + e.toString());
  11.840 -            e.printStackTrace();
  11.841 -        }
  11.842 -        // Internal error
  11.843 -        catch(Error e)
  11.844 -        {
  11.845 -            test.fail(log, "Matcher threw fatal error \"" + e.getMessage() + "\"");
  11.846 -            e.printStackTrace();
  11.847 -        }
  11.848 -        return false;
  11.849 -    }
  11.850 -
  11.851 -    public boolean assertEquals(StringBuffer log, String message, String expected, String actual)
  11.852 -    {
  11.853 -        if (expected != null && !expected.equals(actual)
  11.854 -            || actual != null && !actual.equals(expected))
  11.855 -        {
  11.856 -            test.fail(log, message + " (expected \"" + expected
  11.857 -                      + "\", actual \"" + actual + "\")");
  11.858 -            return false;
  11.859 -        }
  11.860 -        return true;
  11.861 -    }
  11.862 -
  11.863 -    public boolean assertEquals(StringBuffer log, String message, int expected, int actual)
  11.864 -    {
  11.865 -        if (expected != actual) {
  11.866 -            test.fail(log, message + " (expected \"" + expected
  11.867 -                      + "\", actual \"" + actual + "\")");
  11.868 -            return false;
  11.869 -        }
  11.870 -        return true;
  11.871 -    }
  11.872 -
  11.873 -    /**
  11.874 -     * Show a success
  11.875 -     * @param s Success story
  11.876 -     */
  11.877 -    void success(String s)
  11.878 -    {
  11.879 -        if (RETest.showSuccesses)
  11.880 -        {
  11.881 -            test.say("" + RETest.NEW_LINE + "-----------------------" + RETest.NEW_LINE + "");
  11.882 -            test.say("Expression #" + (number) + " \"" + pattern + "\" ");
  11.883 -            test.say("Success: " + s);
  11.884 -        }
  11.885 -    }
  11.886 -}
    12.1 --- a/src/com/sun/org/apache/regexp/internal/REUtil.java	Sat Oct 24 16:18:47 2020 +0800
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,61 +0,0 @@
    12.4 -/*
    12.5 - * reserved comment block
    12.6 - * DO NOT REMOVE OR ALTER!
    12.7 - */
    12.8 -/*
    12.9 - * Copyright 1999-2004 The Apache Software Foundation.
   12.10 - *
   12.11 - * Licensed under the Apache License, Version 2.0 (the "License");
   12.12 - * you may not use this file except in compliance with the License.
   12.13 - * You may obtain a copy of the License at
   12.14 - *
   12.15 - *     http://www.apache.org/licenses/LICENSE-2.0
   12.16 - *
   12.17 - * Unless required by applicable law or agreed to in writing, software
   12.18 - * distributed under the License is distributed on an "AS IS" BASIS,
   12.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   12.20 - * See the License for the specific language governing permissions and
   12.21 - * limitations under the License.
   12.22 - */
   12.23 -
   12.24 -package com.sun.org.apache.regexp.internal;
   12.25 -
   12.26 -/**
   12.27 - * This is a class that contains utility helper methods for this package.
   12.28 - *
   12.29 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
   12.30 - */
   12.31 -public class REUtil
   12.32 -{
   12.33 -    /** complex: */
   12.34 -    private static final String complexPrefix = "complex:";
   12.35 -
   12.36 -    /**
   12.37 -     * Creates a regular expression, permitting simple or complex syntax
   12.38 -     * @param expression The expression, beginning with a prefix if it's complex or
   12.39 -     * having no prefix if it's simple
   12.40 -     * @param matchFlags Matching style flags
   12.41 -     * @return The regular expression object
   12.42 -     * @exception RESyntaxException thrown in case of error
   12.43 -     */
   12.44 -    public static RE createRE(String expression, int matchFlags) throws RESyntaxException
   12.45 -    {
   12.46 -        if (expression.startsWith(complexPrefix))
   12.47 -        {
   12.48 -            return new RE(expression.substring(complexPrefix.length()), matchFlags);
   12.49 -        }
   12.50 -        return new RE(RE.simplePatternToFullRegularExpression(expression), matchFlags);
   12.51 -    }
   12.52 -
   12.53 -    /**
   12.54 -     * Creates a regular expression, permitting simple or complex syntax
   12.55 -     * @param expression The expression, beginning with a prefix if it's complex or
   12.56 -     * having no prefix if it's simple
   12.57 -     * @return The regular expression object
   12.58 -     * @exception RESyntaxException thrown in case of error
   12.59 -     */
   12.60 -    public static RE createRE(String expression) throws RESyntaxException
   12.61 -    {
   12.62 -        return createRE(expression, RE.MATCH_NORMAL);
   12.63 -    }
   12.64 -}
    13.1 --- a/src/com/sun/org/apache/regexp/internal/ReaderCharacterIterator.java	Sat Oct 24 16:18:47 2020 +0800
    13.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.3 @@ -1,164 +0,0 @@
    13.4 -/*
    13.5 - * reserved comment block
    13.6 - * DO NOT REMOVE OR ALTER!
    13.7 - */
    13.8 -/*
    13.9 - * Copyright 1999-2004 The Apache Software Foundation.
   13.10 - *
   13.11 - * Licensed under the Apache License, Version 2.0 (the "License");
   13.12 - * you may not use this file except in compliance with the License.
   13.13 - * You may obtain a copy of the License at
   13.14 - *
   13.15 - *     http://www.apache.org/licenses/LICENSE-2.0
   13.16 - *
   13.17 - * Unless required by applicable law or agreed to in writing, software
   13.18 - * distributed under the License is distributed on an "AS IS" BASIS,
   13.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   13.20 - * See the License for the specific language governing permissions and
   13.21 - * limitations under the License.
   13.22 - */
   13.23 -
   13.24 -package com.sun.org.apache.regexp.internal;
   13.25 -
   13.26 -import java.io.Reader;
   13.27 -import java.io.IOException;
   13.28 -
   13.29 -/**
   13.30 - * Encapsulates java.io.Reader as CharacterIterator
   13.31 - *
   13.32 - * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
   13.33 - */
   13.34 -public final class ReaderCharacterIterator implements CharacterIterator
   13.35 -{
   13.36 -    /** Underlying reader */
   13.37 -    private final Reader reader;
   13.38 -
   13.39 -    /** Buffer of read chars */
   13.40 -    private final StringBuffer buff;
   13.41 -
   13.42 -    /** read end? */
   13.43 -    private boolean closed;
   13.44 -
   13.45 -    /** @param reader a Reader, which is parsed */
   13.46 -    public ReaderCharacterIterator(Reader reader)
   13.47 -    {
   13.48 -        this.reader = reader;
   13.49 -        this.buff = new StringBuffer(512);
   13.50 -        this.closed = false;
   13.51 -    }
   13.52 -
   13.53 -    /** @return a substring */
   13.54 -    public String substring(int beginIndex, int endIndex)
   13.55 -    {
   13.56 -        try
   13.57 -        {
   13.58 -            ensure(endIndex);
   13.59 -            return buff.toString().substring(beginIndex, endIndex);
   13.60 -        }
   13.61 -        catch (IOException e)
   13.62 -        {
   13.63 -            throw new StringIndexOutOfBoundsException(e.getMessage());
   13.64 -        }
   13.65 -    }
   13.66 -
   13.67 -    /** @return a substring */
   13.68 -    public String substring(int beginIndex)
   13.69 -    {
   13.70 -        try
   13.71 -        {
   13.72 -            readAll();
   13.73 -            return buff.toString().substring(beginIndex);
   13.74 -        }
   13.75 -        catch (IOException e)
   13.76 -        {
   13.77 -            throw new StringIndexOutOfBoundsException(e.getMessage());
   13.78 -        }
   13.79 -    }
   13.80 -
   13.81 -    /** @return a character at the specified position. */
   13.82 -    public char charAt(int pos)
   13.83 -    {
   13.84 -        try
   13.85 -        {
   13.86 -            ensure(pos);
   13.87 -            return buff.charAt(pos);
   13.88 -        }
   13.89 -        catch (IOException e)
   13.90 -        {
   13.91 -            throw new StringIndexOutOfBoundsException(e.getMessage());
   13.92 -        }
   13.93 -    }
   13.94 -
   13.95 -    /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */
   13.96 -    public boolean isEnd(int pos)
   13.97 -    {
   13.98 -        if (buff.length() > pos)
   13.99 -        {
  13.100 -            return false;
  13.101 -        }
  13.102 -        else
  13.103 -        {
  13.104 -            try
  13.105 -            {
  13.106 -                ensure(pos);
  13.107 -                return (buff.length() <= pos);
  13.108 -            }
  13.109 -            catch (IOException e)
  13.110 -            {
  13.111 -                throw new StringIndexOutOfBoundsException(e.getMessage());
  13.112 -            }
  13.113 -        }
  13.114 -    }
  13.115 -
  13.116 -    /** Reads n characters from the stream and appends them to the buffer */
  13.117 -    private int read(int n) throws IOException
  13.118 -    {
  13.119 -        if (closed)
  13.120 -        {
  13.121 -            return 0;
  13.122 -        }
  13.123 -
  13.124 -        char[] c = new char[n];
  13.125 -        int count = 0;
  13.126 -        int read = 0;
  13.127 -
  13.128 -        do
  13.129 -        {
  13.130 -            read = reader.read(c);
  13.131 -            if (read < 0) // EOF
  13.132 -            {
  13.133 -                closed = true;
  13.134 -                break;
  13.135 -            }
  13.136 -            count += read;
  13.137 -            buff.append(c, 0, read);
  13.138 -        }
  13.139 -        while (count < n);
  13.140 -
  13.141 -        return count;
  13.142 -    }
  13.143 -
  13.144 -    /** Reads rest of the stream. */
  13.145 -    private void readAll() throws IOException
  13.146 -    {
  13.147 -        while(! closed)
  13.148 -        {
  13.149 -            read(1000);
  13.150 -        }
  13.151 -    }
  13.152 -
  13.153 -    /** Reads chars up to the idx */
  13.154 -    private void ensure(int idx) throws IOException
  13.155 -    {
  13.156 -        if (closed)
  13.157 -        {
  13.158 -            return;
  13.159 -        }
  13.160 -
  13.161 -        if (idx < buff.length())
  13.162 -        {
  13.163 -            return;
  13.164 -        }
  13.165 -        read(idx + 1 - buff.length());
  13.166 -    }
  13.167 -}
    14.1 --- a/src/com/sun/org/apache/regexp/internal/StreamCharacterIterator.java	Sat Oct 24 16:18:47 2020 +0800
    14.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.3 @@ -1,161 +0,0 @@
    14.4 -/*
    14.5 - * reserved comment block
    14.6 - * DO NOT REMOVE OR ALTER!
    14.7 - */
    14.8 -/*
    14.9 - * Copyright 1999-2004 The Apache Software Foundation.
   14.10 - *
   14.11 - * Licensed under the Apache License, Version 2.0 (the "License");
   14.12 - * you may not use this file except in compliance with the License.
   14.13 - * You may obtain a copy of the License at
   14.14 - *
   14.15 - *     http://www.apache.org/licenses/LICENSE-2.0
   14.16 - *
   14.17 - * Unless required by applicable law or agreed to in writing, software
   14.18 - * distributed under the License is distributed on an "AS IS" BASIS,
   14.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14.20 - * See the License for the specific language governing permissions and
   14.21 - * limitations under the License.
   14.22 - */
   14.23 -
   14.24 -package com.sun.org.apache.regexp.internal;
   14.25 -
   14.26 -import java.io.InputStream;
   14.27 -import java.io.IOException;
   14.28 -
   14.29 -/**
   14.30 - * Encapsulates java.io.InputStream as CharacterIterator.
   14.31 - *
   14.32 - * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
   14.33 - */
   14.34 -public final class StreamCharacterIterator implements CharacterIterator
   14.35 -{
   14.36 -    /** Underlying is */
   14.37 -    private final InputStream is;
   14.38 -
   14.39 -    /** Buffer of read chars */
   14.40 -    private final StringBuffer buff;
   14.41 -
   14.42 -    /** read end? */
   14.43 -    private boolean closed;
   14.44 -
   14.45 -    /** @param is an InputStream, which is parsed */
   14.46 -    public StreamCharacterIterator(InputStream is)
   14.47 -    {
   14.48 -        this.is = is;
   14.49 -        this.buff = new StringBuffer(512);
   14.50 -        this.closed = false;
   14.51 -    }
   14.52 -
   14.53 -    /** @return a substring */
   14.54 -    public String substring(int beginIndex, int endIndex)
   14.55 -    {
   14.56 -        try
   14.57 -        {
   14.58 -            ensure(endIndex);
   14.59 -            return buff.toString().substring(beginIndex, endIndex);
   14.60 -        }
   14.61 -        catch (IOException e)
   14.62 -        {
   14.63 -            throw new StringIndexOutOfBoundsException(e.getMessage());
   14.64 -        }
   14.65 -    }
   14.66 -
   14.67 -    /** @return a substring */
   14.68 -    public String substring(int beginIndex)
   14.69 -    {
   14.70 -        try
   14.71 -        {
   14.72 -            readAll();
   14.73 -            return buff.toString().substring(beginIndex);
   14.74 -        }
   14.75 -        catch (IOException e)
   14.76 -        {
   14.77 -            throw new StringIndexOutOfBoundsException(e.getMessage());
   14.78 -        }
   14.79 -    }
   14.80 -
   14.81 -
   14.82 -    /** @return a character at the specified position. */
   14.83 -    public char charAt(int pos)
   14.84 -    {
   14.85 -        try
   14.86 -        {
   14.87 -            ensure(pos);
   14.88 -            return buff.charAt(pos);
   14.89 -        }
   14.90 -        catch (IOException e)
   14.91 -        {
   14.92 -            throw new StringIndexOutOfBoundsException(e.getMessage());
   14.93 -        }
   14.94 -    }
   14.95 -
   14.96 -    /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */
   14.97 -    public boolean isEnd(int pos)
   14.98 -    {
   14.99 -        if (buff.length() > pos)
  14.100 -        {
  14.101 -            return false;
  14.102 -        }
  14.103 -        else
  14.104 -        {
  14.105 -            try
  14.106 -            {
  14.107 -                ensure(pos);
  14.108 -                return (buff.length() <= pos);
  14.109 -            }
  14.110 -            catch (IOException e)
  14.111 -            {
  14.112 -                throw new StringIndexOutOfBoundsException(e.getMessage());
  14.113 -            }
  14.114 -        }
  14.115 -    }
  14.116 -
  14.117 -    /** Reads n characters from the stream and appends them to the buffer */
  14.118 -    private int read(int n) throws IOException
  14.119 -    {
  14.120 -        if (closed)
  14.121 -        {
  14.122 -            return 0;
  14.123 -        }
  14.124 -
  14.125 -        int c;
  14.126 -        int i = n;
  14.127 -        while (--i >= 0)
  14.128 -        {
  14.129 -            c = is.read();
  14.130 -            if (c < 0) // EOF
  14.131 -            {
  14.132 -                closed = true;
  14.133 -                break;
  14.134 -            }
  14.135 -            buff.append((char) c);
  14.136 -        }
  14.137 -        return n - i;
  14.138 -    }
  14.139 -
  14.140 -    /** Reads rest of the stream. */
  14.141 -    private void readAll() throws IOException
  14.142 -    {
  14.143 -        while(! closed)
  14.144 -        {
  14.145 -            read(1000);
  14.146 -        }
  14.147 -    }
  14.148 -
  14.149 -    /** Reads chars up to the idx */
  14.150 -    private void ensure(int idx) throws IOException
  14.151 -    {
  14.152 -        if (closed)
  14.153 -        {
  14.154 -            return;
  14.155 -        }
  14.156 -
  14.157 -        if (idx < buff.length())
  14.158 -        {
  14.159 -            return;
  14.160 -        }
  14.161 -
  14.162 -        read(idx + 1 - buff.length());
  14.163 -    }
  14.164 -}
    15.1 --- a/src/com/sun/org/apache/regexp/internal/StringCharacterIterator.java	Sat Oct 24 16:18:47 2020 +0800
    15.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.3 @@ -1,62 +0,0 @@
    15.4 -/*
    15.5 - * reserved comment block
    15.6 - * DO NOT REMOVE OR ALTER!
    15.7 - */
    15.8 -/*
    15.9 - * Copyright 1999-2004 The Apache Software Foundation.
   15.10 - *
   15.11 - * Licensed under the Apache License, Version 2.0 (the "License");
   15.12 - * you may not use this file except in compliance with the License.
   15.13 - * You may obtain a copy of the License at
   15.14 - *
   15.15 - *     http://www.apache.org/licenses/LICENSE-2.0
   15.16 - *
   15.17 - * Unless required by applicable law or agreed to in writing, software
   15.18 - * distributed under the License is distributed on an "AS IS" BASIS,
   15.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   15.20 - * See the License for the specific language governing permissions and
   15.21 - * limitations under the License.
   15.22 - */
   15.23 -
   15.24 -package com.sun.org.apache.regexp.internal;
   15.25 -
   15.26 -/**
   15.27 - * Encapsulates String as CharacterIterator.
   15.28 - *
   15.29 - * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a>
   15.30 - */
   15.31 -public final class StringCharacterIterator implements CharacterIterator
   15.32 -{
   15.33 -    /** encapsulated */
   15.34 -    private final String src;
   15.35 -
   15.36 -    /** @param src - encapsulated String */
   15.37 -    public StringCharacterIterator(String src)
   15.38 -    {
   15.39 -        this.src = src;
   15.40 -    }
   15.41 -
   15.42 -    /** @return a substring */
   15.43 -    public String substring(int beginIndex, int endIndex)
   15.44 -    {
   15.45 -        return src.substring(beginIndex, endIndex);
   15.46 -    }
   15.47 -
   15.48 -    /** @return a substring */
   15.49 -    public String substring(int beginIndex)
   15.50 -    {
   15.51 -        return src.substring(beginIndex);
   15.52 -    }
   15.53 -
   15.54 -    /** @return a character at the specified position. */
   15.55 -    public char charAt(int pos)
   15.56 -    {
   15.57 -        return src.charAt(pos);
   15.58 -    }
   15.59 -
   15.60 -    /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */
   15.61 -    public boolean isEnd(int pos)
   15.62 -    {
   15.63 -        return (pos >= src.length());
   15.64 -    }
   15.65 -}
    16.1 --- a/src/com/sun/org/apache/regexp/internal/recompile.java	Sat Oct 24 16:18:47 2020 +0800
    16.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.3 @@ -1,137 +0,0 @@
    16.4 -/*
    16.5 - * reserved comment block
    16.6 - * DO NOT REMOVE OR ALTER!
    16.7 - */
    16.8 -/*
    16.9 - * Copyright 1999-2004 The Apache Software Foundation.
   16.10 - *
   16.11 - * Licensed under the Apache License, Version 2.0 (the "License");
   16.12 - * you may not use this file except in compliance with the License.
   16.13 - * You may obtain a copy of the License at
   16.14 - *
   16.15 - *     http://www.apache.org/licenses/LICENSE-2.0
   16.16 - *
   16.17 - * Unless required by applicable law or agreed to in writing, software
   16.18 - * distributed under the License is distributed on an "AS IS" BASIS,
   16.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16.20 - * See the License for the specific language governing permissions and
   16.21 - * limitations under the License.
   16.22 - */
   16.23 -
   16.24 -package com.sun.org.apache.regexp.internal;
   16.25 -
   16.26 -import com.sun.org.apache.regexp.internal.RECompiler;
   16.27 -import com.sun.org.apache.regexp.internal.RESyntaxException;
   16.28 -
   16.29 -/**
   16.30 - * 'recompile' is a command line tool that pre-compiles one or more regular expressions
   16.31 - * for use with the regular expression matcher class 'RE'.  For example, the command
   16.32 - * "java recompile a*b" produces output like this:
   16.33 - *
   16.34 - * <pre>
   16.35 - *
   16.36 - *    // Pre-compiled regular expression "a*b"
   16.37 - *    char[] re1Instructions =
   16.38 - *    {
   16.39 - *        0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
   16.40 - *        0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
   16.41 - *        0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
   16.42 - *        0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
   16.43 - *        0x0000,
   16.44 - *    };
   16.45 - *
   16.46 - *    REProgram re1 = new REProgram(re1Instructions);
   16.47 - *
   16.48 - * </pre>
   16.49 - *
   16.50 - * By pasting this output into your code, you can construct a regular expression matcher
   16.51 - * (RE) object directly from the pre-compiled data (the character array re1), thus avoiding
   16.52 - * the overhead of compiling the expression at runtime.  For example:
   16.53 - *
   16.54 - * <pre>
   16.55 - *
   16.56 - *    RE r = new RE(re1);
   16.57 - *
   16.58 - * </pre>
   16.59 - *
   16.60 - * @see RE
   16.61 - * @see RECompiler
   16.62 - *
   16.63 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
   16.64 - */
   16.65 -public class recompile
   16.66 -{
   16.67 -    /**
   16.68 -     * Main application entrypoint.
   16.69 -     * @param arg Command line arguments
   16.70 -     */
   16.71 -    static public void main(String[] arg)
   16.72 -    {
   16.73 -        // Create a compiler object
   16.74 -        RECompiler r = new RECompiler();
   16.75 -
   16.76 -        // Print usage if arguments are incorrect
   16.77 -        if (arg.length <= 0 || arg.length % 2 != 0)
   16.78 -        {
   16.79 -            System.out.println("Usage: recompile <patternname> <pattern>");
   16.80 -            System.exit(0);
   16.81 -        }
   16.82 -
   16.83 -        // Loop through arguments, compiling each
   16.84 -        for (int i = 0; i < arg.length; i += 2)
   16.85 -        {
   16.86 -            try
   16.87 -            {
   16.88 -                // Compile regular expression
   16.89 -                String name         = arg[i];
   16.90 -                String pattern      = arg[i+1];
   16.91 -                String instructions = name + "PatternInstructions";
   16.92 -
   16.93 -                // Output program as a nice, formatted character array
   16.94 -                System.out.print("\n    // Pre-compiled regular expression '" + pattern + "'\n"
   16.95 -                                 + "    private static char[] " + instructions + " = \n    {");
   16.96 -
   16.97 -                // Compile program for pattern
   16.98 -                REProgram program = r.compile(pattern);
   16.99 -
  16.100 -                // Number of columns in output
  16.101 -                int numColumns = 7;
  16.102 -
  16.103 -                // Loop through program
  16.104 -                char[] p = program.getInstructions();
  16.105 -                for (int j = 0; j < p.length; j++)
  16.106 -                {
  16.107 -                    // End of column?
  16.108 -                    if ((j % numColumns) == 0)
  16.109 -                    {
  16.110 -                        System.out.print("\n        ");
  16.111 -                    }
  16.112 -
  16.113 -                    // Print character as padded hex number
  16.114 -                    String hex = Integer.toHexString(p[j]);
  16.115 -                    while (hex.length() < 4)
  16.116 -                    {
  16.117 -                        hex = "0" + hex;
  16.118 -                    }
  16.119 -                    System.out.print("0x" + hex + ", ");
  16.120 -                }
  16.121 -
  16.122 -                // End of program block
  16.123 -                System.out.println("\n    };");
  16.124 -                System.out.println("\n    private static RE " + name + "Pattern = new RE(new REProgram(" + instructions + "));");
  16.125 -            }
  16.126 -            catch (RESyntaxException e)
  16.127 -            {
  16.128 -                System.out.println("Syntax error in expression \"" + arg[i] + "\": " + e.toString());
  16.129 -            }
  16.130 -            catch (Exception e)
  16.131 -            {
  16.132 -                System.out.println("Unexpected exception: " + e.toString());
  16.133 -            }
  16.134 -            catch (Error e)
  16.135 -            {
  16.136 -                System.out.println("Internal error: " + e.toString());
  16.137 -            }
  16.138 -        }
  16.139 -    }
  16.140 -}

mercurial