src/com/sun/org/apache/regexp/internal/RE.java

changeset 2116
aaee9ae4799a
parent 2090
3b8ebb957957
parent 2115
ba503169016f
child 2117
a5f920b6d2b5
     1.1 --- a/src/com/sun/org/apache/regexp/internal/RE.java	Sat Oct 24 16:18:47 2020 +0800
     1.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.3 @@ -1,1760 +0,0 @@
     1.4 -/*
     1.5 - * reserved comment block
     1.6 - * DO NOT REMOVE OR ALTER!
     1.7 - */
     1.8 -/*
     1.9 - * Copyright 1999-2004 The Apache Software Foundation.
    1.10 - *
    1.11 - * Licensed under the Apache License, Version 2.0 (the "License");
    1.12 - * you may not use this file except in compliance with the License.
    1.13 - * You may obtain a copy of the License at
    1.14 - *
    1.15 - *     http://www.apache.org/licenses/LICENSE-2.0
    1.16 - *
    1.17 - * Unless required by applicable law or agreed to in writing, software
    1.18 - * distributed under the License is distributed on an "AS IS" BASIS,
    1.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    1.20 - * See the License for the specific language governing permissions and
    1.21 - * limitations under the License.
    1.22 - */
    1.23 -
    1.24 -package com.sun.org.apache.regexp.internal;
    1.25 -
    1.26 -import java.io.Serializable;
    1.27 -import java.util.Vector;
    1.28 -
    1.29 -/**
    1.30 - * RE is an efficient, lightweight regular expression evaluator/matcher
    1.31 - * class. Regular expressions are pattern descriptions which enable
    1.32 - * sophisticated matching of strings.  In addition to being able to
    1.33 - * match a string against a pattern, you can also extract parts of the
    1.34 - * match.  This is especially useful in text parsing! Details on the
    1.35 - * syntax of regular expression patterns are given below.
    1.36 - *
    1.37 - * <p>
    1.38 - * To compile a regular expression (RE), you can simply construct an RE
    1.39 - * matcher object from the string specification of the pattern, like this:
    1.40 - *
    1.41 - * <pre>
    1.42 - *  RE r = new RE("a*b");
    1.43 - * </pre>
    1.44 - *
    1.45 - * <p>
    1.46 - * Once you have done this, you can call either of the RE.match methods to
    1.47 - * perform matching on a String.  For example:
    1.48 - *
    1.49 - * <pre>
    1.50 - *  boolean matched = r.match("aaaab");
    1.51 - * </pre>
    1.52 - *
    1.53 - * will cause the boolean matched to be set to true because the
    1.54 - * pattern "a*b" matches the string "aaaab".
    1.55 - *
    1.56 - * <p>
    1.57 - * If you were interested in the <i>number</i> of a's which matched the
    1.58 - * first part of our example expression, you could change the expression to
    1.59 - * "(a*)b".  Then when you compiled the expression and matched it against
    1.60 - * something like "xaaaab", you would get results like this:
    1.61 - *
    1.62 - * <pre>
    1.63 - *  RE r = new RE("(a*)b");                  // Compile expression
    1.64 - *  boolean matched = r.match("xaaaab");     // Match against "xaaaab"
    1.65 - *
    1.66 - *  String wholeExpr = r.getParen(0);        // wholeExpr will be 'aaaab'
    1.67 - *  String insideParens = r.getParen(1);     // insideParens will be 'aaaa'
    1.68 - *
    1.69 - *  int startWholeExpr = r.getParenStart(0); // startWholeExpr will be index 1
    1.70 - *  int endWholeExpr = r.getParenEnd(0);     // endWholeExpr will be index 6
    1.71 - *  int lenWholeExpr = r.getParenLength(0);  // lenWholeExpr will be 5
    1.72 - *
    1.73 - *  int startInside = r.getParenStart(1);    // startInside will be index 1
    1.74 - *  int endInside = r.getParenEnd(1);        // endInside will be index 5
    1.75 - *  int lenInside = r.getParenLength(1);     // lenInside will be 4
    1.76 - * </pre>
    1.77 - *
    1.78 - * You can also refer to the contents of a parenthesized expression
    1.79 - * within a regular expression itself.  This is called a
    1.80 - * 'backreference'.  The first backreference in a regular expression is
    1.81 - * denoted by \1, the second by \2 and so on.  So the expression:
    1.82 - *
    1.83 - * <pre>
    1.84 - *  ([0-9]+)=\1
    1.85 - * </pre>
    1.86 - *
    1.87 - * will match any string of the form n=n (like 0=0 or 2=2).
    1.88 - *
    1.89 - * <p>
    1.90 - * The full regular expression syntax accepted by RE is described here:
    1.91 - *
    1.92 - * <pre>
    1.93 - *
    1.94 - *  <b><font face=times roman>Characters</font></b>
    1.95 - *
    1.96 - *    <i>unicodeChar</i>   Matches any identical unicode character
    1.97 - *    \                    Used to quote a meta-character (like '*')
    1.98 - *    \\                   Matches a single '\' character
    1.99 - *    \0nnn                Matches a given octal character
   1.100 - *    \xhh                 Matches a given 8-bit hexadecimal character
   1.101 - *    \\uhhhh              Matches a given 16-bit hexadecimal character
   1.102 - *    \t                   Matches an ASCII tab character
   1.103 - *    \n                   Matches an ASCII newline character
   1.104 - *    \r                   Matches an ASCII return character
   1.105 - *    \f                   Matches an ASCII form feed character
   1.106 - *
   1.107 - *
   1.108 - *  <b><font face=times roman>Character Classes</font></b>
   1.109 - *
   1.110 - *    [abc]                Simple character class
   1.111 - *    [a-zA-Z]             Character class with ranges
   1.112 - *    [^abc]               Negated character class
   1.113 - * </pre>
   1.114 - *
   1.115 - * <b>NOTE:</b> Incomplete ranges will be interpreted as &quot;starts
   1.116 - * from zero&quot; or &quot;ends with last character&quot;.
   1.117 - * <br>
   1.118 - * I.e. [-a] is the same as [\\u0000-a], and [a-] is the same as [a-\\uFFFF],
   1.119 - * [-] means &quot;all characters&quot;.
   1.120 - *
   1.121 - * <pre>
   1.122 - *
   1.123 - *  <b><font face=times roman>Standard POSIX Character Classes</font></b>
   1.124 - *
   1.125 - *    [:alnum:]            Alphanumeric characters.
   1.126 - *    [:alpha:]            Alphabetic characters.
   1.127 - *    [:blank:]            Space and tab characters.
   1.128 - *    [:cntrl:]            Control characters.
   1.129 - *    [:digit:]            Numeric characters.
   1.130 - *    [:graph:]            Characters that are printable and are also visible.
   1.131 - *                         (A space is printable, but not visible, while an
   1.132 - *                         `a' is both.)
   1.133 - *    [:lower:]            Lower-case alphabetic characters.
   1.134 - *    [:print:]            Printable characters (characters that are not
   1.135 - *                         control characters.)
   1.136 - *    [:punct:]            Punctuation characters (characters that are not letter,
   1.137 - *                         digits, control characters, or space characters).
   1.138 - *    [:space:]            Space characters (such as space, tab, and formfeed,
   1.139 - *                         to name a few).
   1.140 - *    [:upper:]            Upper-case alphabetic characters.
   1.141 - *    [:xdigit:]           Characters that are hexadecimal digits.
   1.142 - *
   1.143 - *
   1.144 - *  <b><font face=times roman>Non-standard POSIX-style Character Classes</font></b>
   1.145 - *
   1.146 - *    [:javastart:]        Start of a Java identifier
   1.147 - *    [:javapart:]         Part of a Java identifier
   1.148 - *
   1.149 - *
   1.150 - *  <b><font face=times roman>Predefined Classes</font></b>
   1.151 - *
   1.152 - *    .         Matches any character other than newline
   1.153 - *    \w        Matches a "word" character (alphanumeric plus "_")
   1.154 - *    \W        Matches a non-word character
   1.155 - *    \s        Matches a whitespace character
   1.156 - *    \S        Matches a non-whitespace character
   1.157 - *    \d        Matches a digit character
   1.158 - *    \D        Matches a non-digit character
   1.159 - *
   1.160 - *
   1.161 - *  <b><font face=times roman>Boundary Matchers</font></b>
   1.162 - *
   1.163 - *    ^         Matches only at the beginning of a line
   1.164 - *    $         Matches only at the end of a line
   1.165 - *    \b        Matches only at a word boundary
   1.166 - *    \B        Matches only at a non-word boundary
   1.167 - *
   1.168 - *
   1.169 - *  <b><font face=times roman>Greedy Closures</font></b>
   1.170 - *
   1.171 - *    A*        Matches A 0 or more times (greedy)
   1.172 - *    A+        Matches A 1 or more times (greedy)
   1.173 - *    A?        Matches A 1 or 0 times (greedy)
   1.174 - *    A{n}      Matches A exactly n times (greedy)
   1.175 - *    A{n,}     Matches A at least n times (greedy)
   1.176 - *    A{n,m}    Matches A at least n but not more than m times (greedy)
   1.177 - *
   1.178 - *
   1.179 - *  <b><font face=times roman>Reluctant Closures</font></b>
   1.180 - *
   1.181 - *    A*?       Matches A 0 or more times (reluctant)
   1.182 - *    A+?       Matches A 1 or more times (reluctant)
   1.183 - *    A??       Matches A 0 or 1 times (reluctant)
   1.184 - *
   1.185 - *
   1.186 - *  <b><font face=times roman>Logical Operators</font></b>
   1.187 - *
   1.188 - *    AB        Matches A followed by B
   1.189 - *    A|B       Matches either A or B
   1.190 - *    (A)       Used for subexpression grouping
   1.191 - *   (?:A)      Used for subexpression clustering (just like grouping but
   1.192 - *              no backrefs)
   1.193 - *
   1.194 - *
   1.195 - *  <b><font face=times roman>Backreferences</font></b>
   1.196 - *
   1.197 - *    \1    Backreference to 1st parenthesized subexpression
   1.198 - *    \2    Backreference to 2nd parenthesized subexpression
   1.199 - *    \3    Backreference to 3rd parenthesized subexpression
   1.200 - *    \4    Backreference to 4th parenthesized subexpression
   1.201 - *    \5    Backreference to 5th parenthesized subexpression
   1.202 - *    \6    Backreference to 6th parenthesized subexpression
   1.203 - *    \7    Backreference to 7th parenthesized subexpression
   1.204 - *    \8    Backreference to 8th parenthesized subexpression
   1.205 - *    \9    Backreference to 9th parenthesized subexpression
   1.206 - * </pre>
   1.207 - *
   1.208 - * <p>
   1.209 - * All closure operators (+, *, ?, {m,n}) are greedy by default, meaning
   1.210 - * that they match as many elements of the string as possible without
   1.211 - * causing the overall match to fail.  If you want a closure to be
   1.212 - * reluctant (non-greedy), you can simply follow it with a '?'.  A
   1.213 - * reluctant closure will match as few elements of the string as
   1.214 - * possible when finding matches.  {m,n} closures don't currently
   1.215 - * support reluctancy.
   1.216 - *
   1.217 - * <p>
   1.218 - * <b><font face="times roman">Line terminators</font></b>
   1.219 - * <br>
   1.220 - * A line terminator is a one- or two-character sequence that marks
   1.221 - * the end of a line of the input character sequence. The following
   1.222 - * are recognized as line terminators:
   1.223 - * <ul>
   1.224 - * <li>A newline (line feed) character ('\n'),</li>
   1.225 - * <li>A carriage-return character followed immediately by a newline character ("\r\n"),</li>
   1.226 - * <li>A standalone carriage-return character ('\r'),</li>
   1.227 - * <li>A next-line character ('\u0085'),</li>
   1.228 - * <li>A line-separator character ('\u2028'), or</li>
   1.229 - * <li>A paragraph-separator character ('\u2029).</li>
   1.230 - * </ul>
   1.231 - *
   1.232 - * <p>
   1.233 - * RE runs programs compiled by the RECompiler class.  But the RE
   1.234 - * matcher class does not include the actual regular expression compiler
   1.235 - * for reasons of efficiency.  In fact, if you want to pre-compile one
   1.236 - * or more regular expressions, the 'recompile' class can be invoked
   1.237 - * from the command line to produce compiled output like this:
   1.238 - *
   1.239 - * <pre>
   1.240 - *    // Pre-compiled regular expression "a*b"
   1.241 - *    char[] re1Instructions =
   1.242 - *    {
   1.243 - *        0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
   1.244 - *        0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
   1.245 - *        0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
   1.246 - *        0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
   1.247 - *        0x0000,
   1.248 - *    };
   1.249 - *
   1.250 - *
   1.251 - *    REProgram re1 = new REProgram(re1Instructions);
   1.252 - * </pre>
   1.253 - *
   1.254 - * You can then construct a regular expression matcher (RE) object from
   1.255 - * the pre-compiled expression re1 and thus avoid the overhead of
   1.256 - * compiling the expression at runtime. If you require more dynamic
   1.257 - * regular expressions, you can construct a single RECompiler object and
   1.258 - * re-use it to compile each expression. Similarly, you can change the
   1.259 - * program run by a given matcher object at any time. However, RE and
   1.260 - * RECompiler are not threadsafe (for efficiency reasons, and because
   1.261 - * requiring thread safety in this class is deemed to be a rare
   1.262 - * requirement), so you will need to construct a separate compiler or
   1.263 - * matcher object for each thread (unless you do thread synchronization
   1.264 - * yourself). Once expression compiled into the REProgram object, REProgram
   1.265 - * can be safely shared across multiple threads and RE objects.
   1.266 - *
   1.267 - * <br><p><br>
   1.268 - *
   1.269 - * <font color="red">
   1.270 - * <i>ISSUES:</i>
   1.271 - *
   1.272 - * <ul>
   1.273 - *  <li>com.weusours.util.re is not currently compatible with all
   1.274 - *      standard POSIX regcomp flags</li>
   1.275 - *  <li>com.weusours.util.re does not support POSIX equivalence classes
   1.276 - *      ([=foo=] syntax) (I18N/locale issue)</li>
   1.277 - *  <li>com.weusours.util.re does not support nested POSIX character
   1.278 - *      classes (definitely should, but not completely trivial)</li>
   1.279 - *  <li>com.weusours.util.re Does not support POSIX character collation
   1.280 - *      concepts ([.foo.] syntax) (I18N/locale issue)</li>
   1.281 - *  <li>Should there be different matching styles (simple, POSIX, Perl etc?)</li>
   1.282 - *  <li>Should RE support character iterators (for backwards RE matching!)?</li>
   1.283 - *  <li>Should RE support reluctant {m,n} closures (does anyone care)?</li>
   1.284 - *  <li>Not *all* possibilities are considered for greediness when backreferences
   1.285 - *      are involved (as POSIX suggests should be the case).  The POSIX RE
   1.286 - *      "(ac*)c*d[ac]*\1", when matched against "acdacaa" should yield a match
   1.287 - *      of acdacaa where \1 is "a".  This is not the case in this RE package,
   1.288 - *      and actually Perl doesn't go to this extent either!  Until someone
   1.289 - *      actually complains about this, I'm not sure it's worth "fixing".
   1.290 - *      If it ever is fixed, test #137 in RETest.txt should be updated.</li>
   1.291 - * </ul>
   1.292 - *
   1.293 - * </font>
   1.294 - *
   1.295 - * @see recompile
   1.296 - * @see RECompiler
   1.297 - *
   1.298 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
   1.299 - * @author <a href="mailto:ts@sch-fer.de">Tobias Sch&auml;fer</a>
   1.300 - */
   1.301 -public class RE implements Serializable
   1.302 -{
   1.303 -    /**
   1.304 -     * Specifies normal, case-sensitive matching behaviour.
   1.305 -     */
   1.306 -    public static final int MATCH_NORMAL          = 0x0000;
   1.307 -
   1.308 -    /**
   1.309 -     * Flag to indicate that matching should be case-independent (folded)
   1.310 -     */
   1.311 -    public static final int MATCH_CASEINDEPENDENT = 0x0001;
   1.312 -
   1.313 -    /**
   1.314 -     * Newlines should match as BOL/EOL (^ and $)
   1.315 -     */
   1.316 -    public static final int MATCH_MULTILINE       = 0x0002;
   1.317 -
   1.318 -    /**
   1.319 -     * Consider all input a single body of text - newlines are matched by .
   1.320 -     */
   1.321 -    public static final int MATCH_SINGLELINE      = 0x0004;
   1.322 -
   1.323 -    /************************************************
   1.324 -     *                                              *
   1.325 -     * The format of a node in a program is:        *
   1.326 -     *                                              *
   1.327 -     * [ OPCODE ] [ OPDATA ] [ OPNEXT ] [ OPERAND ] *
   1.328 -     *                                              *
   1.329 -     * char OPCODE - instruction                    *
   1.330 -     * char OPDATA - modifying data                 *
   1.331 -     * char OPNEXT - next node (relative offset)    *
   1.332 -     *                                              *
   1.333 -     ************************************************/
   1.334 -
   1.335 -                 //   Opcode              Char       Opdata/Operand  Meaning
   1.336 -                 //   ----------          ---------- --------------- --------------------------------------------------
   1.337 -    static final char OP_END              = 'E';  //                 end of program
   1.338 -    static final char OP_BOL              = '^';  //                 match only if at beginning of line
   1.339 -    static final char OP_EOL              = '$';  //                 match only if at end of line
   1.340 -    static final char OP_ANY              = '.';  //                 match any single character except newline
   1.341 -    static final char OP_ANYOF            = '[';  // count/ranges    match any char in the list of ranges
   1.342 -    static final char OP_BRANCH           = '|';  // node            match this alternative or the next one
   1.343 -    static final char OP_ATOM             = 'A';  // length/string   length of string followed by string itself
   1.344 -    static final char OP_STAR             = '*';  // node            kleene closure
   1.345 -    static final char OP_PLUS             = '+';  // node            positive closure
   1.346 -    static final char OP_MAYBE            = '?';  // node            optional closure
   1.347 -    static final char OP_ESCAPE           = '\\'; // escape          special escape code char class (escape is E_* code)
   1.348 -    static final char OP_OPEN             = '(';  // number          nth opening paren
   1.349 -    static final char OP_OPEN_CLUSTER     = '<';  //                 opening cluster
   1.350 -    static final char OP_CLOSE            = ')';  // number          nth closing paren
   1.351 -    static final char OP_CLOSE_CLUSTER    = '>';  //                 closing cluster
   1.352 -    static final char OP_BACKREF          = '#';  // number          reference nth already matched parenthesized string
   1.353 -    static final char OP_GOTO             = 'G';  //                 nothing but a (back-)pointer
   1.354 -    static final char OP_NOTHING          = 'N';  //                 match null string such as in '(a|)'
   1.355 -    static final char OP_RELUCTANTSTAR    = '8';  // none/expr       reluctant '*' (mnemonic for char is unshifted '*')
   1.356 -    static final char OP_RELUCTANTPLUS    = '=';  // none/expr       reluctant '+' (mnemonic for char is unshifted '+')
   1.357 -    static final char OP_RELUCTANTMAYBE   = '/';  // none/expr       reluctant '?' (mnemonic for char is unshifted '?')
   1.358 -    static final char OP_POSIXCLASS       = 'P';  // classid         one of the posix character classes
   1.359 -
   1.360 -    // Escape codes
   1.361 -    static final char E_ALNUM             = 'w';  // Alphanumeric
   1.362 -    static final char E_NALNUM            = 'W';  // Non-alphanumeric
   1.363 -    static final char E_BOUND             = 'b';  // Word boundary
   1.364 -    static final char E_NBOUND            = 'B';  // Non-word boundary
   1.365 -    static final char E_SPACE             = 's';  // Whitespace
   1.366 -    static final char E_NSPACE            = 'S';  // Non-whitespace
   1.367 -    static final char E_DIGIT             = 'd';  // Digit
   1.368 -    static final char E_NDIGIT            = 'D';  // Non-digit
   1.369 -
   1.370 -    // Posix character classes
   1.371 -    static final char POSIX_CLASS_ALNUM   = 'w';  // Alphanumerics
   1.372 -    static final char POSIX_CLASS_ALPHA   = 'a';  // Alphabetics
   1.373 -    static final char POSIX_CLASS_BLANK   = 'b';  // Blanks
   1.374 -    static final char POSIX_CLASS_CNTRL   = 'c';  // Control characters
   1.375 -    static final char POSIX_CLASS_DIGIT   = 'd';  // Digits
   1.376 -    static final char POSIX_CLASS_GRAPH   = 'g';  // Graphic characters
   1.377 -    static final char POSIX_CLASS_LOWER   = 'l';  // Lowercase characters
   1.378 -    static final char POSIX_CLASS_PRINT   = 'p';  // Printable characters
   1.379 -    static final char POSIX_CLASS_PUNCT   = '!';  // Punctuation
   1.380 -    static final char POSIX_CLASS_SPACE   = 's';  // Spaces
   1.381 -    static final char POSIX_CLASS_UPPER   = 'u';  // Uppercase characters
   1.382 -    static final char POSIX_CLASS_XDIGIT  = 'x';  // Hexadecimal digits
   1.383 -    static final char POSIX_CLASS_JSTART  = 'j';  // Java identifier start
   1.384 -    static final char POSIX_CLASS_JPART   = 'k';  // Java identifier part
   1.385 -
   1.386 -    // Limits
   1.387 -    static final int maxNode  = 65536;            // Maximum number of nodes in a program
   1.388 -    static final int MAX_PAREN = 16;              // Number of paren pairs (only 9 can be backrefs)
   1.389 -
   1.390 -    // Node layout constants
   1.391 -    static final int offsetOpcode = 0;            // Opcode offset (first character)
   1.392 -    static final int offsetOpdata = 1;            // Opdata offset (second char)
   1.393 -    static final int offsetNext   = 2;            // Next index offset (third char)
   1.394 -    static final int nodeSize     = 3;            // Node size (in chars)
   1.395 -
   1.396 -    // State of current program
   1.397 -    REProgram program;                            // Compiled regular expression 'program'
   1.398 -    transient CharacterIterator search;           // The string being matched against
   1.399 -    int matchFlags;                               // Match behaviour flags
   1.400 -    int maxParen = MAX_PAREN;
   1.401 -
   1.402 -    // Parenthesized subexpressions
   1.403 -    transient int parenCount;                     // Number of subexpressions matched (num open parens + 1)
   1.404 -    transient int start0;                         // Cache of start[0]
   1.405 -    transient int end0;                           // Cache of start[0]
   1.406 -    transient int start1;                         // Cache of start[1]
   1.407 -    transient int end1;                           // Cache of start[1]
   1.408 -    transient int start2;                         // Cache of start[2]
   1.409 -    transient int end2;                           // Cache of start[2]
   1.410 -    transient int[] startn;                       // Lazy-alloced array of sub-expression starts
   1.411 -    transient int[] endn;                         // Lazy-alloced array of sub-expression ends
   1.412 -
   1.413 -    // Backreferences
   1.414 -    transient int[] startBackref;                 // Lazy-alloced array of backref starts
   1.415 -    transient int[] endBackref;                   // Lazy-alloced array of backref ends
   1.416 -
   1.417 -    /**
   1.418 -     * Constructs a regular expression matcher from a String by compiling it
   1.419 -     * using a new instance of RECompiler.  If you will be compiling many
   1.420 -     * expressions, you may prefer to use a single RECompiler object instead.
   1.421 -     *
   1.422 -     * @param pattern The regular expression pattern to compile.
   1.423 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   1.424 -     * @see RECompiler
   1.425 -     * @see recompile
   1.426 -     */
   1.427 -    public RE(String pattern) throws RESyntaxException
   1.428 -    {
   1.429 -        this(pattern, MATCH_NORMAL);
   1.430 -    }
   1.431 -
   1.432 -    /**
   1.433 -     * Constructs a regular expression matcher from a String by compiling it
   1.434 -     * using a new instance of RECompiler.  If you will be compiling many
   1.435 -     * expressions, you may prefer to use a single RECompiler object instead.
   1.436 -     *
   1.437 -     * @param pattern The regular expression pattern to compile.
   1.438 -     * @param matchFlags The matching style
   1.439 -     * @exception RESyntaxException Thrown if the regular expression has invalid syntax.
   1.440 -     * @see RECompiler
   1.441 -     * @see recompile
   1.442 -     */
   1.443 -    public RE(String pattern, int matchFlags) throws RESyntaxException
   1.444 -    {
   1.445 -        this(new RECompiler().compile(pattern));
   1.446 -        setMatchFlags(matchFlags);
   1.447 -    }
   1.448 -
   1.449 -    /**
   1.450 -     * Construct a matcher for a pre-compiled regular expression from program
   1.451 -     * (bytecode) data.  Permits special flags to be passed in to modify matching
   1.452 -     * behaviour.
   1.453 -     *
   1.454 -     * @param program Compiled regular expression program (see RECompiler and/or recompile)
   1.455 -     * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
   1.456 -     *
   1.457 -     * <pre>
   1.458 -     *   MATCH_NORMAL              // Normal (case-sensitive) matching
   1.459 -     *   MATCH_CASEINDEPENDENT     // Case folded comparisons
   1.460 -     *   MATCH_MULTILINE           // Newline matches as BOL/EOL
   1.461 -     * </pre>
   1.462 -     *
   1.463 -     * @see RECompiler
   1.464 -     * @see REProgram
   1.465 -     * @see recompile
   1.466 -     */
   1.467 -    public RE(REProgram program, int matchFlags)
   1.468 -    {
   1.469 -        setProgram(program);
   1.470 -        setMatchFlags(matchFlags);
   1.471 -    }
   1.472 -
   1.473 -    /**
   1.474 -     * Construct a matcher for a pre-compiled regular expression from program
   1.475 -     * (bytecode) data.
   1.476 -     *
   1.477 -     * @param program Compiled regular expression program
   1.478 -     * @see RECompiler
   1.479 -     * @see recompile
   1.480 -     */
   1.481 -    public RE(REProgram program)
   1.482 -    {
   1.483 -        this(program, MATCH_NORMAL);
   1.484 -    }
   1.485 -
   1.486 -    /**
   1.487 -     * Constructs a regular expression matcher with no initial program.
   1.488 -     * This is likely to be an uncommon practice, but is still supported.
   1.489 -     */
   1.490 -    public RE()
   1.491 -    {
   1.492 -        this((REProgram)null, MATCH_NORMAL);
   1.493 -    }
   1.494 -
   1.495 -    /**
   1.496 -     * Converts a 'simplified' regular expression to a full regular expression
   1.497 -     *
   1.498 -     * @param pattern The pattern to convert
   1.499 -     * @return The full regular expression
   1.500 -     */
   1.501 -    public static String simplePatternToFullRegularExpression(String pattern)
   1.502 -    {
   1.503 -        StringBuffer buf = new StringBuffer();
   1.504 -        for (int i = 0; i < pattern.length(); i++)
   1.505 -        {
   1.506 -            char c = pattern.charAt(i);
   1.507 -            switch (c)
   1.508 -            {
   1.509 -                case '*':
   1.510 -                    buf.append(".*");
   1.511 -                    break;
   1.512 -
   1.513 -                case '.':
   1.514 -                case '[':
   1.515 -                case ']':
   1.516 -                case '\\':
   1.517 -                case '+':
   1.518 -                case '?':
   1.519 -                case '{':
   1.520 -                case '}':
   1.521 -                case '$':
   1.522 -                case '^':
   1.523 -                case '|':
   1.524 -                case '(':
   1.525 -                case ')':
   1.526 -                    buf.append('\\');
   1.527 -                default:
   1.528 -                    buf.append(c);
   1.529 -                    break;
   1.530 -            }
   1.531 -        }
   1.532 -        return buf.toString();
   1.533 -    }
   1.534 -
   1.535 -    /**
   1.536 -     * Sets match behaviour flags which alter the way RE does matching.
   1.537 -     * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*):
   1.538 -     *
   1.539 -     * <pre>
   1.540 -     *   MATCH_NORMAL              // Normal (case-sensitive) matching
   1.541 -     *   MATCH_CASEINDEPENDENT     // Case folded comparisons
   1.542 -     *   MATCH_MULTILINE           // Newline matches as BOL/EOL
   1.543 -     * </pre>
   1.544 -     */
   1.545 -    public void setMatchFlags(int matchFlags)
   1.546 -    {
   1.547 -        this.matchFlags = matchFlags;
   1.548 -    }
   1.549 -
   1.550 -    /**
   1.551 -     * Returns the current match behaviour flags.
   1.552 -     * @return Current match behaviour flags (RE.MATCH_*).
   1.553 -     *
   1.554 -     * <pre>
   1.555 -     *   MATCH_NORMAL              // Normal (case-sensitive) matching
   1.556 -     *   MATCH_CASEINDEPENDENT     // Case folded comparisons
   1.557 -     *   MATCH_MULTILINE           // Newline matches as BOL/EOL
   1.558 -     * </pre>
   1.559 -     *
   1.560 -     * @see #setMatchFlags
   1.561 -     */
   1.562 -    public int getMatchFlags()
   1.563 -    {
   1.564 -        return matchFlags;
   1.565 -    }
   1.566 -
   1.567 -    /**
   1.568 -     * Sets the current regular expression program used by this matcher object.
   1.569 -     *
   1.570 -     * @param program Regular expression program compiled by RECompiler.
   1.571 -     * @see RECompiler
   1.572 -     * @see REProgram
   1.573 -     * @see recompile
   1.574 -     */
   1.575 -    public void setProgram(REProgram program)
   1.576 -    {
   1.577 -        this.program = program;
   1.578 -        if (program != null && program.maxParens != -1) {
   1.579 -            this.maxParen = program.maxParens;
   1.580 -        } else {
   1.581 -            this.maxParen = MAX_PAREN;
   1.582 -        }
   1.583 -    }
   1.584 -
   1.585 -    /**
   1.586 -     * Returns the current regular expression program in use by this matcher object.
   1.587 -     *
   1.588 -     * @return Regular expression program
   1.589 -     * @see #setProgram
   1.590 -     */
   1.591 -    public REProgram getProgram()
   1.592 -    {
   1.593 -        return program;
   1.594 -    }
   1.595 -
   1.596 -    /**
   1.597 -     * Returns the number of parenthesized subexpressions available after a successful match.
   1.598 -     *
   1.599 -     * @return Number of available parenthesized subexpressions
   1.600 -     */
   1.601 -    public int getParenCount()
   1.602 -    {
   1.603 -        return parenCount;
   1.604 -    }
   1.605 -
   1.606 -    /**
   1.607 -     * Gets the contents of a parenthesized subexpression after a successful match.
   1.608 -     *
   1.609 -     * @param which Nesting level of subexpression
   1.610 -     * @return String
   1.611 -     */
   1.612 -    public String getParen(int which)
   1.613 -    {
   1.614 -        int start;
   1.615 -        if (which < parenCount && (start = getParenStart(which)) >= 0)
   1.616 -        {
   1.617 -            return search.substring(start, getParenEnd(which));
   1.618 -        }
   1.619 -        return null;
   1.620 -    }
   1.621 -
   1.622 -    /**
   1.623 -     * Returns the start index of a given paren level.
   1.624 -     *
   1.625 -     * @param which Nesting level of subexpression
   1.626 -     * @return String index
   1.627 -     */
   1.628 -    public final int getParenStart(int which)
   1.629 -    {
   1.630 -        if (which < parenCount)
   1.631 -        {
   1.632 -            switch (which)
   1.633 -            {
   1.634 -                case 0:
   1.635 -                    return start0;
   1.636 -
   1.637 -                case 1:
   1.638 -                    return start1;
   1.639 -
   1.640 -                case 2:
   1.641 -                    return start2;
   1.642 -
   1.643 -                default:
   1.644 -                    if (startn == null)
   1.645 -                    {
   1.646 -                        allocParens();
   1.647 -                    }
   1.648 -                    return startn[which];
   1.649 -            }
   1.650 -        }
   1.651 -        return -1;
   1.652 -    }
   1.653 -
   1.654 -    /**
   1.655 -     * Returns the end index of a given paren level.
   1.656 -     *
   1.657 -     * @param which Nesting level of subexpression
   1.658 -     * @return String index
   1.659 -     */
   1.660 -    public final int getParenEnd(int which)
   1.661 -    {
   1.662 -        if (which < parenCount)
   1.663 -        {
   1.664 -            switch (which)
   1.665 -            {
   1.666 -                case 0:
   1.667 -                    return end0;
   1.668 -
   1.669 -                case 1:
   1.670 -                    return end1;
   1.671 -
   1.672 -                case 2:
   1.673 -                    return end2;
   1.674 -
   1.675 -                default:
   1.676 -                    if (endn == null)
   1.677 -                    {
   1.678 -                        allocParens();
   1.679 -                    }
   1.680 -                    return endn[which];
   1.681 -            }
   1.682 -        }
   1.683 -        return -1;
   1.684 -    }
   1.685 -
   1.686 -    /**
   1.687 -     * Returns the length of a given paren level.
   1.688 -     *
   1.689 -     * @param which Nesting level of subexpression
   1.690 -     * @return Number of characters in the parenthesized subexpression
   1.691 -     */
   1.692 -    public final int getParenLength(int which)
   1.693 -    {
   1.694 -        if (which < parenCount)
   1.695 -        {
   1.696 -            return getParenEnd(which) - getParenStart(which);
   1.697 -        }
   1.698 -        return -1;
   1.699 -    }
   1.700 -
   1.701 -    /**
   1.702 -     * Sets the start of a paren level
   1.703 -     *
   1.704 -     * @param which Which paren level
   1.705 -     * @param i Index in input array
   1.706 -     */
   1.707 -    protected final void setParenStart(int which, int i)
   1.708 -    {
   1.709 -        if (which < parenCount)
   1.710 -        {
   1.711 -            switch (which)
   1.712 -            {
   1.713 -                case 0:
   1.714 -                    start0 = i;
   1.715 -                    break;
   1.716 -
   1.717 -                case 1:
   1.718 -                    start1 = i;
   1.719 -                    break;
   1.720 -
   1.721 -                case 2:
   1.722 -                    start2 = i;
   1.723 -                    break;
   1.724 -
   1.725 -                default:
   1.726 -                    if (startn == null)
   1.727 -                    {
   1.728 -                        allocParens();
   1.729 -                    }
   1.730 -                    startn[which] = i;
   1.731 -                    break;
   1.732 -            }
   1.733 -        }
   1.734 -    }
   1.735 -
   1.736 -    /**
   1.737 -     * Sets the end of a paren level
   1.738 -     *
   1.739 -     * @param which Which paren level
   1.740 -     * @param i Index in input array
   1.741 -     */
   1.742 -    protected final void setParenEnd(int which, int i)
   1.743 -    {
   1.744 -        if (which < parenCount)
   1.745 -        {
   1.746 -            switch (which)
   1.747 -            {
   1.748 -                case 0:
   1.749 -                    end0 = i;
   1.750 -                    break;
   1.751 -
   1.752 -                case 1:
   1.753 -                    end1 = i;
   1.754 -                    break;
   1.755 -
   1.756 -                case 2:
   1.757 -                    end2 = i;
   1.758 -                    break;
   1.759 -
   1.760 -                default:
   1.761 -                    if (endn == null)
   1.762 -                    {
   1.763 -                        allocParens();
   1.764 -                    }
   1.765 -                    endn[which] = i;
   1.766 -                    break;
   1.767 -            }
   1.768 -        }
   1.769 -    }
   1.770 -
   1.771 -    /**
   1.772 -     * Throws an Error representing an internal error condition probably resulting
   1.773 -     * from a bug in the regular expression compiler (or possibly data corruption).
   1.774 -     * In practice, this should be very rare.
   1.775 -     *
   1.776 -     * @param s Error description
   1.777 -     */
   1.778 -    protected void internalError(String s) throws Error
   1.779 -    {
   1.780 -        throw new Error("RE internal error: " + s);
   1.781 -    }
   1.782 -
   1.783 -    /**
   1.784 -     * Performs lazy allocation of subexpression arrays
   1.785 -     */
   1.786 -    private final void allocParens()
   1.787 -    {
   1.788 -        // Allocate arrays for subexpressions
   1.789 -        startn = new int[maxParen];
   1.790 -        endn = new int[maxParen];
   1.791 -
   1.792 -        // Set sub-expression pointers to invalid values
   1.793 -        for (int i = 0; i < maxParen; i++)
   1.794 -        {
   1.795 -            startn[i] = -1;
   1.796 -            endn[i] = -1;
   1.797 -        }
   1.798 -    }
   1.799 -
   1.800 -    /**
   1.801 -     * Try to match a string against a subset of nodes in the program
   1.802 -     *
   1.803 -     * @param firstNode Node to start at in program
   1.804 -     * @param lastNode  Last valid node (used for matching a subexpression without
   1.805 -     *                  matching the rest of the program as well).
   1.806 -     * @param idxStart  Starting position in character array
   1.807 -     * @return Final input array index if match succeeded.  -1 if not.
   1.808 -     */
   1.809 -    protected int matchNodes(int firstNode, int lastNode, int idxStart)
   1.810 -    {
   1.811 -        // Our current place in the string
   1.812 -        int idx = idxStart;
   1.813 -
   1.814 -        // Loop while node is valid
   1.815 -        int next, opcode, opdata;
   1.816 -        int idxNew;
   1.817 -        char[] instruction = program.instruction;
   1.818 -        for (int node = firstNode; node < lastNode; )
   1.819 -        {
   1.820 -            opcode = instruction[node + offsetOpcode];
   1.821 -            next   = node + (short)instruction[node + offsetNext];
   1.822 -            opdata = instruction[node + offsetOpdata];
   1.823 -
   1.824 -            switch (opcode)
   1.825 -            {
   1.826 -                case OP_RELUCTANTMAYBE:
   1.827 -                    {
   1.828 -                        int once = 0;
   1.829 -                        do
   1.830 -                        {
   1.831 -                            // Try to match the rest without using the reluctant subexpr
   1.832 -                            if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
   1.833 -                            {
   1.834 -                                return idxNew;
   1.835 -                            }
   1.836 -                        }
   1.837 -                        while ((once++ == 0) && (idx = matchNodes(node + nodeSize, next, idx)) != -1);
   1.838 -                        return -1;
   1.839 -                    }
   1.840 -
   1.841 -                case OP_RELUCTANTPLUS:
   1.842 -                    while ((idx = matchNodes(node + nodeSize, next, idx)) != -1)
   1.843 -                    {
   1.844 -                        // Try to match the rest without using the reluctant subexpr
   1.845 -                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
   1.846 -                        {
   1.847 -                            return idxNew;
   1.848 -                        }
   1.849 -                    }
   1.850 -                    return -1;
   1.851 -
   1.852 -                case OP_RELUCTANTSTAR:
   1.853 -                    do
   1.854 -                    {
   1.855 -                        // Try to match the rest without using the reluctant subexpr
   1.856 -                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
   1.857 -                        {
   1.858 -                            return idxNew;
   1.859 -                        }
   1.860 -                    }
   1.861 -                    while ((idx = matchNodes(node + nodeSize, next, idx)) != -1);
   1.862 -                    return -1;
   1.863 -
   1.864 -                case OP_OPEN:
   1.865 -
   1.866 -                    // Match subexpression
   1.867 -                    if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
   1.868 -                    {
   1.869 -                        startBackref[opdata] = idx;
   1.870 -                    }
   1.871 -                    if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
   1.872 -                    {
   1.873 -                        // Increase valid paren count
   1.874 -                        if ((opdata + 1) > parenCount)
   1.875 -                        {
   1.876 -                            parenCount = opdata + 1;
   1.877 -                        }
   1.878 -
   1.879 -                        // Don't set paren if already set later on
   1.880 -                        if (getParenStart(opdata) == -1)
   1.881 -                        {
   1.882 -                            setParenStart(opdata, idx);
   1.883 -                        }
   1.884 -                    }
   1.885 -                    return idxNew;
   1.886 -
   1.887 -                case OP_CLOSE:
   1.888 -
   1.889 -                    // Done matching subexpression
   1.890 -                    if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
   1.891 -                    {
   1.892 -                        endBackref[opdata] = idx;
   1.893 -                    }
   1.894 -                    if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
   1.895 -                    {
   1.896 -                        // Increase valid paren count
   1.897 -                        if ((opdata + 1) > parenCount)
   1.898 -                        {
   1.899 -                            parenCount = opdata + 1;
   1.900 -                        }
   1.901 -
   1.902 -                        // Don't set paren if already set later on
   1.903 -                        if (getParenEnd(opdata) == -1)
   1.904 -                        {
   1.905 -                            setParenEnd(opdata, idx);
   1.906 -                        }
   1.907 -                    }
   1.908 -                    return idxNew;
   1.909 -
   1.910 -                case OP_OPEN_CLUSTER:
   1.911 -                case OP_CLOSE_CLUSTER:
   1.912 -                    // starting or ending the matching of a subexpression which has no backref.
   1.913 -                    return matchNodes( next, maxNode, idx );
   1.914 -
   1.915 -                case OP_BACKREF:
   1.916 -                    {
   1.917 -                        // Get the start and end of the backref
   1.918 -                        int s = startBackref[opdata];
   1.919 -                        int e = endBackref[opdata];
   1.920 -
   1.921 -                        // We don't know the backref yet
   1.922 -                        if (s == -1 || e == -1)
   1.923 -                        {
   1.924 -                            return -1;
   1.925 -                        }
   1.926 -
   1.927 -                        // The backref is empty size
   1.928 -                        if (s == e)
   1.929 -                        {
   1.930 -                            break;
   1.931 -                        }
   1.932 -
   1.933 -                        // Get the length of the backref
   1.934 -                        int l = e - s;
   1.935 -
   1.936 -                        // If there's not enough input left, give up.
   1.937 -                        if (search.isEnd(idx + l - 1))
   1.938 -                        {
   1.939 -                            return -1;
   1.940 -                        }
   1.941 -
   1.942 -                        // Case fold the backref?
   1.943 -                        final boolean caseFold =
   1.944 -                            ((matchFlags & MATCH_CASEINDEPENDENT) != 0);
   1.945 -                        // Compare backref to input
   1.946 -                        for (int i = 0; i < l; i++)
   1.947 -                        {
   1.948 -                            if (compareChars(search.charAt(idx++), search.charAt(s + i), caseFold) != 0)
   1.949 -                            {
   1.950 -                                return -1;
   1.951 -                            }
   1.952 -                        }
   1.953 -                    }
   1.954 -                    break;
   1.955 -
   1.956 -                case OP_BOL:
   1.957 -
   1.958 -                    // Fail if we're not at the start of the string
   1.959 -                    if (idx != 0)
   1.960 -                    {
   1.961 -                        // If we're multiline matching, we could still be at the start of a line
   1.962 -                        if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
   1.963 -                        {
   1.964 -                            // If not at start of line, give up
   1.965 -                            if (idx <= 0 || !isNewline(idx - 1)) {
   1.966 -                                return -1;
   1.967 -                            } else {
   1.968 -                                break;
   1.969 -                            }
   1.970 -                        }
   1.971 -                        return -1;
   1.972 -                    }
   1.973 -                    break;
   1.974 -
   1.975 -                case OP_EOL:
   1.976 -
   1.977 -                    // If we're not at the end of string
   1.978 -                    if (!search.isEnd(0) && !search.isEnd(idx))
   1.979 -                    {
   1.980 -                        // If we're multi-line matching
   1.981 -                        if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
   1.982 -                        {
   1.983 -                            // Give up if we're not at the end of a line
   1.984 -                            if (!isNewline(idx)) {
   1.985 -                                return -1;
   1.986 -                            } else {
   1.987 -                                break;
   1.988 -                            }
   1.989 -                        }
   1.990 -                        return -1;
   1.991 -                    }
   1.992 -                    break;
   1.993 -
   1.994 -                case OP_ESCAPE:
   1.995 -
   1.996 -                    // Which escape?
   1.997 -                    switch (opdata)
   1.998 -                    {
   1.999 -                        // Word boundary match
  1.1000 -                        case E_NBOUND:
  1.1001 -                        case E_BOUND:
  1.1002 -                            {
  1.1003 -                                char cLast = ((idx == 0) ? '\n' : search.charAt(idx - 1));
  1.1004 -                                char cNext = ((search.isEnd(idx)) ? '\n' : search.charAt(idx));
  1.1005 -                                if ((Character.isLetterOrDigit(cLast) == Character.isLetterOrDigit(cNext)) == (opdata == E_BOUND))
  1.1006 -                                {
  1.1007 -                                    return -1;
  1.1008 -                                }
  1.1009 -                            }
  1.1010 -                            break;
  1.1011 -
  1.1012 -                        // Alpha-numeric, digit, space, javaLetter, javaLetterOrDigit
  1.1013 -                        case E_ALNUM:
  1.1014 -                        case E_NALNUM:
  1.1015 -                        case E_DIGIT:
  1.1016 -                        case E_NDIGIT:
  1.1017 -                        case E_SPACE:
  1.1018 -                        case E_NSPACE:
  1.1019 -
  1.1020 -                            // Give up if out of input
  1.1021 -                            if (search.isEnd(idx))
  1.1022 -                            {
  1.1023 -                                return -1;
  1.1024 -                            }
  1.1025 -
  1.1026 -                            char c = search.charAt(idx);
  1.1027 -
  1.1028 -                            // Switch on escape
  1.1029 -                            switch (opdata)
  1.1030 -                            {
  1.1031 -                                case E_ALNUM:
  1.1032 -                                case E_NALNUM:
  1.1033 -                                    if (!((Character.isLetterOrDigit(c) || c == '_') == (opdata == E_ALNUM)))
  1.1034 -                                    {
  1.1035 -                                        return -1;
  1.1036 -                                    }
  1.1037 -                                    break;
  1.1038 -
  1.1039 -                                case E_DIGIT:
  1.1040 -                                case E_NDIGIT:
  1.1041 -                                    if (!(Character.isDigit(c) == (opdata == E_DIGIT)))
  1.1042 -                                    {
  1.1043 -                                        return -1;
  1.1044 -                                    }
  1.1045 -                                    break;
  1.1046 -
  1.1047 -                                case E_SPACE:
  1.1048 -                                case E_NSPACE:
  1.1049 -                                    if (!(Character.isWhitespace(c) == (opdata == E_SPACE)))
  1.1050 -                                    {
  1.1051 -                                        return -1;
  1.1052 -                                    }
  1.1053 -                                    break;
  1.1054 -                            }
  1.1055 -                            idx++;
  1.1056 -                            break;
  1.1057 -
  1.1058 -                        default:
  1.1059 -                            internalError("Unrecognized escape '" + opdata + "'");
  1.1060 -                    }
  1.1061 -                    break;
  1.1062 -
  1.1063 -                case OP_ANY:
  1.1064 -
  1.1065 -                    if ((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) {
  1.1066 -                        // Match anything
  1.1067 -                        if (search.isEnd(idx))
  1.1068 -                        {
  1.1069 -                            return -1;
  1.1070 -                        }
  1.1071 -                    }
  1.1072 -                    else
  1.1073 -                    {
  1.1074 -                        // Match anything but a newline
  1.1075 -                        if (search.isEnd(idx) || isNewline(idx))
  1.1076 -                        {
  1.1077 -                            return -1;
  1.1078 -                        }
  1.1079 -                    }
  1.1080 -                    idx++;
  1.1081 -                    break;
  1.1082 -
  1.1083 -                case OP_ATOM:
  1.1084 -                    {
  1.1085 -                        // Match an atom value
  1.1086 -                        if (search.isEnd(idx))
  1.1087 -                        {
  1.1088 -                            return -1;
  1.1089 -                        }
  1.1090 -
  1.1091 -                        // Get length of atom and starting index
  1.1092 -                        int lenAtom = opdata;
  1.1093 -                        int startAtom = node + nodeSize;
  1.1094 -
  1.1095 -                        // Give up if not enough input remains to have a match
  1.1096 -                        if (search.isEnd(lenAtom + idx - 1))
  1.1097 -                        {
  1.1098 -                            return -1;
  1.1099 -                        }
  1.1100 -
  1.1101 -                        // Match atom differently depending on casefolding flag
  1.1102 -                        final boolean caseFold =
  1.1103 -                            ((matchFlags & MATCH_CASEINDEPENDENT) != 0);
  1.1104 -
  1.1105 -                        for (int i = 0; i < lenAtom; i++)
  1.1106 -                        {
  1.1107 -                            if (compareChars(search.charAt(idx++), instruction[startAtom + i], caseFold) != 0)
  1.1108 -                            {
  1.1109 -                                return -1;
  1.1110 -                            }
  1.1111 -                        }
  1.1112 -                    }
  1.1113 -                    break;
  1.1114 -
  1.1115 -                case OP_POSIXCLASS:
  1.1116 -                    {
  1.1117 -                        // Out of input?
  1.1118 -                        if (search.isEnd(idx))
  1.1119 -                        {
  1.1120 -                            return -1;
  1.1121 -                        }
  1.1122 -
  1.1123 -                        switch (opdata)
  1.1124 -                        {
  1.1125 -                            case POSIX_CLASS_ALNUM:
  1.1126 -                                if (!Character.isLetterOrDigit(search.charAt(idx)))
  1.1127 -                                {
  1.1128 -                                    return -1;
  1.1129 -                                }
  1.1130 -                                break;
  1.1131 -
  1.1132 -                            case POSIX_CLASS_ALPHA:
  1.1133 -                                if (!Character.isLetter(search.charAt(idx)))
  1.1134 -                                {
  1.1135 -                                    return -1;
  1.1136 -                                }
  1.1137 -                                break;
  1.1138 -
  1.1139 -                            case POSIX_CLASS_DIGIT:
  1.1140 -                                if (!Character.isDigit(search.charAt(idx)))
  1.1141 -                                {
  1.1142 -                                    return -1;
  1.1143 -                                }
  1.1144 -                                break;
  1.1145 -
  1.1146 -                            case POSIX_CLASS_BLANK: // JWL - bugbug: is this right??
  1.1147 -                                if (!Character.isSpaceChar(search.charAt(idx)))
  1.1148 -                                {
  1.1149 -                                    return -1;
  1.1150 -                                }
  1.1151 -                                break;
  1.1152 -
  1.1153 -                            case POSIX_CLASS_SPACE:
  1.1154 -                                if (!Character.isWhitespace(search.charAt(idx)))
  1.1155 -                                {
  1.1156 -                                    return -1;
  1.1157 -                                }
  1.1158 -                                break;
  1.1159 -
  1.1160 -                            case POSIX_CLASS_CNTRL:
  1.1161 -                                if (Character.getType(search.charAt(idx)) != Character.CONTROL)
  1.1162 -                                {
  1.1163 -                                    return -1;
  1.1164 -                                }
  1.1165 -                                break;
  1.1166 -
  1.1167 -                            case POSIX_CLASS_GRAPH: // JWL - bugbug???
  1.1168 -                                switch (Character.getType(search.charAt(idx)))
  1.1169 -                                {
  1.1170 -                                    case Character.MATH_SYMBOL:
  1.1171 -                                    case Character.CURRENCY_SYMBOL:
  1.1172 -                                    case Character.MODIFIER_SYMBOL:
  1.1173 -                                    case Character.OTHER_SYMBOL:
  1.1174 -                                        break;
  1.1175 -
  1.1176 -                                    default:
  1.1177 -                                        return -1;
  1.1178 -                                }
  1.1179 -                                break;
  1.1180 -
  1.1181 -                            case POSIX_CLASS_LOWER:
  1.1182 -                                if (Character.getType(search.charAt(idx)) != Character.LOWERCASE_LETTER)
  1.1183 -                                {
  1.1184 -                                    return -1;
  1.1185 -                                }
  1.1186 -                                break;
  1.1187 -
  1.1188 -                            case POSIX_CLASS_UPPER:
  1.1189 -                                if (Character.getType(search.charAt(idx)) != Character.UPPERCASE_LETTER)
  1.1190 -                                {
  1.1191 -                                    return -1;
  1.1192 -                                }
  1.1193 -                                break;
  1.1194 -
  1.1195 -                            case POSIX_CLASS_PRINT:
  1.1196 -                                if (Character.getType(search.charAt(idx)) == Character.CONTROL)
  1.1197 -                                {
  1.1198 -                                    return -1;
  1.1199 -                                }
  1.1200 -                                break;
  1.1201 -
  1.1202 -                            case POSIX_CLASS_PUNCT:
  1.1203 -                            {
  1.1204 -                                int type = Character.getType(search.charAt(idx));
  1.1205 -                                switch(type)
  1.1206 -                                {
  1.1207 -                                    case Character.DASH_PUNCTUATION:
  1.1208 -                                    case Character.START_PUNCTUATION:
  1.1209 -                                    case Character.END_PUNCTUATION:
  1.1210 -                                    case Character.CONNECTOR_PUNCTUATION:
  1.1211 -                                    case Character.OTHER_PUNCTUATION:
  1.1212 -                                        break;
  1.1213 -
  1.1214 -                                    default:
  1.1215 -                                        return -1;
  1.1216 -                                }
  1.1217 -                            }
  1.1218 -                            break;
  1.1219 -
  1.1220 -                            case POSIX_CLASS_XDIGIT: // JWL - bugbug??
  1.1221 -                            {
  1.1222 -                                boolean isXDigit = ((search.charAt(idx) >= '0' && search.charAt(idx) <= '9') ||
  1.1223 -                                                    (search.charAt(idx) >= 'a' && search.charAt(idx) <= 'f') ||
  1.1224 -                                                    (search.charAt(idx) >= 'A' && search.charAt(idx) <= 'F'));
  1.1225 -                                if (!isXDigit)
  1.1226 -                                {
  1.1227 -                                    return -1;
  1.1228 -                                }
  1.1229 -                            }
  1.1230 -                            break;
  1.1231 -
  1.1232 -                            case POSIX_CLASS_JSTART:
  1.1233 -                                if (!Character.isJavaIdentifierStart(search.charAt(idx)))
  1.1234 -                                {
  1.1235 -                                    return -1;
  1.1236 -                                }
  1.1237 -                                break;
  1.1238 -
  1.1239 -                            case POSIX_CLASS_JPART:
  1.1240 -                                if (!Character.isJavaIdentifierPart(search.charAt(idx)))
  1.1241 -                                {
  1.1242 -                                    return -1;
  1.1243 -                                }
  1.1244 -                                break;
  1.1245 -
  1.1246 -                            default:
  1.1247 -                                internalError("Bad posix class");
  1.1248 -                                break;
  1.1249 -                        }
  1.1250 -
  1.1251 -                        // Matched.
  1.1252 -                        idx++;
  1.1253 -                    }
  1.1254 -                    break;
  1.1255 -
  1.1256 -                case OP_ANYOF:
  1.1257 -                    {
  1.1258 -                        // Out of input?
  1.1259 -                        if (search.isEnd(idx))
  1.1260 -                        {
  1.1261 -                            return -1;
  1.1262 -                        }
  1.1263 -
  1.1264 -                        // Get character to match against character class and maybe casefold
  1.1265 -                        char c = search.charAt(idx);
  1.1266 -                        boolean caseFold = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
  1.1267 -                        // Loop through character class checking our match character
  1.1268 -                        int idxRange = node + nodeSize;
  1.1269 -                        int idxEnd = idxRange + (opdata * 2);
  1.1270 -                        boolean match = false;
  1.1271 -                        for (int i = idxRange; !match && i < idxEnd; )
  1.1272 -                        {
  1.1273 -                            // Get start, end and match characters
  1.1274 -                            char s = instruction[i++];
  1.1275 -                            char e = instruction[i++];
  1.1276 -
  1.1277 -                            match = ((compareChars(c, s, caseFold) >= 0)
  1.1278 -                                     && (compareChars(c, e, caseFold) <= 0));
  1.1279 -                        }
  1.1280 -
  1.1281 -                        // Fail if we didn't match the character class
  1.1282 -                        if (!match)
  1.1283 -                        {
  1.1284 -                            return -1;
  1.1285 -                        }
  1.1286 -                        idx++;
  1.1287 -                    }
  1.1288 -                    break;
  1.1289 -
  1.1290 -                case OP_BRANCH:
  1.1291 -                {
  1.1292 -                    // Check for choices
  1.1293 -                    if (instruction[next + offsetOpcode] != OP_BRANCH)
  1.1294 -                    {
  1.1295 -                        // If there aren't any other choices, just evaluate this branch.
  1.1296 -                        node += nodeSize;
  1.1297 -                        continue;
  1.1298 -                    }
  1.1299 -
  1.1300 -                    // Try all available branches
  1.1301 -                    short nextBranch;
  1.1302 -                    do
  1.1303 -                    {
  1.1304 -                        // Try matching the branch against the string
  1.1305 -                        if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1)
  1.1306 -                        {
  1.1307 -                            return idxNew;
  1.1308 -                        }
  1.1309 -
  1.1310 -                        // Go to next branch (if any)
  1.1311 -                        nextBranch = (short)instruction[node + offsetNext];
  1.1312 -                        node += nextBranch;
  1.1313 -                    }
  1.1314 -                    while (nextBranch != 0 && (instruction[node + offsetOpcode] == OP_BRANCH));
  1.1315 -
  1.1316 -                    // Failed to match any branch!
  1.1317 -                    return -1;
  1.1318 -                }
  1.1319 -
  1.1320 -                case OP_NOTHING:
  1.1321 -                case OP_GOTO:
  1.1322 -
  1.1323 -                    // Just advance to the next node without doing anything
  1.1324 -                    break;
  1.1325 -
  1.1326 -                case OP_END:
  1.1327 -
  1.1328 -                    // Match has succeeded!
  1.1329 -                    setParenEnd(0, idx);
  1.1330 -                    return idx;
  1.1331 -
  1.1332 -                default:
  1.1333 -
  1.1334 -                    // Corrupt program
  1.1335 -                    internalError("Invalid opcode '" + opcode + "'");
  1.1336 -            }
  1.1337 -
  1.1338 -            // Advance to the next node in the program
  1.1339 -            node = next;
  1.1340 -        }
  1.1341 -
  1.1342 -        // We "should" never end up here
  1.1343 -        internalError("Corrupt program");
  1.1344 -        return -1;
  1.1345 -    }
  1.1346 -
  1.1347 -    /**
  1.1348 -     * Match the current regular expression program against the current
  1.1349 -     * input string, starting at index i of the input string.  This method
  1.1350 -     * is only meant for internal use.
  1.1351 -     *
  1.1352 -     * @param i The input string index to start matching at
  1.1353 -     * @return True if the input matched the expression
  1.1354 -     */
  1.1355 -    protected boolean matchAt(int i)
  1.1356 -    {
  1.1357 -        // Initialize start pointer, paren cache and paren count
  1.1358 -        start0 = -1;
  1.1359 -        end0   = -1;
  1.1360 -        start1 = -1;
  1.1361 -        end1   = -1;
  1.1362 -        start2 = -1;
  1.1363 -        end2   = -1;
  1.1364 -        startn = null;
  1.1365 -        endn   = null;
  1.1366 -        parenCount = 1;
  1.1367 -        setParenStart(0, i);
  1.1368 -
  1.1369 -        // Allocate backref arrays (unless optimizations indicate otherwise)
  1.1370 -        if ((program.flags & REProgram.OPT_HASBACKREFS) != 0)
  1.1371 -        {
  1.1372 -            startBackref = new int[maxParen];
  1.1373 -            endBackref = new int[maxParen];
  1.1374 -        }
  1.1375 -
  1.1376 -        // Match against string
  1.1377 -        int idx;
  1.1378 -        if ((idx = matchNodes(0, maxNode, i)) != -1)
  1.1379 -        {
  1.1380 -            setParenEnd(0, idx);
  1.1381 -            return true;
  1.1382 -        }
  1.1383 -
  1.1384 -        // Didn't match
  1.1385 -        parenCount = 0;
  1.1386 -        return false;
  1.1387 -    }
  1.1388 -
  1.1389 -    /**
  1.1390 -     * Matches the current regular expression program against a character array,
  1.1391 -     * starting at a given index.
  1.1392 -     *
  1.1393 -     * @param search String to match against
  1.1394 -     * @param i Index to start searching at
  1.1395 -     * @return True if string matched
  1.1396 -     */
  1.1397 -    public boolean match(String search, int i)
  1.1398 -    {
  1.1399 -        return match(new StringCharacterIterator(search), i);
  1.1400 -    }
  1.1401 -
  1.1402 -    /**
  1.1403 -     * Matches the current regular expression program against a character array,
  1.1404 -     * starting at a given index.
  1.1405 -     *
  1.1406 -     * @param search String to match against
  1.1407 -     * @param i Index to start searching at
  1.1408 -     * @return True if string matched
  1.1409 -     */
  1.1410 -    public boolean match(CharacterIterator search, int i)
  1.1411 -    {
  1.1412 -        // There is no compiled program to search with!
  1.1413 -        if (program == null)
  1.1414 -        {
  1.1415 -            // This should be uncommon enough to be an error case rather
  1.1416 -            // than an exception (which would have to be handled everywhere)
  1.1417 -            internalError("No RE program to run!");
  1.1418 -        }
  1.1419 -
  1.1420 -        // Save string to search
  1.1421 -        this.search = search;
  1.1422 -
  1.1423 -        // Can we optimize the search by looking for a prefix string?
  1.1424 -        if (program.prefix == null)
  1.1425 -        {
  1.1426 -            // Unprefixed matching must try for a match at each character
  1.1427 -            for ( ;! search.isEnd(i - 1); i++)
  1.1428 -            {
  1.1429 -                // Try a match at index i
  1.1430 -                if (matchAt(i))
  1.1431 -                {
  1.1432 -                    return true;
  1.1433 -                }
  1.1434 -            }
  1.1435 -            return false;
  1.1436 -        }
  1.1437 -        else
  1.1438 -        {
  1.1439 -            // Prefix-anchored matching is possible
  1.1440 -            boolean caseIndependent = (matchFlags & MATCH_CASEINDEPENDENT) != 0;
  1.1441 -            char[] prefix = program.prefix;
  1.1442 -            for ( ; !search.isEnd(i + prefix.length - 1); i++)
  1.1443 -            {
  1.1444 -                int j = i;
  1.1445 -                int k = 0;
  1.1446 -
  1.1447 -                boolean match;
  1.1448 -                do {
  1.1449 -                    // If there's a mismatch of any character in the prefix, give up
  1.1450 -                    match = (compareChars(search.charAt(j++), prefix[k++], caseIndependent) == 0);
  1.1451 -                } while (match && k < prefix.length);
  1.1452 -
  1.1453 -                // See if the whole prefix string matched
  1.1454 -                if (k == prefix.length)
  1.1455 -                {
  1.1456 -                    // We matched the full prefix at firstChar, so try it
  1.1457 -                    if (matchAt(i))
  1.1458 -                    {
  1.1459 -                        return true;
  1.1460 -                    }
  1.1461 -                }
  1.1462 -            }
  1.1463 -            return false;
  1.1464 -        }
  1.1465 -    }
  1.1466 -
  1.1467 -    /**
  1.1468 -     * Matches the current regular expression program against a String.
  1.1469 -     *
  1.1470 -     * @param search String to match against
  1.1471 -     * @return True if string matched
  1.1472 -     */
  1.1473 -    public boolean match(String search)
  1.1474 -    {
  1.1475 -        return match(search, 0);
  1.1476 -    }
  1.1477 -
  1.1478 -    /**
  1.1479 -     * Splits a string into an array of strings on regular expression boundaries.
  1.1480 -     * This function works the same way as the Perl function of the same name.
  1.1481 -     * Given a regular expression of "[ab]+" and a string to split of
  1.1482 -     * "xyzzyababbayyzabbbab123", the result would be the array of Strings
  1.1483 -     * "[xyzzy, yyz, 123]".
  1.1484 -     *
  1.1485 -     * <p>Please note that the first string in the resulting array may be an empty
  1.1486 -     * string. This happens when the very first character of input string is
  1.1487 -     * matched by the pattern.
  1.1488 -     *
  1.1489 -     * @param s String to split on this regular exression
  1.1490 -     * @return Array of strings
  1.1491 -     */
  1.1492 -    public String[] split(String s)
  1.1493 -    {
  1.1494 -        // Create new vector
  1.1495 -        Vector v = new Vector();
  1.1496 -
  1.1497 -        // Start at position 0 and search the whole string
  1.1498 -        int pos = 0;
  1.1499 -        int len = s.length();
  1.1500 -
  1.1501 -        // Try a match at each position
  1.1502 -        while (pos < len && match(s, pos))
  1.1503 -        {
  1.1504 -            // Get start of match
  1.1505 -            int start = getParenStart(0);
  1.1506 -
  1.1507 -            // Get end of match
  1.1508 -            int newpos = getParenEnd(0);
  1.1509 -
  1.1510 -            // Check if no progress was made
  1.1511 -            if (newpos == pos)
  1.1512 -            {
  1.1513 -                v.addElement(s.substring(pos, start + 1));
  1.1514 -                newpos++;
  1.1515 -            }
  1.1516 -            else
  1.1517 -            {
  1.1518 -                v.addElement(s.substring(pos, start));
  1.1519 -            }
  1.1520 -
  1.1521 -            // Move to new position
  1.1522 -            pos = newpos;
  1.1523 -        }
  1.1524 -
  1.1525 -        // Push remainder if it's not empty
  1.1526 -        String remainder = s.substring(pos);
  1.1527 -        if (remainder.length() != 0)
  1.1528 -        {
  1.1529 -            v.addElement(remainder);
  1.1530 -        }
  1.1531 -
  1.1532 -        // Return vector as an array of strings
  1.1533 -        String[] ret = new String[v.size()];
  1.1534 -        v.copyInto(ret);
  1.1535 -        return ret;
  1.1536 -    }
  1.1537 -
  1.1538 -    /**
  1.1539 -     * Flag bit that indicates that subst should replace all occurrences of this
  1.1540 -     * regular expression.
  1.1541 -     */
  1.1542 -    public static final int REPLACE_ALL            = 0x0000;
  1.1543 -
  1.1544 -    /**
  1.1545 -     * Flag bit that indicates that subst should only replace the first occurrence
  1.1546 -     * of this regular expression.
  1.1547 -     */
  1.1548 -    public static final int REPLACE_FIRSTONLY      = 0x0001;
  1.1549 -
  1.1550 -    /**
  1.1551 -     * Flag bit that indicates that subst should replace backreferences
  1.1552 -     */
  1.1553 -    public static final int REPLACE_BACKREFERENCES = 0x0002;
  1.1554 -
  1.1555 -    /**
  1.1556 -     * Substitutes a string for this regular expression in another string.
  1.1557 -     * This method works like the Perl function of the same name.
  1.1558 -     * Given a regular expression of "a*b", a String to substituteIn of
  1.1559 -     * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
  1.1560 -     * resulting String returned by subst would be "-foo-garply-wacky-".
  1.1561 -     *
  1.1562 -     * @param substituteIn String to substitute within
  1.1563 -     * @param substitution String to substitute for all matches of this regular expression.
  1.1564 -     * @return The string substituteIn with zero or more occurrences of the current
  1.1565 -     * regular expression replaced with the substitution String (if this regular
  1.1566 -     * expression object doesn't match at any position, the original String is returned
  1.1567 -     * unchanged).
  1.1568 -     */
  1.1569 -    public String subst(String substituteIn, String substitution)
  1.1570 -    {
  1.1571 -        return subst(substituteIn, substitution, REPLACE_ALL);
  1.1572 -    }
  1.1573 -
  1.1574 -    /**
  1.1575 -     * Substitutes a string for this regular expression in another string.
  1.1576 -     * This method works like the Perl function of the same name.
  1.1577 -     * Given a regular expression of "a*b", a String to substituteIn of
  1.1578 -     * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
  1.1579 -     * resulting String returned by subst would be "-foo-garply-wacky-".
  1.1580 -     * <p>
  1.1581 -     * It is also possible to reference the contents of a parenthesized expression
  1.1582 -     * with $0, $1, ... $9. A regular expression of "http://[\\.\\w\\-\\?/~_@&=%]+",
  1.1583 -     * a String to substituteIn of "visit us: http://www.apache.org!" and the
  1.1584 -     * substitution String "&lt;a href=\"$0\"&gt;$0&lt;/a&gt;", the resulting String
  1.1585 -     * returned by subst would be
  1.1586 -     * "visit us: &lt;a href=\"http://www.apache.org\"&gt;http://www.apache.org&lt;/a&gt;!".
  1.1587 -     * <p>
  1.1588 -     * <i>Note:</i> $0 represents the whole match.
  1.1589 -     *
  1.1590 -     * @param substituteIn String to substitute within
  1.1591 -     * @param substitution String to substitute for matches of this regular expression
  1.1592 -     * @param flags One or more bitwise flags from REPLACE_*.  If the REPLACE_FIRSTONLY
  1.1593 -     * flag bit is set, only the first occurrence of this regular expression is replaced.
  1.1594 -     * If the bit is not set (REPLACE_ALL), all occurrences of this pattern will be
  1.1595 -     * replaced. If the flag REPLACE_BACKREFERENCES is set, all backreferences will
  1.1596 -     * be processed.
  1.1597 -     * @return The string substituteIn with zero or more occurrences of the current
  1.1598 -     * regular expression replaced with the substitution String (if this regular
  1.1599 -     * expression object doesn't match at any position, the original String is returned
  1.1600 -     * unchanged).
  1.1601 -     */
  1.1602 -    public String subst(String substituteIn, String substitution, int flags)
  1.1603 -    {
  1.1604 -        // String to return
  1.1605 -        StringBuffer ret = new StringBuffer();
  1.1606 -
  1.1607 -        // Start at position 0 and search the whole string
  1.1608 -        int pos = 0;
  1.1609 -        int len = substituteIn.length();
  1.1610 -
  1.1611 -        // Try a match at each position
  1.1612 -        while (pos < len && match(substituteIn, pos))
  1.1613 -        {
  1.1614 -            // Append string before match
  1.1615 -            ret.append(substituteIn.substring(pos, getParenStart(0)));
  1.1616 -
  1.1617 -            if ((flags & REPLACE_BACKREFERENCES) != 0)
  1.1618 -            {
  1.1619 -                // Process backreferences
  1.1620 -                int lCurrentPosition = 0;
  1.1621 -                int lLastPosition = -2;
  1.1622 -                int lLength = substitution.length();
  1.1623 -                boolean bAddedPrefix = false;
  1.1624 -
  1.1625 -                while ((lCurrentPosition = substitution.indexOf("$", lCurrentPosition)) >= 0)
  1.1626 -                {
  1.1627 -                    if ((lCurrentPosition == 0 || substitution.charAt(lCurrentPosition - 1) != '\\')
  1.1628 -                        && lCurrentPosition+1 < lLength)
  1.1629 -                    {
  1.1630 -                        char c = substitution.charAt(lCurrentPosition + 1);
  1.1631 -                        if (c >= '0' && c <= '9')
  1.1632 -                        {
  1.1633 -                            if (bAddedPrefix == false)
  1.1634 -                            {
  1.1635 -                                // Append everything between the beginning of the
  1.1636 -                                // substitution string and the current $ sign
  1.1637 -                                ret.append(substitution.substring(0, lCurrentPosition));
  1.1638 -                                bAddedPrefix = true;
  1.1639 -                            }
  1.1640 -                            else
  1.1641 -                            {
  1.1642 -                                // Append everything between the last and the current $ sign
  1.1643 -                                ret.append(substitution.substring(lLastPosition + 2, lCurrentPosition));
  1.1644 -                            }
  1.1645 -
  1.1646 -                            // Append the parenthesized expression
  1.1647 -                            // Note: if a parenthesized expression of the requested
  1.1648 -                            // index is not available "null" is added to the string
  1.1649 -                            ret.append(getParen(c - '0'));
  1.1650 -                            lLastPosition = lCurrentPosition;
  1.1651 -                        }
  1.1652 -                    }
  1.1653 -
  1.1654 -                    // Move forward, skipping past match
  1.1655 -                    lCurrentPosition++;
  1.1656 -                }
  1.1657 -
  1.1658 -                // Append everything after the last $ sign
  1.1659 -                ret.append(substitution.substring(lLastPosition + 2, lLength));
  1.1660 -            }
  1.1661 -            else
  1.1662 -            {
  1.1663 -                // Append substitution without processing backreferences
  1.1664 -                ret.append(substitution);
  1.1665 -            }
  1.1666 -
  1.1667 -            // Move forward, skipping past match
  1.1668 -            int newpos = getParenEnd(0);
  1.1669 -
  1.1670 -            // We always want to make progress!
  1.1671 -            if (newpos == pos)
  1.1672 -            {
  1.1673 -                newpos++;
  1.1674 -            }
  1.1675 -
  1.1676 -            // Try new position
  1.1677 -            pos = newpos;
  1.1678 -
  1.1679 -            // Break out if we're only supposed to replace one occurrence
  1.1680 -            if ((flags & REPLACE_FIRSTONLY) != 0)
  1.1681 -            {
  1.1682 -                break;
  1.1683 -            }
  1.1684 -        }
  1.1685 -
  1.1686 -        // If there's remaining input, append it
  1.1687 -        if (pos < len)
  1.1688 -        {
  1.1689 -            ret.append(substituteIn.substring(pos));
  1.1690 -        }
  1.1691 -
  1.1692 -        // Return string buffer as string
  1.1693 -        return ret.toString();
  1.1694 -    }
  1.1695 -
  1.1696 -    /**
  1.1697 -     * Returns an array of Strings, whose toString representation matches a regular
  1.1698 -     * expression. This method works like the Perl function of the same name.  Given
  1.1699 -     * a regular expression of "a*b" and an array of String objects of [foo, aab, zzz,
  1.1700 -     * aaaab], the array of Strings returned by grep would be [aab, aaaab].
  1.1701 -     *
  1.1702 -     * @param search Array of Objects to search
  1.1703 -     * @return Array of Strings whose toString() value matches this regular expression.
  1.1704 -     */
  1.1705 -    public String[] grep(Object[] search)
  1.1706 -    {
  1.1707 -        // Create new vector to hold return items
  1.1708 -        Vector v = new Vector();
  1.1709 -
  1.1710 -        // Traverse array of objects
  1.1711 -        for (int i = 0; i < search.length; i++)
  1.1712 -        {
  1.1713 -            // Get next object as a string
  1.1714 -            String s = search[i].toString();
  1.1715 -
  1.1716 -            // If it matches this regexp, add it to the list
  1.1717 -            if (match(s))
  1.1718 -            {
  1.1719 -                v.addElement(s);
  1.1720 -            }
  1.1721 -        }
  1.1722 -
  1.1723 -        // Return vector as an array of strings
  1.1724 -        String[] ret = new String[v.size()];
  1.1725 -        v.copyInto(ret);
  1.1726 -        return ret;
  1.1727 -    }
  1.1728 -
  1.1729 -    /**
  1.1730 -     * @return true if character at i-th position in the <code>search</code> string is a newline
  1.1731 -     */
  1.1732 -    private boolean isNewline(int i)
  1.1733 -    {
  1.1734 -        char nextChar = search.charAt(i);
  1.1735 -
  1.1736 -        if (nextChar == '\n' || nextChar == '\r' || nextChar == '\u0085'
  1.1737 -            || nextChar == '\u2028' || nextChar == '\u2029')
  1.1738 -        {
  1.1739 -            return true;
  1.1740 -        }
  1.1741 -
  1.1742 -        return false;
  1.1743 -    }
  1.1744 -
  1.1745 -    /**
  1.1746 -     * Compares two characters.
  1.1747 -     *
  1.1748 -     * @param c1 first character to compare.
  1.1749 -     * @param c2 second character to compare.
  1.1750 -     * @param caseIndependent whether comparision is case insensitive or not.
  1.1751 -     * @return negative, 0, or positive integer as the first character
  1.1752 -     *         less than, equal to, or greater then the second.
  1.1753 -     */
  1.1754 -    private int compareChars(char c1, char c2, boolean caseIndependent)
  1.1755 -    {
  1.1756 -        if (caseIndependent)
  1.1757 -        {
  1.1758 -            c1 = Character.toLowerCase(c1);
  1.1759 -            c2 = Character.toLowerCase(c2);
  1.1760 -        }
  1.1761 -        return ((int)c1 - (int)c2);
  1.1762 -    }
  1.1763 -}

mercurial