Sat, 24 Oct 2020 16:43:03 +0800
Merge
1.1 --- a/.hgtags Sat Oct 24 16:18:47 2020 +0800 1.2 +++ b/.hgtags Sat Oct 24 16:43:03 2020 +0800 1.3 @@ -1069,8 +1069,23 @@ 1.4 560093f3167970da2935b745493653420fdea008 jdk8u262-b03 1.5 d054aabd2e3c09de0ff622b4fab09388d30aee02 jdk8u262-b04 1.6 976e73cfac410997160b1d3d6e14a88a324440c3 jdk8u262-b05 1.7 +976e73cfac410997160b1d3d6e14a88a324440c3 jdk8u272-b00 1.8 ddbd856338439f2d5f742040d896e27f0f104cd1 jdk8u262-b06 1.9 ebb0a284b7e75dfb741af3332eb87b37aca66875 jdk8u262-b07 1.10 0cccb32a50471fd52ecf2f697d95e7254798ab26 jdk8u262-b08 1.11 779db06fb02444e294b7c93fe3902afee615df2a jdk8u262-b09 1.12 +63884b34cac1b652cf49289199a00cb363cb93dd jdk8u262-b10 1.13 +63884b34cac1b652cf49289199a00cb363cb93dd jdk8u262-ga 1.14 3b85d4e65538af51987a00b276cde9c250615f9d mips-jdk8u262-b10 1.15 +63884b34cac1b652cf49289199a00cb363cb93dd jdk8u265-b00 1.16 +3147b24fc8b092b34599830b56d03da4731577a2 jdk8u265-b01 1.17 +3147b24fc8b092b34599830b56d03da4731577a2 jdk8u265-ga 1.18 +1bc3598fbad03fa73168f64cea4d0628e75a292b jdk8u272-b01 1.19 +7694bb86e0236ba9a89326206af46da8c252aad5 jdk8u272-b02 1.20 +370157535629da61a0f0ac045d77c384b98211f6 jdk8u272-b03 1.21 +89445883ffdec61e5b32980633b67d932d602582 jdk8u272-b04 1.22 +36d18f0fd6eeffc14f311dc5ff5a18ae870fc1d0 jdk8u272-b05 1.23 +44cbebcc276cddad3ad0aa67f4da313d50af7e4b jdk8u272-b06 1.24 +bd015816ce490762772ca71c86bd90f58a90fb8c jdk8u272-b07 1.25 +9d92962b2fe312a045e5814d4604d00e04492515 jdk8u272-b08 1.26 +a5b79eebcc1f3c9afbe9927d672be64364647049 jdk8u272-b09
2.1 --- a/THIRD_PARTY_README Sat Oct 24 16:18:47 2020 +0800 2.2 +++ b/THIRD_PARTY_README Sat Oct 24 16:43:03 2020 +0800 2.3 @@ -2240,7 +2240,7 @@ 2.4 2.5 ------------------------------------------------------------------------------- 2.6 2.7 -%% This notice is provided with respect to PC/SC Lite v1.8.24, 2.8 +%% This notice is provided with respect to PC/SC Lite v1.8.26, 2.9 which may be included with JRE 8, JDK 8, and OpenJDK 8 on Linux and Solaris. 2.10 2.11 --- begin of LICENSE --- 2.12 @@ -3028,8 +3028,7 @@ 2.13 Apache Commons Math 3.2 2.14 Apache Derby 10.11.1.2 2.15 Apache Jakarta BCEL 5.1 2.16 - Apache Jakarta Regexp 1.4 2.17 - Apache Santuario XML Security for Java 1.5.4 2.18 + Apache Santuario XML Security for Java 2.1.1 2.19 Apache Xalan-Java 2.7.2 2.20 Apache Xerces Java 2.10.0 2.21 Apache XML Resolver 1.1 2.22 @@ -3243,3 +3242,41 @@ 2.23 2.24 ------------------------------------------------------------------------------- 2.25 2.26 +%% This notice is provided with respect to OASIS PKCS #11 Cryptographic Token 2.27 +Interface v2.40, which may be included with JRE 8, JDK 8, and OpenJDK 8. 2.28 + 2.29 +--- begin of LICENSE --- 2.30 + 2.31 +Copyright (c) OASIS Open 2016. All Rights Reserved. 2.32 + 2.33 +All capitalized terms in the following text have the meanings assigned to them 2.34 +in the OASIS Intellectual Property Rights Policy (the "OASIS IPR Policy"). The 2.35 +full Policy may be found at the OASIS website: 2.36 +[http://www.oasis-open.org/policies-guidelines/ipr] 2.37 + 2.38 +This document and translations of it may be copied and furnished to others, and 2.39 +derivative works that comment on or otherwise explain it or assist in its 2.40 +implementation may be prepared, copied, published, and distributed, in whole or 2.41 +in part, without restriction of any kind, provided that the above copyright 2.42 +notice and this section are included on all such copies and derivative works. 2.43 +However, this document itself may not be modified in any way, including by 2.44 +removing the copyright notice or references to OASIS, except as needed for the 2.45 +purpose of developing any document or deliverable produced by an OASIS 2.46 +Technical Committee (in which case the rules applicable to copyrights, as set 2.47 +forth in the OASIS IPR Policy, must be followed) or as required to translate it 2.48 +into languages other than English. 2.49 + 2.50 +The limited permissions granted above are perpetual and will not be revoked by 2.51 +OASIS or its successors or assigns. 2.52 + 2.53 +This document and the information contained herein is provided on an "AS IS" 2.54 +basis and OASIS DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT 2.55 +LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION HEREIN WILL NOT 2.56 +INFRINGE ANY OWNERSHIP RIGHTS OR ANY IMPLIED WARRANTIES OF MERCHANTABILITY OR 2.57 +FITNESS FOR A PARTICULAR PURPOSE. OASIS AND ITS MEMBERS WILL NOT BE LIABLE FOR 2.58 +ANY DIRECT, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE 2.59 +OF THIS DOCUMENT OR ANY PART THEREOF. 2.60 + 2.61 +--- end of LICENSE --- 2.62 + 2.63 +-------------------------------------------------------------------------------
3.1 --- a/src/com/sun/org/apache/bcel/internal/util/InstructionFinder.java Sat Oct 24 16:18:47 2020 +0800 3.2 +++ b/src/com/sun/org/apache/bcel/internal/util/InstructionFinder.java Sat Oct 24 16:43:03 2020 +0800 3.3 @@ -4,64 +4,29 @@ 3.4 */ 3.5 package com.sun.org.apache.bcel.internal.util; 3.6 3.7 -/* ==================================================================== 3.8 - * The Apache Software License, Version 1.1 3.9 +/* 3.10 + * Licensed to the Apache Software Foundation (ASF) under one or more 3.11 + * contributor license agreements. See the NOTICE file distributed with 3.12 + * this work for additional information regarding copyright ownership. 3.13 + * The ASF licenses this file to You under the Apache License, Version 2.0 3.14 + * (the "License"); you may not use this file except in compliance with 3.15 + * the License. You may obtain a copy of the License at 3.16 * 3.17 - * Copyright (c) 2001 The Apache Software Foundation. All rights 3.18 - * reserved. 3.19 + * http://www.apache.org/licenses/LICENSE-2.0 3.20 * 3.21 - * Redistribution and use in source and binary forms, with or without 3.22 - * modification, are permitted provided that the following conditions 3.23 - * are met: 3.24 + * Unless required by applicable law or agreed to in writing, software 3.25 + * distributed under the License is distributed on an "AS IS" BASIS, 3.26 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 3.27 + * See the License for the specific language governing permissions and 3.28 + * limitations under the License. 3.29 * 3.30 - * 1. Redistributions of source code must retain the above copyright 3.31 - * notice, this list of conditions and the following disclaimer. 3.32 - * 3.33 - * 2. Redistributions in binary form must reproduce the above copyright 3.34 - * notice, this list of conditions and the following disclaimer in 3.35 - * the documentation and/or other materials provided with the 3.36 - * distribution. 3.37 - * 3.38 - * 3. The end-user documentation included with the redistribution, 3.39 - * if any, must include the following acknowledgment: 3.40 - * "This product includes software developed by the 3.41 - * Apache Software Foundation (http://www.apache.org/)." 3.42 - * Alternately, this acknowledgment may appear in the software itself, 3.43 - * if and wherever such third-party acknowledgments normally appear. 3.44 - * 3.45 - * 4. The names "Apache" and "Apache Software Foundation" and 3.46 - * "Apache BCEL" must not be used to endorse or promote products 3.47 - * derived from this software without prior written permission. For 3.48 - * written permission, please contact apache@apache.org. 3.49 - * 3.50 - * 5. Products derived from this software may not be called "Apache", 3.51 - * "Apache BCEL", nor may "Apache" appear in their name, without 3.52 - * prior written permission of the Apache Software Foundation. 3.53 - * 3.54 - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 3.55 - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 3.56 - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 3.57 - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 3.58 - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3.59 - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 3.60 - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 3.61 - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 3.62 - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3.63 - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 3.64 - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3.65 - * SUCH DAMAGE. 3.66 - * ==================================================================== 3.67 - * 3.68 - * This software consists of voluntary contributions made by many 3.69 - * individuals on behalf of the Apache Software Foundation. For more 3.70 - * information on the Apache Software Foundation, please see 3.71 - * <http://www.apache.org/>. 3.72 */ 3.73 3.74 -import java.util.*; 3.75 import com.sun.org.apache.bcel.internal.Constants; 3.76 import com.sun.org.apache.bcel.internal.generic.*; 3.77 -import com.sun.org.apache.regexp.internal.*; 3.78 +import java.util.*; 3.79 +import java.util.regex.Matcher; 3.80 +import java.util.regex.Pattern; 3.81 3.82 /** 3.83 * InstructionFinder is a tool to search for given instructions patterns, 3.84 @@ -231,28 +196,22 @@ 3.85 if(start == -1) 3.86 throw new ClassGenException("Instruction handle " + from + 3.87 " not found in instruction list."); 3.88 - try { 3.89 - RE regex = new RE(search); 3.90 - ArrayList matches = new ArrayList(); 3.91 3.92 - while(start < il_string.length() && regex.match(il_string, start)) { 3.93 - int startExpr = regex.getParenStart(0); 3.94 - int endExpr = regex.getParenEnd(0); 3.95 - int lenExpr = regex.getParenLength(0); 3.96 + Pattern regex = Pattern.compile(search); 3.97 + List<InstructionHandle[]> matches = new ArrayList<>(); 3.98 + Matcher matcher = regex.matcher(il_string); 3.99 + while(start < il_string.length() && matcher.find(start)) { 3.100 + int startExpr = matcher.start(); 3.101 + int endExpr = matcher.end(); 3.102 + int lenExpr = endExpr - startExpr; 3.103 + InstructionHandle[] match = getMatch(startExpr, lenExpr); 3.104 3.105 - InstructionHandle[] match = getMatch(startExpr, lenExpr); 3.106 - 3.107 - if((constraint == null) || constraint.checkCode(match)) 3.108 - matches.add(match); 3.109 - start = endExpr; 3.110 - } 3.111 - 3.112 - return matches.iterator(); 3.113 - } catch(RESyntaxException e) { 3.114 - System.err.println(e); 3.115 + if((constraint == null) || constraint.checkCode(match)) 3.116 + matches.add(match); 3.117 + start = endExpr; 3.118 } 3.119 3.120 - return null; 3.121 + return matches.iterator(); 3.122 } 3.123 3.124 /**
4.1 --- a/src/com/sun/org/apache/regexp/internal/CharacterArrayCharacterIterator.java Sat Oct 24 16:18:47 2020 +0800 4.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 4.3 @@ -1,76 +0,0 @@ 4.4 -/* 4.5 - * reserved comment block 4.6 - * DO NOT REMOVE OR ALTER! 4.7 - */ 4.8 -/* 4.9 - * Copyright 1999-2004 The Apache Software Foundation. 4.10 - * 4.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 4.12 - * you may not use this file except in compliance with the License. 4.13 - * You may obtain a copy of the License at 4.14 - * 4.15 - * http://www.apache.org/licenses/LICENSE-2.0 4.16 - * 4.17 - * Unless required by applicable law or agreed to in writing, software 4.18 - * distributed under the License is distributed on an "AS IS" BASIS, 4.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 4.20 - * See the License for the specific language governing permissions and 4.21 - * limitations under the License. 4.22 - */ 4.23 - 4.24 -package com.sun.org.apache.regexp.internal; 4.25 - 4.26 -/** 4.27 - * Encapsulates char[] as CharacterIterator 4.28 - * 4.29 - * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a> 4.30 - */ 4.31 -public final class CharacterArrayCharacterIterator implements CharacterIterator 4.32 -{ 4.33 - /** encapsulated */ 4.34 - private final char[] src; 4.35 - /** offset in the char array */ 4.36 - private final int off; 4.37 - /** used portion of the array */ 4.38 - private final int len; 4.39 - 4.40 - /** @param src - encapsulated String */ 4.41 - public CharacterArrayCharacterIterator(char[] src, int off, int len) 4.42 - { 4.43 - this.src = src; 4.44 - this.off = off; 4.45 - this.len = len; 4.46 - } 4.47 - 4.48 - /** @return a substring */ 4.49 - public String substring(int beginIndex, int endIndex) 4.50 - { 4.51 - if (endIndex > len) { 4.52 - throw new IndexOutOfBoundsException("endIndex=" + endIndex 4.53 - + "; sequence size=" + len); 4.54 - } 4.55 - if (beginIndex < 0 || beginIndex > endIndex) { 4.56 - throw new IndexOutOfBoundsException("beginIndex=" + beginIndex 4.57 - + "; endIndex=" + endIndex); 4.58 - } 4.59 - return new String(src, off + beginIndex, endIndex - beginIndex); 4.60 - } 4.61 - 4.62 - /** @return a substring */ 4.63 - public String substring(int beginIndex) 4.64 - { 4.65 - return substring(beginIndex, len); 4.66 - } 4.67 - 4.68 - /** @return a character at the specified position. */ 4.69 - public char charAt(int pos) 4.70 - { 4.71 - return src[off + pos]; 4.72 - } 4.73 - 4.74 - /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */ 4.75 - public boolean isEnd(int pos) 4.76 - { 4.77 - return (pos >= len); 4.78 - } 4.79 -}
5.1 --- a/src/com/sun/org/apache/regexp/internal/CharacterIterator.java Sat Oct 24 16:18:47 2020 +0800 5.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 5.3 @@ -1,42 +0,0 @@ 5.4 -/* 5.5 - * reserved comment block 5.6 - * DO NOT REMOVE OR ALTER! 5.7 - */ 5.8 -/* 5.9 - * Copyright 1999-2004 The Apache Software Foundation. 5.10 - * 5.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 5.12 - * you may not use this file except in compliance with the License. 5.13 - * You may obtain a copy of the License at 5.14 - * 5.15 - * http://www.apache.org/licenses/LICENSE-2.0 5.16 - * 5.17 - * Unless required by applicable law or agreed to in writing, software 5.18 - * distributed under the License is distributed on an "AS IS" BASIS, 5.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 5.20 - * See the License for the specific language governing permissions and 5.21 - * limitations under the License. 5.22 - */ 5.23 - 5.24 -package com.sun.org.apache.regexp.internal; 5.25 - 5.26 -/** 5.27 - * Encapsulates different types of character sources - String, InputStream, ... 5.28 - * Defines a set of common methods 5.29 - * 5.30 - * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a> 5.31 - */ 5.32 -public interface CharacterIterator 5.33 -{ 5.34 - /** @return a substring */ 5.35 - String substring(int beginIndex, int endIndex); 5.36 - 5.37 - /** @return a substring */ 5.38 - String substring(int beginIndex); 5.39 - 5.40 - /** @return a character at the specified position. */ 5.41 - char charAt(int pos); 5.42 - 5.43 - /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */ 5.44 - boolean isEnd(int pos); 5.45 -}
6.1 --- a/src/com/sun/org/apache/regexp/internal/RE.java Sat Oct 24 16:18:47 2020 +0800 6.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 6.3 @@ -1,1760 +0,0 @@ 6.4 -/* 6.5 - * reserved comment block 6.6 - * DO NOT REMOVE OR ALTER! 6.7 - */ 6.8 -/* 6.9 - * Copyright 1999-2004 The Apache Software Foundation. 6.10 - * 6.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 6.12 - * you may not use this file except in compliance with the License. 6.13 - * You may obtain a copy of the License at 6.14 - * 6.15 - * http://www.apache.org/licenses/LICENSE-2.0 6.16 - * 6.17 - * Unless required by applicable law or agreed to in writing, software 6.18 - * distributed under the License is distributed on an "AS IS" BASIS, 6.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6.20 - * See the License for the specific language governing permissions and 6.21 - * limitations under the License. 6.22 - */ 6.23 - 6.24 -package com.sun.org.apache.regexp.internal; 6.25 - 6.26 -import java.io.Serializable; 6.27 -import java.util.Vector; 6.28 - 6.29 -/** 6.30 - * RE is an efficient, lightweight regular expression evaluator/matcher 6.31 - * class. Regular expressions are pattern descriptions which enable 6.32 - * sophisticated matching of strings. In addition to being able to 6.33 - * match a string against a pattern, you can also extract parts of the 6.34 - * match. This is especially useful in text parsing! Details on the 6.35 - * syntax of regular expression patterns are given below. 6.36 - * 6.37 - * <p> 6.38 - * To compile a regular expression (RE), you can simply construct an RE 6.39 - * matcher object from the string specification of the pattern, like this: 6.40 - * 6.41 - * <pre> 6.42 - * RE r = new RE("a*b"); 6.43 - * </pre> 6.44 - * 6.45 - * <p> 6.46 - * Once you have done this, you can call either of the RE.match methods to 6.47 - * perform matching on a String. For example: 6.48 - * 6.49 - * <pre> 6.50 - * boolean matched = r.match("aaaab"); 6.51 - * </pre> 6.52 - * 6.53 - * will cause the boolean matched to be set to true because the 6.54 - * pattern "a*b" matches the string "aaaab". 6.55 - * 6.56 - * <p> 6.57 - * If you were interested in the <i>number</i> of a's which matched the 6.58 - * first part of our example expression, you could change the expression to 6.59 - * "(a*)b". Then when you compiled the expression and matched it against 6.60 - * something like "xaaaab", you would get results like this: 6.61 - * 6.62 - * <pre> 6.63 - * RE r = new RE("(a*)b"); // Compile expression 6.64 - * boolean matched = r.match("xaaaab"); // Match against "xaaaab" 6.65 - * 6.66 - * String wholeExpr = r.getParen(0); // wholeExpr will be 'aaaab' 6.67 - * String insideParens = r.getParen(1); // insideParens will be 'aaaa' 6.68 - * 6.69 - * int startWholeExpr = r.getParenStart(0); // startWholeExpr will be index 1 6.70 - * int endWholeExpr = r.getParenEnd(0); // endWholeExpr will be index 6 6.71 - * int lenWholeExpr = r.getParenLength(0); // lenWholeExpr will be 5 6.72 - * 6.73 - * int startInside = r.getParenStart(1); // startInside will be index 1 6.74 - * int endInside = r.getParenEnd(1); // endInside will be index 5 6.75 - * int lenInside = r.getParenLength(1); // lenInside will be 4 6.76 - * </pre> 6.77 - * 6.78 - * You can also refer to the contents of a parenthesized expression 6.79 - * within a regular expression itself. This is called a 6.80 - * 'backreference'. The first backreference in a regular expression is 6.81 - * denoted by \1, the second by \2 and so on. So the expression: 6.82 - * 6.83 - * <pre> 6.84 - * ([0-9]+)=\1 6.85 - * </pre> 6.86 - * 6.87 - * will match any string of the form n=n (like 0=0 or 2=2). 6.88 - * 6.89 - * <p> 6.90 - * The full regular expression syntax accepted by RE is described here: 6.91 - * 6.92 - * <pre> 6.93 - * 6.94 - * <b><font face=times roman>Characters</font></b> 6.95 - * 6.96 - * <i>unicodeChar</i> Matches any identical unicode character 6.97 - * \ Used to quote a meta-character (like '*') 6.98 - * \\ Matches a single '\' character 6.99 - * \0nnn Matches a given octal character 6.100 - * \xhh Matches a given 8-bit hexadecimal character 6.101 - * \\uhhhh Matches a given 16-bit hexadecimal character 6.102 - * \t Matches an ASCII tab character 6.103 - * \n Matches an ASCII newline character 6.104 - * \r Matches an ASCII return character 6.105 - * \f Matches an ASCII form feed character 6.106 - * 6.107 - * 6.108 - * <b><font face=times roman>Character Classes</font></b> 6.109 - * 6.110 - * [abc] Simple character class 6.111 - * [a-zA-Z] Character class with ranges 6.112 - * [^abc] Negated character class 6.113 - * </pre> 6.114 - * 6.115 - * <b>NOTE:</b> Incomplete ranges will be interpreted as "starts 6.116 - * from zero" or "ends with last character". 6.117 - * <br> 6.118 - * I.e. [-a] is the same as [\\u0000-a], and [a-] is the same as [a-\\uFFFF], 6.119 - * [-] means "all characters". 6.120 - * 6.121 - * <pre> 6.122 - * 6.123 - * <b><font face=times roman>Standard POSIX Character Classes</font></b> 6.124 - * 6.125 - * [:alnum:] Alphanumeric characters. 6.126 - * [:alpha:] Alphabetic characters. 6.127 - * [:blank:] Space and tab characters. 6.128 - * [:cntrl:] Control characters. 6.129 - * [:digit:] Numeric characters. 6.130 - * [:graph:] Characters that are printable and are also visible. 6.131 - * (A space is printable, but not visible, while an 6.132 - * `a' is both.) 6.133 - * [:lower:] Lower-case alphabetic characters. 6.134 - * [:print:] Printable characters (characters that are not 6.135 - * control characters.) 6.136 - * [:punct:] Punctuation characters (characters that are not letter, 6.137 - * digits, control characters, or space characters). 6.138 - * [:space:] Space characters (such as space, tab, and formfeed, 6.139 - * to name a few). 6.140 - * [:upper:] Upper-case alphabetic characters. 6.141 - * [:xdigit:] Characters that are hexadecimal digits. 6.142 - * 6.143 - * 6.144 - * <b><font face=times roman>Non-standard POSIX-style Character Classes</font></b> 6.145 - * 6.146 - * [:javastart:] Start of a Java identifier 6.147 - * [:javapart:] Part of a Java identifier 6.148 - * 6.149 - * 6.150 - * <b><font face=times roman>Predefined Classes</font></b> 6.151 - * 6.152 - * . Matches any character other than newline 6.153 - * \w Matches a "word" character (alphanumeric plus "_") 6.154 - * \W Matches a non-word character 6.155 - * \s Matches a whitespace character 6.156 - * \S Matches a non-whitespace character 6.157 - * \d Matches a digit character 6.158 - * \D Matches a non-digit character 6.159 - * 6.160 - * 6.161 - * <b><font face=times roman>Boundary Matchers</font></b> 6.162 - * 6.163 - * ^ Matches only at the beginning of a line 6.164 - * $ Matches only at the end of a line 6.165 - * \b Matches only at a word boundary 6.166 - * \B Matches only at a non-word boundary 6.167 - * 6.168 - * 6.169 - * <b><font face=times roman>Greedy Closures</font></b> 6.170 - * 6.171 - * A* Matches A 0 or more times (greedy) 6.172 - * A+ Matches A 1 or more times (greedy) 6.173 - * A? Matches A 1 or 0 times (greedy) 6.174 - * A{n} Matches A exactly n times (greedy) 6.175 - * A{n,} Matches A at least n times (greedy) 6.176 - * A{n,m} Matches A at least n but not more than m times (greedy) 6.177 - * 6.178 - * 6.179 - * <b><font face=times roman>Reluctant Closures</font></b> 6.180 - * 6.181 - * A*? Matches A 0 or more times (reluctant) 6.182 - * A+? Matches A 1 or more times (reluctant) 6.183 - * A?? Matches A 0 or 1 times (reluctant) 6.184 - * 6.185 - * 6.186 - * <b><font face=times roman>Logical Operators</font></b> 6.187 - * 6.188 - * AB Matches A followed by B 6.189 - * A|B Matches either A or B 6.190 - * (A) Used for subexpression grouping 6.191 - * (?:A) Used for subexpression clustering (just like grouping but 6.192 - * no backrefs) 6.193 - * 6.194 - * 6.195 - * <b><font face=times roman>Backreferences</font></b> 6.196 - * 6.197 - * \1 Backreference to 1st parenthesized subexpression 6.198 - * \2 Backreference to 2nd parenthesized subexpression 6.199 - * \3 Backreference to 3rd parenthesized subexpression 6.200 - * \4 Backreference to 4th parenthesized subexpression 6.201 - * \5 Backreference to 5th parenthesized subexpression 6.202 - * \6 Backreference to 6th parenthesized subexpression 6.203 - * \7 Backreference to 7th parenthesized subexpression 6.204 - * \8 Backreference to 8th parenthesized subexpression 6.205 - * \9 Backreference to 9th parenthesized subexpression 6.206 - * </pre> 6.207 - * 6.208 - * <p> 6.209 - * All closure operators (+, *, ?, {m,n}) are greedy by default, meaning 6.210 - * that they match as many elements of the string as possible without 6.211 - * causing the overall match to fail. If you want a closure to be 6.212 - * reluctant (non-greedy), you can simply follow it with a '?'. A 6.213 - * reluctant closure will match as few elements of the string as 6.214 - * possible when finding matches. {m,n} closures don't currently 6.215 - * support reluctancy. 6.216 - * 6.217 - * <p> 6.218 - * <b><font face="times roman">Line terminators</font></b> 6.219 - * <br> 6.220 - * A line terminator is a one- or two-character sequence that marks 6.221 - * the end of a line of the input character sequence. The following 6.222 - * are recognized as line terminators: 6.223 - * <ul> 6.224 - * <li>A newline (line feed) character ('\n'),</li> 6.225 - * <li>A carriage-return character followed immediately by a newline character ("\r\n"),</li> 6.226 - * <li>A standalone carriage-return character ('\r'),</li> 6.227 - * <li>A next-line character ('\u0085'),</li> 6.228 - * <li>A line-separator character ('\u2028'), or</li> 6.229 - * <li>A paragraph-separator character ('\u2029).</li> 6.230 - * </ul> 6.231 - * 6.232 - * <p> 6.233 - * RE runs programs compiled by the RECompiler class. But the RE 6.234 - * matcher class does not include the actual regular expression compiler 6.235 - * for reasons of efficiency. In fact, if you want to pre-compile one 6.236 - * or more regular expressions, the 'recompile' class can be invoked 6.237 - * from the command line to produce compiled output like this: 6.238 - * 6.239 - * <pre> 6.240 - * // Pre-compiled regular expression "a*b" 6.241 - * char[] re1Instructions = 6.242 - * { 6.243 - * 0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041, 6.244 - * 0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047, 6.245 - * 0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000, 6.246 - * 0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000, 6.247 - * 0x0000, 6.248 - * }; 6.249 - * 6.250 - * 6.251 - * REProgram re1 = new REProgram(re1Instructions); 6.252 - * </pre> 6.253 - * 6.254 - * You can then construct a regular expression matcher (RE) object from 6.255 - * the pre-compiled expression re1 and thus avoid the overhead of 6.256 - * compiling the expression at runtime. If you require more dynamic 6.257 - * regular expressions, you can construct a single RECompiler object and 6.258 - * re-use it to compile each expression. Similarly, you can change the 6.259 - * program run by a given matcher object at any time. However, RE and 6.260 - * RECompiler are not threadsafe (for efficiency reasons, and because 6.261 - * requiring thread safety in this class is deemed to be a rare 6.262 - * requirement), so you will need to construct a separate compiler or 6.263 - * matcher object for each thread (unless you do thread synchronization 6.264 - * yourself). Once expression compiled into the REProgram object, REProgram 6.265 - * can be safely shared across multiple threads and RE objects. 6.266 - * 6.267 - * <br><p><br> 6.268 - * 6.269 - * <font color="red"> 6.270 - * <i>ISSUES:</i> 6.271 - * 6.272 - * <ul> 6.273 - * <li>com.weusours.util.re is not currently compatible with all 6.274 - * standard POSIX regcomp flags</li> 6.275 - * <li>com.weusours.util.re does not support POSIX equivalence classes 6.276 - * ([=foo=] syntax) (I18N/locale issue)</li> 6.277 - * <li>com.weusours.util.re does not support nested POSIX character 6.278 - * classes (definitely should, but not completely trivial)</li> 6.279 - * <li>com.weusours.util.re Does not support POSIX character collation 6.280 - * concepts ([.foo.] syntax) (I18N/locale issue)</li> 6.281 - * <li>Should there be different matching styles (simple, POSIX, Perl etc?)</li> 6.282 - * <li>Should RE support character iterators (for backwards RE matching!)?</li> 6.283 - * <li>Should RE support reluctant {m,n} closures (does anyone care)?</li> 6.284 - * <li>Not *all* possibilities are considered for greediness when backreferences 6.285 - * are involved (as POSIX suggests should be the case). The POSIX RE 6.286 - * "(ac*)c*d[ac]*\1", when matched against "acdacaa" should yield a match 6.287 - * of acdacaa where \1 is "a". This is not the case in this RE package, 6.288 - * and actually Perl doesn't go to this extent either! Until someone 6.289 - * actually complains about this, I'm not sure it's worth "fixing". 6.290 - * If it ever is fixed, test #137 in RETest.txt should be updated.</li> 6.291 - * </ul> 6.292 - * 6.293 - * </font> 6.294 - * 6.295 - * @see recompile 6.296 - * @see RECompiler 6.297 - * 6.298 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> 6.299 - * @author <a href="mailto:ts@sch-fer.de">Tobias Schäfer</a> 6.300 - */ 6.301 -public class RE implements Serializable 6.302 -{ 6.303 - /** 6.304 - * Specifies normal, case-sensitive matching behaviour. 6.305 - */ 6.306 - public static final int MATCH_NORMAL = 0x0000; 6.307 - 6.308 - /** 6.309 - * Flag to indicate that matching should be case-independent (folded) 6.310 - */ 6.311 - public static final int MATCH_CASEINDEPENDENT = 0x0001; 6.312 - 6.313 - /** 6.314 - * Newlines should match as BOL/EOL (^ and $) 6.315 - */ 6.316 - public static final int MATCH_MULTILINE = 0x0002; 6.317 - 6.318 - /** 6.319 - * Consider all input a single body of text - newlines are matched by . 6.320 - */ 6.321 - public static final int MATCH_SINGLELINE = 0x0004; 6.322 - 6.323 - /************************************************ 6.324 - * * 6.325 - * The format of a node in a program is: * 6.326 - * * 6.327 - * [ OPCODE ] [ OPDATA ] [ OPNEXT ] [ OPERAND ] * 6.328 - * * 6.329 - * char OPCODE - instruction * 6.330 - * char OPDATA - modifying data * 6.331 - * char OPNEXT - next node (relative offset) * 6.332 - * * 6.333 - ************************************************/ 6.334 - 6.335 - // Opcode Char Opdata/Operand Meaning 6.336 - // ---------- ---------- --------------- -------------------------------------------------- 6.337 - static final char OP_END = 'E'; // end of program 6.338 - static final char OP_BOL = '^'; // match only if at beginning of line 6.339 - static final char OP_EOL = '$'; // match only if at end of line 6.340 - static final char OP_ANY = '.'; // match any single character except newline 6.341 - static final char OP_ANYOF = '['; // count/ranges match any char in the list of ranges 6.342 - static final char OP_BRANCH = '|'; // node match this alternative or the next one 6.343 - static final char OP_ATOM = 'A'; // length/string length of string followed by string itself 6.344 - static final char OP_STAR = '*'; // node kleene closure 6.345 - static final char OP_PLUS = '+'; // node positive closure 6.346 - static final char OP_MAYBE = '?'; // node optional closure 6.347 - static final char OP_ESCAPE = '\\'; // escape special escape code char class (escape is E_* code) 6.348 - static final char OP_OPEN = '('; // number nth opening paren 6.349 - static final char OP_OPEN_CLUSTER = '<'; // opening cluster 6.350 - static final char OP_CLOSE = ')'; // number nth closing paren 6.351 - static final char OP_CLOSE_CLUSTER = '>'; // closing cluster 6.352 - static final char OP_BACKREF = '#'; // number reference nth already matched parenthesized string 6.353 - static final char OP_GOTO = 'G'; // nothing but a (back-)pointer 6.354 - static final char OP_NOTHING = 'N'; // match null string such as in '(a|)' 6.355 - static final char OP_RELUCTANTSTAR = '8'; // none/expr reluctant '*' (mnemonic for char is unshifted '*') 6.356 - static final char OP_RELUCTANTPLUS = '='; // none/expr reluctant '+' (mnemonic for char is unshifted '+') 6.357 - static final char OP_RELUCTANTMAYBE = '/'; // none/expr reluctant '?' (mnemonic for char is unshifted '?') 6.358 - static final char OP_POSIXCLASS = 'P'; // classid one of the posix character classes 6.359 - 6.360 - // Escape codes 6.361 - static final char E_ALNUM = 'w'; // Alphanumeric 6.362 - static final char E_NALNUM = 'W'; // Non-alphanumeric 6.363 - static final char E_BOUND = 'b'; // Word boundary 6.364 - static final char E_NBOUND = 'B'; // Non-word boundary 6.365 - static final char E_SPACE = 's'; // Whitespace 6.366 - static final char E_NSPACE = 'S'; // Non-whitespace 6.367 - static final char E_DIGIT = 'd'; // Digit 6.368 - static final char E_NDIGIT = 'D'; // Non-digit 6.369 - 6.370 - // Posix character classes 6.371 - static final char POSIX_CLASS_ALNUM = 'w'; // Alphanumerics 6.372 - static final char POSIX_CLASS_ALPHA = 'a'; // Alphabetics 6.373 - static final char POSIX_CLASS_BLANK = 'b'; // Blanks 6.374 - static final char POSIX_CLASS_CNTRL = 'c'; // Control characters 6.375 - static final char POSIX_CLASS_DIGIT = 'd'; // Digits 6.376 - static final char POSIX_CLASS_GRAPH = 'g'; // Graphic characters 6.377 - static final char POSIX_CLASS_LOWER = 'l'; // Lowercase characters 6.378 - static final char POSIX_CLASS_PRINT = 'p'; // Printable characters 6.379 - static final char POSIX_CLASS_PUNCT = '!'; // Punctuation 6.380 - static final char POSIX_CLASS_SPACE = 's'; // Spaces 6.381 - static final char POSIX_CLASS_UPPER = 'u'; // Uppercase characters 6.382 - static final char POSIX_CLASS_XDIGIT = 'x'; // Hexadecimal digits 6.383 - static final char POSIX_CLASS_JSTART = 'j'; // Java identifier start 6.384 - static final char POSIX_CLASS_JPART = 'k'; // Java identifier part 6.385 - 6.386 - // Limits 6.387 - static final int maxNode = 65536; // Maximum number of nodes in a program 6.388 - static final int MAX_PAREN = 16; // Number of paren pairs (only 9 can be backrefs) 6.389 - 6.390 - // Node layout constants 6.391 - static final int offsetOpcode = 0; // Opcode offset (first character) 6.392 - static final int offsetOpdata = 1; // Opdata offset (second char) 6.393 - static final int offsetNext = 2; // Next index offset (third char) 6.394 - static final int nodeSize = 3; // Node size (in chars) 6.395 - 6.396 - // State of current program 6.397 - REProgram program; // Compiled regular expression 'program' 6.398 - transient CharacterIterator search; // The string being matched against 6.399 - int matchFlags; // Match behaviour flags 6.400 - int maxParen = MAX_PAREN; 6.401 - 6.402 - // Parenthesized subexpressions 6.403 - transient int parenCount; // Number of subexpressions matched (num open parens + 1) 6.404 - transient int start0; // Cache of start[0] 6.405 - transient int end0; // Cache of start[0] 6.406 - transient int start1; // Cache of start[1] 6.407 - transient int end1; // Cache of start[1] 6.408 - transient int start2; // Cache of start[2] 6.409 - transient int end2; // Cache of start[2] 6.410 - transient int[] startn; // Lazy-alloced array of sub-expression starts 6.411 - transient int[] endn; // Lazy-alloced array of sub-expression ends 6.412 - 6.413 - // Backreferences 6.414 - transient int[] startBackref; // Lazy-alloced array of backref starts 6.415 - transient int[] endBackref; // Lazy-alloced array of backref ends 6.416 - 6.417 - /** 6.418 - * Constructs a regular expression matcher from a String by compiling it 6.419 - * using a new instance of RECompiler. If you will be compiling many 6.420 - * expressions, you may prefer to use a single RECompiler object instead. 6.421 - * 6.422 - * @param pattern The regular expression pattern to compile. 6.423 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 6.424 - * @see RECompiler 6.425 - * @see recompile 6.426 - */ 6.427 - public RE(String pattern) throws RESyntaxException 6.428 - { 6.429 - this(pattern, MATCH_NORMAL); 6.430 - } 6.431 - 6.432 - /** 6.433 - * Constructs a regular expression matcher from a String by compiling it 6.434 - * using a new instance of RECompiler. If you will be compiling many 6.435 - * expressions, you may prefer to use a single RECompiler object instead. 6.436 - * 6.437 - * @param pattern The regular expression pattern to compile. 6.438 - * @param matchFlags The matching style 6.439 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 6.440 - * @see RECompiler 6.441 - * @see recompile 6.442 - */ 6.443 - public RE(String pattern, int matchFlags) throws RESyntaxException 6.444 - { 6.445 - this(new RECompiler().compile(pattern)); 6.446 - setMatchFlags(matchFlags); 6.447 - } 6.448 - 6.449 - /** 6.450 - * Construct a matcher for a pre-compiled regular expression from program 6.451 - * (bytecode) data. Permits special flags to be passed in to modify matching 6.452 - * behaviour. 6.453 - * 6.454 - * @param program Compiled regular expression program (see RECompiler and/or recompile) 6.455 - * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*): 6.456 - * 6.457 - * <pre> 6.458 - * MATCH_NORMAL // Normal (case-sensitive) matching 6.459 - * MATCH_CASEINDEPENDENT // Case folded comparisons 6.460 - * MATCH_MULTILINE // Newline matches as BOL/EOL 6.461 - * </pre> 6.462 - * 6.463 - * @see RECompiler 6.464 - * @see REProgram 6.465 - * @see recompile 6.466 - */ 6.467 - public RE(REProgram program, int matchFlags) 6.468 - { 6.469 - setProgram(program); 6.470 - setMatchFlags(matchFlags); 6.471 - } 6.472 - 6.473 - /** 6.474 - * Construct a matcher for a pre-compiled regular expression from program 6.475 - * (bytecode) data. 6.476 - * 6.477 - * @param program Compiled regular expression program 6.478 - * @see RECompiler 6.479 - * @see recompile 6.480 - */ 6.481 - public RE(REProgram program) 6.482 - { 6.483 - this(program, MATCH_NORMAL); 6.484 - } 6.485 - 6.486 - /** 6.487 - * Constructs a regular expression matcher with no initial program. 6.488 - * This is likely to be an uncommon practice, but is still supported. 6.489 - */ 6.490 - public RE() 6.491 - { 6.492 - this((REProgram)null, MATCH_NORMAL); 6.493 - } 6.494 - 6.495 - /** 6.496 - * Converts a 'simplified' regular expression to a full regular expression 6.497 - * 6.498 - * @param pattern The pattern to convert 6.499 - * @return The full regular expression 6.500 - */ 6.501 - public static String simplePatternToFullRegularExpression(String pattern) 6.502 - { 6.503 - StringBuffer buf = new StringBuffer(); 6.504 - for (int i = 0; i < pattern.length(); i++) 6.505 - { 6.506 - char c = pattern.charAt(i); 6.507 - switch (c) 6.508 - { 6.509 - case '*': 6.510 - buf.append(".*"); 6.511 - break; 6.512 - 6.513 - case '.': 6.514 - case '[': 6.515 - case ']': 6.516 - case '\\': 6.517 - case '+': 6.518 - case '?': 6.519 - case '{': 6.520 - case '}': 6.521 - case '$': 6.522 - case '^': 6.523 - case '|': 6.524 - case '(': 6.525 - case ')': 6.526 - buf.append('\\'); 6.527 - default: 6.528 - buf.append(c); 6.529 - break; 6.530 - } 6.531 - } 6.532 - return buf.toString(); 6.533 - } 6.534 - 6.535 - /** 6.536 - * Sets match behaviour flags which alter the way RE does matching. 6.537 - * @param matchFlags One or more of the RE match behaviour flags (RE.MATCH_*): 6.538 - * 6.539 - * <pre> 6.540 - * MATCH_NORMAL // Normal (case-sensitive) matching 6.541 - * MATCH_CASEINDEPENDENT // Case folded comparisons 6.542 - * MATCH_MULTILINE // Newline matches as BOL/EOL 6.543 - * </pre> 6.544 - */ 6.545 - public void setMatchFlags(int matchFlags) 6.546 - { 6.547 - this.matchFlags = matchFlags; 6.548 - } 6.549 - 6.550 - /** 6.551 - * Returns the current match behaviour flags. 6.552 - * @return Current match behaviour flags (RE.MATCH_*). 6.553 - * 6.554 - * <pre> 6.555 - * MATCH_NORMAL // Normal (case-sensitive) matching 6.556 - * MATCH_CASEINDEPENDENT // Case folded comparisons 6.557 - * MATCH_MULTILINE // Newline matches as BOL/EOL 6.558 - * </pre> 6.559 - * 6.560 - * @see #setMatchFlags 6.561 - */ 6.562 - public int getMatchFlags() 6.563 - { 6.564 - return matchFlags; 6.565 - } 6.566 - 6.567 - /** 6.568 - * Sets the current regular expression program used by this matcher object. 6.569 - * 6.570 - * @param program Regular expression program compiled by RECompiler. 6.571 - * @see RECompiler 6.572 - * @see REProgram 6.573 - * @see recompile 6.574 - */ 6.575 - public void setProgram(REProgram program) 6.576 - { 6.577 - this.program = program; 6.578 - if (program != null && program.maxParens != -1) { 6.579 - this.maxParen = program.maxParens; 6.580 - } else { 6.581 - this.maxParen = MAX_PAREN; 6.582 - } 6.583 - } 6.584 - 6.585 - /** 6.586 - * Returns the current regular expression program in use by this matcher object. 6.587 - * 6.588 - * @return Regular expression program 6.589 - * @see #setProgram 6.590 - */ 6.591 - public REProgram getProgram() 6.592 - { 6.593 - return program; 6.594 - } 6.595 - 6.596 - /** 6.597 - * Returns the number of parenthesized subexpressions available after a successful match. 6.598 - * 6.599 - * @return Number of available parenthesized subexpressions 6.600 - */ 6.601 - public int getParenCount() 6.602 - { 6.603 - return parenCount; 6.604 - } 6.605 - 6.606 - /** 6.607 - * Gets the contents of a parenthesized subexpression after a successful match. 6.608 - * 6.609 - * @param which Nesting level of subexpression 6.610 - * @return String 6.611 - */ 6.612 - public String getParen(int which) 6.613 - { 6.614 - int start; 6.615 - if (which < parenCount && (start = getParenStart(which)) >= 0) 6.616 - { 6.617 - return search.substring(start, getParenEnd(which)); 6.618 - } 6.619 - return null; 6.620 - } 6.621 - 6.622 - /** 6.623 - * Returns the start index of a given paren level. 6.624 - * 6.625 - * @param which Nesting level of subexpression 6.626 - * @return String index 6.627 - */ 6.628 - public final int getParenStart(int which) 6.629 - { 6.630 - if (which < parenCount) 6.631 - { 6.632 - switch (which) 6.633 - { 6.634 - case 0: 6.635 - return start0; 6.636 - 6.637 - case 1: 6.638 - return start1; 6.639 - 6.640 - case 2: 6.641 - return start2; 6.642 - 6.643 - default: 6.644 - if (startn == null) 6.645 - { 6.646 - allocParens(); 6.647 - } 6.648 - return startn[which]; 6.649 - } 6.650 - } 6.651 - return -1; 6.652 - } 6.653 - 6.654 - /** 6.655 - * Returns the end index of a given paren level. 6.656 - * 6.657 - * @param which Nesting level of subexpression 6.658 - * @return String index 6.659 - */ 6.660 - public final int getParenEnd(int which) 6.661 - { 6.662 - if (which < parenCount) 6.663 - { 6.664 - switch (which) 6.665 - { 6.666 - case 0: 6.667 - return end0; 6.668 - 6.669 - case 1: 6.670 - return end1; 6.671 - 6.672 - case 2: 6.673 - return end2; 6.674 - 6.675 - default: 6.676 - if (endn == null) 6.677 - { 6.678 - allocParens(); 6.679 - } 6.680 - return endn[which]; 6.681 - } 6.682 - } 6.683 - return -1; 6.684 - } 6.685 - 6.686 - /** 6.687 - * Returns the length of a given paren level. 6.688 - * 6.689 - * @param which Nesting level of subexpression 6.690 - * @return Number of characters in the parenthesized subexpression 6.691 - */ 6.692 - public final int getParenLength(int which) 6.693 - { 6.694 - if (which < parenCount) 6.695 - { 6.696 - return getParenEnd(which) - getParenStart(which); 6.697 - } 6.698 - return -1; 6.699 - } 6.700 - 6.701 - /** 6.702 - * Sets the start of a paren level 6.703 - * 6.704 - * @param which Which paren level 6.705 - * @param i Index in input array 6.706 - */ 6.707 - protected final void setParenStart(int which, int i) 6.708 - { 6.709 - if (which < parenCount) 6.710 - { 6.711 - switch (which) 6.712 - { 6.713 - case 0: 6.714 - start0 = i; 6.715 - break; 6.716 - 6.717 - case 1: 6.718 - start1 = i; 6.719 - break; 6.720 - 6.721 - case 2: 6.722 - start2 = i; 6.723 - break; 6.724 - 6.725 - default: 6.726 - if (startn == null) 6.727 - { 6.728 - allocParens(); 6.729 - } 6.730 - startn[which] = i; 6.731 - break; 6.732 - } 6.733 - } 6.734 - } 6.735 - 6.736 - /** 6.737 - * Sets the end of a paren level 6.738 - * 6.739 - * @param which Which paren level 6.740 - * @param i Index in input array 6.741 - */ 6.742 - protected final void setParenEnd(int which, int i) 6.743 - { 6.744 - if (which < parenCount) 6.745 - { 6.746 - switch (which) 6.747 - { 6.748 - case 0: 6.749 - end0 = i; 6.750 - break; 6.751 - 6.752 - case 1: 6.753 - end1 = i; 6.754 - break; 6.755 - 6.756 - case 2: 6.757 - end2 = i; 6.758 - break; 6.759 - 6.760 - default: 6.761 - if (endn == null) 6.762 - { 6.763 - allocParens(); 6.764 - } 6.765 - endn[which] = i; 6.766 - break; 6.767 - } 6.768 - } 6.769 - } 6.770 - 6.771 - /** 6.772 - * Throws an Error representing an internal error condition probably resulting 6.773 - * from a bug in the regular expression compiler (or possibly data corruption). 6.774 - * In practice, this should be very rare. 6.775 - * 6.776 - * @param s Error description 6.777 - */ 6.778 - protected void internalError(String s) throws Error 6.779 - { 6.780 - throw new Error("RE internal error: " + s); 6.781 - } 6.782 - 6.783 - /** 6.784 - * Performs lazy allocation of subexpression arrays 6.785 - */ 6.786 - private final void allocParens() 6.787 - { 6.788 - // Allocate arrays for subexpressions 6.789 - startn = new int[maxParen]; 6.790 - endn = new int[maxParen]; 6.791 - 6.792 - // Set sub-expression pointers to invalid values 6.793 - for (int i = 0; i < maxParen; i++) 6.794 - { 6.795 - startn[i] = -1; 6.796 - endn[i] = -1; 6.797 - } 6.798 - } 6.799 - 6.800 - /** 6.801 - * Try to match a string against a subset of nodes in the program 6.802 - * 6.803 - * @param firstNode Node to start at in program 6.804 - * @param lastNode Last valid node (used for matching a subexpression without 6.805 - * matching the rest of the program as well). 6.806 - * @param idxStart Starting position in character array 6.807 - * @return Final input array index if match succeeded. -1 if not. 6.808 - */ 6.809 - protected int matchNodes(int firstNode, int lastNode, int idxStart) 6.810 - { 6.811 - // Our current place in the string 6.812 - int idx = idxStart; 6.813 - 6.814 - // Loop while node is valid 6.815 - int next, opcode, opdata; 6.816 - int idxNew; 6.817 - char[] instruction = program.instruction; 6.818 - for (int node = firstNode; node < lastNode; ) 6.819 - { 6.820 - opcode = instruction[node + offsetOpcode]; 6.821 - next = node + (short)instruction[node + offsetNext]; 6.822 - opdata = instruction[node + offsetOpdata]; 6.823 - 6.824 - switch (opcode) 6.825 - { 6.826 - case OP_RELUCTANTMAYBE: 6.827 - { 6.828 - int once = 0; 6.829 - do 6.830 - { 6.831 - // Try to match the rest without using the reluctant subexpr 6.832 - if ((idxNew = matchNodes(next, maxNode, idx)) != -1) 6.833 - { 6.834 - return idxNew; 6.835 - } 6.836 - } 6.837 - while ((once++ == 0) && (idx = matchNodes(node + nodeSize, next, idx)) != -1); 6.838 - return -1; 6.839 - } 6.840 - 6.841 - case OP_RELUCTANTPLUS: 6.842 - while ((idx = matchNodes(node + nodeSize, next, idx)) != -1) 6.843 - { 6.844 - // Try to match the rest without using the reluctant subexpr 6.845 - if ((idxNew = matchNodes(next, maxNode, idx)) != -1) 6.846 - { 6.847 - return idxNew; 6.848 - } 6.849 - } 6.850 - return -1; 6.851 - 6.852 - case OP_RELUCTANTSTAR: 6.853 - do 6.854 - { 6.855 - // Try to match the rest without using the reluctant subexpr 6.856 - if ((idxNew = matchNodes(next, maxNode, idx)) != -1) 6.857 - { 6.858 - return idxNew; 6.859 - } 6.860 - } 6.861 - while ((idx = matchNodes(node + nodeSize, next, idx)) != -1); 6.862 - return -1; 6.863 - 6.864 - case OP_OPEN: 6.865 - 6.866 - // Match subexpression 6.867 - if ((program.flags & REProgram.OPT_HASBACKREFS) != 0) 6.868 - { 6.869 - startBackref[opdata] = idx; 6.870 - } 6.871 - if ((idxNew = matchNodes(next, maxNode, idx)) != -1) 6.872 - { 6.873 - // Increase valid paren count 6.874 - if ((opdata + 1) > parenCount) 6.875 - { 6.876 - parenCount = opdata + 1; 6.877 - } 6.878 - 6.879 - // Don't set paren if already set later on 6.880 - if (getParenStart(opdata) == -1) 6.881 - { 6.882 - setParenStart(opdata, idx); 6.883 - } 6.884 - } 6.885 - return idxNew; 6.886 - 6.887 - case OP_CLOSE: 6.888 - 6.889 - // Done matching subexpression 6.890 - if ((program.flags & REProgram.OPT_HASBACKREFS) != 0) 6.891 - { 6.892 - endBackref[opdata] = idx; 6.893 - } 6.894 - if ((idxNew = matchNodes(next, maxNode, idx)) != -1) 6.895 - { 6.896 - // Increase valid paren count 6.897 - if ((opdata + 1) > parenCount) 6.898 - { 6.899 - parenCount = opdata + 1; 6.900 - } 6.901 - 6.902 - // Don't set paren if already set later on 6.903 - if (getParenEnd(opdata) == -1) 6.904 - { 6.905 - setParenEnd(opdata, idx); 6.906 - } 6.907 - } 6.908 - return idxNew; 6.909 - 6.910 - case OP_OPEN_CLUSTER: 6.911 - case OP_CLOSE_CLUSTER: 6.912 - // starting or ending the matching of a subexpression which has no backref. 6.913 - return matchNodes( next, maxNode, idx ); 6.914 - 6.915 - case OP_BACKREF: 6.916 - { 6.917 - // Get the start and end of the backref 6.918 - int s = startBackref[opdata]; 6.919 - int e = endBackref[opdata]; 6.920 - 6.921 - // We don't know the backref yet 6.922 - if (s == -1 || e == -1) 6.923 - { 6.924 - return -1; 6.925 - } 6.926 - 6.927 - // The backref is empty size 6.928 - if (s == e) 6.929 - { 6.930 - break; 6.931 - } 6.932 - 6.933 - // Get the length of the backref 6.934 - int l = e - s; 6.935 - 6.936 - // If there's not enough input left, give up. 6.937 - if (search.isEnd(idx + l - 1)) 6.938 - { 6.939 - return -1; 6.940 - } 6.941 - 6.942 - // Case fold the backref? 6.943 - final boolean caseFold = 6.944 - ((matchFlags & MATCH_CASEINDEPENDENT) != 0); 6.945 - // Compare backref to input 6.946 - for (int i = 0; i < l; i++) 6.947 - { 6.948 - if (compareChars(search.charAt(idx++), search.charAt(s + i), caseFold) != 0) 6.949 - { 6.950 - return -1; 6.951 - } 6.952 - } 6.953 - } 6.954 - break; 6.955 - 6.956 - case OP_BOL: 6.957 - 6.958 - // Fail if we're not at the start of the string 6.959 - if (idx != 0) 6.960 - { 6.961 - // If we're multiline matching, we could still be at the start of a line 6.962 - if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE) 6.963 - { 6.964 - // If not at start of line, give up 6.965 - if (idx <= 0 || !isNewline(idx - 1)) { 6.966 - return -1; 6.967 - } else { 6.968 - break; 6.969 - } 6.970 - } 6.971 - return -1; 6.972 - } 6.973 - break; 6.974 - 6.975 - case OP_EOL: 6.976 - 6.977 - // If we're not at the end of string 6.978 - if (!search.isEnd(0) && !search.isEnd(idx)) 6.979 - { 6.980 - // If we're multi-line matching 6.981 - if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE) 6.982 - { 6.983 - // Give up if we're not at the end of a line 6.984 - if (!isNewline(idx)) { 6.985 - return -1; 6.986 - } else { 6.987 - break; 6.988 - } 6.989 - } 6.990 - return -1; 6.991 - } 6.992 - break; 6.993 - 6.994 - case OP_ESCAPE: 6.995 - 6.996 - // Which escape? 6.997 - switch (opdata) 6.998 - { 6.999 - // Word boundary match 6.1000 - case E_NBOUND: 6.1001 - case E_BOUND: 6.1002 - { 6.1003 - char cLast = ((idx == 0) ? '\n' : search.charAt(idx - 1)); 6.1004 - char cNext = ((search.isEnd(idx)) ? '\n' : search.charAt(idx)); 6.1005 - if ((Character.isLetterOrDigit(cLast) == Character.isLetterOrDigit(cNext)) == (opdata == E_BOUND)) 6.1006 - { 6.1007 - return -1; 6.1008 - } 6.1009 - } 6.1010 - break; 6.1011 - 6.1012 - // Alpha-numeric, digit, space, javaLetter, javaLetterOrDigit 6.1013 - case E_ALNUM: 6.1014 - case E_NALNUM: 6.1015 - case E_DIGIT: 6.1016 - case E_NDIGIT: 6.1017 - case E_SPACE: 6.1018 - case E_NSPACE: 6.1019 - 6.1020 - // Give up if out of input 6.1021 - if (search.isEnd(idx)) 6.1022 - { 6.1023 - return -1; 6.1024 - } 6.1025 - 6.1026 - char c = search.charAt(idx); 6.1027 - 6.1028 - // Switch on escape 6.1029 - switch (opdata) 6.1030 - { 6.1031 - case E_ALNUM: 6.1032 - case E_NALNUM: 6.1033 - if (!((Character.isLetterOrDigit(c) || c == '_') == (opdata == E_ALNUM))) 6.1034 - { 6.1035 - return -1; 6.1036 - } 6.1037 - break; 6.1038 - 6.1039 - case E_DIGIT: 6.1040 - case E_NDIGIT: 6.1041 - if (!(Character.isDigit(c) == (opdata == E_DIGIT))) 6.1042 - { 6.1043 - return -1; 6.1044 - } 6.1045 - break; 6.1046 - 6.1047 - case E_SPACE: 6.1048 - case E_NSPACE: 6.1049 - if (!(Character.isWhitespace(c) == (opdata == E_SPACE))) 6.1050 - { 6.1051 - return -1; 6.1052 - } 6.1053 - break; 6.1054 - } 6.1055 - idx++; 6.1056 - break; 6.1057 - 6.1058 - default: 6.1059 - internalError("Unrecognized escape '" + opdata + "'"); 6.1060 - } 6.1061 - break; 6.1062 - 6.1063 - case OP_ANY: 6.1064 - 6.1065 - if ((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) { 6.1066 - // Match anything 6.1067 - if (search.isEnd(idx)) 6.1068 - { 6.1069 - return -1; 6.1070 - } 6.1071 - } 6.1072 - else 6.1073 - { 6.1074 - // Match anything but a newline 6.1075 - if (search.isEnd(idx) || isNewline(idx)) 6.1076 - { 6.1077 - return -1; 6.1078 - } 6.1079 - } 6.1080 - idx++; 6.1081 - break; 6.1082 - 6.1083 - case OP_ATOM: 6.1084 - { 6.1085 - // Match an atom value 6.1086 - if (search.isEnd(idx)) 6.1087 - { 6.1088 - return -1; 6.1089 - } 6.1090 - 6.1091 - // Get length of atom and starting index 6.1092 - int lenAtom = opdata; 6.1093 - int startAtom = node + nodeSize; 6.1094 - 6.1095 - // Give up if not enough input remains to have a match 6.1096 - if (search.isEnd(lenAtom + idx - 1)) 6.1097 - { 6.1098 - return -1; 6.1099 - } 6.1100 - 6.1101 - // Match atom differently depending on casefolding flag 6.1102 - final boolean caseFold = 6.1103 - ((matchFlags & MATCH_CASEINDEPENDENT) != 0); 6.1104 - 6.1105 - for (int i = 0; i < lenAtom; i++) 6.1106 - { 6.1107 - if (compareChars(search.charAt(idx++), instruction[startAtom + i], caseFold) != 0) 6.1108 - { 6.1109 - return -1; 6.1110 - } 6.1111 - } 6.1112 - } 6.1113 - break; 6.1114 - 6.1115 - case OP_POSIXCLASS: 6.1116 - { 6.1117 - // Out of input? 6.1118 - if (search.isEnd(idx)) 6.1119 - { 6.1120 - return -1; 6.1121 - } 6.1122 - 6.1123 - switch (opdata) 6.1124 - { 6.1125 - case POSIX_CLASS_ALNUM: 6.1126 - if (!Character.isLetterOrDigit(search.charAt(idx))) 6.1127 - { 6.1128 - return -1; 6.1129 - } 6.1130 - break; 6.1131 - 6.1132 - case POSIX_CLASS_ALPHA: 6.1133 - if (!Character.isLetter(search.charAt(idx))) 6.1134 - { 6.1135 - return -1; 6.1136 - } 6.1137 - break; 6.1138 - 6.1139 - case POSIX_CLASS_DIGIT: 6.1140 - if (!Character.isDigit(search.charAt(idx))) 6.1141 - { 6.1142 - return -1; 6.1143 - } 6.1144 - break; 6.1145 - 6.1146 - case POSIX_CLASS_BLANK: // JWL - bugbug: is this right?? 6.1147 - if (!Character.isSpaceChar(search.charAt(idx))) 6.1148 - { 6.1149 - return -1; 6.1150 - } 6.1151 - break; 6.1152 - 6.1153 - case POSIX_CLASS_SPACE: 6.1154 - if (!Character.isWhitespace(search.charAt(idx))) 6.1155 - { 6.1156 - return -1; 6.1157 - } 6.1158 - break; 6.1159 - 6.1160 - case POSIX_CLASS_CNTRL: 6.1161 - if (Character.getType(search.charAt(idx)) != Character.CONTROL) 6.1162 - { 6.1163 - return -1; 6.1164 - } 6.1165 - break; 6.1166 - 6.1167 - case POSIX_CLASS_GRAPH: // JWL - bugbug??? 6.1168 - switch (Character.getType(search.charAt(idx))) 6.1169 - { 6.1170 - case Character.MATH_SYMBOL: 6.1171 - case Character.CURRENCY_SYMBOL: 6.1172 - case Character.MODIFIER_SYMBOL: 6.1173 - case Character.OTHER_SYMBOL: 6.1174 - break; 6.1175 - 6.1176 - default: 6.1177 - return -1; 6.1178 - } 6.1179 - break; 6.1180 - 6.1181 - case POSIX_CLASS_LOWER: 6.1182 - if (Character.getType(search.charAt(idx)) != Character.LOWERCASE_LETTER) 6.1183 - { 6.1184 - return -1; 6.1185 - } 6.1186 - break; 6.1187 - 6.1188 - case POSIX_CLASS_UPPER: 6.1189 - if (Character.getType(search.charAt(idx)) != Character.UPPERCASE_LETTER) 6.1190 - { 6.1191 - return -1; 6.1192 - } 6.1193 - break; 6.1194 - 6.1195 - case POSIX_CLASS_PRINT: 6.1196 - if (Character.getType(search.charAt(idx)) == Character.CONTROL) 6.1197 - { 6.1198 - return -1; 6.1199 - } 6.1200 - break; 6.1201 - 6.1202 - case POSIX_CLASS_PUNCT: 6.1203 - { 6.1204 - int type = Character.getType(search.charAt(idx)); 6.1205 - switch(type) 6.1206 - { 6.1207 - case Character.DASH_PUNCTUATION: 6.1208 - case Character.START_PUNCTUATION: 6.1209 - case Character.END_PUNCTUATION: 6.1210 - case Character.CONNECTOR_PUNCTUATION: 6.1211 - case Character.OTHER_PUNCTUATION: 6.1212 - break; 6.1213 - 6.1214 - default: 6.1215 - return -1; 6.1216 - } 6.1217 - } 6.1218 - break; 6.1219 - 6.1220 - case POSIX_CLASS_XDIGIT: // JWL - bugbug?? 6.1221 - { 6.1222 - boolean isXDigit = ((search.charAt(idx) >= '0' && search.charAt(idx) <= '9') || 6.1223 - (search.charAt(idx) >= 'a' && search.charAt(idx) <= 'f') || 6.1224 - (search.charAt(idx) >= 'A' && search.charAt(idx) <= 'F')); 6.1225 - if (!isXDigit) 6.1226 - { 6.1227 - return -1; 6.1228 - } 6.1229 - } 6.1230 - break; 6.1231 - 6.1232 - case POSIX_CLASS_JSTART: 6.1233 - if (!Character.isJavaIdentifierStart(search.charAt(idx))) 6.1234 - { 6.1235 - return -1; 6.1236 - } 6.1237 - break; 6.1238 - 6.1239 - case POSIX_CLASS_JPART: 6.1240 - if (!Character.isJavaIdentifierPart(search.charAt(idx))) 6.1241 - { 6.1242 - return -1; 6.1243 - } 6.1244 - break; 6.1245 - 6.1246 - default: 6.1247 - internalError("Bad posix class"); 6.1248 - break; 6.1249 - } 6.1250 - 6.1251 - // Matched. 6.1252 - idx++; 6.1253 - } 6.1254 - break; 6.1255 - 6.1256 - case OP_ANYOF: 6.1257 - { 6.1258 - // Out of input? 6.1259 - if (search.isEnd(idx)) 6.1260 - { 6.1261 - return -1; 6.1262 - } 6.1263 - 6.1264 - // Get character to match against character class and maybe casefold 6.1265 - char c = search.charAt(idx); 6.1266 - boolean caseFold = (matchFlags & MATCH_CASEINDEPENDENT) != 0; 6.1267 - // Loop through character class checking our match character 6.1268 - int idxRange = node + nodeSize; 6.1269 - int idxEnd = idxRange + (opdata * 2); 6.1270 - boolean match = false; 6.1271 - for (int i = idxRange; !match && i < idxEnd; ) 6.1272 - { 6.1273 - // Get start, end and match characters 6.1274 - char s = instruction[i++]; 6.1275 - char e = instruction[i++]; 6.1276 - 6.1277 - match = ((compareChars(c, s, caseFold) >= 0) 6.1278 - && (compareChars(c, e, caseFold) <= 0)); 6.1279 - } 6.1280 - 6.1281 - // Fail if we didn't match the character class 6.1282 - if (!match) 6.1283 - { 6.1284 - return -1; 6.1285 - } 6.1286 - idx++; 6.1287 - } 6.1288 - break; 6.1289 - 6.1290 - case OP_BRANCH: 6.1291 - { 6.1292 - // Check for choices 6.1293 - if (instruction[next + offsetOpcode] != OP_BRANCH) 6.1294 - { 6.1295 - // If there aren't any other choices, just evaluate this branch. 6.1296 - node += nodeSize; 6.1297 - continue; 6.1298 - } 6.1299 - 6.1300 - // Try all available branches 6.1301 - short nextBranch; 6.1302 - do 6.1303 - { 6.1304 - // Try matching the branch against the string 6.1305 - if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1) 6.1306 - { 6.1307 - return idxNew; 6.1308 - } 6.1309 - 6.1310 - // Go to next branch (if any) 6.1311 - nextBranch = (short)instruction[node + offsetNext]; 6.1312 - node += nextBranch; 6.1313 - } 6.1314 - while (nextBranch != 0 && (instruction[node + offsetOpcode] == OP_BRANCH)); 6.1315 - 6.1316 - // Failed to match any branch! 6.1317 - return -1; 6.1318 - } 6.1319 - 6.1320 - case OP_NOTHING: 6.1321 - case OP_GOTO: 6.1322 - 6.1323 - // Just advance to the next node without doing anything 6.1324 - break; 6.1325 - 6.1326 - case OP_END: 6.1327 - 6.1328 - // Match has succeeded! 6.1329 - setParenEnd(0, idx); 6.1330 - return idx; 6.1331 - 6.1332 - default: 6.1333 - 6.1334 - // Corrupt program 6.1335 - internalError("Invalid opcode '" + opcode + "'"); 6.1336 - } 6.1337 - 6.1338 - // Advance to the next node in the program 6.1339 - node = next; 6.1340 - } 6.1341 - 6.1342 - // We "should" never end up here 6.1343 - internalError("Corrupt program"); 6.1344 - return -1; 6.1345 - } 6.1346 - 6.1347 - /** 6.1348 - * Match the current regular expression program against the current 6.1349 - * input string, starting at index i of the input string. This method 6.1350 - * is only meant for internal use. 6.1351 - * 6.1352 - * @param i The input string index to start matching at 6.1353 - * @return True if the input matched the expression 6.1354 - */ 6.1355 - protected boolean matchAt(int i) 6.1356 - { 6.1357 - // Initialize start pointer, paren cache and paren count 6.1358 - start0 = -1; 6.1359 - end0 = -1; 6.1360 - start1 = -1; 6.1361 - end1 = -1; 6.1362 - start2 = -1; 6.1363 - end2 = -1; 6.1364 - startn = null; 6.1365 - endn = null; 6.1366 - parenCount = 1; 6.1367 - setParenStart(0, i); 6.1368 - 6.1369 - // Allocate backref arrays (unless optimizations indicate otherwise) 6.1370 - if ((program.flags & REProgram.OPT_HASBACKREFS) != 0) 6.1371 - { 6.1372 - startBackref = new int[maxParen]; 6.1373 - endBackref = new int[maxParen]; 6.1374 - } 6.1375 - 6.1376 - // Match against string 6.1377 - int idx; 6.1378 - if ((idx = matchNodes(0, maxNode, i)) != -1) 6.1379 - { 6.1380 - setParenEnd(0, idx); 6.1381 - return true; 6.1382 - } 6.1383 - 6.1384 - // Didn't match 6.1385 - parenCount = 0; 6.1386 - return false; 6.1387 - } 6.1388 - 6.1389 - /** 6.1390 - * Matches the current regular expression program against a character array, 6.1391 - * starting at a given index. 6.1392 - * 6.1393 - * @param search String to match against 6.1394 - * @param i Index to start searching at 6.1395 - * @return True if string matched 6.1396 - */ 6.1397 - public boolean match(String search, int i) 6.1398 - { 6.1399 - return match(new StringCharacterIterator(search), i); 6.1400 - } 6.1401 - 6.1402 - /** 6.1403 - * Matches the current regular expression program against a character array, 6.1404 - * starting at a given index. 6.1405 - * 6.1406 - * @param search String to match against 6.1407 - * @param i Index to start searching at 6.1408 - * @return True if string matched 6.1409 - */ 6.1410 - public boolean match(CharacterIterator search, int i) 6.1411 - { 6.1412 - // There is no compiled program to search with! 6.1413 - if (program == null) 6.1414 - { 6.1415 - // This should be uncommon enough to be an error case rather 6.1416 - // than an exception (which would have to be handled everywhere) 6.1417 - internalError("No RE program to run!"); 6.1418 - } 6.1419 - 6.1420 - // Save string to search 6.1421 - this.search = search; 6.1422 - 6.1423 - // Can we optimize the search by looking for a prefix string? 6.1424 - if (program.prefix == null) 6.1425 - { 6.1426 - // Unprefixed matching must try for a match at each character 6.1427 - for ( ;! search.isEnd(i - 1); i++) 6.1428 - { 6.1429 - // Try a match at index i 6.1430 - if (matchAt(i)) 6.1431 - { 6.1432 - return true; 6.1433 - } 6.1434 - } 6.1435 - return false; 6.1436 - } 6.1437 - else 6.1438 - { 6.1439 - // Prefix-anchored matching is possible 6.1440 - boolean caseIndependent = (matchFlags & MATCH_CASEINDEPENDENT) != 0; 6.1441 - char[] prefix = program.prefix; 6.1442 - for ( ; !search.isEnd(i + prefix.length - 1); i++) 6.1443 - { 6.1444 - int j = i; 6.1445 - int k = 0; 6.1446 - 6.1447 - boolean match; 6.1448 - do { 6.1449 - // If there's a mismatch of any character in the prefix, give up 6.1450 - match = (compareChars(search.charAt(j++), prefix[k++], caseIndependent) == 0); 6.1451 - } while (match && k < prefix.length); 6.1452 - 6.1453 - // See if the whole prefix string matched 6.1454 - if (k == prefix.length) 6.1455 - { 6.1456 - // We matched the full prefix at firstChar, so try it 6.1457 - if (matchAt(i)) 6.1458 - { 6.1459 - return true; 6.1460 - } 6.1461 - } 6.1462 - } 6.1463 - return false; 6.1464 - } 6.1465 - } 6.1466 - 6.1467 - /** 6.1468 - * Matches the current regular expression program against a String. 6.1469 - * 6.1470 - * @param search String to match against 6.1471 - * @return True if string matched 6.1472 - */ 6.1473 - public boolean match(String search) 6.1474 - { 6.1475 - return match(search, 0); 6.1476 - } 6.1477 - 6.1478 - /** 6.1479 - * Splits a string into an array of strings on regular expression boundaries. 6.1480 - * This function works the same way as the Perl function of the same name. 6.1481 - * Given a regular expression of "[ab]+" and a string to split of 6.1482 - * "xyzzyababbayyzabbbab123", the result would be the array of Strings 6.1483 - * "[xyzzy, yyz, 123]". 6.1484 - * 6.1485 - * <p>Please note that the first string in the resulting array may be an empty 6.1486 - * string. This happens when the very first character of input string is 6.1487 - * matched by the pattern. 6.1488 - * 6.1489 - * @param s String to split on this regular exression 6.1490 - * @return Array of strings 6.1491 - */ 6.1492 - public String[] split(String s) 6.1493 - { 6.1494 - // Create new vector 6.1495 - Vector v = new Vector(); 6.1496 - 6.1497 - // Start at position 0 and search the whole string 6.1498 - int pos = 0; 6.1499 - int len = s.length(); 6.1500 - 6.1501 - // Try a match at each position 6.1502 - while (pos < len && match(s, pos)) 6.1503 - { 6.1504 - // Get start of match 6.1505 - int start = getParenStart(0); 6.1506 - 6.1507 - // Get end of match 6.1508 - int newpos = getParenEnd(0); 6.1509 - 6.1510 - // Check if no progress was made 6.1511 - if (newpos == pos) 6.1512 - { 6.1513 - v.addElement(s.substring(pos, start + 1)); 6.1514 - newpos++; 6.1515 - } 6.1516 - else 6.1517 - { 6.1518 - v.addElement(s.substring(pos, start)); 6.1519 - } 6.1520 - 6.1521 - // Move to new position 6.1522 - pos = newpos; 6.1523 - } 6.1524 - 6.1525 - // Push remainder if it's not empty 6.1526 - String remainder = s.substring(pos); 6.1527 - if (remainder.length() != 0) 6.1528 - { 6.1529 - v.addElement(remainder); 6.1530 - } 6.1531 - 6.1532 - // Return vector as an array of strings 6.1533 - String[] ret = new String[v.size()]; 6.1534 - v.copyInto(ret); 6.1535 - return ret; 6.1536 - } 6.1537 - 6.1538 - /** 6.1539 - * Flag bit that indicates that subst should replace all occurrences of this 6.1540 - * regular expression. 6.1541 - */ 6.1542 - public static final int REPLACE_ALL = 0x0000; 6.1543 - 6.1544 - /** 6.1545 - * Flag bit that indicates that subst should only replace the first occurrence 6.1546 - * of this regular expression. 6.1547 - */ 6.1548 - public static final int REPLACE_FIRSTONLY = 0x0001; 6.1549 - 6.1550 - /** 6.1551 - * Flag bit that indicates that subst should replace backreferences 6.1552 - */ 6.1553 - public static final int REPLACE_BACKREFERENCES = 0x0002; 6.1554 - 6.1555 - /** 6.1556 - * Substitutes a string for this regular expression in another string. 6.1557 - * This method works like the Perl function of the same name. 6.1558 - * Given a regular expression of "a*b", a String to substituteIn of 6.1559 - * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the 6.1560 - * resulting String returned by subst would be "-foo-garply-wacky-". 6.1561 - * 6.1562 - * @param substituteIn String to substitute within 6.1563 - * @param substitution String to substitute for all matches of this regular expression. 6.1564 - * @return The string substituteIn with zero or more occurrences of the current 6.1565 - * regular expression replaced with the substitution String (if this regular 6.1566 - * expression object doesn't match at any position, the original String is returned 6.1567 - * unchanged). 6.1568 - */ 6.1569 - public String subst(String substituteIn, String substitution) 6.1570 - { 6.1571 - return subst(substituteIn, substitution, REPLACE_ALL); 6.1572 - } 6.1573 - 6.1574 - /** 6.1575 - * Substitutes a string for this regular expression in another string. 6.1576 - * This method works like the Perl function of the same name. 6.1577 - * Given a regular expression of "a*b", a String to substituteIn of 6.1578 - * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the 6.1579 - * resulting String returned by subst would be "-foo-garply-wacky-". 6.1580 - * <p> 6.1581 - * It is also possible to reference the contents of a parenthesized expression 6.1582 - * with $0, $1, ... $9. A regular expression of "http://[\\.\\w\\-\\?/~_@&=%]+", 6.1583 - * a String to substituteIn of "visit us: http://www.apache.org!" and the 6.1584 - * substitution String "<a href=\"$0\">$0</a>", the resulting String 6.1585 - * returned by subst would be 6.1586 - * "visit us: <a href=\"http://www.apache.org\">http://www.apache.org</a>!". 6.1587 - * <p> 6.1588 - * <i>Note:</i> $0 represents the whole match. 6.1589 - * 6.1590 - * @param substituteIn String to substitute within 6.1591 - * @param substitution String to substitute for matches of this regular expression 6.1592 - * @param flags One or more bitwise flags from REPLACE_*. If the REPLACE_FIRSTONLY 6.1593 - * flag bit is set, only the first occurrence of this regular expression is replaced. 6.1594 - * If the bit is not set (REPLACE_ALL), all occurrences of this pattern will be 6.1595 - * replaced. If the flag REPLACE_BACKREFERENCES is set, all backreferences will 6.1596 - * be processed. 6.1597 - * @return The string substituteIn with zero or more occurrences of the current 6.1598 - * regular expression replaced with the substitution String (if this regular 6.1599 - * expression object doesn't match at any position, the original String is returned 6.1600 - * unchanged). 6.1601 - */ 6.1602 - public String subst(String substituteIn, String substitution, int flags) 6.1603 - { 6.1604 - // String to return 6.1605 - StringBuffer ret = new StringBuffer(); 6.1606 - 6.1607 - // Start at position 0 and search the whole string 6.1608 - int pos = 0; 6.1609 - int len = substituteIn.length(); 6.1610 - 6.1611 - // Try a match at each position 6.1612 - while (pos < len && match(substituteIn, pos)) 6.1613 - { 6.1614 - // Append string before match 6.1615 - ret.append(substituteIn.substring(pos, getParenStart(0))); 6.1616 - 6.1617 - if ((flags & REPLACE_BACKREFERENCES) != 0) 6.1618 - { 6.1619 - // Process backreferences 6.1620 - int lCurrentPosition = 0; 6.1621 - int lLastPosition = -2; 6.1622 - int lLength = substitution.length(); 6.1623 - boolean bAddedPrefix = false; 6.1624 - 6.1625 - while ((lCurrentPosition = substitution.indexOf("$", lCurrentPosition)) >= 0) 6.1626 - { 6.1627 - if ((lCurrentPosition == 0 || substitution.charAt(lCurrentPosition - 1) != '\\') 6.1628 - && lCurrentPosition+1 < lLength) 6.1629 - { 6.1630 - char c = substitution.charAt(lCurrentPosition + 1); 6.1631 - if (c >= '0' && c <= '9') 6.1632 - { 6.1633 - if (bAddedPrefix == false) 6.1634 - { 6.1635 - // Append everything between the beginning of the 6.1636 - // substitution string and the current $ sign 6.1637 - ret.append(substitution.substring(0, lCurrentPosition)); 6.1638 - bAddedPrefix = true; 6.1639 - } 6.1640 - else 6.1641 - { 6.1642 - // Append everything between the last and the current $ sign 6.1643 - ret.append(substitution.substring(lLastPosition + 2, lCurrentPosition)); 6.1644 - } 6.1645 - 6.1646 - // Append the parenthesized expression 6.1647 - // Note: if a parenthesized expression of the requested 6.1648 - // index is not available "null" is added to the string 6.1649 - ret.append(getParen(c - '0')); 6.1650 - lLastPosition = lCurrentPosition; 6.1651 - } 6.1652 - } 6.1653 - 6.1654 - // Move forward, skipping past match 6.1655 - lCurrentPosition++; 6.1656 - } 6.1657 - 6.1658 - // Append everything after the last $ sign 6.1659 - ret.append(substitution.substring(lLastPosition + 2, lLength)); 6.1660 - } 6.1661 - else 6.1662 - { 6.1663 - // Append substitution without processing backreferences 6.1664 - ret.append(substitution); 6.1665 - } 6.1666 - 6.1667 - // Move forward, skipping past match 6.1668 - int newpos = getParenEnd(0); 6.1669 - 6.1670 - // We always want to make progress! 6.1671 - if (newpos == pos) 6.1672 - { 6.1673 - newpos++; 6.1674 - } 6.1675 - 6.1676 - // Try new position 6.1677 - pos = newpos; 6.1678 - 6.1679 - // Break out if we're only supposed to replace one occurrence 6.1680 - if ((flags & REPLACE_FIRSTONLY) != 0) 6.1681 - { 6.1682 - break; 6.1683 - } 6.1684 - } 6.1685 - 6.1686 - // If there's remaining input, append it 6.1687 - if (pos < len) 6.1688 - { 6.1689 - ret.append(substituteIn.substring(pos)); 6.1690 - } 6.1691 - 6.1692 - // Return string buffer as string 6.1693 - return ret.toString(); 6.1694 - } 6.1695 - 6.1696 - /** 6.1697 - * Returns an array of Strings, whose toString representation matches a regular 6.1698 - * expression. This method works like the Perl function of the same name. Given 6.1699 - * a regular expression of "a*b" and an array of String objects of [foo, aab, zzz, 6.1700 - * aaaab], the array of Strings returned by grep would be [aab, aaaab]. 6.1701 - * 6.1702 - * @param search Array of Objects to search 6.1703 - * @return Array of Strings whose toString() value matches this regular expression. 6.1704 - */ 6.1705 - public String[] grep(Object[] search) 6.1706 - { 6.1707 - // Create new vector to hold return items 6.1708 - Vector v = new Vector(); 6.1709 - 6.1710 - // Traverse array of objects 6.1711 - for (int i = 0; i < search.length; i++) 6.1712 - { 6.1713 - // Get next object as a string 6.1714 - String s = search[i].toString(); 6.1715 - 6.1716 - // If it matches this regexp, add it to the list 6.1717 - if (match(s)) 6.1718 - { 6.1719 - v.addElement(s); 6.1720 - } 6.1721 - } 6.1722 - 6.1723 - // Return vector as an array of strings 6.1724 - String[] ret = new String[v.size()]; 6.1725 - v.copyInto(ret); 6.1726 - return ret; 6.1727 - } 6.1728 - 6.1729 - /** 6.1730 - * @return true if character at i-th position in the <code>search</code> string is a newline 6.1731 - */ 6.1732 - private boolean isNewline(int i) 6.1733 - { 6.1734 - char nextChar = search.charAt(i); 6.1735 - 6.1736 - if (nextChar == '\n' || nextChar == '\r' || nextChar == '\u0085' 6.1737 - || nextChar == '\u2028' || nextChar == '\u2029') 6.1738 - { 6.1739 - return true; 6.1740 - } 6.1741 - 6.1742 - return false; 6.1743 - } 6.1744 - 6.1745 - /** 6.1746 - * Compares two characters. 6.1747 - * 6.1748 - * @param c1 first character to compare. 6.1749 - * @param c2 second character to compare. 6.1750 - * @param caseIndependent whether comparision is case insensitive or not. 6.1751 - * @return negative, 0, or positive integer as the first character 6.1752 - * less than, equal to, or greater then the second. 6.1753 - */ 6.1754 - private int compareChars(char c1, char c2, boolean caseIndependent) 6.1755 - { 6.1756 - if (caseIndependent) 6.1757 - { 6.1758 - c1 = Character.toLowerCase(c1); 6.1759 - c2 = Character.toLowerCase(c2); 6.1760 - } 6.1761 - return ((int)c1 - (int)c2); 6.1762 - } 6.1763 -}
7.1 --- a/src/com/sun/org/apache/regexp/internal/RECompiler.java Sat Oct 24 16:18:47 2020 +0800 7.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 7.3 @@ -1,1520 +0,0 @@ 7.4 -/* 7.5 - * reserved comment block 7.6 - * DO NOT REMOVE OR ALTER! 7.7 - */ 7.8 -/* 7.9 - * Copyright 1999-2004 The Apache Software Foundation. 7.10 - * 7.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 7.12 - * you may not use this file except in compliance with the License. 7.13 - * You may obtain a copy of the License at 7.14 - * 7.15 - * http://www.apache.org/licenses/LICENSE-2.0 7.16 - * 7.17 - * Unless required by applicable law or agreed to in writing, software 7.18 - * distributed under the License is distributed on an "AS IS" BASIS, 7.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 7.20 - * See the License for the specific language governing permissions and 7.21 - * limitations under the License. 7.22 - */ 7.23 - 7.24 -package com.sun.org.apache.regexp.internal; 7.25 - 7.26 -import com.sun.org.apache.regexp.internal.RE; 7.27 -import java.util.Hashtable; 7.28 - 7.29 -/** 7.30 - * A regular expression compiler class. This class compiles a pattern string into a 7.31 - * regular expression program interpretable by the RE evaluator class. The 'recompile' 7.32 - * command line tool uses this compiler to pre-compile regular expressions for use 7.33 - * with RE. For a description of the syntax accepted by RECompiler and what you can 7.34 - * do with regular expressions, see the documentation for the RE matcher class. 7.35 - * 7.36 - * @see RE 7.37 - * @see recompile 7.38 - * 7.39 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> 7.40 - * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a> 7.41 - */ 7.42 -public class RECompiler 7.43 -{ 7.44 - // The compiled program 7.45 - char[] instruction; // The compiled RE 'program' instruction buffer 7.46 - int lenInstruction; // The amount of the program buffer currently in use 7.47 - 7.48 - // Input state for compiling regular expression 7.49 - String pattern; // Input string 7.50 - int len; // Length of the pattern string 7.51 - int idx; // Current input index into ac 7.52 - int parens; // Total number of paren pairs 7.53 - 7.54 - // Node flags 7.55 - static final int NODE_NORMAL = 0; // No flags (nothing special) 7.56 - static final int NODE_NULLABLE = 1; // True if node is potentially null 7.57 - static final int NODE_TOPLEVEL = 2; // True if top level expr 7.58 - 7.59 - // Special types of 'escapes' 7.60 - static final int ESC_MASK = 0xffff0; // Escape complexity mask 7.61 - static final int ESC_BACKREF = 0xfffff; // Escape is really a backreference 7.62 - static final int ESC_COMPLEX = 0xffffe; // Escape isn't really a true character 7.63 - static final int ESC_CLASS = 0xffffd; // Escape represents a whole class of characters 7.64 - 7.65 - // {m,n} stacks 7.66 - int maxBrackets = 10; // Maximum number of bracket pairs 7.67 - static final int bracketUnbounded = -1; // Unbounded value 7.68 - int brackets = 0; // Number of bracket sets 7.69 - int[] bracketStart = null; // Starting point 7.70 - int[] bracketEnd = null; // Ending point 7.71 - int[] bracketMin = null; // Minimum number of matches 7.72 - int[] bracketOpt = null; // Additional optional matches 7.73 - 7.74 - // Lookup table for POSIX character class names 7.75 - static Hashtable hashPOSIX = new Hashtable(); 7.76 - static 7.77 - { 7.78 - hashPOSIX.put("alnum", new Character(RE.POSIX_CLASS_ALNUM)); 7.79 - hashPOSIX.put("alpha", new Character(RE.POSIX_CLASS_ALPHA)); 7.80 - hashPOSIX.put("blank", new Character(RE.POSIX_CLASS_BLANK)); 7.81 - hashPOSIX.put("cntrl", new Character(RE.POSIX_CLASS_CNTRL)); 7.82 - hashPOSIX.put("digit", new Character(RE.POSIX_CLASS_DIGIT)); 7.83 - hashPOSIX.put("graph", new Character(RE.POSIX_CLASS_GRAPH)); 7.84 - hashPOSIX.put("lower", new Character(RE.POSIX_CLASS_LOWER)); 7.85 - hashPOSIX.put("print", new Character(RE.POSIX_CLASS_PRINT)); 7.86 - hashPOSIX.put("punct", new Character(RE.POSIX_CLASS_PUNCT)); 7.87 - hashPOSIX.put("space", new Character(RE.POSIX_CLASS_SPACE)); 7.88 - hashPOSIX.put("upper", new Character(RE.POSIX_CLASS_UPPER)); 7.89 - hashPOSIX.put("xdigit", new Character(RE.POSIX_CLASS_XDIGIT)); 7.90 - hashPOSIX.put("javastart", new Character(RE.POSIX_CLASS_JSTART)); 7.91 - hashPOSIX.put("javapart", new Character(RE.POSIX_CLASS_JPART)); 7.92 - } 7.93 - 7.94 - /** 7.95 - * Constructor. Creates (initially empty) storage for a regular expression program. 7.96 - */ 7.97 - public RECompiler() 7.98 - { 7.99 - // Start off with a generous, yet reasonable, initial size 7.100 - instruction = new char[128]; 7.101 - lenInstruction = 0; 7.102 - } 7.103 - 7.104 - /** 7.105 - * Ensures that n more characters can fit in the program buffer. 7.106 - * If n more can't fit, then the size is doubled until it can. 7.107 - * @param n Number of additional characters to ensure will fit. 7.108 - */ 7.109 - void ensure(int n) 7.110 - { 7.111 - // Get current program length 7.112 - int curlen = instruction.length; 7.113 - 7.114 - // If the current length + n more is too much 7.115 - if (lenInstruction + n >= curlen) 7.116 - { 7.117 - // Double the size of the program array until n more will fit 7.118 - while (lenInstruction + n >= curlen) 7.119 - { 7.120 - curlen *= 2; 7.121 - } 7.122 - 7.123 - // Allocate new program array and move data into it 7.124 - char[] newInstruction = new char[curlen]; 7.125 - System.arraycopy(instruction, 0, newInstruction, 0, lenInstruction); 7.126 - instruction = newInstruction; 7.127 - } 7.128 - } 7.129 - 7.130 - /** 7.131 - * Emit a single character into the program stream. 7.132 - * @param c Character to add 7.133 - */ 7.134 - void emit(char c) 7.135 - { 7.136 - // Make room for character 7.137 - ensure(1); 7.138 - 7.139 - // Add character 7.140 - instruction[lenInstruction++] = c; 7.141 - } 7.142 - 7.143 - /** 7.144 - * Inserts a node with a given opcode and opdata at insertAt. The node relative next 7.145 - * pointer is initialized to 0. 7.146 - * @param opcode Opcode for new node 7.147 - * @param opdata Opdata for new node (only the low 16 bits are currently used) 7.148 - * @param insertAt Index at which to insert the new node in the program 7.149 - */ 7.150 - void nodeInsert(char opcode, int opdata, int insertAt) 7.151 - { 7.152 - // Make room for a new node 7.153 - ensure(RE.nodeSize); 7.154 - 7.155 - // Move everything from insertAt to the end down nodeSize elements 7.156 - System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction - insertAt); 7.157 - instruction[insertAt + RE.offsetOpcode] = opcode; 7.158 - instruction[insertAt + RE.offsetOpdata] = (char)opdata; 7.159 - instruction[insertAt + RE.offsetNext] = 0; 7.160 - lenInstruction += RE.nodeSize; 7.161 - } 7.162 - 7.163 - /** 7.164 - * Appends a node to the end of a node chain 7.165 - * @param node Start of node chain to traverse 7.166 - * @param pointTo Node to have the tail of the chain point to 7.167 - */ 7.168 - void setNextOfEnd(int node, int pointTo) 7.169 - { 7.170 - // Traverse the chain until the next offset is 0 7.171 - int next = instruction[node + RE.offsetNext]; 7.172 - // while the 'node' is not the last in the chain 7.173 - // and the 'node' is not the last in the program. 7.174 - while ( next != 0 && node < lenInstruction ) 7.175 - { 7.176 - // if the node we are supposed to point to is in the chain then 7.177 - // point to the end of the program instead. 7.178 - // Michael McCallum <gholam@xtra.co.nz> 7.179 - // FIXME: // This is a _hack_ to stop infinite programs. 7.180 - // I believe that the implementation of the reluctant matches is wrong but 7.181 - // have not worked out a better way yet. 7.182 - if ( node == pointTo ) { 7.183 - pointTo = lenInstruction; 7.184 - } 7.185 - node += next; 7.186 - next = instruction[node + RE.offsetNext]; 7.187 - } 7.188 - // if we have reached the end of the program then dont set the pointTo. 7.189 - // im not sure if this will break any thing but passes all the tests. 7.190 - if ( node < lenInstruction ) { 7.191 - // Point the last node in the chain to pointTo. 7.192 - instruction[node + RE.offsetNext] = (char)(short)(pointTo - node); 7.193 - } 7.194 - } 7.195 - 7.196 - /** 7.197 - * Adds a new node 7.198 - * @param opcode Opcode for node 7.199 - * @param opdata Opdata for node (only the low 16 bits are currently used) 7.200 - * @return Index of new node in program 7.201 - */ 7.202 - int node(char opcode, int opdata) 7.203 - { 7.204 - // Make room for a new node 7.205 - ensure(RE.nodeSize); 7.206 - 7.207 - // Add new node at end 7.208 - instruction[lenInstruction + RE.offsetOpcode] = opcode; 7.209 - instruction[lenInstruction + RE.offsetOpdata] = (char)opdata; 7.210 - instruction[lenInstruction + RE.offsetNext] = 0; 7.211 - lenInstruction += RE.nodeSize; 7.212 - 7.213 - // Return index of new node 7.214 - return lenInstruction - RE.nodeSize; 7.215 - } 7.216 - 7.217 - 7.218 - /** 7.219 - * Throws a new internal error exception 7.220 - * @exception Error Thrown in the event of an internal error. 7.221 - */ 7.222 - void internalError() throws Error 7.223 - { 7.224 - throw new Error("Internal error!"); 7.225 - } 7.226 - 7.227 - /** 7.228 - * Throws a new syntax error exception 7.229 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 7.230 - */ 7.231 - void syntaxError(String s) throws RESyntaxException 7.232 - { 7.233 - throw new RESyntaxException(s); 7.234 - } 7.235 - 7.236 - /** 7.237 - * Allocate storage for brackets only as needed 7.238 - */ 7.239 - void allocBrackets() 7.240 - { 7.241 - // Allocate bracket stacks if not already done 7.242 - if (bracketStart == null) 7.243 - { 7.244 - // Allocate storage 7.245 - bracketStart = new int[maxBrackets]; 7.246 - bracketEnd = new int[maxBrackets]; 7.247 - bracketMin = new int[maxBrackets]; 7.248 - bracketOpt = new int[maxBrackets]; 7.249 - 7.250 - // Initialize to invalid values 7.251 - for (int i = 0; i < maxBrackets; i++) 7.252 - { 7.253 - bracketStart[i] = bracketEnd[i] = bracketMin[i] = bracketOpt[i] = -1; 7.254 - } 7.255 - } 7.256 - } 7.257 - 7.258 - /** Enlarge storage for brackets only as needed. */ 7.259 - synchronized void reallocBrackets() { 7.260 - // trick the tricky 7.261 - if (bracketStart == null) { 7.262 - allocBrackets(); 7.263 - } 7.264 - 7.265 - int new_size = maxBrackets * 2; 7.266 - int[] new_bS = new int[new_size]; 7.267 - int[] new_bE = new int[new_size]; 7.268 - int[] new_bM = new int[new_size]; 7.269 - int[] new_bO = new int[new_size]; 7.270 - // Initialize to invalid values 7.271 - for (int i=brackets; i<new_size; i++) { 7.272 - new_bS[i] = new_bE[i] = new_bM[i] = new_bO[i] = -1; 7.273 - } 7.274 - System.arraycopy(bracketStart,0, new_bS,0, brackets); 7.275 - System.arraycopy(bracketEnd,0, new_bE,0, brackets); 7.276 - System.arraycopy(bracketMin,0, new_bM,0, brackets); 7.277 - System.arraycopy(bracketOpt,0, new_bO,0, brackets); 7.278 - bracketStart = new_bS; 7.279 - bracketEnd = new_bE; 7.280 - bracketMin = new_bM; 7.281 - bracketOpt = new_bO; 7.282 - maxBrackets = new_size; 7.283 - } 7.284 - 7.285 - /** 7.286 - * Match bracket {m,n} expression put results in bracket member variables 7.287 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 7.288 - */ 7.289 - void bracket() throws RESyntaxException 7.290 - { 7.291 - // Current character must be a '{' 7.292 - if (idx >= len || pattern.charAt(idx++) != '{') 7.293 - { 7.294 - internalError(); 7.295 - } 7.296 - 7.297 - // Next char must be a digit 7.298 - if (idx >= len || !Character.isDigit(pattern.charAt(idx))) 7.299 - { 7.300 - syntaxError("Expected digit"); 7.301 - } 7.302 - 7.303 - // Get min ('m' of {m,n}) number 7.304 - StringBuffer number = new StringBuffer(); 7.305 - while (idx < len && Character.isDigit(pattern.charAt(idx))) 7.306 - { 7.307 - number.append(pattern.charAt(idx++)); 7.308 - } 7.309 - try 7.310 - { 7.311 - bracketMin[brackets] = Integer.parseInt(number.toString()); 7.312 - } 7.313 - catch (NumberFormatException e) 7.314 - { 7.315 - syntaxError("Expected valid number"); 7.316 - } 7.317 - 7.318 - // If out of input, fail 7.319 - if (idx >= len) 7.320 - { 7.321 - syntaxError("Expected comma or right bracket"); 7.322 - } 7.323 - 7.324 - // If end of expr, optional limit is 0 7.325 - if (pattern.charAt(idx) == '}') 7.326 - { 7.327 - idx++; 7.328 - bracketOpt[brackets] = 0; 7.329 - return; 7.330 - } 7.331 - 7.332 - // Must have at least {m,} and maybe {m,n}. 7.333 - if (idx >= len || pattern.charAt(idx++) != ',') 7.334 - { 7.335 - syntaxError("Expected comma"); 7.336 - } 7.337 - 7.338 - // If out of input, fail 7.339 - if (idx >= len) 7.340 - { 7.341 - syntaxError("Expected comma or right bracket"); 7.342 - } 7.343 - 7.344 - // If {m,} max is unlimited 7.345 - if (pattern.charAt(idx) == '}') 7.346 - { 7.347 - idx++; 7.348 - bracketOpt[brackets] = bracketUnbounded; 7.349 - return; 7.350 - } 7.351 - 7.352 - // Next char must be a digit 7.353 - if (idx >= len || !Character.isDigit(pattern.charAt(idx))) 7.354 - { 7.355 - syntaxError("Expected digit"); 7.356 - } 7.357 - 7.358 - // Get max number 7.359 - number.setLength(0); 7.360 - while (idx < len && Character.isDigit(pattern.charAt(idx))) 7.361 - { 7.362 - number.append(pattern.charAt(idx++)); 7.363 - } 7.364 - try 7.365 - { 7.366 - bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets]; 7.367 - } 7.368 - catch (NumberFormatException e) 7.369 - { 7.370 - syntaxError("Expected valid number"); 7.371 - } 7.372 - 7.373 - // Optional repetitions must be >= 0 7.374 - if (bracketOpt[brackets] < 0) 7.375 - { 7.376 - syntaxError("Bad range"); 7.377 - } 7.378 - 7.379 - // Must have close brace 7.380 - if (idx >= len || pattern.charAt(idx++) != '}') 7.381 - { 7.382 - syntaxError("Missing close brace"); 7.383 - } 7.384 - } 7.385 - 7.386 - /** 7.387 - * Match an escape sequence. Handles quoted chars and octal escapes as well 7.388 - * as normal escape characters. Always advances the input stream by the 7.389 - * right amount. This code "understands" the subtle difference between an 7.390 - * octal escape and a backref. You can access the type of ESC_CLASS or 7.391 - * ESC_COMPLEX or ESC_BACKREF by looking at pattern[idx - 1]. 7.392 - * @return ESC_* code or character if simple escape 7.393 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 7.394 - */ 7.395 - int escape() throws RESyntaxException 7.396 - { 7.397 - // "Shouldn't" happen 7.398 - if (pattern.charAt(idx) != '\\') 7.399 - { 7.400 - internalError(); 7.401 - } 7.402 - 7.403 - // Escape shouldn't occur as last character in string! 7.404 - if (idx + 1 == len) 7.405 - { 7.406 - syntaxError("Escape terminates string"); 7.407 - } 7.408 - 7.409 - // Switch on character after backslash 7.410 - idx += 2; 7.411 - char escapeChar = pattern.charAt(idx - 1); 7.412 - switch (escapeChar) 7.413 - { 7.414 - case RE.E_BOUND: 7.415 - case RE.E_NBOUND: 7.416 - return ESC_COMPLEX; 7.417 - 7.418 - case RE.E_ALNUM: 7.419 - case RE.E_NALNUM: 7.420 - case RE.E_SPACE: 7.421 - case RE.E_NSPACE: 7.422 - case RE.E_DIGIT: 7.423 - case RE.E_NDIGIT: 7.424 - return ESC_CLASS; 7.425 - 7.426 - case 'u': 7.427 - case 'x': 7.428 - { 7.429 - // Exact required hex digits for escape type 7.430 - int hexDigits = (escapeChar == 'u' ? 4 : 2); 7.431 - 7.432 - // Parse up to hexDigits characters from input 7.433 - int val = 0; 7.434 - for ( ; idx < len && hexDigits-- > 0; idx++) 7.435 - { 7.436 - // Get char 7.437 - char c = pattern.charAt(idx); 7.438 - 7.439 - // If it's a hexadecimal digit (0-9) 7.440 - if (c >= '0' && c <= '9') 7.441 - { 7.442 - // Compute new value 7.443 - val = (val << 4) + c - '0'; 7.444 - } 7.445 - else 7.446 - { 7.447 - // If it's a hexadecimal letter (a-f) 7.448 - c = Character.toLowerCase(c); 7.449 - if (c >= 'a' && c <= 'f') 7.450 - { 7.451 - // Compute new value 7.452 - val = (val << 4) + (c - 'a') + 10; 7.453 - } 7.454 - else 7.455 - { 7.456 - // If it's not a valid digit or hex letter, the escape must be invalid 7.457 - // because hexDigits of input have not been absorbed yet. 7.458 - syntaxError("Expected " + hexDigits + " hexadecimal digits after \\" + escapeChar); 7.459 - } 7.460 - } 7.461 - } 7.462 - return val; 7.463 - } 7.464 - 7.465 - case 't': 7.466 - return '\t'; 7.467 - 7.468 - case 'n': 7.469 - return '\n'; 7.470 - 7.471 - case 'r': 7.472 - return '\r'; 7.473 - 7.474 - case 'f': 7.475 - return '\f'; 7.476 - 7.477 - case '0': 7.478 - case '1': 7.479 - case '2': 7.480 - case '3': 7.481 - case '4': 7.482 - case '5': 7.483 - case '6': 7.484 - case '7': 7.485 - case '8': 7.486 - case '9': 7.487 - 7.488 - // An octal escape starts with a 0 or has two digits in a row 7.489 - if ((idx < len && Character.isDigit(pattern.charAt(idx))) || escapeChar == '0') 7.490 - { 7.491 - // Handle \nnn octal escapes 7.492 - int val = escapeChar - '0'; 7.493 - if (idx < len && Character.isDigit(pattern.charAt(idx))) 7.494 - { 7.495 - val = ((val << 3) + (pattern.charAt(idx++) - '0')); 7.496 - if (idx < len && Character.isDigit(pattern.charAt(idx))) 7.497 - { 7.498 - val = ((val << 3) + (pattern.charAt(idx++) - '0')); 7.499 - } 7.500 - } 7.501 - return val; 7.502 - } 7.503 - 7.504 - // It's actually a backreference (\[1-9]), not an escape 7.505 - return ESC_BACKREF; 7.506 - 7.507 - default: 7.508 - 7.509 - // Simple quoting of a character 7.510 - return escapeChar; 7.511 - } 7.512 - } 7.513 - 7.514 - /** 7.515 - * Compile a character class 7.516 - * @return Index of class node 7.517 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 7.518 - */ 7.519 - int characterClass() throws RESyntaxException 7.520 - { 7.521 - // Check for bad calling or empty class 7.522 - if (pattern.charAt(idx) != '[') 7.523 - { 7.524 - internalError(); 7.525 - } 7.526 - 7.527 - // Check for unterminated or empty class 7.528 - if ((idx + 1) >= len || pattern.charAt(++idx) == ']') 7.529 - { 7.530 - syntaxError("Empty or unterminated class"); 7.531 - } 7.532 - 7.533 - // Check for POSIX character class 7.534 - if (idx < len && pattern.charAt(idx) == ':') 7.535 - { 7.536 - // Skip colon 7.537 - idx++; 7.538 - 7.539 - // POSIX character classes are denoted with lowercase ASCII strings 7.540 - int idxStart = idx; 7.541 - while (idx < len && pattern.charAt(idx) >= 'a' && pattern.charAt(idx) <= 'z') 7.542 - { 7.543 - idx++; 7.544 - } 7.545 - 7.546 - // Should be a ":]" to terminate the POSIX character class 7.547 - if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']') 7.548 - { 7.549 - // Get character class 7.550 - String charClass = pattern.substring(idxStart, idx); 7.551 - 7.552 - // Select the POSIX class id 7.553 - Character i = (Character)hashPOSIX.get(charClass); 7.554 - if (i != null) 7.555 - { 7.556 - // Move past colon and right bracket 7.557 - idx += 2; 7.558 - 7.559 - // Return new POSIX character class node 7.560 - return node(RE.OP_POSIXCLASS, i.charValue()); 7.561 - } 7.562 - syntaxError("Invalid POSIX character class '" + charClass + "'"); 7.563 - } 7.564 - syntaxError("Invalid POSIX character class syntax"); 7.565 - } 7.566 - 7.567 - // Try to build a class. Create OP_ANYOF node 7.568 - int ret = node(RE.OP_ANYOF, 0); 7.569 - 7.570 - // Parse class declaration 7.571 - char CHAR_INVALID = Character.MAX_VALUE; 7.572 - char last = CHAR_INVALID; 7.573 - char simpleChar = 0; 7.574 - boolean include = true; 7.575 - boolean definingRange = false; 7.576 - int idxFirst = idx; 7.577 - char rangeStart = Character.MIN_VALUE; 7.578 - char rangeEnd; 7.579 - RERange range = new RERange(); 7.580 - while (idx < len && pattern.charAt(idx) != ']') 7.581 - { 7.582 - 7.583 - switchOnCharacter: 7.584 - 7.585 - // Switch on character 7.586 - switch (pattern.charAt(idx)) 7.587 - { 7.588 - case '^': 7.589 - include = !include; 7.590 - if (idx == idxFirst) 7.591 - { 7.592 - range.include(Character.MIN_VALUE, Character.MAX_VALUE, true); 7.593 - } 7.594 - idx++; 7.595 - continue; 7.596 - 7.597 - case '\\': 7.598 - { 7.599 - // Escape always advances the stream 7.600 - int c; 7.601 - switch (c = escape ()) 7.602 - { 7.603 - case ESC_COMPLEX: 7.604 - case ESC_BACKREF: 7.605 - 7.606 - // Word boundaries and backrefs not allowed in a character class! 7.607 - syntaxError("Bad character class"); 7.608 - 7.609 - case ESC_CLASS: 7.610 - 7.611 - // Classes can't be an endpoint of a range 7.612 - if (definingRange) 7.613 - { 7.614 - syntaxError("Bad character class"); 7.615 - } 7.616 - 7.617 - // Handle specific type of class (some are ok) 7.618 - switch (pattern.charAt(idx - 1)) 7.619 - { 7.620 - case RE.E_NSPACE: 7.621 - case RE.E_NDIGIT: 7.622 - case RE.E_NALNUM: 7.623 - syntaxError("Bad character class"); 7.624 - 7.625 - case RE.E_SPACE: 7.626 - range.include('\t', include); 7.627 - range.include('\r', include); 7.628 - range.include('\f', include); 7.629 - range.include('\n', include); 7.630 - range.include('\b', include); 7.631 - range.include(' ', include); 7.632 - break; 7.633 - 7.634 - case RE.E_ALNUM: 7.635 - range.include('a', 'z', include); 7.636 - range.include('A', 'Z', include); 7.637 - range.include('_', include); 7.638 - 7.639 - // Fall through! 7.640 - 7.641 - case RE.E_DIGIT: 7.642 - range.include('0', '9', include); 7.643 - break; 7.644 - } 7.645 - 7.646 - // Make last char invalid (can't be a range start) 7.647 - last = CHAR_INVALID; 7.648 - break; 7.649 - 7.650 - default: 7.651 - 7.652 - // Escape is simple so treat as a simple char 7.653 - simpleChar = (char) c; 7.654 - break switchOnCharacter; 7.655 - } 7.656 - } 7.657 - continue; 7.658 - 7.659 - case '-': 7.660 - 7.661 - // Start a range if one isn't already started 7.662 - if (definingRange) 7.663 - { 7.664 - syntaxError("Bad class range"); 7.665 - } 7.666 - definingRange = true; 7.667 - 7.668 - // If no last character, start of range is 0 7.669 - rangeStart = (last == CHAR_INVALID ? 0 : last); 7.670 - 7.671 - // Premature end of range. define up to Character.MAX_VALUE 7.672 - if ((idx + 1) < len && pattern.charAt(++idx) == ']') 7.673 - { 7.674 - simpleChar = Character.MAX_VALUE; 7.675 - break; 7.676 - } 7.677 - continue; 7.678 - 7.679 - default: 7.680 - simpleChar = pattern.charAt(idx++); 7.681 - break; 7.682 - } 7.683 - 7.684 - // Handle simple character simpleChar 7.685 - if (definingRange) 7.686 - { 7.687 - // if we are defining a range make it now 7.688 - rangeEnd = simpleChar; 7.689 - 7.690 - // Actually create a range if the range is ok 7.691 - if (rangeStart >= rangeEnd) 7.692 - { 7.693 - syntaxError("Bad character class"); 7.694 - } 7.695 - range.include(rangeStart, rangeEnd, include); 7.696 - 7.697 - // We are done defining the range 7.698 - last = CHAR_INVALID; 7.699 - definingRange = false; 7.700 - } 7.701 - else 7.702 - { 7.703 - // If simple character and not start of range, include it 7.704 - if (idx >= len || pattern.charAt(idx) != '-') 7.705 - { 7.706 - range.include(simpleChar, include); 7.707 - } 7.708 - last = simpleChar; 7.709 - } 7.710 - } 7.711 - 7.712 - // Shouldn't be out of input 7.713 - if (idx == len) 7.714 - { 7.715 - syntaxError("Unterminated character class"); 7.716 - } 7.717 - 7.718 - // Absorb the ']' end of class marker 7.719 - idx++; 7.720 - 7.721 - // Emit character class definition 7.722 - instruction[ret + RE.offsetOpdata] = (char)range.num; 7.723 - for (int i = 0; i < range.num; i++) 7.724 - { 7.725 - emit((char)range.minRange[i]); 7.726 - emit((char)range.maxRange[i]); 7.727 - } 7.728 - return ret; 7.729 - } 7.730 - 7.731 - /** 7.732 - * Absorb an atomic character string. This method is a little tricky because 7.733 - * it can un-include the last character of string if a closure operator follows. 7.734 - * This is correct because *+? have higher precedence than concatentation (thus 7.735 - * ABC* means AB(C*) and NOT (ABC)*). 7.736 - * @return Index of new atom node 7.737 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 7.738 - */ 7.739 - int atom() throws RESyntaxException 7.740 - { 7.741 - // Create a string node 7.742 - int ret = node(RE.OP_ATOM, 0); 7.743 - 7.744 - // Length of atom 7.745 - int lenAtom = 0; 7.746 - 7.747 - // Loop while we've got input 7.748 - 7.749 - atomLoop: 7.750 - 7.751 - while (idx < len) 7.752 - { 7.753 - // Is there a next char? 7.754 - if ((idx + 1) < len) 7.755 - { 7.756 - char c = pattern.charAt(idx + 1); 7.757 - 7.758 - // If the next 'char' is an escape, look past the whole escape 7.759 - if (pattern.charAt(idx) == '\\') 7.760 - { 7.761 - int idxEscape = idx; 7.762 - escape(); 7.763 - if (idx < len) 7.764 - { 7.765 - c = pattern.charAt(idx); 7.766 - } 7.767 - idx = idxEscape; 7.768 - } 7.769 - 7.770 - // Switch on next char 7.771 - switch (c) 7.772 - { 7.773 - case '{': 7.774 - case '?': 7.775 - case '*': 7.776 - case '+': 7.777 - 7.778 - // If the next character is a closure operator and our atom is non-empty, the 7.779 - // current character should bind to the closure operator rather than the atom 7.780 - if (lenAtom != 0) 7.781 - { 7.782 - break atomLoop; 7.783 - } 7.784 - } 7.785 - } 7.786 - 7.787 - // Switch on current char 7.788 - switch (pattern.charAt(idx)) 7.789 - { 7.790 - case ']': 7.791 - case '^': 7.792 - case '$': 7.793 - case '.': 7.794 - case '[': 7.795 - case '(': 7.796 - case ')': 7.797 - case '|': 7.798 - break atomLoop; 7.799 - 7.800 - case '{': 7.801 - case '?': 7.802 - case '*': 7.803 - case '+': 7.804 - 7.805 - // We should have an atom by now 7.806 - if (lenAtom == 0) 7.807 - { 7.808 - // No atom before closure 7.809 - syntaxError("Missing operand to closure"); 7.810 - } 7.811 - break atomLoop; 7.812 - 7.813 - case '\\': 7.814 - 7.815 - { 7.816 - // Get the escaped character (advances input automatically) 7.817 - int idxBeforeEscape = idx; 7.818 - int c = escape(); 7.819 - 7.820 - // Check if it's a simple escape (as opposed to, say, a backreference) 7.821 - if ((c & ESC_MASK) == ESC_MASK) 7.822 - { 7.823 - // Not a simple escape, so backup to where we were before the escape. 7.824 - idx = idxBeforeEscape; 7.825 - break atomLoop; 7.826 - } 7.827 - 7.828 - // Add escaped char to atom 7.829 - emit((char) c); 7.830 - lenAtom++; 7.831 - } 7.832 - break; 7.833 - 7.834 - default: 7.835 - 7.836 - // Add normal character to atom 7.837 - emit(pattern.charAt(idx++)); 7.838 - lenAtom++; 7.839 - break; 7.840 - } 7.841 - } 7.842 - 7.843 - // This "shouldn't" happen 7.844 - if (lenAtom == 0) 7.845 - { 7.846 - internalError(); 7.847 - } 7.848 - 7.849 - // Emit the atom length into the program 7.850 - instruction[ret + RE.offsetOpdata] = (char)lenAtom; 7.851 - return ret; 7.852 - } 7.853 - 7.854 - /** 7.855 - * Match a terminal node. 7.856 - * @param flags Flags 7.857 - * @return Index of terminal node (closeable) 7.858 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 7.859 - */ 7.860 - int terminal(int[] flags) throws RESyntaxException 7.861 - { 7.862 - switch (pattern.charAt(idx)) 7.863 - { 7.864 - case RE.OP_EOL: 7.865 - case RE.OP_BOL: 7.866 - case RE.OP_ANY: 7.867 - return node(pattern.charAt(idx++), 0); 7.868 - 7.869 - case '[': 7.870 - return characterClass(); 7.871 - 7.872 - case '(': 7.873 - return expr(flags); 7.874 - 7.875 - case ')': 7.876 - syntaxError("Unexpected close paren"); 7.877 - 7.878 - case '|': 7.879 - internalError(); 7.880 - 7.881 - case ']': 7.882 - syntaxError("Mismatched class"); 7.883 - 7.884 - case 0: 7.885 - syntaxError("Unexpected end of input"); 7.886 - 7.887 - case '?': 7.888 - case '+': 7.889 - case '{': 7.890 - case '*': 7.891 - syntaxError("Missing operand to closure"); 7.892 - 7.893 - case '\\': 7.894 - { 7.895 - // Don't forget, escape() advances the input stream! 7.896 - int idxBeforeEscape = idx; 7.897 - 7.898 - // Switch on escaped character 7.899 - switch (escape()) 7.900 - { 7.901 - case ESC_CLASS: 7.902 - case ESC_COMPLEX: 7.903 - flags[0] &= ~NODE_NULLABLE; 7.904 - return node(RE.OP_ESCAPE, pattern.charAt(idx - 1)); 7.905 - 7.906 - case ESC_BACKREF: 7.907 - { 7.908 - char backreference = (char)(pattern.charAt(idx - 1) - '0'); 7.909 - if (parens <= backreference) 7.910 - { 7.911 - syntaxError("Bad backreference"); 7.912 - } 7.913 - flags[0] |= NODE_NULLABLE; 7.914 - return node(RE.OP_BACKREF, backreference); 7.915 - } 7.916 - 7.917 - default: 7.918 - 7.919 - // We had a simple escape and we want to have it end up in 7.920 - // an atom, so we back up and fall though to the default handling 7.921 - idx = idxBeforeEscape; 7.922 - flags[0] &= ~NODE_NULLABLE; 7.923 - break; 7.924 - } 7.925 - } 7.926 - } 7.927 - 7.928 - // Everything above either fails or returns. 7.929 - // If it wasn't one of the above, it must be the start of an atom. 7.930 - flags[0] &= ~NODE_NULLABLE; 7.931 - return atom(); 7.932 - } 7.933 - 7.934 - /** 7.935 - * Compile a possibly closured terminal 7.936 - * @param flags Flags passed by reference 7.937 - * @return Index of closured node 7.938 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 7.939 - */ 7.940 - int closure(int[] flags) throws RESyntaxException 7.941 - { 7.942 - // Before terminal 7.943 - int idxBeforeTerminal = idx; 7.944 - 7.945 - // Values to pass by reference to terminal() 7.946 - int[] terminalFlags = { NODE_NORMAL }; 7.947 - 7.948 - // Get terminal symbol 7.949 - int ret = terminal(terminalFlags); 7.950 - 7.951 - // Or in flags from terminal symbol 7.952 - flags[0] |= terminalFlags[0]; 7.953 - 7.954 - // Advance input, set NODE_NULLABLE flag and do sanity checks 7.955 - if (idx >= len) 7.956 - { 7.957 - return ret; 7.958 - } 7.959 - boolean greedy = true; 7.960 - char closureType = pattern.charAt(idx); 7.961 - switch (closureType) 7.962 - { 7.963 - case '?': 7.964 - case '*': 7.965 - 7.966 - // The current node can be null 7.967 - flags[0] |= NODE_NULLABLE; 7.968 - 7.969 - case '+': 7.970 - 7.971 - // Eat closure character 7.972 - idx++; 7.973 - 7.974 - case '{': 7.975 - 7.976 - // Don't allow blantant stupidity 7.977 - int opcode = instruction[ret + RE.offsetOpcode]; 7.978 - if (opcode == RE.OP_BOL || opcode == RE.OP_EOL) 7.979 - { 7.980 - syntaxError("Bad closure operand"); 7.981 - } 7.982 - if ((terminalFlags[0] & NODE_NULLABLE) != 0) 7.983 - { 7.984 - syntaxError("Closure operand can't be nullable"); 7.985 - } 7.986 - break; 7.987 - } 7.988 - 7.989 - // If the next character is a '?', make the closure non-greedy (reluctant) 7.990 - if (idx < len && pattern.charAt(idx) == '?') 7.991 - { 7.992 - idx++; 7.993 - greedy = false; 7.994 - } 7.995 - 7.996 - if (greedy) 7.997 - { 7.998 - // Actually do the closure now 7.999 - switch (closureType) 7.1000 - { 7.1001 - case '{': 7.1002 - { 7.1003 - // We look for our bracket in the list 7.1004 - boolean found = false; 7.1005 - int i; 7.1006 - allocBrackets(); 7.1007 - for (i = 0; i < brackets; i++) 7.1008 - { 7.1009 - if (bracketStart[i] == idx) 7.1010 - { 7.1011 - found = true; 7.1012 - break; 7.1013 - } 7.1014 - } 7.1015 - 7.1016 - // If its not in the list we parse the {m,n} 7.1017 - if (!found) 7.1018 - { 7.1019 - if (brackets >= maxBrackets) 7.1020 - { 7.1021 - reallocBrackets(); 7.1022 - } 7.1023 - bracketStart[brackets] = idx; 7.1024 - bracket(); 7.1025 - bracketEnd[brackets] = idx; 7.1026 - i = brackets++; 7.1027 - } 7.1028 - 7.1029 - // Process min first 7.1030 - if (bracketMin[i]-- > 0) 7.1031 - { 7.1032 - if (bracketMin[i] > 0 || bracketOpt[i] != 0) { 7.1033 - // Rewind stream and run it through again - more matchers coming 7.1034 - for (int j = 0; j < brackets; j++) { 7.1035 - if (j != i && bracketStart[j] < idx 7.1036 - && bracketStart[j] >= idxBeforeTerminal) 7.1037 - { 7.1038 - brackets--; 7.1039 - bracketStart[j] = bracketStart[brackets]; 7.1040 - bracketEnd[j] = bracketEnd[brackets]; 7.1041 - bracketMin[j] = bracketMin[brackets]; 7.1042 - bracketOpt[j] = bracketOpt[brackets]; 7.1043 - } 7.1044 - } 7.1045 - 7.1046 - idx = idxBeforeTerminal; 7.1047 - } else { 7.1048 - // Bug #1030: No optinal matches - no need to rewind 7.1049 - idx = bracketEnd[i]; 7.1050 - } 7.1051 - break; 7.1052 - } 7.1053 - 7.1054 - // Do the right thing for maximum ({m,}) 7.1055 - if (bracketOpt[i] == bracketUnbounded) 7.1056 - { 7.1057 - // Drop through now and closure expression. 7.1058 - // We are done with the {m,} expr, so skip rest 7.1059 - closureType = '*'; 7.1060 - bracketOpt[i] = 0; 7.1061 - idx = bracketEnd[i]; 7.1062 - } 7.1063 - else 7.1064 - if (bracketOpt[i]-- > 0) 7.1065 - { 7.1066 - if (bracketOpt[i] > 0) 7.1067 - { 7.1068 - // More optional matchers - 'play it again sam!' 7.1069 - idx = idxBeforeTerminal; 7.1070 - } else { 7.1071 - // Bug #1030: We are done - this one is last and optional 7.1072 - idx = bracketEnd[i]; 7.1073 - } 7.1074 - // Drop through to optionally close 7.1075 - closureType = '?'; 7.1076 - } 7.1077 - else 7.1078 - { 7.1079 - // Rollback terminal - neither min nor opt matchers present 7.1080 - lenInstruction = ret; 7.1081 - node(RE.OP_NOTHING, 0); 7.1082 - 7.1083 - // We are done. skip the rest of {m,n} expr 7.1084 - idx = bracketEnd[i]; 7.1085 - break; 7.1086 - } 7.1087 - } 7.1088 - 7.1089 - // Fall through! 7.1090 - 7.1091 - case '?': 7.1092 - case '*': 7.1093 - 7.1094 - if (!greedy) 7.1095 - { 7.1096 - break; 7.1097 - } 7.1098 - 7.1099 - if (closureType == '?') 7.1100 - { 7.1101 - // X? is compiled as (X|) 7.1102 - nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X 7.1103 - setNextOfEnd(ret, node (RE.OP_BRANCH, 0)); // inserted branch to option 7.1104 - int nothing = node (RE.OP_NOTHING, 0); // which is OP_NOTHING 7.1105 - setNextOfEnd(ret, nothing); // point (second) branch to OP_NOTHING 7.1106 - setNextOfEnd(ret + RE.nodeSize, nothing); // point the end of X to OP_NOTHING node 7.1107 - } 7.1108 - 7.1109 - if (closureType == '*') 7.1110 - { 7.1111 - // X* is compiled as (X{gotoX}|) 7.1112 - nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X 7.1113 - setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH, 0)); // end of X points to an option 7.1114 - setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0)); // to goto 7.1115 - setNextOfEnd(ret + RE.nodeSize, ret); // the start again 7.1116 - setNextOfEnd(ret, node(RE.OP_BRANCH, 0)); // the other option is 7.1117 - setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // OP_NOTHING 7.1118 - } 7.1119 - break; 7.1120 - 7.1121 - case '+': 7.1122 - { 7.1123 - // X+ is compiled as X({gotoX}|) 7.1124 - int branch; 7.1125 - branch = node(RE.OP_BRANCH, 0); // a new branch 7.1126 - setNextOfEnd(ret, branch); // is added to the end of X 7.1127 - setNextOfEnd(node(RE.OP_GOTO, 0), ret); // one option is to go back to the start 7.1128 - setNextOfEnd(branch, node(RE.OP_BRANCH, 0)); // the other option 7.1129 - setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // is OP_NOTHING 7.1130 - } 7.1131 - break; 7.1132 - } 7.1133 - } 7.1134 - else 7.1135 - { 7.1136 - // Add end after closured subexpr 7.1137 - setNextOfEnd(ret, node(RE.OP_END, 0)); 7.1138 - 7.1139 - // Actually do the closure now 7.1140 - switch (closureType) 7.1141 - { 7.1142 - case '?': 7.1143 - nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret); 7.1144 - break; 7.1145 - 7.1146 - case '*': 7.1147 - nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret); 7.1148 - break; 7.1149 - 7.1150 - case '+': 7.1151 - nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret); 7.1152 - break; 7.1153 - } 7.1154 - 7.1155 - // Point to the expr after the closure 7.1156 - setNextOfEnd(ret, lenInstruction); 7.1157 - } 7.1158 - return ret; 7.1159 - } 7.1160 - 7.1161 - /** 7.1162 - * Compile one branch of an or operator (implements concatenation) 7.1163 - * @param flags Flags passed by reference 7.1164 - * @return Pointer to branch node 7.1165 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 7.1166 - */ 7.1167 - int branch(int[] flags) throws RESyntaxException 7.1168 - { 7.1169 - // Get each possibly closured piece and concat 7.1170 - int node; 7.1171 - int ret = node(RE.OP_BRANCH, 0); 7.1172 - int chain = -1; 7.1173 - int[] closureFlags = new int[1]; 7.1174 - boolean nullable = true; 7.1175 - while (idx < len && pattern.charAt(idx) != '|' && pattern.charAt(idx) != ')') 7.1176 - { 7.1177 - // Get new node 7.1178 - closureFlags[0] = NODE_NORMAL; 7.1179 - node = closure(closureFlags); 7.1180 - if (closureFlags[0] == NODE_NORMAL) 7.1181 - { 7.1182 - nullable = false; 7.1183 - } 7.1184 - 7.1185 - // If there's a chain, append to the end 7.1186 - if (chain != -1) 7.1187 - { 7.1188 - setNextOfEnd(chain, node); 7.1189 - } 7.1190 - 7.1191 - // Chain starts at current 7.1192 - chain = node; 7.1193 - } 7.1194 - 7.1195 - // If we don't run loop, make a nothing node 7.1196 - if (chain == -1) 7.1197 - { 7.1198 - node(RE.OP_NOTHING, 0); 7.1199 - } 7.1200 - 7.1201 - // Set nullable flag for this branch 7.1202 - if (nullable) 7.1203 - { 7.1204 - flags[0] |= NODE_NULLABLE; 7.1205 - } 7.1206 - return ret; 7.1207 - } 7.1208 - 7.1209 - /** 7.1210 - * Compile an expression with possible parens around it. Paren matching 7.1211 - * is done at this level so we can tie the branch tails together. 7.1212 - * @param flags Flag value passed by reference 7.1213 - * @return Node index of expression in instruction array 7.1214 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 7.1215 - */ 7.1216 - int expr(int[] flags) throws RESyntaxException 7.1217 - { 7.1218 - // Create open paren node unless we were called from the top level (which has no parens) 7.1219 - int paren = -1; 7.1220 - int ret = -1; 7.1221 - int closeParens = parens; 7.1222 - if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(') 7.1223 - { 7.1224 - // if its a cluster ( rather than a proper subexpression ie with backrefs ) 7.1225 - if ( idx + 2 < len && pattern.charAt( idx + 1 ) == '?' && pattern.charAt( idx + 2 ) == ':' ) 7.1226 - { 7.1227 - paren = 2; 7.1228 - idx += 3; 7.1229 - ret = node( RE.OP_OPEN_CLUSTER, 0 ); 7.1230 - } 7.1231 - else 7.1232 - { 7.1233 - paren = 1; 7.1234 - idx++; 7.1235 - ret = node(RE.OP_OPEN, parens++); 7.1236 - } 7.1237 - } 7.1238 - flags[0] &= ~NODE_TOPLEVEL; 7.1239 - 7.1240 - // Create a branch node 7.1241 - int branch = branch(flags); 7.1242 - if (ret == -1) 7.1243 - { 7.1244 - ret = branch; 7.1245 - } 7.1246 - else 7.1247 - { 7.1248 - setNextOfEnd(ret, branch); 7.1249 - } 7.1250 - 7.1251 - // Loop through branches 7.1252 - while (idx < len && pattern.charAt(idx) == '|') 7.1253 - { 7.1254 - idx++; 7.1255 - branch = branch(flags); 7.1256 - setNextOfEnd(ret, branch); 7.1257 - } 7.1258 - 7.1259 - // Create an ending node (either a close paren or an OP_END) 7.1260 - int end; 7.1261 - if ( paren > 0 ) 7.1262 - { 7.1263 - if (idx < len && pattern.charAt(idx) == ')') 7.1264 - { 7.1265 - idx++; 7.1266 - } 7.1267 - else 7.1268 - { 7.1269 - syntaxError("Missing close paren"); 7.1270 - } 7.1271 - if ( paren == 1 ) 7.1272 - { 7.1273 - end = node(RE.OP_CLOSE, closeParens); 7.1274 - } 7.1275 - else 7.1276 - { 7.1277 - end = node( RE.OP_CLOSE_CLUSTER, 0 ); 7.1278 - } 7.1279 - } 7.1280 - else 7.1281 - { 7.1282 - end = node(RE.OP_END, 0); 7.1283 - } 7.1284 - 7.1285 - // Append the ending node to the ret nodelist 7.1286 - setNextOfEnd(ret, end); 7.1287 - 7.1288 - // Hook the ends of each branch to the end node 7.1289 - int currentNode = ret; 7.1290 - int nextNodeOffset = instruction[ currentNode + RE.offsetNext ]; 7.1291 - // while the next node o 7.1292 - while ( nextNodeOffset != 0 && currentNode < lenInstruction ) 7.1293 - { 7.1294 - // If branch, make the end of the branch's operand chain point to the end node. 7.1295 - if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH ) 7.1296 - { 7.1297 - setNextOfEnd( currentNode + RE.nodeSize, end ); 7.1298 - } 7.1299 - nextNodeOffset = instruction[ currentNode + RE.offsetNext ]; 7.1300 - currentNode += nextNodeOffset; 7.1301 - } 7.1302 - 7.1303 - // Return the node list 7.1304 - return ret; 7.1305 - } 7.1306 - 7.1307 - /** 7.1308 - * Compiles a regular expression pattern into a program runnable by the pattern 7.1309 - * matcher class 'RE'. 7.1310 - * @param pattern Regular expression pattern to compile (see RECompiler class 7.1311 - * for details). 7.1312 - * @return A compiled regular expression program. 7.1313 - * @exception RESyntaxException Thrown if the regular expression has invalid syntax. 7.1314 - * @see RECompiler 7.1315 - * @see RE 7.1316 - */ 7.1317 - public REProgram compile(String pattern) throws RESyntaxException 7.1318 - { 7.1319 - // Initialize variables for compilation 7.1320 - this.pattern = pattern; // Save pattern in instance variable 7.1321 - len = pattern.length(); // Precompute pattern length for speed 7.1322 - idx = 0; // Set parsing index to the first character 7.1323 - lenInstruction = 0; // Set emitted instruction count to zero 7.1324 - parens = 1; // Set paren level to 1 (the implicit outer parens) 7.1325 - brackets = 0; // No bracketed closures yet 7.1326 - 7.1327 - // Initialize pass by reference flags value 7.1328 - int[] flags = { NODE_TOPLEVEL }; 7.1329 - 7.1330 - // Parse expression 7.1331 - expr(flags); 7.1332 - 7.1333 - // Should be at end of input 7.1334 - if (idx != len) 7.1335 - { 7.1336 - if (pattern.charAt(idx) == ')') 7.1337 - { 7.1338 - syntaxError("Unmatched close paren"); 7.1339 - } 7.1340 - syntaxError("Unexpected input remains"); 7.1341 - } 7.1342 - 7.1343 - // Return the result 7.1344 - char[] ins = new char[lenInstruction]; 7.1345 - System.arraycopy(instruction, 0, ins, 0, lenInstruction); 7.1346 - return new REProgram(parens, ins); 7.1347 - } 7.1348 - 7.1349 - /** 7.1350 - * Local, nested class for maintaining character ranges for character classes. 7.1351 - */ 7.1352 - class RERange 7.1353 - { 7.1354 - int size = 16; // Capacity of current range arrays 7.1355 - int[] minRange = new int[size]; // Range minima 7.1356 - int[] maxRange = new int[size]; // Range maxima 7.1357 - int num = 0; // Number of range array elements in use 7.1358 - 7.1359 - /** 7.1360 - * Deletes the range at a given index from the range lists 7.1361 - * @param index Index of range to delete from minRange and maxRange arrays. 7.1362 - */ 7.1363 - void delete(int index) 7.1364 - { 7.1365 - // Return if no elements left or index is out of range 7.1366 - if (num == 0 || index >= num) 7.1367 - { 7.1368 - return; 7.1369 - } 7.1370 - 7.1371 - // Move elements down 7.1372 - while (++index < num) 7.1373 - { 7.1374 - if (index - 1 >= 0) 7.1375 - { 7.1376 - minRange[index-1] = minRange[index]; 7.1377 - maxRange[index-1] = maxRange[index]; 7.1378 - } 7.1379 - } 7.1380 - 7.1381 - // One less element now 7.1382 - num--; 7.1383 - } 7.1384 - 7.1385 - /** 7.1386 - * Merges a range into the range list, coalescing ranges if possible. 7.1387 - * @param min Minimum end of range 7.1388 - * @param max Maximum end of range 7.1389 - */ 7.1390 - void merge(int min, int max) 7.1391 - { 7.1392 - // Loop through ranges 7.1393 - for (int i = 0; i < num; i++) 7.1394 - { 7.1395 - // Min-max is subsumed by minRange[i]-maxRange[i] 7.1396 - if (min >= minRange[i] && max <= maxRange[i]) 7.1397 - { 7.1398 - return; 7.1399 - } 7.1400 - 7.1401 - // Min-max subsumes minRange[i]-maxRange[i] 7.1402 - else if (min <= minRange[i] && max >= maxRange[i]) 7.1403 - { 7.1404 - delete(i); 7.1405 - merge(min, max); 7.1406 - return; 7.1407 - } 7.1408 - 7.1409 - // Min is in the range, but max is outside 7.1410 - else if (min >= minRange[i] && min <= maxRange[i]) 7.1411 - { 7.1412 - delete(i); 7.1413 - min = minRange[i]; 7.1414 - merge(min, max); 7.1415 - return; 7.1416 - } 7.1417 - 7.1418 - // Max is in the range, but min is outside 7.1419 - else if (max >= minRange[i] && max <= maxRange[i]) 7.1420 - { 7.1421 - delete(i); 7.1422 - max = maxRange[i]; 7.1423 - merge(min, max); 7.1424 - return; 7.1425 - } 7.1426 - } 7.1427 - 7.1428 - // Must not overlap any other ranges 7.1429 - if (num >= size) 7.1430 - { 7.1431 - size *= 2; 7.1432 - int[] newMin = new int[size]; 7.1433 - int[] newMax = new int[size]; 7.1434 - System.arraycopy(minRange, 0, newMin, 0, num); 7.1435 - System.arraycopy(maxRange, 0, newMax, 0, num); 7.1436 - minRange = newMin; 7.1437 - maxRange = newMax; 7.1438 - } 7.1439 - minRange[num] = min; 7.1440 - maxRange[num] = max; 7.1441 - num++; 7.1442 - } 7.1443 - 7.1444 - /** 7.1445 - * Removes a range by deleting or shrinking all other ranges 7.1446 - * @param min Minimum end of range 7.1447 - * @param max Maximum end of range 7.1448 - */ 7.1449 - void remove(int min, int max) 7.1450 - { 7.1451 - // Loop through ranges 7.1452 - for (int i = 0; i < num; i++) 7.1453 - { 7.1454 - // minRange[i]-maxRange[i] is subsumed by min-max 7.1455 - if (minRange[i] >= min && maxRange[i] <= max) 7.1456 - { 7.1457 - delete(i); 7.1458 - i--; 7.1459 - return; 7.1460 - } 7.1461 - 7.1462 - // min-max is subsumed by minRange[i]-maxRange[i] 7.1463 - else if (min >= minRange[i] && max <= maxRange[i]) 7.1464 - { 7.1465 - int minr = minRange[i]; 7.1466 - int maxr = maxRange[i]; 7.1467 - delete(i); 7.1468 - if (minr < min) 7.1469 - { 7.1470 - merge(minr, min - 1); 7.1471 - } 7.1472 - if (max < maxr) 7.1473 - { 7.1474 - merge(max + 1, maxr); 7.1475 - } 7.1476 - return; 7.1477 - } 7.1478 - 7.1479 - // minRange is in the range, but maxRange is outside 7.1480 - else if (minRange[i] >= min && minRange[i] <= max) 7.1481 - { 7.1482 - minRange[i] = max + 1; 7.1483 - return; 7.1484 - } 7.1485 - 7.1486 - // maxRange is in the range, but minRange is outside 7.1487 - else if (maxRange[i] >= min && maxRange[i] <= max) 7.1488 - { 7.1489 - maxRange[i] = min - 1; 7.1490 - return; 7.1491 - } 7.1492 - } 7.1493 - } 7.1494 - 7.1495 - /** 7.1496 - * Includes (or excludes) the range from min to max, inclusive. 7.1497 - * @param min Minimum end of range 7.1498 - * @param max Maximum end of range 7.1499 - * @param include True if range should be included. False otherwise. 7.1500 - */ 7.1501 - void include(int min, int max, boolean include) 7.1502 - { 7.1503 - if (include) 7.1504 - { 7.1505 - merge(min, max); 7.1506 - } 7.1507 - else 7.1508 - { 7.1509 - remove(min, max); 7.1510 - } 7.1511 - } 7.1512 - 7.1513 - /** 7.1514 - * Includes a range with the same min and max 7.1515 - * @param minmax Minimum and maximum end of range (inclusive) 7.1516 - * @param include True if range should be included. False otherwise. 7.1517 - */ 7.1518 - void include(char minmax, boolean include) 7.1519 - { 7.1520 - include(minmax, minmax, include); 7.1521 - } 7.1522 - } 7.1523 -}
8.1 --- a/src/com/sun/org/apache/regexp/internal/REDebugCompiler.java Sat Oct 24 16:18:47 2020 +0800 8.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 8.3 @@ -1,225 +0,0 @@ 8.4 -/* 8.5 - * reserved comment block 8.6 - * DO NOT REMOVE OR ALTER! 8.7 - */ 8.8 -/* 8.9 - * Copyright 1999-2004 The Apache Software Foundation. 8.10 - * 8.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 8.12 - * you may not use this file except in compliance with the License. 8.13 - * You may obtain a copy of the License at 8.14 - * 8.15 - * http://www.apache.org/licenses/LICENSE-2.0 8.16 - * 8.17 - * Unless required by applicable law or agreed to in writing, software 8.18 - * distributed under the License is distributed on an "AS IS" BASIS, 8.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 8.20 - * See the License for the specific language governing permissions and 8.21 - * limitations under the License. 8.22 - */ 8.23 - 8.24 -package com.sun.org.apache.regexp.internal; 8.25 - 8.26 -import java.io.PrintWriter; 8.27 -import java.util.Hashtable; 8.28 - 8.29 -/** 8.30 - * A subclass of RECompiler which can dump a regular expression program 8.31 - * for debugging purposes. 8.32 - * 8.33 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> 8.34 - */ 8.35 -public class REDebugCompiler extends RECompiler 8.36 -{ 8.37 - /** 8.38 - * Mapping from opcodes to descriptive strings 8.39 - */ 8.40 - static Hashtable hashOpcode = new Hashtable(); 8.41 - static 8.42 - { 8.43 - hashOpcode.put(new Integer(RE.OP_RELUCTANTSTAR), "OP_RELUCTANTSTAR"); 8.44 - hashOpcode.put(new Integer(RE.OP_RELUCTANTPLUS), "OP_RELUCTANTPLUS"); 8.45 - hashOpcode.put(new Integer(RE.OP_RELUCTANTMAYBE), "OP_RELUCTANTMAYBE"); 8.46 - hashOpcode.put(new Integer(RE.OP_END), "OP_END"); 8.47 - hashOpcode.put(new Integer(RE.OP_BOL), "OP_BOL"); 8.48 - hashOpcode.put(new Integer(RE.OP_EOL), "OP_EOL"); 8.49 - hashOpcode.put(new Integer(RE.OP_ANY), "OP_ANY"); 8.50 - hashOpcode.put(new Integer(RE.OP_ANYOF), "OP_ANYOF"); 8.51 - hashOpcode.put(new Integer(RE.OP_BRANCH), "OP_BRANCH"); 8.52 - hashOpcode.put(new Integer(RE.OP_ATOM), "OP_ATOM"); 8.53 - hashOpcode.put(new Integer(RE.OP_STAR), "OP_STAR"); 8.54 - hashOpcode.put(new Integer(RE.OP_PLUS), "OP_PLUS"); 8.55 - hashOpcode.put(new Integer(RE.OP_MAYBE), "OP_MAYBE"); 8.56 - hashOpcode.put(new Integer(RE.OP_NOTHING), "OP_NOTHING"); 8.57 - hashOpcode.put(new Integer(RE.OP_GOTO), "OP_GOTO"); 8.58 - hashOpcode.put(new Integer(RE.OP_ESCAPE), "OP_ESCAPE"); 8.59 - hashOpcode.put(new Integer(RE.OP_OPEN), "OP_OPEN"); 8.60 - hashOpcode.put(new Integer(RE.OP_CLOSE), "OP_CLOSE"); 8.61 - hashOpcode.put(new Integer(RE.OP_BACKREF), "OP_BACKREF"); 8.62 - hashOpcode.put(new Integer(RE.OP_POSIXCLASS), "OP_POSIXCLASS"); 8.63 - hashOpcode.put(new Integer(RE.OP_OPEN_CLUSTER), "OP_OPEN_CLUSTER"); 8.64 - hashOpcode.put(new Integer(RE.OP_CLOSE_CLUSTER), "OP_CLOSE_CLUSTER"); 8.65 - } 8.66 - 8.67 - /** 8.68 - * Returns a descriptive string for an opcode. 8.69 - * @param opcode Opcode to convert to a string 8.70 - * @return Description of opcode 8.71 - */ 8.72 - String opcodeToString(char opcode) 8.73 - { 8.74 - // Get string for opcode 8.75 - String ret =(String)hashOpcode.get(new Integer(opcode)); 8.76 - 8.77 - // Just in case we have a corrupt program 8.78 - if (ret == null) 8.79 - { 8.80 - ret = "OP_????"; 8.81 - } 8.82 - return ret; 8.83 - } 8.84 - 8.85 - /** 8.86 - * Return a string describing a (possibly unprintable) character. 8.87 - * @param c Character to convert to a printable representation 8.88 - * @return String representation of character 8.89 - */ 8.90 - String charToString(char c) 8.91 - { 8.92 - // If it's unprintable, convert to '\###' 8.93 - if (c < ' ' || c > 127) 8.94 - { 8.95 - return "\\" + (int)c; 8.96 - } 8.97 - 8.98 - // Return the character as a string 8.99 - return String.valueOf(c); 8.100 - } 8.101 - 8.102 - /** 8.103 - * Returns a descriptive string for a node in a regular expression program. 8.104 - * @param node Node to describe 8.105 - * @return Description of node 8.106 - */ 8.107 - String nodeToString(int node) 8.108 - { 8.109 - // Get opcode and opdata for node 8.110 - char opcode = instruction[node + RE.offsetOpcode]; 8.111 - int opdata = (int)instruction[node + RE.offsetOpdata]; 8.112 - 8.113 - // Return opcode as a string and opdata value 8.114 - return opcodeToString(opcode) + ", opdata = " + opdata; 8.115 - } 8.116 - 8.117 - /** 8.118 - * Inserts a node with a given opcode and opdata at insertAt. The node relative next 8.119 - * pointer is initialized to 0. 8.120 - * @param opcode Opcode for new node 8.121 - * @param opdata Opdata for new node (only the low 16 bits are currently used) 8.122 - * @param insertAt Index at which to insert the new node in the program * / 8.123 - void nodeInsert(char opcode, int opdata, int insertAt) { 8.124 - System.out.println( "====> " + opcode + " " + opdata + " " + insertAt ); 8.125 - PrintWriter writer = new PrintWriter( System.out ); 8.126 - dumpProgram( writer ); 8.127 - super.nodeInsert( opcode, opdata, insertAt ); 8.128 - System.out.println( "====< " ); 8.129 - dumpProgram( writer ); 8.130 - writer.flush(); 8.131 - }/**/ 8.132 - 8.133 - 8.134 - /** 8.135 - * Appends a node to the end of a node chain 8.136 - * @param node Start of node chain to traverse 8.137 - * @param pointTo Node to have the tail of the chain point to * / 8.138 - void setNextOfEnd(int node, int pointTo) { 8.139 - System.out.println( "====> " + node + " " + pointTo ); 8.140 - PrintWriter writer = new PrintWriter( System.out ); 8.141 - dumpProgram( writer ); 8.142 - super.setNextOfEnd( node, pointTo ); 8.143 - System.out.println( "====< " ); 8.144 - dumpProgram( writer ); 8.145 - writer.flush(); 8.146 - }/**/ 8.147 - 8.148 - 8.149 - /** 8.150 - * Dumps the current program to a PrintWriter 8.151 - * @param p PrintWriter for program dump output 8.152 - */ 8.153 - public void dumpProgram(PrintWriter p) 8.154 - { 8.155 - // Loop through the whole program 8.156 - for (int i = 0; i < lenInstruction; ) 8.157 - { 8.158 - // Get opcode, opdata and next fields of current program node 8.159 - char opcode = instruction[i + RE.offsetOpcode]; 8.160 - char opdata = instruction[i + RE.offsetOpdata]; 8.161 - short next = (short)instruction[i + RE.offsetNext]; 8.162 - 8.163 - // Display the current program node 8.164 - p.print(i + ". " + nodeToString(i) + ", next = "); 8.165 - 8.166 - // If there's no next, say 'none', otherwise give absolute index of next node 8.167 - if (next == 0) 8.168 - { 8.169 - p.print("none"); 8.170 - } 8.171 - else 8.172 - { 8.173 - p.print(i + next); 8.174 - } 8.175 - 8.176 - // Move past node 8.177 - i += RE.nodeSize; 8.178 - 8.179 - // If character class 8.180 - if (opcode == RE.OP_ANYOF) 8.181 - { 8.182 - // Opening bracket for start of char class 8.183 - p.print(", ["); 8.184 - 8.185 - // Show each range in the char class 8.186 - int rangeCount = opdata; 8.187 - for (int r = 0; r < rangeCount; r++) 8.188 - { 8.189 - // Get first and last chars in range 8.190 - char charFirst = instruction[i++]; 8.191 - char charLast = instruction[i++]; 8.192 - 8.193 - // Print range as X-Y, unless range encompasses only one char 8.194 - if (charFirst == charLast) 8.195 - { 8.196 - p.print(charToString(charFirst)); 8.197 - } 8.198 - else 8.199 - { 8.200 - p.print(charToString(charFirst) + "-" + charToString(charLast)); 8.201 - } 8.202 - } 8.203 - 8.204 - // Annotate the end of the char class 8.205 - p.print("]"); 8.206 - } 8.207 - 8.208 - // If atom 8.209 - if (opcode == RE.OP_ATOM) 8.210 - { 8.211 - // Open quote 8.212 - p.print(", \""); 8.213 - 8.214 - // Print each character in the atom 8.215 - for (int len = opdata; len-- != 0; ) 8.216 - { 8.217 - p.print(charToString(instruction[i++])); 8.218 - } 8.219 - 8.220 - // Close quote 8.221 - p.print("\""); 8.222 - } 8.223 - 8.224 - // Print a newline 8.225 - p.println(""); 8.226 - } 8.227 - } 8.228 -}
9.1 --- a/src/com/sun/org/apache/regexp/internal/REProgram.java Sat Oct 24 16:18:47 2020 +0800 9.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 9.3 @@ -1,158 +0,0 @@ 9.4 -/* 9.5 - * reserved comment block 9.6 - * DO NOT REMOVE OR ALTER! 9.7 - */ 9.8 -/* 9.9 - * Copyright 1999-2004 The Apache Software Foundation. 9.10 - * 9.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 9.12 - * you may not use this file except in compliance with the License. 9.13 - * You may obtain a copy of the License at 9.14 - * 9.15 - * http://www.apache.org/licenses/LICENSE-2.0 9.16 - * 9.17 - * Unless required by applicable law or agreed to in writing, software 9.18 - * distributed under the License is distributed on an "AS IS" BASIS, 9.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9.20 - * See the License for the specific language governing permissions and 9.21 - * limitations under the License. 9.22 - */ 9.23 - 9.24 -package com.sun.org.apache.regexp.internal; 9.25 - 9.26 -import java.io.Serializable; 9.27 - 9.28 -/** 9.29 - * A class that holds compiled regular expressions. This is exposed mainly 9.30 - * for use by the recompile utility (which helps you produce precompiled 9.31 - * REProgram objects). You should not otherwise need to work directly with 9.32 - * this class. 9.33 -* 9.34 - * @see RE 9.35 - * @see RECompiler 9.36 - * 9.37 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> 9.38 - */ 9.39 -public class REProgram implements Serializable 9.40 -{ 9.41 - static final int OPT_HASBACKREFS = 1; 9.42 - 9.43 - char[] instruction; // The compiled regular expression 'program' 9.44 - int lenInstruction; // The amount of the instruction buffer in use 9.45 - char[] prefix; // Prefix string optimization 9.46 - int flags; // Optimization flags (REProgram.OPT_*) 9.47 - int maxParens = -1; 9.48 - 9.49 - /** 9.50 - * Constructs a program object from a character array 9.51 - * @param instruction Character array with RE opcode instructions in it 9.52 - */ 9.53 - public REProgram(char[] instruction) 9.54 - { 9.55 - this(instruction, instruction.length); 9.56 - } 9.57 - 9.58 - /** 9.59 - * Constructs a program object from a character array 9.60 - * @param parens Count of parens in the program 9.61 - * @param instruction Character array with RE opcode instructions in it 9.62 - */ 9.63 - public REProgram(int parens, char[] instruction) 9.64 - { 9.65 - this(instruction, instruction.length); 9.66 - this.maxParens = parens; 9.67 - } 9.68 - 9.69 - /** 9.70 - * Constructs a program object from a character array 9.71 - * @param instruction Character array with RE opcode instructions in it 9.72 - * @param lenInstruction Amount of instruction array in use 9.73 - */ 9.74 - public REProgram(char[] instruction, int lenInstruction) 9.75 - { 9.76 - setInstructions(instruction, lenInstruction); 9.77 - } 9.78 - 9.79 - /** 9.80 - * Returns a copy of the current regular expression program in a character 9.81 - * array that is exactly the right length to hold the program. If there is 9.82 - * no program compiled yet, getInstructions() will return null. 9.83 - * @return A copy of the current compiled RE program 9.84 - */ 9.85 - public char[] getInstructions() 9.86 - { 9.87 - // Ensure program has been compiled! 9.88 - if (lenInstruction != 0) 9.89 - { 9.90 - // Return copy of program 9.91 - char[] ret = new char[lenInstruction]; 9.92 - System.arraycopy(instruction, 0, ret, 0, lenInstruction); 9.93 - return ret; 9.94 - } 9.95 - return null; 9.96 - } 9.97 - 9.98 - /** 9.99 - * Sets a new regular expression program to run. It is this method which 9.100 - * performs any special compile-time search optimizations. Currently only 9.101 - * two optimizations are in place - one which checks for backreferences 9.102 - * (so that they can be lazily allocated) and another which attempts to 9.103 - * find an prefix anchor string so that substantial amounts of input can 9.104 - * potentially be skipped without running the actual program. 9.105 - * @param instruction Program instruction buffer 9.106 - * @param lenInstruction Length of instruction buffer in use 9.107 - */ 9.108 - public void setInstructions(char[] instruction, int lenInstruction) 9.109 - { 9.110 - // Save reference to instruction array 9.111 - this.instruction = instruction; 9.112 - this.lenInstruction = lenInstruction; 9.113 - 9.114 - // Initialize other program-related variables 9.115 - flags = 0; 9.116 - prefix = null; 9.117 - 9.118 - // Try various compile-time optimizations if there's a program 9.119 - if (instruction != null && lenInstruction != 0) 9.120 - { 9.121 - // If the first node is a branch 9.122 - if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH) 9.123 - { 9.124 - // to the end node 9.125 - int next = instruction[0 + RE.offsetNext]; 9.126 - if (instruction[next + RE.offsetOpcode] == RE.OP_END) 9.127 - { 9.128 - // and the branch starts with an atom 9.129 - if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM) 9.130 - { 9.131 - // then get that atom as an prefix because there's no other choice 9.132 - int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata]; 9.133 - prefix = new char[lenAtom]; 9.134 - System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom); 9.135 - } 9.136 - } 9.137 - } 9.138 - 9.139 - BackrefScanLoop: 9.140 - 9.141 - // Check for backreferences 9.142 - for (int i = 0; i < lenInstruction; i += RE.nodeSize) 9.143 - { 9.144 - switch (instruction[i + RE.offsetOpcode]) 9.145 - { 9.146 - case RE.OP_ANYOF: 9.147 - i += (instruction[i + RE.offsetOpdata] * 2); 9.148 - break; 9.149 - 9.150 - case RE.OP_ATOM: 9.151 - i += instruction[i + RE.offsetOpdata]; 9.152 - break; 9.153 - 9.154 - case RE.OP_BACKREF: 9.155 - flags |= OPT_HASBACKREFS; 9.156 - break BackrefScanLoop; 9.157 - } 9.158 - } 9.159 - } 9.160 - } 9.161 -}
10.1 --- a/src/com/sun/org/apache/regexp/internal/RESyntaxException.java Sat Oct 24 16:18:47 2020 +0800 10.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 10.3 @@ -1,43 +0,0 @@ 10.4 -/* 10.5 - * reserved comment block 10.6 - * DO NOT REMOVE OR ALTER! 10.7 - */ 10.8 -/* 10.9 - * Copyright 1999-2004 The Apache Software Foundation. 10.10 - * 10.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 10.12 - * you may not use this file except in compliance with the License. 10.13 - * You may obtain a copy of the License at 10.14 - * 10.15 - * http://www.apache.org/licenses/LICENSE-2.0 10.16 - * 10.17 - * Unless required by applicable law or agreed to in writing, software 10.18 - * distributed under the License is distributed on an "AS IS" BASIS, 10.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10.20 - * See the License for the specific language governing permissions and 10.21 - * limitations under the License. 10.22 - */ 10.23 - 10.24 -package com.sun.org.apache.regexp.internal; 10.25 - 10.26 -/** 10.27 - * Exception thrown to indicate a syntax error in a regular expression. 10.28 - * This is a non-checked exception because you should only have problems compiling 10.29 - * a regular expression during development. 10.30 - * If you are making regular expresion programs dynamically then you can catch it 10.31 - * if you wish. But should not be forced to. 10.32 - * 10.33 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> 10.34 - * @author <a href="mailto:gholam@xtra.co.nz>Michael McCallum</a> 10.35 - */ 10.36 -public class RESyntaxException extends RuntimeException 10.37 -{ 10.38 - /** 10.39 - * Constructor. 10.40 - * @param s Further description of the syntax error 10.41 - */ 10.42 - public RESyntaxException(String s) 10.43 - { 10.44 - super("Syntax error: " + s); 10.45 - } 10.46 -}
11.1 --- a/src/com/sun/org/apache/regexp/internal/RETest.java Sat Oct 24 16:18:47 2020 +0800 11.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 11.3 @@ -1,883 +0,0 @@ 11.4 -/* 11.5 - * reserved comment block 11.6 - * DO NOT REMOVE OR ALTER! 11.7 - */ 11.8 -/* 11.9 - * Copyright 1999-2004 The Apache Software Foundation. 11.10 - * 11.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 11.12 - * you may not use this file except in compliance with the License. 11.13 - * You may obtain a copy of the License at 11.14 - * 11.15 - * http://www.apache.org/licenses/LICENSE-2.0 11.16 - * 11.17 - * Unless required by applicable law or agreed to in writing, software 11.18 - * distributed under the License is distributed on an "AS IS" BASIS, 11.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11.20 - * See the License for the specific language governing permissions and 11.21 - * limitations under the License. 11.22 - */ 11.23 - 11.24 -package com.sun.org.apache.regexp.internal; 11.25 - 11.26 -import java.io.BufferedReader; 11.27 -import java.io.FileReader; 11.28 -import java.io.InputStreamReader; 11.29 -import java.io.PrintWriter; 11.30 -import java.io.File; 11.31 -import java.io.ByteArrayOutputStream; 11.32 -import java.io.ObjectOutputStream; 11.33 -import java.io.ByteArrayInputStream; 11.34 -import java.io.ObjectInputStream; 11.35 -import java.io.StringBufferInputStream; 11.36 -import java.io.StringReader; 11.37 -import java.io.IOException; 11.38 - 11.39 -/** 11.40 - * Data driven (and optionally interactive) testing harness to exercise regular 11.41 - * expression compiler and matching engine. 11.42 - * 11.43 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> 11.44 - * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a> 11.45 - * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a> 11.46 - */ 11.47 -public class RETest 11.48 -{ 11.49 - // True if we want to see output from success cases 11.50 - static final boolean showSuccesses = false; 11.51 - 11.52 - // A new line character. 11.53 - static final String NEW_LINE = System.getProperty( "line.separator" ); 11.54 - 11.55 - // Construct a debug compiler 11.56 - REDebugCompiler compiler = new REDebugCompiler(); 11.57 - 11.58 - /** 11.59 - * Main program entrypoint. If an argument is given, it will be compiled 11.60 - * and interactive matching will ensue. If no argument is given, the 11.61 - * file RETest.txt will be used as automated testing input. 11.62 - * @param args Command line arguments (optional regular expression) 11.63 - */ 11.64 - public static void main(String[] args) 11.65 - { 11.66 - try 11.67 - { 11.68 - if (!test( args )) { 11.69 - System.exit(1); 11.70 - } 11.71 - } 11.72 - catch (Exception e) 11.73 - { 11.74 - e.printStackTrace(); 11.75 - System.exit(1); 11.76 - } 11.77 - } 11.78 - 11.79 - /** 11.80 - * Testing entrypoint. 11.81 - * @param args Command line arguments 11.82 - * @exception Exception thrown in case of error 11.83 - */ 11.84 - public static boolean test( String[] args ) throws Exception 11.85 - { 11.86 - RETest test = new RETest(); 11.87 - // Run interactive tests against a single regexp 11.88 - if (args.length == 2) 11.89 - { 11.90 - test.runInteractiveTests(args[1]); 11.91 - } 11.92 - else if (args.length == 1) 11.93 - { 11.94 - // Run automated tests 11.95 - test.runAutomatedTests(args[0]); 11.96 - } 11.97 - else 11.98 - { 11.99 - System.out.println( "Usage: RETest ([-i] [regex]) ([/path/to/testfile.txt])" ); 11.100 - System.out.println( "By Default will run automated tests from file 'docs/RETest.txt' ..." ); 11.101 - System.out.println(); 11.102 - test.runAutomatedTests("docs/RETest.txt"); 11.103 - } 11.104 - return test.failures == 0; 11.105 - } 11.106 - 11.107 - /** 11.108 - * Constructor 11.109 - */ 11.110 - public RETest() 11.111 - { 11.112 - } 11.113 - 11.114 - /** 11.115 - * Compile and test matching against a single expression 11.116 - * @param expr Expression to compile and test 11.117 - */ 11.118 - void runInteractiveTests(String expr) 11.119 - { 11.120 - RE r = new RE(); 11.121 - try 11.122 - { 11.123 - // Compile expression 11.124 - r.setProgram(compiler.compile(expr)); 11.125 - 11.126 - // Show expression 11.127 - say("" + NEW_LINE + "" + expr + "" + NEW_LINE + ""); 11.128 - 11.129 - // Show program for compiled expression 11.130 - PrintWriter writer = new PrintWriter( System.out ); 11.131 - compiler.dumpProgram( writer ); 11.132 - writer.flush(); 11.133 - 11.134 - boolean running = true; 11.135 - // Test matching against compiled expression 11.136 - while ( running ) 11.137 - { 11.138 - // Read from keyboard 11.139 - BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); 11.140 - System.out.print("> "); 11.141 - System.out.flush(); 11.142 - String match = br.readLine(); 11.143 - 11.144 - if ( match != null ) 11.145 - { 11.146 - // Try a match against the keyboard input 11.147 - if (r.match(match)) 11.148 - { 11.149 - say("Match successful."); 11.150 - } 11.151 - else 11.152 - { 11.153 - say("Match failed."); 11.154 - } 11.155 - 11.156 - // Show subparen registers 11.157 - showParens(r); 11.158 - } 11.159 - else 11.160 - { 11.161 - running = false; 11.162 - System.out.println(); 11.163 - } 11.164 - } 11.165 - } 11.166 - catch (Exception e) 11.167 - { 11.168 - say("Error: " + e.toString()); 11.169 - e.printStackTrace(); 11.170 - } 11.171 - } 11.172 - 11.173 - /** 11.174 - * Exit with a fatal error. 11.175 - * @param s Last famous words before exiting 11.176 - */ 11.177 - void die(String s) 11.178 - { 11.179 - say("FATAL ERROR: " + s); 11.180 - System.exit(-1); 11.181 - } 11.182 - 11.183 - /** 11.184 - * Fail with an error. Will print a big failure message to System.out. 11.185 - * 11.186 - * @param log Output before failure 11.187 - * @param s Failure description 11.188 - */ 11.189 - void fail(StringBuffer log, String s) 11.190 - { 11.191 - System.out.print(log.toString()); 11.192 - fail(s); 11.193 - } 11.194 - 11.195 - /** 11.196 - * Fail with an error. Will print a big failure message to System.out. 11.197 - * 11.198 - * @param s Failure description 11.199 - */ 11.200 - void fail(String s) 11.201 - { 11.202 - failures++; 11.203 - say("" + NEW_LINE + ""); 11.204 - say("*******************************************************"); 11.205 - say("********************* FAILURE! **********************"); 11.206 - say("*******************************************************"); 11.207 - say("" + NEW_LINE + ""); 11.208 - say(s); 11.209 - say(""); 11.210 - // make sure the writer gets flushed. 11.211 - if (compiler != null) { 11.212 - PrintWriter writer = new PrintWriter( System.out ); 11.213 - compiler.dumpProgram( writer ); 11.214 - writer.flush(); 11.215 - say("" + NEW_LINE + ""); 11.216 - } 11.217 - } 11.218 - 11.219 - /** 11.220 - * Say something to standard out 11.221 - * @param s What to say 11.222 - */ 11.223 - void say(String s) 11.224 - { 11.225 - System.out.println(s); 11.226 - } 11.227 - 11.228 - /** 11.229 - * Dump parenthesized subexpressions found by a regular expression matcher object 11.230 - * @param r Matcher object with results to show 11.231 - */ 11.232 - void showParens(RE r) 11.233 - { 11.234 - // Loop through each paren 11.235 - for (int i = 0; i < r.getParenCount(); i++) 11.236 - { 11.237 - // Show paren register 11.238 - say("$" + i + " = " + r.getParen(i)); 11.239 - } 11.240 - } 11.241 - 11.242 - /* 11.243 - * number in automated test 11.244 - */ 11.245 - int testCount = 0; 11.246 - 11.247 - /* 11.248 - * Count of failures in automated test 11.249 - */ 11.250 - int failures = 0; 11.251 - 11.252 - /** 11.253 - * Run automated tests in RETest.txt file (from Perl 4.0 test battery) 11.254 - * @exception Exception thrown in case of error 11.255 - */ 11.256 - void runAutomatedTests(String testDocument) throws Exception 11.257 - { 11.258 - long ms = System.currentTimeMillis(); 11.259 - 11.260 - // Some unit tests 11.261 - testPrecompiledRE(); 11.262 - testSplitAndGrep(); 11.263 - testSubst(); 11.264 - testOther(); 11.265 - 11.266 - // Test from script file 11.267 - File testInput = new File(testDocument); 11.268 - if (! testInput.exists()) { 11.269 - throw new Exception ("Could not find: " + testDocument); 11.270 - } 11.271 - 11.272 - BufferedReader br = new BufferedReader(new FileReader(testInput)); 11.273 - try 11.274 - { 11.275 - // While input is available, parse lines 11.276 - while (br.ready()) 11.277 - { 11.278 - RETestCase testcase = getNextTestCase(br); 11.279 - if (testcase != null) { 11.280 - testcase.runTest(); 11.281 - } 11.282 - } 11.283 - } 11.284 - finally 11.285 - { 11.286 - br.close(); 11.287 - } 11.288 - 11.289 - // Show match time 11.290 - say(NEW_LINE + NEW_LINE + "Match time = " + (System.currentTimeMillis() - ms) + " ms."); 11.291 - 11.292 - // Print final results 11.293 - if (failures > 0) { 11.294 - say("*************** THERE ARE FAILURES! *******************"); 11.295 - } 11.296 - say("Tests complete. " + testCount + " tests, " + failures + " failure(s)."); 11.297 - } 11.298 - 11.299 - /** 11.300 - * Run automated unit test 11.301 - * @exception Exception thrown in case of error 11.302 - */ 11.303 - void testOther() throws Exception 11.304 - { 11.305 - // Serialization test 1: Compile regexp and serialize/deserialize it 11.306 - RE r = new RE("(a*)b"); 11.307 - say("Serialized/deserialized (a*)b"); 11.308 - ByteArrayOutputStream out = new ByteArrayOutputStream(128); 11.309 - new ObjectOutputStream(out).writeObject(r); 11.310 - ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); 11.311 - r = (RE)new ObjectInputStream(in).readObject(); 11.312 - if (!r.match("aaab")) 11.313 - { 11.314 - fail("Did not match 'aaab' with deserialized RE."); 11.315 - } else { 11.316 - say("aaaab = true"); 11.317 - showParens(r); 11.318 - } 11.319 - 11.320 - // Serialization test 2: serialize/deserialize used regexp 11.321 - out.reset(); 11.322 - say("Deserialized (a*)b"); 11.323 - new ObjectOutputStream(out).writeObject(r); 11.324 - in = new ByteArrayInputStream(out.toByteArray()); 11.325 - r = (RE)new ObjectInputStream(in).readObject(); 11.326 - if (r.getParenCount() != 0) 11.327 - { 11.328 - fail("Has parens after deserialization."); 11.329 - } 11.330 - if (!r.match("aaab")) 11.331 - { 11.332 - fail("Did not match 'aaab' with deserialized RE."); 11.333 - } else { 11.334 - say("aaaab = true"); 11.335 - showParens(r); 11.336 - } 11.337 - 11.338 - // Test MATCH_CASEINDEPENDENT 11.339 - r = new RE("abc(\\w*)"); 11.340 - say("MATCH_CASEINDEPENDENT abc(\\w*)"); 11.341 - r.setMatchFlags(RE.MATCH_CASEINDEPENDENT); 11.342 - say("abc(d*)"); 11.343 - if (!r.match("abcddd")) 11.344 - { 11.345 - fail("Did not match 'abcddd'."); 11.346 - } else { 11.347 - say("abcddd = true"); 11.348 - showParens(r); 11.349 - } 11.350 - 11.351 - if (!r.match("aBcDDdd")) 11.352 - { 11.353 - fail("Did not match 'aBcDDdd'."); 11.354 - } else { 11.355 - say("aBcDDdd = true"); 11.356 - showParens(r); 11.357 - } 11.358 - 11.359 - if (!r.match("ABCDDDDD")) 11.360 - { 11.361 - fail("Did not match 'ABCDDDDD'."); 11.362 - } else { 11.363 - say("ABCDDDDD = true"); 11.364 - showParens(r); 11.365 - } 11.366 - 11.367 - r = new RE("(A*)b\\1"); 11.368 - r.setMatchFlags(RE.MATCH_CASEINDEPENDENT); 11.369 - if (!r.match("AaAaaaBAAAAAA")) 11.370 - { 11.371 - fail("Did not match 'AaAaaaBAAAAAA'."); 11.372 - } else { 11.373 - say("AaAaaaBAAAAAA = true"); 11.374 - showParens(r); 11.375 - } 11.376 - 11.377 - r = new RE("[A-Z]*"); 11.378 - r.setMatchFlags(RE.MATCH_CASEINDEPENDENT); 11.379 - if (!r.match("CaBgDe12")) 11.380 - { 11.381 - fail("Did not match 'CaBgDe12'."); 11.382 - } else { 11.383 - say("CaBgDe12 = true"); 11.384 - showParens(r); 11.385 - } 11.386 - 11.387 - // Test MATCH_MULTILINE. Test for eol/bol symbols. 11.388 - r = new RE("^abc$", RE.MATCH_MULTILINE); 11.389 - if (!r.match("\nabc")) { 11.390 - fail("\"\\nabc\" doesn't match \"^abc$\""); 11.391 - } 11.392 - if (!r.match("\rabc")) { 11.393 - fail("\"\\rabc\" doesn't match \"^abc$\""); 11.394 - } 11.395 - if (!r.match("\r\nabc")) { 11.396 - fail("\"\\r\\nabc\" doesn't match \"^abc$\""); 11.397 - } 11.398 - if (!r.match("\u0085abc")) { 11.399 - fail("\"\\u0085abc\" doesn't match \"^abc$\""); 11.400 - } 11.401 - if (!r.match("\u2028abc")) { 11.402 - fail("\"\\u2028abc\" doesn't match \"^abc$\""); 11.403 - } 11.404 - if (!r.match("\u2029abc")) { 11.405 - fail("\"\\u2029abc\" doesn't match \"^abc$\""); 11.406 - } 11.407 - 11.408 - // Test MATCH_MULTILINE. Test that '.' does not matches new line. 11.409 - r = new RE("^a.*b$", RE.MATCH_MULTILINE); 11.410 - if (r.match("a\nb")) { 11.411 - fail("\"a\\nb\" matches \"^a.*b$\""); 11.412 - } 11.413 - if (r.match("a\rb")) { 11.414 - fail("\"a\\rb\" matches \"^a.*b$\""); 11.415 - } 11.416 - if (r.match("a\r\nb")) { 11.417 - fail("\"a\\r\\nb\" matches \"^a.*b$\""); 11.418 - } 11.419 - if (r.match("a\u0085b")) { 11.420 - fail("\"a\\u0085b\" matches \"^a.*b$\""); 11.421 - } 11.422 - if (r.match("a\u2028b")) { 11.423 - fail("\"a\\u2028b\" matches \"^a.*b$\""); 11.424 - } 11.425 - if (r.match("a\u2029b")) { 11.426 - fail("\"a\\u2029b\" matches \"^a.*b$\""); 11.427 - } 11.428 - } 11.429 - 11.430 - private void testPrecompiledRE() 11.431 - { 11.432 - // Pre-compiled regular expression "a*b" 11.433 - char[] re1Instructions = 11.434 - { 11.435 - 0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041, 11.436 - 0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047, 11.437 - 0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000, 11.438 - 0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000, 11.439 - 0x0000, 11.440 - }; 11.441 - 11.442 - REProgram re1 = new REProgram(re1Instructions); 11.443 - 11.444 - // Simple test of pre-compiled regular expressions 11.445 - RE r = new RE(re1); 11.446 - say("a*b"); 11.447 - boolean result = r.match("aaab"); 11.448 - say("aaab = " + result); 11.449 - showParens(r); 11.450 - if (!result) { 11.451 - fail("\"aaab\" doesn't match to precompiled \"a*b\""); 11.452 - } 11.453 - 11.454 - result = r.match("b"); 11.455 - say("b = " + result); 11.456 - showParens(r); 11.457 - if (!result) { 11.458 - fail("\"b\" doesn't match to precompiled \"a*b\""); 11.459 - } 11.460 - 11.461 - result = r.match("c"); 11.462 - say("c = " + result); 11.463 - showParens(r); 11.464 - if (result) { 11.465 - fail("\"c\" matches to precompiled \"a*b\""); 11.466 - } 11.467 - 11.468 - result = r.match("ccccaaaaab"); 11.469 - say("ccccaaaaab = " + result); 11.470 - showParens(r); 11.471 - if (!result) { 11.472 - fail("\"ccccaaaaab\" doesn't match to precompiled \"a*b\""); 11.473 - } 11.474 - } 11.475 - 11.476 - private void testSplitAndGrep() 11.477 - { 11.478 - String[] expected = {"xxxx", "xxxx", "yyyy", "zzz"}; 11.479 - RE r = new RE("a*b"); 11.480 - String[] s = r.split("xxxxaabxxxxbyyyyaaabzzz"); 11.481 - for (int i = 0; i < expected.length && i < s.length; i++) { 11.482 - assertEquals("Wrong splitted part", expected[i], s[i]); 11.483 - } 11.484 - assertEquals("Wrong number of splitted parts", expected.length, 11.485 - s.length); 11.486 - 11.487 - r = new RE("x+"); 11.488 - expected = new String[] {"xxxx", "xxxx"}; 11.489 - s = r.grep(s); 11.490 - for (int i = 0; i < s.length; i++) 11.491 - { 11.492 - say("s[" + i + "] = " + s[i]); 11.493 - assertEquals("Grep fails", expected[i], s[i]); 11.494 - } 11.495 - assertEquals("Wrong number of string found by grep", expected.length, 11.496 - s.length); 11.497 - } 11.498 - 11.499 - private void testSubst() 11.500 - { 11.501 - RE r = new RE("a*b"); 11.502 - String expected = "-foo-garply-wacky-"; 11.503 - String actual = r.subst("aaaabfooaaabgarplyaaabwackyb", "-"); 11.504 - assertEquals("Wrong result of substitution in \"a*b\"", expected, actual); 11.505 - 11.506 - // Test subst() with backreferences 11.507 - r = new RE("http://[\\.\\w\\-\\?/~_@&=%]+"); 11.508 - actual = r.subst("visit us: http://www.apache.org!", 11.509 - "1234<a href=\"$0\">$0</a>", RE.REPLACE_BACKREFERENCES); 11.510 - assertEquals("Wrong subst() result", "visit us: 1234<a href=\"http://www.apache.org\">http://www.apache.org</a>!", actual); 11.511 - 11.512 - // Test subst() with backreferences without leading characters 11.513 - // before first backreference 11.514 - r = new RE("(.*?)=(.*)"); 11.515 - actual = r.subst("variable=value", 11.516 - "$1_test_$212", RE.REPLACE_BACKREFERENCES); 11.517 - assertEquals("Wrong subst() result", "variable_test_value12", actual); 11.518 - 11.519 - // Test subst() with NO backreferences 11.520 - r = new RE("^a$"); 11.521 - actual = r.subst("a", 11.522 - "b", RE.REPLACE_BACKREFERENCES); 11.523 - assertEquals("Wrong subst() result", "b", actual); 11.524 - 11.525 - // Test subst() with NO backreferences 11.526 - r = new RE("^a$", RE.MATCH_MULTILINE); 11.527 - actual = r.subst("\r\na\r\n", 11.528 - "b", RE.REPLACE_BACKREFERENCES); 11.529 - assertEquals("Wrong subst() result", "\r\nb\r\n", actual); 11.530 - } 11.531 - 11.532 - public void assertEquals(String message, String expected, String actual) 11.533 - { 11.534 - if (expected != null && !expected.equals(actual) 11.535 - || actual != null && !actual.equals(expected)) 11.536 - { 11.537 - fail(message + " (expected \"" + expected 11.538 - + "\", actual \"" + actual + "\")"); 11.539 - } 11.540 - } 11.541 - 11.542 - public void assertEquals(String message, int expected, int actual) 11.543 - { 11.544 - if (expected != actual) { 11.545 - fail(message + " (expected \"" + expected 11.546 - + "\", actual \"" + actual + "\")"); 11.547 - } 11.548 - } 11.549 - 11.550 - /** 11.551 - * Converts yesno string to boolean. 11.552 - * @param yesno string representation of expected result 11.553 - * @return true if yesno is "YES", false if yesno is "NO" 11.554 - * stops program otherwise. 11.555 - */ 11.556 - private boolean getExpectedResult(String yesno) 11.557 - { 11.558 - if ("NO".equals(yesno)) 11.559 - { 11.560 - return false; 11.561 - } 11.562 - else if ("YES".equals(yesno)) 11.563 - { 11.564 - return true; 11.565 - } 11.566 - else 11.567 - { 11.568 - // Bad test script 11.569 - die("Test script error!"); 11.570 - return false; //to please javac 11.571 - } 11.572 - } 11.573 - 11.574 - /** 11.575 - * Finds next test description in a given script. 11.576 - * @param br <code>BufferedReader</code> for a script file 11.577 - * @return strign tag for next test description 11.578 - * @exception IOException if some io problems occured 11.579 - */ 11.580 - private String findNextTest(BufferedReader br) throws IOException 11.581 - { 11.582 - String number = ""; 11.583 - 11.584 - while (br.ready()) 11.585 - { 11.586 - number = br.readLine(); 11.587 - if (number == null) 11.588 - { 11.589 - break; 11.590 - } 11.591 - number = number.trim(); 11.592 - if (number.startsWith("#")) 11.593 - { 11.594 - break; 11.595 - } 11.596 - if (!number.equals("")) 11.597 - { 11.598 - say("Script error. Line = " + number); 11.599 - System.exit(-1); 11.600 - } 11.601 - } 11.602 - return number; 11.603 - } 11.604 - 11.605 - /** 11.606 - * Creates testcase for the next test description in the script file. 11.607 - * @param br <code>BufferedReader</code> for script file. 11.608 - * @return a new tescase or null. 11.609 - * @exception IOException if some io problems occured 11.610 - */ 11.611 - private RETestCase getNextTestCase(BufferedReader br) throws IOException 11.612 - { 11.613 - // Find next re test case 11.614 - final String tag = findNextTest(br); 11.615 - 11.616 - // Are we done? 11.617 - if (!br.ready()) 11.618 - { 11.619 - return null; 11.620 - } 11.621 - 11.622 - // Get expression 11.623 - final String expr = br.readLine(); 11.624 - 11.625 - // Get test information 11.626 - final String matchAgainst = br.readLine(); 11.627 - final boolean badPattern = "ERR".equals(matchAgainst); 11.628 - boolean shouldMatch = false; 11.629 - int expectedParenCount = 0; 11.630 - String[] expectedParens = null; 11.631 - 11.632 - if (!badPattern) { 11.633 - shouldMatch = getExpectedResult(br.readLine().trim()); 11.634 - if (shouldMatch) { 11.635 - expectedParenCount = Integer.parseInt(br.readLine().trim()); 11.636 - expectedParens = new String[expectedParenCount]; 11.637 - for (int i = 0; i < expectedParenCount; i++) { 11.638 - expectedParens[i] = br.readLine(); 11.639 - } 11.640 - } 11.641 - } 11.642 - 11.643 - return new RETestCase(this, tag, expr, matchAgainst, badPattern, 11.644 - shouldMatch, expectedParens); 11.645 - } 11.646 -} 11.647 - 11.648 -final class RETestCase 11.649 -{ 11.650 - final private StringBuffer log = new StringBuffer(); 11.651 - final private int number; 11.652 - final private String tag; // number from script file 11.653 - final private String pattern; 11.654 - final private String toMatch; 11.655 - final private boolean badPattern; 11.656 - final private boolean shouldMatch; 11.657 - final private String[] parens; 11.658 - final private RETest test; 11.659 - private RE regexp; 11.660 - 11.661 - public RETestCase(RETest test, String tag, String pattern, 11.662 - String toMatch, boolean badPattern, 11.663 - boolean shouldMatch, String[] parens) 11.664 - { 11.665 - this.number = ++test.testCount; 11.666 - this.test = test; 11.667 - this.tag = tag; 11.668 - this.pattern = pattern; 11.669 - this.toMatch = toMatch; 11.670 - this.badPattern = badPattern; 11.671 - this.shouldMatch = shouldMatch; 11.672 - if (parens != null) { 11.673 - this.parens = new String[parens.length]; 11.674 - for (int i = 0; i < parens.length; i++) { 11.675 - this.parens[i] = parens[i]; 11.676 - } 11.677 - } else { 11.678 - this.parens = null; 11.679 - } 11.680 - } 11.681 - 11.682 - public void runTest() 11.683 - { 11.684 - test.say(tag + "(" + number + "): " + pattern); 11.685 - if (testCreation()) { 11.686 - testMatch(); 11.687 - } 11.688 - } 11.689 - 11.690 - boolean testCreation() 11.691 - { 11.692 - try 11.693 - { 11.694 - // Compile it 11.695 - regexp = new RE(); 11.696 - regexp.setProgram(test.compiler.compile(pattern)); 11.697 - // Expression didn't cause an expected error 11.698 - if (badPattern) 11.699 - { 11.700 - test.fail(log, "Was expected to be an error, but wasn't."); 11.701 - return false; 11.702 - } 11.703 - 11.704 - return true; 11.705 - } 11.706 - // Some expressions *should* cause exceptions to be thrown 11.707 - catch (Exception e) 11.708 - { 11.709 - // If it was supposed to be an error, report success and continue 11.710 - if (badPattern) 11.711 - { 11.712 - log.append(" Match: ERR\n"); 11.713 - success("Produces an error (" + e.toString() + "), as expected."); 11.714 - return false; 11.715 - } 11.716 - 11.717 - // Wasn't supposed to be an error 11.718 - String message = (e.getMessage() == null) ? e.toString() : e.getMessage(); 11.719 - test.fail(log, "Produces an unexpected exception \"" + message + "\""); 11.720 - e.printStackTrace(); 11.721 - } 11.722 - catch (Error e) 11.723 - { 11.724 - // Internal error happened 11.725 - test.fail(log, "Compiler threw fatal error \"" + e.getMessage() + "\""); 11.726 - e.printStackTrace(); 11.727 - } 11.728 - 11.729 - return false; 11.730 - } 11.731 - 11.732 - private void testMatch() 11.733 - { 11.734 - log.append(" Match against: '" + toMatch + "'\n"); 11.735 - // Try regular matching 11.736 - try 11.737 - { 11.738 - // Match against the string 11.739 - boolean result = regexp.match(toMatch); 11.740 - log.append(" Matched: " + (result ? "YES" : "NO") + "\n"); 11.741 - 11.742 - // Check result, parens, and iterators 11.743 - if (checkResult(result) && (!shouldMatch || checkParens())) 11.744 - { 11.745 - // test match(CharacterIterator, int) 11.746 - // for every CharacterIterator implementation. 11.747 - log.append(" Match using StringCharacterIterator\n"); 11.748 - if (!tryMatchUsingCI(new StringCharacterIterator(toMatch))) 11.749 - return; 11.750 - 11.751 - log.append(" Match using CharacterArrayCharacterIterator\n"); 11.752 - if (!tryMatchUsingCI(new CharacterArrayCharacterIterator(toMatch.toCharArray(), 0, toMatch.length()))) 11.753 - return; 11.754 - 11.755 - log.append(" Match using StreamCharacterIterator\n"); 11.756 - if (!tryMatchUsingCI(new StreamCharacterIterator(new StringBufferInputStream(toMatch)))) 11.757 - return; 11.758 - 11.759 - log.append(" Match using ReaderCharacterIterator\n"); 11.760 - if (!tryMatchUsingCI(new ReaderCharacterIterator(new StringReader(toMatch)))) 11.761 - return; 11.762 - } 11.763 - } 11.764 - // Matcher blew it 11.765 - catch(Exception e) 11.766 - { 11.767 - test.fail(log, "Matcher threw exception: " + e.toString()); 11.768 - e.printStackTrace(); 11.769 - } 11.770 - // Internal error 11.771 - catch(Error e) 11.772 - { 11.773 - test.fail(log, "Matcher threw fatal error \"" + e.getMessage() + "\""); 11.774 - e.printStackTrace(); 11.775 - } 11.776 - } 11.777 - 11.778 - private boolean checkResult(boolean result) 11.779 - { 11.780 - // Write status 11.781 - if (result == shouldMatch) { 11.782 - success((shouldMatch ? "Matched" : "Did not match") 11.783 - + " \"" + toMatch + "\", as expected:"); 11.784 - return true; 11.785 - } else { 11.786 - if (shouldMatch) { 11.787 - test.fail(log, "Did not match \"" + toMatch + "\", when expected to."); 11.788 - } else { 11.789 - test.fail(log, "Matched \"" + toMatch + "\", when not expected to."); 11.790 - } 11.791 - return false; 11.792 - } 11.793 - } 11.794 - 11.795 - private boolean checkParens() 11.796 - { 11.797 - // Show subexpression registers 11.798 - if (RETest.showSuccesses) 11.799 - { 11.800 - test.showParens(regexp); 11.801 - } 11.802 - 11.803 - log.append(" Paren count: " + regexp.getParenCount() + "\n"); 11.804 - if (!assertEquals(log, "Wrong number of parens", parens.length, regexp.getParenCount())) 11.805 - { 11.806 - return false; 11.807 - } 11.808 - 11.809 - // Check registers against expected contents 11.810 - for (int p = 0; p < regexp.getParenCount(); p++) 11.811 - { 11.812 - log.append(" Paren " + p + ": " + regexp.getParen(p) + "\n"); 11.813 - 11.814 - // Compare expected result with actual 11.815 - if ("null".equals(parens[p]) && regexp.getParen(p) == null) 11.816 - { 11.817 - // Consider "null" in test file equal to null 11.818 - continue; 11.819 - } 11.820 - if (!assertEquals(log, "Wrong register " + p, parens[p], regexp.getParen(p))) 11.821 - { 11.822 - return false; 11.823 - } 11.824 - } 11.825 - 11.826 - return true; 11.827 - } 11.828 - 11.829 - boolean tryMatchUsingCI(CharacterIterator matchAgainst) 11.830 - { 11.831 - try { 11.832 - boolean result = regexp.match(matchAgainst, 0); 11.833 - log.append(" Match: " + (result ? "YES" : "NO") + "\n"); 11.834 - return checkResult(result) && (!shouldMatch || checkParens()); 11.835 - } 11.836 - // Matcher blew it 11.837 - catch(Exception e) 11.838 - { 11.839 - test.fail(log, "Matcher threw exception: " + e.toString()); 11.840 - e.printStackTrace(); 11.841 - } 11.842 - // Internal error 11.843 - catch(Error e) 11.844 - { 11.845 - test.fail(log, "Matcher threw fatal error \"" + e.getMessage() + "\""); 11.846 - e.printStackTrace(); 11.847 - } 11.848 - return false; 11.849 - } 11.850 - 11.851 - public boolean assertEquals(StringBuffer log, String message, String expected, String actual) 11.852 - { 11.853 - if (expected != null && !expected.equals(actual) 11.854 - || actual != null && !actual.equals(expected)) 11.855 - { 11.856 - test.fail(log, message + " (expected \"" + expected 11.857 - + "\", actual \"" + actual + "\")"); 11.858 - return false; 11.859 - } 11.860 - return true; 11.861 - } 11.862 - 11.863 - public boolean assertEquals(StringBuffer log, String message, int expected, int actual) 11.864 - { 11.865 - if (expected != actual) { 11.866 - test.fail(log, message + " (expected \"" + expected 11.867 - + "\", actual \"" + actual + "\")"); 11.868 - return false; 11.869 - } 11.870 - return true; 11.871 - } 11.872 - 11.873 - /** 11.874 - * Show a success 11.875 - * @param s Success story 11.876 - */ 11.877 - void success(String s) 11.878 - { 11.879 - if (RETest.showSuccesses) 11.880 - { 11.881 - test.say("" + RETest.NEW_LINE + "-----------------------" + RETest.NEW_LINE + ""); 11.882 - test.say("Expression #" + (number) + " \"" + pattern + "\" "); 11.883 - test.say("Success: " + s); 11.884 - } 11.885 - } 11.886 -}
12.1 --- a/src/com/sun/org/apache/regexp/internal/REUtil.java Sat Oct 24 16:18:47 2020 +0800 12.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 12.3 @@ -1,61 +0,0 @@ 12.4 -/* 12.5 - * reserved comment block 12.6 - * DO NOT REMOVE OR ALTER! 12.7 - */ 12.8 -/* 12.9 - * Copyright 1999-2004 The Apache Software Foundation. 12.10 - * 12.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 12.12 - * you may not use this file except in compliance with the License. 12.13 - * You may obtain a copy of the License at 12.14 - * 12.15 - * http://www.apache.org/licenses/LICENSE-2.0 12.16 - * 12.17 - * Unless required by applicable law or agreed to in writing, software 12.18 - * distributed under the License is distributed on an "AS IS" BASIS, 12.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12.20 - * See the License for the specific language governing permissions and 12.21 - * limitations under the License. 12.22 - */ 12.23 - 12.24 -package com.sun.org.apache.regexp.internal; 12.25 - 12.26 -/** 12.27 - * This is a class that contains utility helper methods for this package. 12.28 - * 12.29 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> 12.30 - */ 12.31 -public class REUtil 12.32 -{ 12.33 - /** complex: */ 12.34 - private static final String complexPrefix = "complex:"; 12.35 - 12.36 - /** 12.37 - * Creates a regular expression, permitting simple or complex syntax 12.38 - * @param expression The expression, beginning with a prefix if it's complex or 12.39 - * having no prefix if it's simple 12.40 - * @param matchFlags Matching style flags 12.41 - * @return The regular expression object 12.42 - * @exception RESyntaxException thrown in case of error 12.43 - */ 12.44 - public static RE createRE(String expression, int matchFlags) throws RESyntaxException 12.45 - { 12.46 - if (expression.startsWith(complexPrefix)) 12.47 - { 12.48 - return new RE(expression.substring(complexPrefix.length()), matchFlags); 12.49 - } 12.50 - return new RE(RE.simplePatternToFullRegularExpression(expression), matchFlags); 12.51 - } 12.52 - 12.53 - /** 12.54 - * Creates a regular expression, permitting simple or complex syntax 12.55 - * @param expression The expression, beginning with a prefix if it's complex or 12.56 - * having no prefix if it's simple 12.57 - * @return The regular expression object 12.58 - * @exception RESyntaxException thrown in case of error 12.59 - */ 12.60 - public static RE createRE(String expression) throws RESyntaxException 12.61 - { 12.62 - return createRE(expression, RE.MATCH_NORMAL); 12.63 - } 12.64 -}
13.1 --- a/src/com/sun/org/apache/regexp/internal/ReaderCharacterIterator.java Sat Oct 24 16:18:47 2020 +0800 13.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 13.3 @@ -1,164 +0,0 @@ 13.4 -/* 13.5 - * reserved comment block 13.6 - * DO NOT REMOVE OR ALTER! 13.7 - */ 13.8 -/* 13.9 - * Copyright 1999-2004 The Apache Software Foundation. 13.10 - * 13.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 13.12 - * you may not use this file except in compliance with the License. 13.13 - * You may obtain a copy of the License at 13.14 - * 13.15 - * http://www.apache.org/licenses/LICENSE-2.0 13.16 - * 13.17 - * Unless required by applicable law or agreed to in writing, software 13.18 - * distributed under the License is distributed on an "AS IS" BASIS, 13.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13.20 - * See the License for the specific language governing permissions and 13.21 - * limitations under the License. 13.22 - */ 13.23 - 13.24 -package com.sun.org.apache.regexp.internal; 13.25 - 13.26 -import java.io.Reader; 13.27 -import java.io.IOException; 13.28 - 13.29 -/** 13.30 - * Encapsulates java.io.Reader as CharacterIterator 13.31 - * 13.32 - * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a> 13.33 - */ 13.34 -public final class ReaderCharacterIterator implements CharacterIterator 13.35 -{ 13.36 - /** Underlying reader */ 13.37 - private final Reader reader; 13.38 - 13.39 - /** Buffer of read chars */ 13.40 - private final StringBuffer buff; 13.41 - 13.42 - /** read end? */ 13.43 - private boolean closed; 13.44 - 13.45 - /** @param reader a Reader, which is parsed */ 13.46 - public ReaderCharacterIterator(Reader reader) 13.47 - { 13.48 - this.reader = reader; 13.49 - this.buff = new StringBuffer(512); 13.50 - this.closed = false; 13.51 - } 13.52 - 13.53 - /** @return a substring */ 13.54 - public String substring(int beginIndex, int endIndex) 13.55 - { 13.56 - try 13.57 - { 13.58 - ensure(endIndex); 13.59 - return buff.toString().substring(beginIndex, endIndex); 13.60 - } 13.61 - catch (IOException e) 13.62 - { 13.63 - throw new StringIndexOutOfBoundsException(e.getMessage()); 13.64 - } 13.65 - } 13.66 - 13.67 - /** @return a substring */ 13.68 - public String substring(int beginIndex) 13.69 - { 13.70 - try 13.71 - { 13.72 - readAll(); 13.73 - return buff.toString().substring(beginIndex); 13.74 - } 13.75 - catch (IOException e) 13.76 - { 13.77 - throw new StringIndexOutOfBoundsException(e.getMessage()); 13.78 - } 13.79 - } 13.80 - 13.81 - /** @return a character at the specified position. */ 13.82 - public char charAt(int pos) 13.83 - { 13.84 - try 13.85 - { 13.86 - ensure(pos); 13.87 - return buff.charAt(pos); 13.88 - } 13.89 - catch (IOException e) 13.90 - { 13.91 - throw new StringIndexOutOfBoundsException(e.getMessage()); 13.92 - } 13.93 - } 13.94 - 13.95 - /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */ 13.96 - public boolean isEnd(int pos) 13.97 - { 13.98 - if (buff.length() > pos) 13.99 - { 13.100 - return false; 13.101 - } 13.102 - else 13.103 - { 13.104 - try 13.105 - { 13.106 - ensure(pos); 13.107 - return (buff.length() <= pos); 13.108 - } 13.109 - catch (IOException e) 13.110 - { 13.111 - throw new StringIndexOutOfBoundsException(e.getMessage()); 13.112 - } 13.113 - } 13.114 - } 13.115 - 13.116 - /** Reads n characters from the stream and appends them to the buffer */ 13.117 - private int read(int n) throws IOException 13.118 - { 13.119 - if (closed) 13.120 - { 13.121 - return 0; 13.122 - } 13.123 - 13.124 - char[] c = new char[n]; 13.125 - int count = 0; 13.126 - int read = 0; 13.127 - 13.128 - do 13.129 - { 13.130 - read = reader.read(c); 13.131 - if (read < 0) // EOF 13.132 - { 13.133 - closed = true; 13.134 - break; 13.135 - } 13.136 - count += read; 13.137 - buff.append(c, 0, read); 13.138 - } 13.139 - while (count < n); 13.140 - 13.141 - return count; 13.142 - } 13.143 - 13.144 - /** Reads rest of the stream. */ 13.145 - private void readAll() throws IOException 13.146 - { 13.147 - while(! closed) 13.148 - { 13.149 - read(1000); 13.150 - } 13.151 - } 13.152 - 13.153 - /** Reads chars up to the idx */ 13.154 - private void ensure(int idx) throws IOException 13.155 - { 13.156 - if (closed) 13.157 - { 13.158 - return; 13.159 - } 13.160 - 13.161 - if (idx < buff.length()) 13.162 - { 13.163 - return; 13.164 - } 13.165 - read(idx + 1 - buff.length()); 13.166 - } 13.167 -}
14.1 --- a/src/com/sun/org/apache/regexp/internal/StreamCharacterIterator.java Sat Oct 24 16:18:47 2020 +0800 14.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 14.3 @@ -1,161 +0,0 @@ 14.4 -/* 14.5 - * reserved comment block 14.6 - * DO NOT REMOVE OR ALTER! 14.7 - */ 14.8 -/* 14.9 - * Copyright 1999-2004 The Apache Software Foundation. 14.10 - * 14.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 14.12 - * you may not use this file except in compliance with the License. 14.13 - * You may obtain a copy of the License at 14.14 - * 14.15 - * http://www.apache.org/licenses/LICENSE-2.0 14.16 - * 14.17 - * Unless required by applicable law or agreed to in writing, software 14.18 - * distributed under the License is distributed on an "AS IS" BASIS, 14.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14.20 - * See the License for the specific language governing permissions and 14.21 - * limitations under the License. 14.22 - */ 14.23 - 14.24 -package com.sun.org.apache.regexp.internal; 14.25 - 14.26 -import java.io.InputStream; 14.27 -import java.io.IOException; 14.28 - 14.29 -/** 14.30 - * Encapsulates java.io.InputStream as CharacterIterator. 14.31 - * 14.32 - * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a> 14.33 - */ 14.34 -public final class StreamCharacterIterator implements CharacterIterator 14.35 -{ 14.36 - /** Underlying is */ 14.37 - private final InputStream is; 14.38 - 14.39 - /** Buffer of read chars */ 14.40 - private final StringBuffer buff; 14.41 - 14.42 - /** read end? */ 14.43 - private boolean closed; 14.44 - 14.45 - /** @param is an InputStream, which is parsed */ 14.46 - public StreamCharacterIterator(InputStream is) 14.47 - { 14.48 - this.is = is; 14.49 - this.buff = new StringBuffer(512); 14.50 - this.closed = false; 14.51 - } 14.52 - 14.53 - /** @return a substring */ 14.54 - public String substring(int beginIndex, int endIndex) 14.55 - { 14.56 - try 14.57 - { 14.58 - ensure(endIndex); 14.59 - return buff.toString().substring(beginIndex, endIndex); 14.60 - } 14.61 - catch (IOException e) 14.62 - { 14.63 - throw new StringIndexOutOfBoundsException(e.getMessage()); 14.64 - } 14.65 - } 14.66 - 14.67 - /** @return a substring */ 14.68 - public String substring(int beginIndex) 14.69 - { 14.70 - try 14.71 - { 14.72 - readAll(); 14.73 - return buff.toString().substring(beginIndex); 14.74 - } 14.75 - catch (IOException e) 14.76 - { 14.77 - throw new StringIndexOutOfBoundsException(e.getMessage()); 14.78 - } 14.79 - } 14.80 - 14.81 - 14.82 - /** @return a character at the specified position. */ 14.83 - public char charAt(int pos) 14.84 - { 14.85 - try 14.86 - { 14.87 - ensure(pos); 14.88 - return buff.charAt(pos); 14.89 - } 14.90 - catch (IOException e) 14.91 - { 14.92 - throw new StringIndexOutOfBoundsException(e.getMessage()); 14.93 - } 14.94 - } 14.95 - 14.96 - /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */ 14.97 - public boolean isEnd(int pos) 14.98 - { 14.99 - if (buff.length() > pos) 14.100 - { 14.101 - return false; 14.102 - } 14.103 - else 14.104 - { 14.105 - try 14.106 - { 14.107 - ensure(pos); 14.108 - return (buff.length() <= pos); 14.109 - } 14.110 - catch (IOException e) 14.111 - { 14.112 - throw new StringIndexOutOfBoundsException(e.getMessage()); 14.113 - } 14.114 - } 14.115 - } 14.116 - 14.117 - /** Reads n characters from the stream and appends them to the buffer */ 14.118 - private int read(int n) throws IOException 14.119 - { 14.120 - if (closed) 14.121 - { 14.122 - return 0; 14.123 - } 14.124 - 14.125 - int c; 14.126 - int i = n; 14.127 - while (--i >= 0) 14.128 - { 14.129 - c = is.read(); 14.130 - if (c < 0) // EOF 14.131 - { 14.132 - closed = true; 14.133 - break; 14.134 - } 14.135 - buff.append((char) c); 14.136 - } 14.137 - return n - i; 14.138 - } 14.139 - 14.140 - /** Reads rest of the stream. */ 14.141 - private void readAll() throws IOException 14.142 - { 14.143 - while(! closed) 14.144 - { 14.145 - read(1000); 14.146 - } 14.147 - } 14.148 - 14.149 - /** Reads chars up to the idx */ 14.150 - private void ensure(int idx) throws IOException 14.151 - { 14.152 - if (closed) 14.153 - { 14.154 - return; 14.155 - } 14.156 - 14.157 - if (idx < buff.length()) 14.158 - { 14.159 - return; 14.160 - } 14.161 - 14.162 - read(idx + 1 - buff.length()); 14.163 - } 14.164 -}
15.1 --- a/src/com/sun/org/apache/regexp/internal/StringCharacterIterator.java Sat Oct 24 16:18:47 2020 +0800 15.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 15.3 @@ -1,62 +0,0 @@ 15.4 -/* 15.5 - * reserved comment block 15.6 - * DO NOT REMOVE OR ALTER! 15.7 - */ 15.8 -/* 15.9 - * Copyright 1999-2004 The Apache Software Foundation. 15.10 - * 15.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 15.12 - * you may not use this file except in compliance with the License. 15.13 - * You may obtain a copy of the License at 15.14 - * 15.15 - * http://www.apache.org/licenses/LICENSE-2.0 15.16 - * 15.17 - * Unless required by applicable law or agreed to in writing, software 15.18 - * distributed under the License is distributed on an "AS IS" BASIS, 15.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15.20 - * See the License for the specific language governing permissions and 15.21 - * limitations under the License. 15.22 - */ 15.23 - 15.24 -package com.sun.org.apache.regexp.internal; 15.25 - 15.26 -/** 15.27 - * Encapsulates String as CharacterIterator. 15.28 - * 15.29 - * @author <a href="mailto:ales.novak@netbeans.com">Ales Novak</a> 15.30 - */ 15.31 -public final class StringCharacterIterator implements CharacterIterator 15.32 -{ 15.33 - /** encapsulated */ 15.34 - private final String src; 15.35 - 15.36 - /** @param src - encapsulated String */ 15.37 - public StringCharacterIterator(String src) 15.38 - { 15.39 - this.src = src; 15.40 - } 15.41 - 15.42 - /** @return a substring */ 15.43 - public String substring(int beginIndex, int endIndex) 15.44 - { 15.45 - return src.substring(beginIndex, endIndex); 15.46 - } 15.47 - 15.48 - /** @return a substring */ 15.49 - public String substring(int beginIndex) 15.50 - { 15.51 - return src.substring(beginIndex); 15.52 - } 15.53 - 15.54 - /** @return a character at the specified position. */ 15.55 - public char charAt(int pos) 15.56 - { 15.57 - return src.charAt(pos); 15.58 - } 15.59 - 15.60 - /** @return <tt>true</tt> iff if the specified index is after the end of the character stream */ 15.61 - public boolean isEnd(int pos) 15.62 - { 15.63 - return (pos >= src.length()); 15.64 - } 15.65 -}
16.1 --- a/src/com/sun/org/apache/regexp/internal/recompile.java Sat Oct 24 16:18:47 2020 +0800 16.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 16.3 @@ -1,137 +0,0 @@ 16.4 -/* 16.5 - * reserved comment block 16.6 - * DO NOT REMOVE OR ALTER! 16.7 - */ 16.8 -/* 16.9 - * Copyright 1999-2004 The Apache Software Foundation. 16.10 - * 16.11 - * Licensed under the Apache License, Version 2.0 (the "License"); 16.12 - * you may not use this file except in compliance with the License. 16.13 - * You may obtain a copy of the License at 16.14 - * 16.15 - * http://www.apache.org/licenses/LICENSE-2.0 16.16 - * 16.17 - * Unless required by applicable law or agreed to in writing, software 16.18 - * distributed under the License is distributed on an "AS IS" BASIS, 16.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16.20 - * See the License for the specific language governing permissions and 16.21 - * limitations under the License. 16.22 - */ 16.23 - 16.24 -package com.sun.org.apache.regexp.internal; 16.25 - 16.26 -import com.sun.org.apache.regexp.internal.RECompiler; 16.27 -import com.sun.org.apache.regexp.internal.RESyntaxException; 16.28 - 16.29 -/** 16.30 - * 'recompile' is a command line tool that pre-compiles one or more regular expressions 16.31 - * for use with the regular expression matcher class 'RE'. For example, the command 16.32 - * "java recompile a*b" produces output like this: 16.33 - * 16.34 - * <pre> 16.35 - * 16.36 - * // Pre-compiled regular expression "a*b" 16.37 - * char[] re1Instructions = 16.38 - * { 16.39 - * 0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041, 16.40 - * 0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047, 16.41 - * 0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000, 16.42 - * 0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000, 16.43 - * 0x0000, 16.44 - * }; 16.45 - * 16.46 - * REProgram re1 = new REProgram(re1Instructions); 16.47 - * 16.48 - * </pre> 16.49 - * 16.50 - * By pasting this output into your code, you can construct a regular expression matcher 16.51 - * (RE) object directly from the pre-compiled data (the character array re1), thus avoiding 16.52 - * the overhead of compiling the expression at runtime. For example: 16.53 - * 16.54 - * <pre> 16.55 - * 16.56 - * RE r = new RE(re1); 16.57 - * 16.58 - * </pre> 16.59 - * 16.60 - * @see RE 16.61 - * @see RECompiler 16.62 - * 16.63 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> 16.64 - */ 16.65 -public class recompile 16.66 -{ 16.67 - /** 16.68 - * Main application entrypoint. 16.69 - * @param arg Command line arguments 16.70 - */ 16.71 - static public void main(String[] arg) 16.72 - { 16.73 - // Create a compiler object 16.74 - RECompiler r = new RECompiler(); 16.75 - 16.76 - // Print usage if arguments are incorrect 16.77 - if (arg.length <= 0 || arg.length % 2 != 0) 16.78 - { 16.79 - System.out.println("Usage: recompile <patternname> <pattern>"); 16.80 - System.exit(0); 16.81 - } 16.82 - 16.83 - // Loop through arguments, compiling each 16.84 - for (int i = 0; i < arg.length; i += 2) 16.85 - { 16.86 - try 16.87 - { 16.88 - // Compile regular expression 16.89 - String name = arg[i]; 16.90 - String pattern = arg[i+1]; 16.91 - String instructions = name + "PatternInstructions"; 16.92 - 16.93 - // Output program as a nice, formatted character array 16.94 - System.out.print("\n // Pre-compiled regular expression '" + pattern + "'\n" 16.95 - + " private static char[] " + instructions + " = \n {"); 16.96 - 16.97 - // Compile program for pattern 16.98 - REProgram program = r.compile(pattern); 16.99 - 16.100 - // Number of columns in output 16.101 - int numColumns = 7; 16.102 - 16.103 - // Loop through program 16.104 - char[] p = program.getInstructions(); 16.105 - for (int j = 0; j < p.length; j++) 16.106 - { 16.107 - // End of column? 16.108 - if ((j % numColumns) == 0) 16.109 - { 16.110 - System.out.print("\n "); 16.111 - } 16.112 - 16.113 - // Print character as padded hex number 16.114 - String hex = Integer.toHexString(p[j]); 16.115 - while (hex.length() < 4) 16.116 - { 16.117 - hex = "0" + hex; 16.118 - } 16.119 - System.out.print("0x" + hex + ", "); 16.120 - } 16.121 - 16.122 - // End of program block 16.123 - System.out.println("\n };"); 16.124 - System.out.println("\n private static RE " + name + "Pattern = new RE(new REProgram(" + instructions + "));"); 16.125 - } 16.126 - catch (RESyntaxException e) 16.127 - { 16.128 - System.out.println("Syntax error in expression \"" + arg[i] + "\": " + e.toString()); 16.129 - } 16.130 - catch (Exception e) 16.131 - { 16.132 - System.out.println("Unexpected exception: " + e.toString()); 16.133 - } 16.134 - catch (Error e) 16.135 - { 16.136 - System.out.println("Internal error: " + e.toString()); 16.137 - } 16.138 - } 16.139 - } 16.140 -}