src/com/sun/org/apache/regexp/internal/REProgram.java

changeset 2116
aaee9ae4799a
parent 2090
3b8ebb957957
parent 2115
ba503169016f
child 2117
a5f920b6d2b5
     1.1 --- a/src/com/sun/org/apache/regexp/internal/REProgram.java	Sat Oct 24 16:18:47 2020 +0800
     1.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.3 @@ -1,158 +0,0 @@
     1.4 -/*
     1.5 - * reserved comment block
     1.6 - * DO NOT REMOVE OR ALTER!
     1.7 - */
     1.8 -/*
     1.9 - * Copyright 1999-2004 The Apache Software Foundation.
    1.10 - *
    1.11 - * Licensed under the Apache License, Version 2.0 (the "License");
    1.12 - * you may not use this file except in compliance with the License.
    1.13 - * You may obtain a copy of the License at
    1.14 - *
    1.15 - *     http://www.apache.org/licenses/LICENSE-2.0
    1.16 - *
    1.17 - * Unless required by applicable law or agreed to in writing, software
    1.18 - * distributed under the License is distributed on an "AS IS" BASIS,
    1.19 - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    1.20 - * See the License for the specific language governing permissions and
    1.21 - * limitations under the License.
    1.22 - */
    1.23 -
    1.24 -package com.sun.org.apache.regexp.internal;
    1.25 -
    1.26 -import java.io.Serializable;
    1.27 -
    1.28 -/**
    1.29 - * A class that holds compiled regular expressions.  This is exposed mainly
    1.30 - * for use by the recompile utility (which helps you produce precompiled
    1.31 - * REProgram objects). You should not otherwise need to work directly with
    1.32 - * this class.
    1.33 -*
    1.34 - * @see RE
    1.35 - * @see RECompiler
    1.36 - *
    1.37 - * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
    1.38 - */
    1.39 -public class REProgram implements Serializable
    1.40 -{
    1.41 -    static final int OPT_HASBACKREFS = 1;
    1.42 -
    1.43 -    char[] instruction;         // The compiled regular expression 'program'
    1.44 -    int lenInstruction;         // The amount of the instruction buffer in use
    1.45 -    char[] prefix;              // Prefix string optimization
    1.46 -    int flags;                  // Optimization flags (REProgram.OPT_*)
    1.47 -    int maxParens = -1;
    1.48 -
    1.49 -    /**
    1.50 -     * Constructs a program object from a character array
    1.51 -     * @param instruction Character array with RE opcode instructions in it
    1.52 -     */
    1.53 -    public REProgram(char[] instruction)
    1.54 -    {
    1.55 -        this(instruction, instruction.length);
    1.56 -    }
    1.57 -
    1.58 -    /**
    1.59 -     * Constructs a program object from a character array
    1.60 -     * @param parens Count of parens in the program
    1.61 -     * @param instruction Character array with RE opcode instructions in it
    1.62 -     */
    1.63 -    public REProgram(int parens, char[] instruction)
    1.64 -    {
    1.65 -        this(instruction, instruction.length);
    1.66 -        this.maxParens = parens;
    1.67 -    }
    1.68 -
    1.69 -    /**
    1.70 -     * Constructs a program object from a character array
    1.71 -     * @param instruction Character array with RE opcode instructions in it
    1.72 -     * @param lenInstruction Amount of instruction array in use
    1.73 -     */
    1.74 -    public REProgram(char[] instruction, int lenInstruction)
    1.75 -    {
    1.76 -        setInstructions(instruction, lenInstruction);
    1.77 -    }
    1.78 -
    1.79 -    /**
    1.80 -     * Returns a copy of the current regular expression program in a character
    1.81 -     * array that is exactly the right length to hold the program.  If there is
    1.82 -     * no program compiled yet, getInstructions() will return null.
    1.83 -     * @return A copy of the current compiled RE program
    1.84 -     */
    1.85 -    public char[] getInstructions()
    1.86 -    {
    1.87 -        // Ensure program has been compiled!
    1.88 -        if (lenInstruction != 0)
    1.89 -        {
    1.90 -            // Return copy of program
    1.91 -            char[] ret = new char[lenInstruction];
    1.92 -            System.arraycopy(instruction, 0, ret, 0, lenInstruction);
    1.93 -            return ret;
    1.94 -        }
    1.95 -        return null;
    1.96 -    }
    1.97 -
    1.98 -    /**
    1.99 -     * Sets a new regular expression program to run.  It is this method which
   1.100 -     * performs any special compile-time search optimizations.  Currently only
   1.101 -     * two optimizations are in place - one which checks for backreferences
   1.102 -     * (so that they can be lazily allocated) and another which attempts to
   1.103 -     * find an prefix anchor string so that substantial amounts of input can
   1.104 -     * potentially be skipped without running the actual program.
   1.105 -     * @param instruction Program instruction buffer
   1.106 -     * @param lenInstruction Length of instruction buffer in use
   1.107 -     */
   1.108 -    public void setInstructions(char[] instruction, int lenInstruction)
   1.109 -    {
   1.110 -        // Save reference to instruction array
   1.111 -        this.instruction = instruction;
   1.112 -        this.lenInstruction = lenInstruction;
   1.113 -
   1.114 -        // Initialize other program-related variables
   1.115 -        flags = 0;
   1.116 -        prefix = null;
   1.117 -
   1.118 -        // Try various compile-time optimizations if there's a program
   1.119 -        if (instruction != null && lenInstruction != 0)
   1.120 -        {
   1.121 -            // If the first node is a branch
   1.122 -            if (lenInstruction >= RE.nodeSize && instruction[0 + RE.offsetOpcode] == RE.OP_BRANCH)
   1.123 -            {
   1.124 -                // to the end node
   1.125 -                int next = instruction[0 + RE.offsetNext];
   1.126 -                if (instruction[next + RE.offsetOpcode] == RE.OP_END)
   1.127 -                {
   1.128 -                    // and the branch starts with an atom
   1.129 -                    if (lenInstruction >= (RE.nodeSize * 2) && instruction[RE.nodeSize + RE.offsetOpcode] == RE.OP_ATOM)
   1.130 -                    {
   1.131 -                        // then get that atom as an prefix because there's no other choice
   1.132 -                        int lenAtom = instruction[RE.nodeSize + RE.offsetOpdata];
   1.133 -                        prefix = new char[lenAtom];
   1.134 -                        System.arraycopy(instruction, RE.nodeSize * 2, prefix, 0, lenAtom);
   1.135 -                    }
   1.136 -                }
   1.137 -            }
   1.138 -
   1.139 -            BackrefScanLoop:
   1.140 -
   1.141 -            // Check for backreferences
   1.142 -            for (int i = 0; i < lenInstruction; i += RE.nodeSize)
   1.143 -            {
   1.144 -                switch (instruction[i + RE.offsetOpcode])
   1.145 -                {
   1.146 -                    case RE.OP_ANYOF:
   1.147 -                        i += (instruction[i + RE.offsetOpdata] * 2);
   1.148 -                        break;
   1.149 -
   1.150 -                    case RE.OP_ATOM:
   1.151 -                        i += instruction[i + RE.offsetOpdata];
   1.152 -                        break;
   1.153 -
   1.154 -                    case RE.OP_BACKREF:
   1.155 -                        flags |= OPT_HASBACKREFS;
   1.156 -                        break BackrefScanLoop;
   1.157 -                }
   1.158 -            }
   1.159 -        }
   1.160 -    }
   1.161 -}

mercurial