src/com/sun/org/apache/regexp/internal/REDebugCompiler.java

changeset 2116
aaee9ae4799a
parent 759
7ea027fae4d8
equal deleted inserted replaced
2090:3b8ebb957957 2116:aaee9ae4799a
1 /*
2 * reserved comment block
3 * DO NOT REMOVE OR ALTER!
4 */
5 /*
6 * Copyright 1999-2004 The Apache Software Foundation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 package com.sun.org.apache.regexp.internal;
22
23 import java.io.PrintWriter;
24 import java.util.Hashtable;
25
26 /**
27 * A subclass of RECompiler which can dump a regular expression program
28 * for debugging purposes.
29 *
30 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
31 */
32 public class REDebugCompiler extends RECompiler
33 {
34 /**
35 * Mapping from opcodes to descriptive strings
36 */
37 static Hashtable hashOpcode = new Hashtable();
38 static
39 {
40 hashOpcode.put(new Integer(RE.OP_RELUCTANTSTAR), "OP_RELUCTANTSTAR");
41 hashOpcode.put(new Integer(RE.OP_RELUCTANTPLUS), "OP_RELUCTANTPLUS");
42 hashOpcode.put(new Integer(RE.OP_RELUCTANTMAYBE), "OP_RELUCTANTMAYBE");
43 hashOpcode.put(new Integer(RE.OP_END), "OP_END");
44 hashOpcode.put(new Integer(RE.OP_BOL), "OP_BOL");
45 hashOpcode.put(new Integer(RE.OP_EOL), "OP_EOL");
46 hashOpcode.put(new Integer(RE.OP_ANY), "OP_ANY");
47 hashOpcode.put(new Integer(RE.OP_ANYOF), "OP_ANYOF");
48 hashOpcode.put(new Integer(RE.OP_BRANCH), "OP_BRANCH");
49 hashOpcode.put(new Integer(RE.OP_ATOM), "OP_ATOM");
50 hashOpcode.put(new Integer(RE.OP_STAR), "OP_STAR");
51 hashOpcode.put(new Integer(RE.OP_PLUS), "OP_PLUS");
52 hashOpcode.put(new Integer(RE.OP_MAYBE), "OP_MAYBE");
53 hashOpcode.put(new Integer(RE.OP_NOTHING), "OP_NOTHING");
54 hashOpcode.put(new Integer(RE.OP_GOTO), "OP_GOTO");
55 hashOpcode.put(new Integer(RE.OP_ESCAPE), "OP_ESCAPE");
56 hashOpcode.put(new Integer(RE.OP_OPEN), "OP_OPEN");
57 hashOpcode.put(new Integer(RE.OP_CLOSE), "OP_CLOSE");
58 hashOpcode.put(new Integer(RE.OP_BACKREF), "OP_BACKREF");
59 hashOpcode.put(new Integer(RE.OP_POSIXCLASS), "OP_POSIXCLASS");
60 hashOpcode.put(new Integer(RE.OP_OPEN_CLUSTER), "OP_OPEN_CLUSTER");
61 hashOpcode.put(new Integer(RE.OP_CLOSE_CLUSTER), "OP_CLOSE_CLUSTER");
62 }
63
64 /**
65 * Returns a descriptive string for an opcode.
66 * @param opcode Opcode to convert to a string
67 * @return Description of opcode
68 */
69 String opcodeToString(char opcode)
70 {
71 // Get string for opcode
72 String ret =(String)hashOpcode.get(new Integer(opcode));
73
74 // Just in case we have a corrupt program
75 if (ret == null)
76 {
77 ret = "OP_????";
78 }
79 return ret;
80 }
81
82 /**
83 * Return a string describing a (possibly unprintable) character.
84 * @param c Character to convert to a printable representation
85 * @return String representation of character
86 */
87 String charToString(char c)
88 {
89 // If it's unprintable, convert to '\###'
90 if (c < ' ' || c > 127)
91 {
92 return "\\" + (int)c;
93 }
94
95 // Return the character as a string
96 return String.valueOf(c);
97 }
98
99 /**
100 * Returns a descriptive string for a node in a regular expression program.
101 * @param node Node to describe
102 * @return Description of node
103 */
104 String nodeToString(int node)
105 {
106 // Get opcode and opdata for node
107 char opcode = instruction[node + RE.offsetOpcode];
108 int opdata = (int)instruction[node + RE.offsetOpdata];
109
110 // Return opcode as a string and opdata value
111 return opcodeToString(opcode) + ", opdata = " + opdata;
112 }
113
114 /**
115 * Inserts a node with a given opcode and opdata at insertAt. The node relative next
116 * pointer is initialized to 0.
117 * @param opcode Opcode for new node
118 * @param opdata Opdata for new node (only the low 16 bits are currently used)
119 * @param insertAt Index at which to insert the new node in the program * /
120 void nodeInsert(char opcode, int opdata, int insertAt) {
121 System.out.println( "====> " + opcode + " " + opdata + " " + insertAt );
122 PrintWriter writer = new PrintWriter( System.out );
123 dumpProgram( writer );
124 super.nodeInsert( opcode, opdata, insertAt );
125 System.out.println( "====< " );
126 dumpProgram( writer );
127 writer.flush();
128 }/**/
129
130
131 /**
132 * Appends a node to the end of a node chain
133 * @param node Start of node chain to traverse
134 * @param pointTo Node to have the tail of the chain point to * /
135 void setNextOfEnd(int node, int pointTo) {
136 System.out.println( "====> " + node + " " + pointTo );
137 PrintWriter writer = new PrintWriter( System.out );
138 dumpProgram( writer );
139 super.setNextOfEnd( node, pointTo );
140 System.out.println( "====< " );
141 dumpProgram( writer );
142 writer.flush();
143 }/**/
144
145
146 /**
147 * Dumps the current program to a PrintWriter
148 * @param p PrintWriter for program dump output
149 */
150 public void dumpProgram(PrintWriter p)
151 {
152 // Loop through the whole program
153 for (int i = 0; i < lenInstruction; )
154 {
155 // Get opcode, opdata and next fields of current program node
156 char opcode = instruction[i + RE.offsetOpcode];
157 char opdata = instruction[i + RE.offsetOpdata];
158 short next = (short)instruction[i + RE.offsetNext];
159
160 // Display the current program node
161 p.print(i + ". " + nodeToString(i) + ", next = ");
162
163 // If there's no next, say 'none', otherwise give absolute index of next node
164 if (next == 0)
165 {
166 p.print("none");
167 }
168 else
169 {
170 p.print(i + next);
171 }
172
173 // Move past node
174 i += RE.nodeSize;
175
176 // If character class
177 if (opcode == RE.OP_ANYOF)
178 {
179 // Opening bracket for start of char class
180 p.print(", [");
181
182 // Show each range in the char class
183 int rangeCount = opdata;
184 for (int r = 0; r < rangeCount; r++)
185 {
186 // Get first and last chars in range
187 char charFirst = instruction[i++];
188 char charLast = instruction[i++];
189
190 // Print range as X-Y, unless range encompasses only one char
191 if (charFirst == charLast)
192 {
193 p.print(charToString(charFirst));
194 }
195 else
196 {
197 p.print(charToString(charFirst) + "-" + charToString(charLast));
198 }
199 }
200
201 // Annotate the end of the char class
202 p.print("]");
203 }
204
205 // If atom
206 if (opcode == RE.OP_ATOM)
207 {
208 // Open quote
209 p.print(", \"");
210
211 // Print each character in the atom
212 for (int len = opdata; len-- != 0; )
213 {
214 p.print(charToString(instruction[i++]));
215 }
216
217 // Close quote
218 p.print("\"");
219 }
220
221 // Print a newline
222 p.println("");
223 }
224 }
225 }

mercurial