1 /* |
|
2 * reserved comment block |
|
3 * DO NOT REMOVE OR ALTER! |
|
4 */ |
|
5 /* |
|
6 * Copyright 1999-2004 The Apache Software Foundation. |
|
7 * |
|
8 * Licensed under the Apache License, Version 2.0 (the "License"); |
|
9 * you may not use this file except in compliance with the License. |
|
10 * You may obtain a copy of the License at |
|
11 * |
|
12 * http://www.apache.org/licenses/LICENSE-2.0 |
|
13 * |
|
14 * Unless required by applicable law or agreed to in writing, software |
|
15 * distributed under the License is distributed on an "AS IS" BASIS, |
|
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
17 * See the License for the specific language governing permissions and |
|
18 * limitations under the License. |
|
19 */ |
|
20 |
|
21 package com.sun.org.apache.regexp.internal; |
|
22 |
|
23 import java.io.PrintWriter; |
|
24 import java.util.Hashtable; |
|
25 |
|
26 /** |
|
27 * A subclass of RECompiler which can dump a regular expression program |
|
28 * for debugging purposes. |
|
29 * |
|
30 * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> |
|
31 */ |
|
32 public class REDebugCompiler extends RECompiler |
|
33 { |
|
34 /** |
|
35 * Mapping from opcodes to descriptive strings |
|
36 */ |
|
37 static Hashtable hashOpcode = new Hashtable(); |
|
38 static |
|
39 { |
|
40 hashOpcode.put(new Integer(RE.OP_RELUCTANTSTAR), "OP_RELUCTANTSTAR"); |
|
41 hashOpcode.put(new Integer(RE.OP_RELUCTANTPLUS), "OP_RELUCTANTPLUS"); |
|
42 hashOpcode.put(new Integer(RE.OP_RELUCTANTMAYBE), "OP_RELUCTANTMAYBE"); |
|
43 hashOpcode.put(new Integer(RE.OP_END), "OP_END"); |
|
44 hashOpcode.put(new Integer(RE.OP_BOL), "OP_BOL"); |
|
45 hashOpcode.put(new Integer(RE.OP_EOL), "OP_EOL"); |
|
46 hashOpcode.put(new Integer(RE.OP_ANY), "OP_ANY"); |
|
47 hashOpcode.put(new Integer(RE.OP_ANYOF), "OP_ANYOF"); |
|
48 hashOpcode.put(new Integer(RE.OP_BRANCH), "OP_BRANCH"); |
|
49 hashOpcode.put(new Integer(RE.OP_ATOM), "OP_ATOM"); |
|
50 hashOpcode.put(new Integer(RE.OP_STAR), "OP_STAR"); |
|
51 hashOpcode.put(new Integer(RE.OP_PLUS), "OP_PLUS"); |
|
52 hashOpcode.put(new Integer(RE.OP_MAYBE), "OP_MAYBE"); |
|
53 hashOpcode.put(new Integer(RE.OP_NOTHING), "OP_NOTHING"); |
|
54 hashOpcode.put(new Integer(RE.OP_GOTO), "OP_GOTO"); |
|
55 hashOpcode.put(new Integer(RE.OP_ESCAPE), "OP_ESCAPE"); |
|
56 hashOpcode.put(new Integer(RE.OP_OPEN), "OP_OPEN"); |
|
57 hashOpcode.put(new Integer(RE.OP_CLOSE), "OP_CLOSE"); |
|
58 hashOpcode.put(new Integer(RE.OP_BACKREF), "OP_BACKREF"); |
|
59 hashOpcode.put(new Integer(RE.OP_POSIXCLASS), "OP_POSIXCLASS"); |
|
60 hashOpcode.put(new Integer(RE.OP_OPEN_CLUSTER), "OP_OPEN_CLUSTER"); |
|
61 hashOpcode.put(new Integer(RE.OP_CLOSE_CLUSTER), "OP_CLOSE_CLUSTER"); |
|
62 } |
|
63 |
|
64 /** |
|
65 * Returns a descriptive string for an opcode. |
|
66 * @param opcode Opcode to convert to a string |
|
67 * @return Description of opcode |
|
68 */ |
|
69 String opcodeToString(char opcode) |
|
70 { |
|
71 // Get string for opcode |
|
72 String ret =(String)hashOpcode.get(new Integer(opcode)); |
|
73 |
|
74 // Just in case we have a corrupt program |
|
75 if (ret == null) |
|
76 { |
|
77 ret = "OP_????"; |
|
78 } |
|
79 return ret; |
|
80 } |
|
81 |
|
82 /** |
|
83 * Return a string describing a (possibly unprintable) character. |
|
84 * @param c Character to convert to a printable representation |
|
85 * @return String representation of character |
|
86 */ |
|
87 String charToString(char c) |
|
88 { |
|
89 // If it's unprintable, convert to '\###' |
|
90 if (c < ' ' || c > 127) |
|
91 { |
|
92 return "\\" + (int)c; |
|
93 } |
|
94 |
|
95 // Return the character as a string |
|
96 return String.valueOf(c); |
|
97 } |
|
98 |
|
99 /** |
|
100 * Returns a descriptive string for a node in a regular expression program. |
|
101 * @param node Node to describe |
|
102 * @return Description of node |
|
103 */ |
|
104 String nodeToString(int node) |
|
105 { |
|
106 // Get opcode and opdata for node |
|
107 char opcode = instruction[node + RE.offsetOpcode]; |
|
108 int opdata = (int)instruction[node + RE.offsetOpdata]; |
|
109 |
|
110 // Return opcode as a string and opdata value |
|
111 return opcodeToString(opcode) + ", opdata = " + opdata; |
|
112 } |
|
113 |
|
114 /** |
|
115 * Inserts a node with a given opcode and opdata at insertAt. The node relative next |
|
116 * pointer is initialized to 0. |
|
117 * @param opcode Opcode for new node |
|
118 * @param opdata Opdata for new node (only the low 16 bits are currently used) |
|
119 * @param insertAt Index at which to insert the new node in the program * / |
|
120 void nodeInsert(char opcode, int opdata, int insertAt) { |
|
121 System.out.println( "====> " + opcode + " " + opdata + " " + insertAt ); |
|
122 PrintWriter writer = new PrintWriter( System.out ); |
|
123 dumpProgram( writer ); |
|
124 super.nodeInsert( opcode, opdata, insertAt ); |
|
125 System.out.println( "====< " ); |
|
126 dumpProgram( writer ); |
|
127 writer.flush(); |
|
128 }/**/ |
|
129 |
|
130 |
|
131 /** |
|
132 * Appends a node to the end of a node chain |
|
133 * @param node Start of node chain to traverse |
|
134 * @param pointTo Node to have the tail of the chain point to * / |
|
135 void setNextOfEnd(int node, int pointTo) { |
|
136 System.out.println( "====> " + node + " " + pointTo ); |
|
137 PrintWriter writer = new PrintWriter( System.out ); |
|
138 dumpProgram( writer ); |
|
139 super.setNextOfEnd( node, pointTo ); |
|
140 System.out.println( "====< " ); |
|
141 dumpProgram( writer ); |
|
142 writer.flush(); |
|
143 }/**/ |
|
144 |
|
145 |
|
146 /** |
|
147 * Dumps the current program to a PrintWriter |
|
148 * @param p PrintWriter for program dump output |
|
149 */ |
|
150 public void dumpProgram(PrintWriter p) |
|
151 { |
|
152 // Loop through the whole program |
|
153 for (int i = 0; i < lenInstruction; ) |
|
154 { |
|
155 // Get opcode, opdata and next fields of current program node |
|
156 char opcode = instruction[i + RE.offsetOpcode]; |
|
157 char opdata = instruction[i + RE.offsetOpdata]; |
|
158 short next = (short)instruction[i + RE.offsetNext]; |
|
159 |
|
160 // Display the current program node |
|
161 p.print(i + ". " + nodeToString(i) + ", next = "); |
|
162 |
|
163 // If there's no next, say 'none', otherwise give absolute index of next node |
|
164 if (next == 0) |
|
165 { |
|
166 p.print("none"); |
|
167 } |
|
168 else |
|
169 { |
|
170 p.print(i + next); |
|
171 } |
|
172 |
|
173 // Move past node |
|
174 i += RE.nodeSize; |
|
175 |
|
176 // If character class |
|
177 if (opcode == RE.OP_ANYOF) |
|
178 { |
|
179 // Opening bracket for start of char class |
|
180 p.print(", ["); |
|
181 |
|
182 // Show each range in the char class |
|
183 int rangeCount = opdata; |
|
184 for (int r = 0; r < rangeCount; r++) |
|
185 { |
|
186 // Get first and last chars in range |
|
187 char charFirst = instruction[i++]; |
|
188 char charLast = instruction[i++]; |
|
189 |
|
190 // Print range as X-Y, unless range encompasses only one char |
|
191 if (charFirst == charLast) |
|
192 { |
|
193 p.print(charToString(charFirst)); |
|
194 } |
|
195 else |
|
196 { |
|
197 p.print(charToString(charFirst) + "-" + charToString(charLast)); |
|
198 } |
|
199 } |
|
200 |
|
201 // Annotate the end of the char class |
|
202 p.print("]"); |
|
203 } |
|
204 |
|
205 // If atom |
|
206 if (opcode == RE.OP_ATOM) |
|
207 { |
|
208 // Open quote |
|
209 p.print(", \""); |
|
210 |
|
211 // Print each character in the atom |
|
212 for (int len = opdata; len-- != 0; ) |
|
213 { |
|
214 p.print(charToString(instruction[i++])); |
|
215 } |
|
216 |
|
217 // Close quote |
|
218 p.print("\""); |
|
219 } |
|
220 |
|
221 // Print a newline |
|
222 p.println(""); |
|
223 } |
|
224 } |
|
225 } |
|