src/jdk/nashorn/internal/runtime/regexp/joni/Regex.java

Mon, 03 Nov 2014 11:47:41 +0100

author
lagergren
date
Mon, 03 Nov 2014 11:47:41 +0100
changeset 1082
e1e27c4262be
parent 962
ac62e33a99b0
child 1205
4112748288bb
permissions
-rw-r--r--

8060204: Fix warnings in Joni and tests
Reviewed-by: hannesw, sundar, attila

hannesw@115 1 /*
hannesw@115 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
hannesw@115 3 * this software and associated documentation files (the "Software"), to deal in
hannesw@115 4 * the Software without restriction, including without limitation the rights to
hannesw@115 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
hannesw@115 6 * of the Software, and to permit persons to whom the Software is furnished to do
hannesw@115 7 * so, subject to the following conditions:
hannesw@115 8 *
hannesw@115 9 * The above copyright notice and this permission notice shall be included in all
hannesw@115 10 * copies or substantial portions of the Software.
hannesw@115 11 *
hannesw@115 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
hannesw@115 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
hannesw@115 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
hannesw@115 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
hannesw@115 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
hannesw@115 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
hannesw@115 18 * SOFTWARE.
hannesw@115 19 */
hannesw@115 20 package jdk.nashorn.internal.runtime.regexp.joni;
hannesw@115 21
hannesw@115 22 import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
hannesw@115 23 import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState;
hannesw@115 24 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
hannesw@115 25 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
hannesw@115 26
lagergren@1082 27 @SuppressWarnings("javadoc")
hannesw@115 28 public final class Regex implements RegexState {
hannesw@115 29
hannesw@115 30 int[] code; /* compiled pattern */
hannesw@115 31 int codeLength;
hannesw@115 32 boolean stackNeeded;
hannesw@447 33 Object[] operands; /* e.g. shared CClassNode */
hannesw@115 34 int operandLength;
hannesw@115 35
hannesw@115 36 int numMem; /* used memory(...) num counted from 1 */
hannesw@115 37 int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
hannesw@115 38 int numNullCheck; /* OP_NULL_CHECK_START/END id counter */
hannesw@115 39 int captureHistory; /* (?@...) flag (1-31) */
hannesw@115 40 int btMemStart; /* need backtrack flag */
hannesw@115 41 int btMemEnd; /* need backtrack flag */
hannesw@115 42
hannesw@115 43 int stackPopLevel;
hannesw@115 44
hannesw@447 45 int[] repeatRangeLo;
hannesw@447 46 int[] repeatRangeHi;
hannesw@115 47
hannesw@193 48 WarnCallback warnings;
hannesw@193 49 MatcherFactory factory;
hannesw@273 50 protected Analyser analyser;
hannesw@115 51
hannesw@115 52 int options;
hannesw@115 53 final int caseFoldFlag;
hannesw@115 54
hannesw@115 55 /* optimization info (string search, char-map and anchors) */
hannesw@115 56 SearchAlgorithm searchAlgorithm; /* optimize flag */
hannesw@115 57 int thresholdLength; /* search str-length for apply optimize */
hannesw@115 58 int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
hannesw@115 59 int anchorDmin; /* (SEMI_)END_BUF anchor distance */
hannesw@115 60 int anchorDmax; /* (SEMI_)END_BUF anchor distance */
hannesw@115 61 int subAnchor; /* start-anchor for exact or map */
hannesw@115 62
hannesw@115 63 char[] exact;
hannesw@115 64 int exactP;
hannesw@115 65 int exactEnd;
hannesw@115 66
hannesw@115 67 byte[] map; /* used as BM skip or char-map */
hannesw@115 68 int[] intMap; /* BM skip for exact_len > 255 */
hannesw@115 69 int[] intMapBackward; /* BM skip for backward search */
hannesw@115 70 int dMin; /* min-distance of exact or map */
hannesw@115 71 int dMax; /* max-distance of exact or map */
hannesw@115 72
hannesw@115 73 char[][] templates;
hannesw@115 74 int templateNum;
hannesw@115 75
attila@962 76 public Regex(final CharSequence cs) {
hannesw@115 77 this(cs.toString());
hannesw@115 78 }
hannesw@115 79
attila@962 80 public Regex(final String str) {
hannesw@115 81 this(str.toCharArray(), 0, str.length(), 0);
hannesw@115 82 }
hannesw@115 83
attila@962 84 public Regex(final char[] chars) {
hannesw@115 85 this(chars, 0, chars.length, 0);
hannesw@115 86 }
hannesw@115 87
attila@962 88 public Regex(final char[] chars, final int p, final int end) {
hannesw@115 89 this(chars, p, end, 0);
hannesw@115 90 }
hannesw@115 91
attila@962 92 public Regex(final char[] chars, final int p, final int end, final int option) {
hannesw@115 93 this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT);
hannesw@115 94 }
hannesw@115 95
hannesw@115 96 // onig_new
attila@962 97 public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax) {
hannesw@115 98 this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT);
hannesw@115 99 }
hannesw@115 100
attila@962 101 public Regex(final char[]chars, final int p, final int end, final int option, final WarnCallback warnings) {
hannesw@115 102 this(chars, p, end, option, Syntax.RUBY, warnings);
hannesw@115 103 }
hannesw@115 104
hannesw@115 105 // onig_new
attila@962 106 public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax, final WarnCallback warnings) {
hannesw@115 107 this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings);
hannesw@115 108 }
hannesw@115 109
hannesw@115 110 // onig_alloc_init
lagergren@1082 111 public Regex(final char[] chars, final int p, final int end, final int optionp, final int caseFoldFlag, final Syntax syntax, final WarnCallback warnings) {
lagergren@1082 112 int option = optionp;
hannesw@115 113
hannesw@115 114 if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) ==
hannesw@115 115 (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) {
hannesw@115 116 throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS);
hannesw@115 117 }
hannesw@115 118
hannesw@115 119 if ((option & Option.NEGATE_SINGLELINE) != 0) {
hannesw@115 120 option |= syntax.options;
hannesw@115 121 option &= ~Option.SINGLELINE;
hannesw@115 122 } else {
hannesw@115 123 option |= syntax.options;
hannesw@115 124 }
hannesw@115 125
hannesw@115 126 this.options = option;
hannesw@115 127 this.caseFoldFlag = caseFoldFlag;
hannesw@115 128 this.warnings = warnings;
hannesw@115 129
hannesw@193 130 this.analyser = new Analyser(new ScanEnvironment(this, syntax), chars, p, end);
hannesw@193 131 this.analyser.compile();
hannesw@115 132
hannesw@115 133 this.warnings = null;
hannesw@115 134 }
hannesw@115 135
hannesw@846 136 public synchronized MatcherFactory compile() {
hannesw@193 137 if (factory == null && analyser != null) {
hannesw@846 138 new ArrayCompiler(analyser).compile();
hannesw@193 139 analyser = null; // only do this once
hannesw@193 140 }
hannesw@846 141 assert factory != null;
hannesw@846 142 return factory;
hannesw@193 143 }
hannesw@193 144
attila@962 145 public Matcher matcher(final char[] chars) {
hannesw@115 146 return matcher(chars, 0, chars.length);
hannesw@115 147 }
hannesw@115 148
attila@962 149 public Matcher matcher(final char[] chars, final int p, final int end) {
hannesw@846 150 MatcherFactory matcherFactory = factory;
hannesw@846 151 if (matcherFactory == null) {
hannesw@846 152 matcherFactory = compile();
hannesw@846 153 }
hannesw@846 154 return matcherFactory.create(this, chars, p, end);
hannesw@115 155 }
hannesw@115 156
hannesw@193 157 public WarnCallback getWarnings() {
hannesw@193 158 return warnings;
hannesw@193 159 }
hannesw@193 160
hannesw@115 161 public int numberOfCaptures() {
hannesw@115 162 return numMem;
hannesw@115 163 }
hannesw@115 164
hannesw@115 165 /* set skip map for Boyer-Moor search */
hannesw@115 166 void setupBMSkipMap() {
attila@962 167 final char[] chars = exact;
attila@962 168 final int p = exactP;
attila@962 169 final int end = exactEnd;
attila@962 170 final int len = end - p;
hannesw@115 171
hannesw@115 172 if (len < Config.CHAR_TABLE_SIZE) {
hannesw@115 173 // map/skip
lagergren@1082 174 if (map == null) {
lagergren@1082 175 map = new byte[Config.CHAR_TABLE_SIZE];
lagergren@1082 176 }
hannesw@115 177
lagergren@1082 178 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
lagergren@1082 179 map[i] = (byte)len;
lagergren@1082 180 }
lagergren@1082 181 for (int i=0; i<len-1; i++)
lagergren@1082 182 {
lagergren@1082 183 map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ??
lagergren@1082 184 }
hannesw@115 185 } else {
lagergren@1082 186 if (intMap == null) {
lagergren@1082 187 intMap = new int[Config.CHAR_TABLE_SIZE];
lagergren@1082 188 }
hannesw@115 189
lagergren@1082 190 for (int i=0; i<len-1; i++)
lagergren@1082 191 {
lagergren@1082 192 intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ??
lagergren@1082 193 }
hannesw@115 194 }
hannesw@115 195 }
hannesw@115 196
attila@962 197 void setExactInfo(final OptExactInfo e) {
lagergren@1082 198 if (e.length == 0) {
lagergren@1082 199 return;
lagergren@1082 200 }
hannesw@115 201
hannesw@115 202 // shall we copy that ?
hannesw@115 203 exact = e.chars;
hannesw@115 204 exactP = 0;
hannesw@115 205 exactEnd = e.length;
hannesw@115 206
hannesw@115 207 if (e.ignoreCase) {
hannesw@115 208 searchAlgorithm = new SearchAlgorithm.SLOW_IC(this);
hannesw@115 209 } else {
hannesw@115 210 if (e.length >= 2) {
hannesw@115 211 setupBMSkipMap();
hannesw@115 212 searchAlgorithm = SearchAlgorithm.BM;
hannesw@115 213 } else {
hannesw@115 214 searchAlgorithm = SearchAlgorithm.SLOW;
hannesw@115 215 }
hannesw@115 216 }
hannesw@115 217
hannesw@115 218 dMin = e.mmd.min;
hannesw@115 219 dMax = e.mmd.max;
hannesw@115 220
hannesw@115 221 if (dMin != MinMaxLen.INFINITE_DISTANCE) {
hannesw@115 222 thresholdLength = dMin + (exactEnd - exactP);
hannesw@115 223 }
hannesw@115 224 }
hannesw@115 225
attila@962 226 void setOptimizeMapInfo(final OptMapInfo m) {
hannesw@115 227 map = m.map;
hannesw@115 228
hannesw@115 229 searchAlgorithm = SearchAlgorithm.MAP;
hannesw@115 230 dMin = m.mmd.min;
hannesw@115 231 dMax = m.mmd.max;
hannesw@115 232
hannesw@115 233 if (dMin != MinMaxLen.INFINITE_DISTANCE) {
hannesw@115 234 thresholdLength = dMin + 1;
hannesw@115 235 }
hannesw@115 236 }
hannesw@115 237
attila@962 238 void setSubAnchor(final OptAnchorInfo anc) {
hannesw@115 239 subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE;
hannesw@115 240 subAnchor |= anc.rightAnchor & AnchorType.END_LINE;
hannesw@115 241 }
hannesw@115 242
hannesw@115 243 void clearOptimizeInfo() {
hannesw@115 244 searchAlgorithm = SearchAlgorithm.NONE;
hannesw@115 245 anchor = 0;
hannesw@115 246 anchorDmax = 0;
hannesw@115 247 anchorDmin = 0;
hannesw@115 248 subAnchor = 0;
hannesw@115 249
hannesw@115 250 exact = null;
hannesw@115 251 exactP = exactEnd = 0;
hannesw@115 252 }
hannesw@115 253
hannesw@115 254 public String optimizeInfoToString() {
attila@962 255 final StringBuilder s = new StringBuilder();
hannesw@447 256 s.append("optimize: ").append(searchAlgorithm.getName()).append("\n");
hannesw@447 257 s.append(" anchor: ").append(OptAnchorInfo.anchorToString(anchor));
hannesw@115 258
hannesw@115 259 if ((anchor & AnchorType.END_BUF_MASK) != 0) {
hannesw@447 260 s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax));
hannesw@115 261 }
hannesw@115 262
hannesw@447 263 s.append("\n");
hannesw@115 264
hannesw@115 265 if (searchAlgorithm != SearchAlgorithm.NONE) {
hannesw@447 266 s.append(" sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n");
hannesw@115 267 }
hannesw@115 268
hannesw@447 269 s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n");
hannesw@447 270 s.append("threshold length: ").append(thresholdLength).append("\n");
hannesw@115 271
hannesw@115 272 if (exact != null) {
hannesw@447 273 s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n");
hannesw@115 274 } else if (searchAlgorithm == SearchAlgorithm.MAP) {
hannesw@115 275 int n=0;
lagergren@1082 276 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
lagergren@1082 277 if (map[i] != 0) {
lagergren@1082 278 n++;
lagergren@1082 279 }
lagergren@1082 280 }
hannesw@115 281
hannesw@447 282 s.append("map: n = ").append(n).append("\n");
hannesw@115 283 if (n > 0) {
hannesw@115 284 int c=0;
hannesw@447 285 s.append("[");
hannesw@115 286 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) {
hannesw@115 287 if (map[i] != 0) {
hannesw@447 288 if (c > 0) {
hannesw@447 289 s.append(", ");
hannesw@447 290 }
hannesw@115 291 c++;
hannesw@115 292 // TODO if (enc.isPrint(i)
hannesw@447 293 s.append((char)i);
hannesw@115 294 }
hannesw@115 295 }
hannesw@447 296 s.append("]\n");
hannesw@115 297 }
hannesw@115 298 }
hannesw@115 299
hannesw@447 300 return s.toString();
hannesw@115 301 }
hannesw@115 302
hannesw@115 303 public int getOptions() {
hannesw@115 304 return options;
hannesw@115 305 }
hannesw@115 306
hannesw@273 307 public String dumpTree() {
hannesw@273 308 return analyser == null ? null : analyser.root.toString();
hannesw@115 309 }
hannesw@115 310
hannesw@273 311 public String dumpByteCode() {
hannesw@273 312 compile();
hannesw@273 313 return new ByteCodePrinter(this).byteCodeListToString();
hannesw@115 314 }
hannesw@115 315
hannesw@115 316 }

mercurial