Mon, 03 Nov 2014 11:47:41 +0100
8060204: Fix warnings in Joni and tests
Reviewed-by: hannesw, sundar, attila
hannesw@115 | 1 | /* |
hannesw@115 | 2 | * Permission is hereby granted, free of charge, to any person obtaining a copy of |
hannesw@115 | 3 | * this software and associated documentation files (the "Software"), to deal in |
hannesw@115 | 4 | * the Software without restriction, including without limitation the rights to |
hannesw@115 | 5 | * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies |
hannesw@115 | 6 | * of the Software, and to permit persons to whom the Software is furnished to do |
hannesw@115 | 7 | * so, subject to the following conditions: |
hannesw@115 | 8 | * |
hannesw@115 | 9 | * The above copyright notice and this permission notice shall be included in all |
hannesw@115 | 10 | * copies or substantial portions of the Software. |
hannesw@115 | 11 | * |
hannesw@115 | 12 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
hannesw@115 | 13 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
hannesw@115 | 14 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
hannesw@115 | 15 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
hannesw@115 | 16 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
hannesw@115 | 17 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
hannesw@115 | 18 | * SOFTWARE. |
hannesw@115 | 19 | */ |
hannesw@115 | 20 | package jdk.nashorn.internal.runtime.regexp.joni; |
hannesw@115 | 21 | |
hannesw@115 | 22 | import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; |
hannesw@115 | 23 | import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState; |
hannesw@115 | 24 | import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; |
hannesw@115 | 25 | import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; |
hannesw@115 | 26 | |
lagergren@1082 | 27 | @SuppressWarnings("javadoc") |
hannesw@115 | 28 | public final class Regex implements RegexState { |
hannesw@115 | 29 | |
hannesw@115 | 30 | int[] code; /* compiled pattern */ |
hannesw@115 | 31 | int codeLength; |
hannesw@115 | 32 | boolean stackNeeded; |
hannesw@447 | 33 | Object[] operands; /* e.g. shared CClassNode */ |
hannesw@115 | 34 | int operandLength; |
hannesw@115 | 35 | |
hannesw@115 | 36 | int numMem; /* used memory(...) num counted from 1 */ |
hannesw@115 | 37 | int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ |
hannesw@115 | 38 | int numNullCheck; /* OP_NULL_CHECK_START/END id counter */ |
hannesw@115 | 39 | int captureHistory; /* (?@...) flag (1-31) */ |
hannesw@115 | 40 | int btMemStart; /* need backtrack flag */ |
hannesw@115 | 41 | int btMemEnd; /* need backtrack flag */ |
hannesw@115 | 42 | |
hannesw@115 | 43 | int stackPopLevel; |
hannesw@115 | 44 | |
hannesw@447 | 45 | int[] repeatRangeLo; |
hannesw@447 | 46 | int[] repeatRangeHi; |
hannesw@115 | 47 | |
hannesw@193 | 48 | WarnCallback warnings; |
hannesw@193 | 49 | MatcherFactory factory; |
hannesw@273 | 50 | protected Analyser analyser; |
hannesw@115 | 51 | |
hannesw@115 | 52 | int options; |
hannesw@115 | 53 | final int caseFoldFlag; |
hannesw@115 | 54 | |
hannesw@115 | 55 | /* optimization info (string search, char-map and anchors) */ |
hannesw@115 | 56 | SearchAlgorithm searchAlgorithm; /* optimize flag */ |
hannesw@115 | 57 | int thresholdLength; /* search str-length for apply optimize */ |
hannesw@115 | 58 | int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ |
hannesw@115 | 59 | int anchorDmin; /* (SEMI_)END_BUF anchor distance */ |
hannesw@115 | 60 | int anchorDmax; /* (SEMI_)END_BUF anchor distance */ |
hannesw@115 | 61 | int subAnchor; /* start-anchor for exact or map */ |
hannesw@115 | 62 | |
hannesw@115 | 63 | char[] exact; |
hannesw@115 | 64 | int exactP; |
hannesw@115 | 65 | int exactEnd; |
hannesw@115 | 66 | |
hannesw@115 | 67 | byte[] map; /* used as BM skip or char-map */ |
hannesw@115 | 68 | int[] intMap; /* BM skip for exact_len > 255 */ |
hannesw@115 | 69 | int[] intMapBackward; /* BM skip for backward search */ |
hannesw@115 | 70 | int dMin; /* min-distance of exact or map */ |
hannesw@115 | 71 | int dMax; /* max-distance of exact or map */ |
hannesw@115 | 72 | |
hannesw@115 | 73 | char[][] templates; |
hannesw@115 | 74 | int templateNum; |
hannesw@115 | 75 | |
attila@962 | 76 | public Regex(final CharSequence cs) { |
hannesw@115 | 77 | this(cs.toString()); |
hannesw@115 | 78 | } |
hannesw@115 | 79 | |
attila@962 | 80 | public Regex(final String str) { |
hannesw@115 | 81 | this(str.toCharArray(), 0, str.length(), 0); |
hannesw@115 | 82 | } |
hannesw@115 | 83 | |
attila@962 | 84 | public Regex(final char[] chars) { |
hannesw@115 | 85 | this(chars, 0, chars.length, 0); |
hannesw@115 | 86 | } |
hannesw@115 | 87 | |
attila@962 | 88 | public Regex(final char[] chars, final int p, final int end) { |
hannesw@115 | 89 | this(chars, p, end, 0); |
hannesw@115 | 90 | } |
hannesw@115 | 91 | |
attila@962 | 92 | public Regex(final char[] chars, final int p, final int end, final int option) { |
hannesw@115 | 93 | this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT); |
hannesw@115 | 94 | } |
hannesw@115 | 95 | |
hannesw@115 | 96 | // onig_new |
attila@962 | 97 | public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax) { |
hannesw@115 | 98 | this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT); |
hannesw@115 | 99 | } |
hannesw@115 | 100 | |
attila@962 | 101 | public Regex(final char[]chars, final int p, final int end, final int option, final WarnCallback warnings) { |
hannesw@115 | 102 | this(chars, p, end, option, Syntax.RUBY, warnings); |
hannesw@115 | 103 | } |
hannesw@115 | 104 | |
hannesw@115 | 105 | // onig_new |
attila@962 | 106 | public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax, final WarnCallback warnings) { |
hannesw@115 | 107 | this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings); |
hannesw@115 | 108 | } |
hannesw@115 | 109 | |
hannesw@115 | 110 | // onig_alloc_init |
lagergren@1082 | 111 | public Regex(final char[] chars, final int p, final int end, final int optionp, final int caseFoldFlag, final Syntax syntax, final WarnCallback warnings) { |
lagergren@1082 | 112 | int option = optionp; |
hannesw@115 | 113 | |
hannesw@115 | 114 | if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) == |
hannesw@115 | 115 | (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) { |
hannesw@115 | 116 | throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS); |
hannesw@115 | 117 | } |
hannesw@115 | 118 | |
hannesw@115 | 119 | if ((option & Option.NEGATE_SINGLELINE) != 0) { |
hannesw@115 | 120 | option |= syntax.options; |
hannesw@115 | 121 | option &= ~Option.SINGLELINE; |
hannesw@115 | 122 | } else { |
hannesw@115 | 123 | option |= syntax.options; |
hannesw@115 | 124 | } |
hannesw@115 | 125 | |
hannesw@115 | 126 | this.options = option; |
hannesw@115 | 127 | this.caseFoldFlag = caseFoldFlag; |
hannesw@115 | 128 | this.warnings = warnings; |
hannesw@115 | 129 | |
hannesw@193 | 130 | this.analyser = new Analyser(new ScanEnvironment(this, syntax), chars, p, end); |
hannesw@193 | 131 | this.analyser.compile(); |
hannesw@115 | 132 | |
hannesw@115 | 133 | this.warnings = null; |
hannesw@115 | 134 | } |
hannesw@115 | 135 | |
hannesw@846 | 136 | public synchronized MatcherFactory compile() { |
hannesw@193 | 137 | if (factory == null && analyser != null) { |
hannesw@846 | 138 | new ArrayCompiler(analyser).compile(); |
hannesw@193 | 139 | analyser = null; // only do this once |
hannesw@193 | 140 | } |
hannesw@846 | 141 | assert factory != null; |
hannesw@846 | 142 | return factory; |
hannesw@193 | 143 | } |
hannesw@193 | 144 | |
attila@962 | 145 | public Matcher matcher(final char[] chars) { |
hannesw@115 | 146 | return matcher(chars, 0, chars.length); |
hannesw@115 | 147 | } |
hannesw@115 | 148 | |
attila@962 | 149 | public Matcher matcher(final char[] chars, final int p, final int end) { |
hannesw@846 | 150 | MatcherFactory matcherFactory = factory; |
hannesw@846 | 151 | if (matcherFactory == null) { |
hannesw@846 | 152 | matcherFactory = compile(); |
hannesw@846 | 153 | } |
hannesw@846 | 154 | return matcherFactory.create(this, chars, p, end); |
hannesw@115 | 155 | } |
hannesw@115 | 156 | |
hannesw@193 | 157 | public WarnCallback getWarnings() { |
hannesw@193 | 158 | return warnings; |
hannesw@193 | 159 | } |
hannesw@193 | 160 | |
hannesw@115 | 161 | public int numberOfCaptures() { |
hannesw@115 | 162 | return numMem; |
hannesw@115 | 163 | } |
hannesw@115 | 164 | |
hannesw@115 | 165 | /* set skip map for Boyer-Moor search */ |
hannesw@115 | 166 | void setupBMSkipMap() { |
attila@962 | 167 | final char[] chars = exact; |
attila@962 | 168 | final int p = exactP; |
attila@962 | 169 | final int end = exactEnd; |
attila@962 | 170 | final int len = end - p; |
hannesw@115 | 171 | |
hannesw@115 | 172 | if (len < Config.CHAR_TABLE_SIZE) { |
hannesw@115 | 173 | // map/skip |
lagergren@1082 | 174 | if (map == null) { |
lagergren@1082 | 175 | map = new byte[Config.CHAR_TABLE_SIZE]; |
lagergren@1082 | 176 | } |
hannesw@115 | 177 | |
lagergren@1082 | 178 | for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { |
lagergren@1082 | 179 | map[i] = (byte)len; |
lagergren@1082 | 180 | } |
lagergren@1082 | 181 | for (int i=0; i<len-1; i++) |
lagergren@1082 | 182 | { |
lagergren@1082 | 183 | map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ?? |
lagergren@1082 | 184 | } |
hannesw@115 | 185 | } else { |
lagergren@1082 | 186 | if (intMap == null) { |
lagergren@1082 | 187 | intMap = new int[Config.CHAR_TABLE_SIZE]; |
lagergren@1082 | 188 | } |
hannesw@115 | 189 | |
lagergren@1082 | 190 | for (int i=0; i<len-1; i++) |
lagergren@1082 | 191 | { |
lagergren@1082 | 192 | intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ?? |
lagergren@1082 | 193 | } |
hannesw@115 | 194 | } |
hannesw@115 | 195 | } |
hannesw@115 | 196 | |
attila@962 | 197 | void setExactInfo(final OptExactInfo e) { |
lagergren@1082 | 198 | if (e.length == 0) { |
lagergren@1082 | 199 | return; |
lagergren@1082 | 200 | } |
hannesw@115 | 201 | |
hannesw@115 | 202 | // shall we copy that ? |
hannesw@115 | 203 | exact = e.chars; |
hannesw@115 | 204 | exactP = 0; |
hannesw@115 | 205 | exactEnd = e.length; |
hannesw@115 | 206 | |
hannesw@115 | 207 | if (e.ignoreCase) { |
hannesw@115 | 208 | searchAlgorithm = new SearchAlgorithm.SLOW_IC(this); |
hannesw@115 | 209 | } else { |
hannesw@115 | 210 | if (e.length >= 2) { |
hannesw@115 | 211 | setupBMSkipMap(); |
hannesw@115 | 212 | searchAlgorithm = SearchAlgorithm.BM; |
hannesw@115 | 213 | } else { |
hannesw@115 | 214 | searchAlgorithm = SearchAlgorithm.SLOW; |
hannesw@115 | 215 | } |
hannesw@115 | 216 | } |
hannesw@115 | 217 | |
hannesw@115 | 218 | dMin = e.mmd.min; |
hannesw@115 | 219 | dMax = e.mmd.max; |
hannesw@115 | 220 | |
hannesw@115 | 221 | if (dMin != MinMaxLen.INFINITE_DISTANCE) { |
hannesw@115 | 222 | thresholdLength = dMin + (exactEnd - exactP); |
hannesw@115 | 223 | } |
hannesw@115 | 224 | } |
hannesw@115 | 225 | |
attila@962 | 226 | void setOptimizeMapInfo(final OptMapInfo m) { |
hannesw@115 | 227 | map = m.map; |
hannesw@115 | 228 | |
hannesw@115 | 229 | searchAlgorithm = SearchAlgorithm.MAP; |
hannesw@115 | 230 | dMin = m.mmd.min; |
hannesw@115 | 231 | dMax = m.mmd.max; |
hannesw@115 | 232 | |
hannesw@115 | 233 | if (dMin != MinMaxLen.INFINITE_DISTANCE) { |
hannesw@115 | 234 | thresholdLength = dMin + 1; |
hannesw@115 | 235 | } |
hannesw@115 | 236 | } |
hannesw@115 | 237 | |
attila@962 | 238 | void setSubAnchor(final OptAnchorInfo anc) { |
hannesw@115 | 239 | subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE; |
hannesw@115 | 240 | subAnchor |= anc.rightAnchor & AnchorType.END_LINE; |
hannesw@115 | 241 | } |
hannesw@115 | 242 | |
hannesw@115 | 243 | void clearOptimizeInfo() { |
hannesw@115 | 244 | searchAlgorithm = SearchAlgorithm.NONE; |
hannesw@115 | 245 | anchor = 0; |
hannesw@115 | 246 | anchorDmax = 0; |
hannesw@115 | 247 | anchorDmin = 0; |
hannesw@115 | 248 | subAnchor = 0; |
hannesw@115 | 249 | |
hannesw@115 | 250 | exact = null; |
hannesw@115 | 251 | exactP = exactEnd = 0; |
hannesw@115 | 252 | } |
hannesw@115 | 253 | |
hannesw@115 | 254 | public String optimizeInfoToString() { |
attila@962 | 255 | final StringBuilder s = new StringBuilder(); |
hannesw@447 | 256 | s.append("optimize: ").append(searchAlgorithm.getName()).append("\n"); |
hannesw@447 | 257 | s.append(" anchor: ").append(OptAnchorInfo.anchorToString(anchor)); |
hannesw@115 | 258 | |
hannesw@115 | 259 | if ((anchor & AnchorType.END_BUF_MASK) != 0) { |
hannesw@447 | 260 | s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax)); |
hannesw@115 | 261 | } |
hannesw@115 | 262 | |
hannesw@447 | 263 | s.append("\n"); |
hannesw@115 | 264 | |
hannesw@115 | 265 | if (searchAlgorithm != SearchAlgorithm.NONE) { |
hannesw@447 | 266 | s.append(" sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n"); |
hannesw@115 | 267 | } |
hannesw@115 | 268 | |
hannesw@447 | 269 | s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n"); |
hannesw@447 | 270 | s.append("threshold length: ").append(thresholdLength).append("\n"); |
hannesw@115 | 271 | |
hannesw@115 | 272 | if (exact != null) { |
hannesw@447 | 273 | s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n"); |
hannesw@115 | 274 | } else if (searchAlgorithm == SearchAlgorithm.MAP) { |
hannesw@115 | 275 | int n=0; |
lagergren@1082 | 276 | for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { |
lagergren@1082 | 277 | if (map[i] != 0) { |
lagergren@1082 | 278 | n++; |
lagergren@1082 | 279 | } |
lagergren@1082 | 280 | } |
hannesw@115 | 281 | |
hannesw@447 | 282 | s.append("map: n = ").append(n).append("\n"); |
hannesw@115 | 283 | if (n > 0) { |
hannesw@115 | 284 | int c=0; |
hannesw@447 | 285 | s.append("["); |
hannesw@115 | 286 | for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { |
hannesw@115 | 287 | if (map[i] != 0) { |
hannesw@447 | 288 | if (c > 0) { |
hannesw@447 | 289 | s.append(", "); |
hannesw@447 | 290 | } |
hannesw@115 | 291 | c++; |
hannesw@115 | 292 | // TODO if (enc.isPrint(i) |
hannesw@447 | 293 | s.append((char)i); |
hannesw@115 | 294 | } |
hannesw@115 | 295 | } |
hannesw@447 | 296 | s.append("]\n"); |
hannesw@115 | 297 | } |
hannesw@115 | 298 | } |
hannesw@115 | 299 | |
hannesw@447 | 300 | return s.toString(); |
hannesw@115 | 301 | } |
hannesw@115 | 302 | |
hannesw@115 | 303 | public int getOptions() { |
hannesw@115 | 304 | return options; |
hannesw@115 | 305 | } |
hannesw@115 | 306 | |
hannesw@273 | 307 | public String dumpTree() { |
hannesw@273 | 308 | return analyser == null ? null : analyser.root.toString(); |
hannesw@115 | 309 | } |
hannesw@115 | 310 | |
hannesw@273 | 311 | public String dumpByteCode() { |
hannesw@273 | 312 | compile(); |
hannesw@273 | 313 | return new ByteCodePrinter(this).byteCodeListToString(); |
hannesw@115 | 314 | } |
hannesw@115 | 315 | |
hannesw@115 | 316 | } |