Mon, 03 Nov 2014 11:47:41 +0100
8060204: Fix warnings in Joni and tests
Reviewed-by: hannesw, sundar, attila
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20 package jdk.nashorn.internal.runtime.regexp.joni;
22 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
23 import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine;
24 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition;
25 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest;
26 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty;
27 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol;
28 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol;
29 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion;
30 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
31 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode;
32 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
33 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
34 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
36 class ByteCodeMachine extends StackMachine {
37 private int bestLen; // return value
38 private int s = 0; // current char
40 private int range; // right range
41 private int sprev;
42 private int sstart;
43 private int sbegin;
45 private final int[] code; // byte code
46 private int ip; // instruction pointer
48 ByteCodeMachine(final Regex regex, final char[] chars, final int p, final int end) {
49 super(regex, chars, p, end);
50 this.code = regex.code;
51 }
53 private boolean stringCmpIC(final int caseFlodFlag, final int s1p, final IntHolder ps2, final int mbLen, final int textEnd) {
54 int s1 = s1p;
55 int s2 = ps2.value;
56 final int end1 = s1 + mbLen;
58 while (s1 < end1) {
59 final char c1 = EncodingHelper.toLowerCase(chars[s1++]);
60 final char c2 = EncodingHelper.toLowerCase(chars[s2++]);
62 if (c1 != c2) {
63 return false;
64 }
65 }
66 ps2.value = s2;
67 return true;
68 }
70 private void debugMatchBegin() {
71 Config.log.println("match_at: " +
72 "str: " + str +
73 ", end: " + end +
74 ", start: " + this.sstart +
75 ", sprev: " + this.sprev);
76 Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str));
77 }
79 private void debugMatchLoop() {
80 if (Config.DEBUG_MATCH) {
81 Config.log.printf("%4d", (s - str)).print("> \"");
82 int q, i;
83 for (i=0, q=s; i<7 && q<end && s>=0; i++) {
84 if (q < end) {
85 Config.log.print(new String(new char[]{chars[q++]}));
86 }
87 }
88 final String string = q < end ? "...\"" : "\"";
89 q += string.length();
90 Config.log.print(string);
91 for (i=0; i<20-(q-s);i++) {
92 Config.log.print(" ");
93 }
94 final StringBuilder sb = new StringBuilder();
95 new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip);
96 Config.log.println(sb.toString());
97 }
98 }
100 @Override
101 protected final int matchAt(final int r, final int ss, final int sp) {
102 this.range = r;
103 this.sstart = ss;
104 this.sprev = sp;
106 stk = 0;
107 ip = 0;
109 if (Config.DEBUG_MATCH) {
110 debugMatchBegin();
111 }
113 init();
115 bestLen = -1;
116 s = ss;
118 final int[] c = this.code;
119 while (true) {
120 if (Config.DEBUG_MATCH) {
121 debugMatchLoop();
122 }
124 sbegin = s;
125 switch (c[ip++]) {
126 case OPCode.END: if (opEnd()) {
127 return finish();
128 } break;
129 case OPCode.EXACT1: opExact1(); break;
130 case OPCode.EXACT2: opExact2(); continue;
131 case OPCode.EXACT3: opExact3(); continue;
132 case OPCode.EXACT4: opExact4(); continue;
133 case OPCode.EXACT5: opExact5(); continue;
134 case OPCode.EXACTN: opExactN(); continue;
136 case OPCode.EXACT1_IC: opExact1IC(); break;
137 case OPCode.EXACTN_IC: opExactNIC(); continue;
139 case OPCode.CCLASS: opCClass(); break;
140 case OPCode.CCLASS_MB: opCClassMB(); break;
141 case OPCode.CCLASS_MIX: opCClassMIX(); break;
142 case OPCode.CCLASS_NOT: opCClassNot(); break;
143 case OPCode.CCLASS_MB_NOT: opCClassMBNot(); break;
144 case OPCode.CCLASS_MIX_NOT: opCClassMIXNot(); break;
145 case OPCode.CCLASS_NODE: opCClassNode(); break;
147 case OPCode.ANYCHAR: opAnyChar(); break;
148 case OPCode.ANYCHAR_ML: opAnyCharML(); break;
149 case OPCode.ANYCHAR_STAR: opAnyCharStar(); break;
150 case OPCode.ANYCHAR_ML_STAR: opAnyCharMLStar(); break;
151 case OPCode.ANYCHAR_STAR_PEEK_NEXT: opAnyCharStarPeekNext(); break;
152 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: opAnyCharMLStarPeekNext(); break;
154 case OPCode.WORD: opWord(); break;
155 case OPCode.NOT_WORD: opNotWord(); break;
156 case OPCode.WORD_BOUND: opWordBound(); continue;
157 case OPCode.NOT_WORD_BOUND: opNotWordBound(); continue;
158 case OPCode.WORD_BEGIN: opWordBegin(); continue;
159 case OPCode.WORD_END: opWordEnd(); continue;
161 case OPCode.BEGIN_BUF: opBeginBuf(); continue;
162 case OPCode.END_BUF: opEndBuf(); continue;
163 case OPCode.BEGIN_LINE: opBeginLine(); continue;
164 case OPCode.END_LINE: opEndLine(); continue;
165 case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue;
166 case OPCode.BEGIN_POSITION: opBeginPosition(); continue;
168 case OPCode.MEMORY_START_PUSH: opMemoryStartPush(); continue;
169 case OPCode.MEMORY_START: opMemoryStart(); continue;
170 case OPCode.MEMORY_END_PUSH: opMemoryEndPush(); continue;
171 case OPCode.MEMORY_END: opMemoryEnd(); continue;
172 case OPCode.MEMORY_END_PUSH_REC: opMemoryEndPushRec(); continue;
173 case OPCode.MEMORY_END_REC: opMemoryEndRec(); continue;
175 case OPCode.BACKREF1: opBackRef1(); continue;
176 case OPCode.BACKREF2: opBackRef2(); continue;
177 case OPCode.BACKREFN: opBackRefN(); continue;
178 case OPCode.BACKREFN_IC: opBackRefNIC(); continue;
179 case OPCode.BACKREF_MULTI: opBackRefMulti(); continue;
180 case OPCode.BACKREF_MULTI_IC: opBackRefMultiIC(); continue;
181 case OPCode.BACKREF_WITH_LEVEL: opBackRefAtLevel(); continue;
183 case OPCode.NULL_CHECK_START: opNullCheckStart(); continue;
184 case OPCode.NULL_CHECK_END: opNullCheckEnd(); continue;
185 case OPCode.NULL_CHECK_END_MEMST: opNullCheckEndMemST(); continue;
186 case OPCode.NULL_CHECK_END_MEMST_PUSH: opNullCheckEndMemSTPush(); continue;
188 case OPCode.JUMP: opJump(); continue;
189 case OPCode.PUSH: opPush(); continue;
191 case OPCode.POP: opPop(); continue;
192 case OPCode.PUSH_OR_JUMP_EXACT1: opPushOrJumpExact1(); continue;
193 case OPCode.PUSH_IF_PEEK_NEXT: opPushIfPeekNext(); continue;
195 case OPCode.REPEAT: opRepeat(); continue;
196 case OPCode.REPEAT_NG: opRepeatNG(); continue;
197 case OPCode.REPEAT_INC: opRepeatInc(); continue;
198 case OPCode.REPEAT_INC_SG: opRepeatIncSG(); continue;
199 case OPCode.REPEAT_INC_NG: opRepeatIncNG(); continue;
200 case OPCode.REPEAT_INC_NG_SG: opRepeatIncNGSG(); continue;
202 case OPCode.PUSH_POS: opPushPos(); continue;
203 case OPCode.POP_POS: opPopPos(); continue;
204 case OPCode.PUSH_POS_NOT: opPushPosNot(); continue;
205 case OPCode.FAIL_POS: opFailPos(); continue;
206 case OPCode.PUSH_STOP_BT: opPushStopBT(); continue;
207 case OPCode.POP_STOP_BT: opPopStopBT(); continue;
209 case OPCode.LOOK_BEHIND: opLookBehind(); continue;
210 case OPCode.PUSH_LOOK_BEHIND_NOT: opPushLookBehindNot(); continue;
211 case OPCode.FAIL_LOOK_BEHIND_NOT: opFailLookBehindNot(); continue;
213 case OPCode.FINISH:
214 return finish();
216 case OPCode.FAIL: opFail(); continue;
218 default:
219 throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE);
221 } // main switch
222 } // main while
223 }
225 private boolean opEnd() {
226 final int n = s - sstart;
228 if (n > bestLen) {
229 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
230 if (isFindLongest(regex.options)) {
231 if (n > msaBestLen) {
232 msaBestLen = n;
233 msaBestS = sstart;
234 } else {
235 // goto end_best_len;
236 return endBestLength();
237 }
238 }
239 } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
241 bestLen = n;
242 final Region region = msaRegion;
243 if (region != null) {
244 // USE_POSIX_REGION_OPTION ... else ...
245 region.beg[0] = msaBegin = sstart - str;
246 region.end[0] = msaEnd = s - str;
247 for (int i = 1; i <= regex.numMem; i++) {
248 // opt!
249 if (repeatStk[memEndStk + i] != INVALID_INDEX) {
250 region.beg[i] = bsAt(regex.btMemStart, i) ?
251 stack[repeatStk[memStartStk + i]].getMemPStr() - str :
252 repeatStk[memStartStk + i] - str;
255 region.end[i] = bsAt(regex.btMemEnd, i) ?
256 stack[repeatStk[memEndStk + i]].getMemPStr() :
257 repeatStk[memEndStk + i] - str;
259 } else {
260 region.beg[i] = region.end[i] = Region.REGION_NOTPOS;
261 }
263 }
265 } else {
266 msaBegin = sstart - str;
267 msaEnd = s - str;
268 }
269 } else {
270 final Region region = msaRegion;
271 if (Config.USE_POSIX_API_REGION_OPTION) {
272 if (!isPosixRegion(regex.options)) {
273 if (region != null) {
274 region.clear();
275 } else {
276 msaBegin = msaEnd = 0;
277 }
278 }
279 } else {
280 if (region != null) {
281 region.clear();
282 } else {
283 msaBegin = msaEnd = 0;
284 }
285 } // USE_POSIX_REGION_OPTION
286 }
287 // end_best_len:
288 /* default behavior: return first-matching result. */
289 return endBestLength();
290 }
292 private boolean endBestLength() {
293 if (isFindCondition(regex.options)) {
294 if (isFindNotEmpty(regex.options) && s == sstart) {
295 bestLen = -1;
296 {opFail(); return false;} /* for retry */
297 }
298 if (isFindLongest(regex.options) && s < range) {
299 {opFail(); return false;} /* for retry */
300 }
301 }
302 // goto finish;
303 return true;
304 }
306 private void opExact1() {
307 if (s >= range || code[ip] != chars[s++]) {opFail(); return;}
308 //if (s > range) {opFail(); return;}
309 ip++;
310 sprev = sbegin; // break;
311 }
313 private void opExact2() {
314 if (s + 2 > range) {opFail(); return;}
315 if (code[ip] != chars[s]) {opFail(); return;}
316 ip++; s++;
317 if (code[ip] != chars[s]) {opFail(); return;}
318 sprev = s;
319 ip++; s++;
320 }
322 private void opExact3() {
323 if (s + 3 > range) {opFail(); return;}
324 if (code[ip] != chars[s]) {opFail(); return;}
325 ip++; s++;
326 if (code[ip] != chars[s]) {opFail(); return;}
327 ip++; s++;
328 if (code[ip] != chars[s]) {opFail(); return;}
329 sprev = s;
330 ip++; s++;
331 }
333 private void opExact4() {
334 if (s + 4 > range) {opFail(); return;}
335 if (code[ip] != chars[s]) {opFail(); return;}
336 ip++; s++;
337 if (code[ip] != chars[s]) {opFail(); return;}
338 ip++; s++;
339 if (code[ip] != chars[s]) {opFail(); return;}
340 ip++; s++;
341 if (code[ip] != chars[s]) {opFail(); return;}
342 sprev = s;
343 ip++; s++;
344 }
346 private void opExact5() {
347 if (s + 5 > range) {opFail(); return;}
348 if (code[ip] != chars[s]) {opFail(); return;}
349 ip++; s++;
350 if (code[ip] != chars[s]) {opFail(); return;}
351 ip++; s++;
352 if (code[ip] != chars[s]) {opFail(); return;}
353 ip++; s++;
354 if (code[ip] != chars[s]) {opFail(); return;}
355 ip++; s++;
356 if (code[ip] != chars[s]) {opFail(); return;}
357 sprev = s;
358 ip++; s++;
359 }
361 private void opExactN() {
362 int tlen = code[ip++];
363 if (s + tlen > range) {opFail(); return;}
365 if (Config.USE_STRING_TEMPLATES) {
366 final char[] bs = regex.templates[code[ip++]];
367 int ps = code[ip++];
369 while (tlen-- > 0) {
370 if (bs[ps++] != chars[s++]) {opFail(); return;}
371 }
373 } else {
374 while (tlen-- > 0) {
375 if (code[ip++] != chars[s++]) {opFail(); return;}
376 }
377 }
378 sprev = s - 1;
379 }
381 private void opExact1IC() {
382 if (s >= range || code[ip] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
383 ip++;
384 sprev = sbegin; // break;
385 }
387 private void opExactNIC() {
388 int tlen = code[ip++];
389 if (s + tlen > range) {opFail(); return;}
391 if (Config.USE_STRING_TEMPLATES) {
392 final char[] bs = regex.templates[code[ip++]];
393 int ps = code[ip++];
395 while (tlen-- > 0) {
396 if (bs[ps++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
397 }
398 } else {
400 while (tlen-- > 0) {
401 if (code[ip++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
402 }
403 }
404 sprev = s - 1;
405 }
407 private boolean isInBitSet() {
408 final int c = chars[s];
409 return (c <= 0xff && (code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0);
410 }
412 private void opCClass() {
413 if (s >= range || !isInBitSet()) {opFail(); return;}
414 ip += BitSet.BITSET_SIZE;
415 s++;
416 sprev = sbegin; // break;
417 }
419 private boolean isInClassMB() {
420 final int tlen = code[ip++];
421 if (s >= range) {
422 return false;
423 }
424 final int ss = s;
425 s++;
426 final int c = chars[ss];
427 if (!EncodingHelper.isInCodeRange(code, ip, c)) {
428 return false;
429 }
430 ip += tlen;
431 return true;
432 }
434 private void opCClassMB() {
435 // beyond string check
436 if (s >= range || chars[s] <= 0xff) {opFail(); return;}
437 if (!isInClassMB()) {opFail(); return;} // not!!!
438 sprev = sbegin; // break;
439 }
441 private void opCClassMIX() {
442 if (s >= range) {opFail(); return;}
443 if (chars[s] > 0xff) {
444 ip += BitSet.BITSET_SIZE;
445 if (!isInClassMB()) {opFail(); return;}
446 } else {
447 if (!isInBitSet()) {opFail(); return;}
448 ip += BitSet.BITSET_SIZE;
449 final int tlen = code[ip++]; // by code range length
450 ip += tlen;
451 s++;
452 }
453 sprev = sbegin; // break;
454 }
456 private void opCClassNot() {
457 if (s >= range || isInBitSet()) {opFail(); return;}
458 ip += BitSet.BITSET_SIZE;
459 s++;
460 sprev = sbegin; // break;
461 }
463 private boolean isNotInClassMB() {
464 final int tlen = code[ip++];
466 if (!(s + 1 <= range)) {
467 if (s >= range) {
468 return false;
469 }
470 s = end;
471 ip += tlen;
472 return true;
473 }
475 final int ss = s;
476 s++;
477 final int c = chars[ss];
479 if (EncodingHelper.isInCodeRange(code, ip, c)) {
480 return false;
481 }
482 ip += tlen;
483 return true;
484 }
486 private void opCClassMBNot() {
487 if (s >= range) {opFail(); return;}
488 if (chars[s] <= 0xff) {
489 s++;
490 final int tlen = code[ip++];
491 ip += tlen;
492 sprev = sbegin; // break;
493 return;
494 }
495 if (!isNotInClassMB()) {opFail(); return;}
496 sprev = sbegin; // break;
497 }
499 private void opCClassMIXNot() {
500 if (s >= range) {opFail(); return;}
501 if (chars[s] > 0xff) {
502 ip += BitSet.BITSET_SIZE;
503 if (!isNotInClassMB()) {opFail(); return;}
504 } else {
505 if (isInBitSet()) {opFail(); return;}
506 ip += BitSet.BITSET_SIZE;
507 final int tlen = code[ip++];
508 ip += tlen;
509 s++;
510 }
511 sprev = sbegin; // break;
512 }
514 private void opCClassNode() {
515 if (s >= range) {opFail(); return;}
516 final CClassNode cc = (CClassNode)regex.operands[code[ip++]];
517 final int ss = s;
518 s++;
519 final int c = chars[ss];
520 if (!cc.isCodeInCCLength(c)) {opFail(); return;}
521 sprev = sbegin; // break;
522 }
524 private void opAnyChar() {
525 if (s >= range) {opFail(); return;}
526 if (isNewLine(chars[s])) {opFail(); return;}
527 s++;
528 sprev = sbegin; // break;
529 }
531 private void opAnyCharML() {
532 if (s >= range) {opFail(); return;}
533 s++;
534 sprev = sbegin; // break;
535 }
537 private void opAnyCharStar() {
538 final char[] ch = this.chars;
539 while (s < range) {
540 pushAlt(ip, s, sprev);
541 if (isNewLine(ch, s, end)) {opFail(); return;}
542 sprev = s;
543 s++;
544 }
545 sprev = sbegin; // break;
546 }
548 private void opAnyCharMLStar() {
549 while (s < range) {
550 pushAlt(ip, s, sprev);
551 sprev = s;
552 s++;
553 }
554 sprev = sbegin; // break;
555 }
557 private void opAnyCharStarPeekNext() {
558 final char c = (char)code[ip];
559 final char[] ch = this.chars;
561 while (s < range) {
562 final char b = ch[s];
563 if (c == b) {
564 pushAlt(ip + 1, s, sprev);
565 }
566 if (isNewLine(b)) {opFail(); return;}
567 sprev = s;
568 s++;
569 }
570 ip++;
571 sprev = sbegin; // break;
572 }
574 private void opAnyCharMLStarPeekNext() {
575 final char c = (char)code[ip];
576 final char[] ch = this.chars;
578 while (s < range) {
579 if (c == ch[s]) {
580 pushAlt(ip + 1, s, sprev);
581 }
582 sprev = s;
583 s++;
584 }
585 ip++;
586 sprev = sbegin; // break;
587 }
589 private void opWord() {
590 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;}
591 s++;
592 sprev = sbegin; // break;
593 }
595 private void opNotWord() {
596 if (s >= range || EncodingHelper.isWord(chars[s])) {opFail(); return;}
597 s++;
598 sprev = sbegin; // break;
599 }
601 private void opWordBound() {
602 if (s == str) {
603 if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;}
604 } else if (s == end) {
605 if (sprev >= end || !EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
606 } else {
607 if (EncodingHelper.isWord(chars[s]) == EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
608 }
609 }
611 private void opNotWordBound() {
612 if (s == str) {
613 if (s < range && EncodingHelper.isWord(chars[s])) {opFail(); return;}
614 } else if (s == end) {
615 if (sprev < end && EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
616 } else {
617 if (EncodingHelper.isWord(chars[s]) != EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
618 }
619 }
621 private void opWordBegin() {
622 if (s < range && EncodingHelper.isWord(chars[s])) {
623 if (s == str || !EncodingHelper.isWord(chars[sprev])) {
624 return;
625 }
626 }
627 opFail();
628 }
630 private void opWordEnd() {
631 if (s != str && EncodingHelper.isWord(chars[sprev])) {
632 if (s == end || !EncodingHelper.isWord(chars[s])) {
633 return;
634 }
635 }
636 opFail();
637 }
639 private void opBeginBuf() {
640 if (s != str) {
641 opFail();
642 }
643 }
645 private void opEndBuf() {
646 if (s != end) {
647 opFail();
648 }
649 }
651 private void opBeginLine() {
652 if (s == str) {
653 if (isNotBol(msaOptions)) {
654 opFail();
655 }
656 return;
657 } else if (isNewLine(chars, sprev, end) && s != end) {
658 return;
659 }
660 opFail();
661 }
663 private void opEndLine() {
664 if (s == end) {
665 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
666 if (str == end || !isNewLine(chars, sprev, end)) {
667 if (isNotEol(msaOptions)) {
668 opFail();
669 }
670 }
671 return;
672 }
673 if (isNotEol(msaOptions)) {
674 opFail();
675 }
676 return;
677 } else if (isNewLine(chars, s, end)) {
678 return;
679 }
680 opFail();
681 }
683 private void opSemiEndBuf() {
684 if (s == end) {
685 if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
686 if (str == end || !isNewLine(chars, sprev, end)) {
687 if (isNotEol(msaOptions)) {
688 opFail();
689 }
690 }
691 return;
692 }
693 if (isNotEol(msaOptions)) {
694 opFail();
695 }
696 return;
697 } else if (isNewLine(chars, s, end) && s + 1 == end) {
698 return;
699 }
700 opFail();
701 }
703 private void opBeginPosition() {
704 if (s != msaStart) {
705 opFail();
706 }
707 }
709 private void opMemoryStartPush() {
710 final int mem = code[ip++];
711 pushMemStart(mem, s);
712 }
714 private void opMemoryStart() {
715 final int mem = code[ip++];
716 repeatStk[memStartStk + mem] = s;
717 }
719 private void opMemoryEndPush() {
720 final int mem = code[ip++];
721 pushMemEnd(mem, s);
722 }
724 private void opMemoryEnd() {
725 final int mem = code[ip++];
726 repeatStk[memEndStk + mem] = s;
727 }
729 private void opMemoryEndPushRec() {
730 final int mem = code[ip++];
731 final int stkp = getMemStart(mem); /* should be before push mem-end. */
732 pushMemEnd(mem, s);
733 repeatStk[memStartStk + mem] = stkp;
734 }
736 private void opMemoryEndRec() {
737 final int mem = code[ip++];
738 repeatStk[memEndStk + mem] = s;
739 final int stkp = getMemStart(mem);
741 if (BitStatus.bsAt(regex.btMemStart, mem)) {
742 repeatStk[memStartStk + mem] = stkp;
743 } else {
744 repeatStk[memStartStk + mem] = stack[stkp].getMemPStr();
745 }
747 pushMemEndMark(mem);
748 }
750 private boolean backrefInvalid(final int mem) {
751 return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX;
752 }
754 private int backrefStart(final int mem) {
755 return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem];
756 }
758 private int backrefEnd(final int mem) {
759 return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem];
760 }
762 private void backref(final int mem) {
763 /* if you want to remove following line,
764 you should check in parse and compile time. (numMem) */
765 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
767 int pstart = backrefStart(mem);
768 final int pend = backrefEnd(mem);
770 int n = pend - pstart;
771 if (s + n > range) {opFail(); return;}
772 sprev = s;
774 // STRING_CMP
775 while(n-- > 0) {
776 if (chars[pstart++] != chars[s++]) {opFail(); return;}
777 }
779 // beyond string check
780 if (sprev < range) {
781 while (sprev + 1 < s) {
782 sprev++;
783 }
784 }
785 }
787 private void opBackRef1() {
788 backref(1);
789 }
791 private void opBackRef2() {
792 backref(2);
793 }
795 private void opBackRefN() {
796 backref(code[ip++]);
797 }
799 private void opBackRefNIC() {
800 final int mem = code[ip++];
801 /* if you want to remove following line,
802 you should check in parse and compile time. (numMem) */
803 if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
805 final int pstart = backrefStart(mem);
806 final int pend = backrefEnd(mem);
808 final int n = pend - pstart;
809 if (s + n > range) {opFail(); return;}
810 sprev = s;
812 value = s;
813 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;}
814 s = value;
816 // if (sprev < chars.length)
817 while (sprev + 1 < s) {
818 sprev++;
819 }
820 }
822 private void opBackRefMulti() {
823 final int tlen = code[ip++];
825 int i;
826 loop:for (i=0; i<tlen; i++) {
827 final int mem = code[ip++];
828 if (backrefInvalid(mem)) {
829 continue;
830 }
832 int pstart = backrefStart(mem);
833 final int pend = backrefEnd(mem);
835 int n = pend - pstart;
836 if (s + n > range) {opFail(); return;}
838 sprev = s;
839 int swork = s;
841 while (n-- > 0) {
842 if (chars[pstart++] != chars[swork++]) {
843 continue loop;
844 }
845 }
847 s = swork;
849 // beyond string check
850 if (sprev < range) {
851 while (sprev + 1 < s) {
852 sprev++;
853 }
854 }
856 ip += tlen - i - 1; // * SIZE_MEMNUM (1)
857 break; /* success */
858 }
859 if (i == tlen) {opFail(); return;}
860 }
862 private void opBackRefMultiIC() {
863 final int tlen = code[ip++];
865 int i;
866 loop:for (i=0; i<tlen; i++) {
867 final int mem = code[ip++];
868 if (backrefInvalid(mem)) {
869 continue;
870 }
872 final int pstart = backrefStart(mem);
873 final int pend = backrefEnd(mem);
875 final int n = pend - pstart;
876 if (s + n > range) {opFail(); return;}
878 sprev = s;
880 value = s;
881 if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end))
882 {
883 continue loop; // STRING_CMP_VALUE_IC
884 }
885 s = value;
887 // if (sprev < chars.length)
888 while (sprev + 1 < s) {
889 sprev++;
890 }
892 ip += tlen - i - 1; // * SIZE_MEMNUM (1)
893 break; /* success */
894 }
895 if (i == tlen) {opFail(); return;}
896 }
898 private boolean memIsInMemp(final int mem, final int num, final int mempp) {
899 for (int i=0, memp = mempp; i<num; i++) {
900 final int m = code[memp++];
901 if (mem == m) {
902 return true;
903 }
904 }
905 return false;
906 }
908 // USE_BACKREF_AT_LEVEL // (s) and (end) implicit
909 private boolean backrefMatchAtNestedLevel(final boolean ignoreCase, final int caseFoldFlag,
910 final int nest, final int memNum, final int memp) {
911 int pend = -1;
912 int level = 0;
913 int k = stk - 1;
915 while (k >= 0) {
916 final StackEntry e = stack[k];
918 if (e.type == CALL_FRAME) {
919 level--;
920 } else if (e.type == RETURN) {
921 level++;
922 } else if (level == nest) {
923 if (e.type == MEM_START) {
924 if (memIsInMemp(e.getMemNum(), memNum, memp)) {
925 final int pstart = e.getMemPStr();
926 if (pend != -1) {
927 if (pend - pstart > end - s) {
928 return false; /* or goto next_mem; */
929 }
930 int p = pstart;
932 value = s;
933 if (ignoreCase) {
934 if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) {
935 return false; /* or goto next_mem; */
936 }
937 } else {
938 while (p < pend) {
939 if (chars[p++] != chars[value++]) {
940 return false; /* or goto next_mem; */
941 }
942 }
943 }
944 s = value;
946 return true;
947 }
948 }
949 } else if (e.type == MEM_END) {
950 if (memIsInMemp(e.getMemNum(), memNum, memp)) {
951 pend = e.getMemPStr();
952 }
953 }
954 }
955 k--;
956 }
957 return false;
958 }
960 private void opBackRefAtLevel() {
961 final int ic = code[ip++];
962 final int level = code[ip++];
963 final int tlen = code[ip++];
965 sprev = s;
966 if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit
967 while (sprev + 1 < s) {
968 sprev++;
969 }
970 ip += tlen; // * SIZE_MEMNUM
971 } else {
972 {opFail(); return;}
973 }
974 }
976 private void opNullCheckStart() {
977 final int mem = code[ip++];
978 pushNullCheckStart(mem, s);
979 }
981 private void nullCheckFound() {
982 // null_check_found:
983 /* empty loop founded, skip next instruction */
984 switch(code[ip++]) {
985 case OPCode.JUMP:
986 case OPCode.PUSH:
987 ip++; // p += SIZE_RELADDR;
988 break;
989 case OPCode.REPEAT_INC:
990 case OPCode.REPEAT_INC_NG:
991 case OPCode.REPEAT_INC_SG:
992 case OPCode.REPEAT_INC_NG_SG:
993 ip++; // p += SIZE_MEMNUM;
994 break;
995 default:
996 throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE);
997 } // switch
998 }
1000 private void opNullCheckEnd() {
1001 final int mem = code[ip++];
1002 final int isNull = nullCheck(mem, s); /* mem: null check id */
1004 if (isNull != 0) {
1005 if (Config.DEBUG_MATCH) {
1006 Config.log.println("NULL_CHECK_END: skip id:" + mem + ", s:" + s);
1007 }
1009 nullCheckFound();
1010 }
1011 }
1013 // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
1014 private void opNullCheckEndMemST() {
1015 final int mem = code[ip++]; /* mem: null check id */
1016 final int isNull = nullCheckMemSt(mem, s);
1018 if (isNull != 0) {
1019 if (Config.DEBUG_MATCH) {
1020 Config.log.println("NULL_CHECK_END_MEMST: skip id:" + mem + ", s:" + s);
1021 }
1023 if (isNull == -1) {opFail(); return;}
1024 nullCheckFound();
1025 }
1026 }
1028 // USE_SUBEXP_CALL
1029 private void opNullCheckEndMemSTPush() {
1030 final int mem = code[ip++]; /* mem: null check id */
1032 int isNull;
1033 if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
1034 isNull = nullCheckMemStRec(mem, s);
1035 } else {
1036 isNull = nullCheckRec(mem, s);
1037 }
1039 if (isNull != 0) {
1040 if (Config.DEBUG_MATCH) {
1041 Config.log.println("NULL_CHECK_END_MEMST_PUSH: skip id:" + mem + ", s:" + s);
1042 }
1044 if (isNull == -1) {opFail(); return;}
1045 nullCheckFound();
1046 } else {
1047 pushNullCheckEnd(mem);
1048 }
1049 }
1051 private void opJump() {
1052 ip += code[ip] + 1;
1053 }
1055 private void opPush() {
1056 final int addr = code[ip++];
1057 pushAlt(ip + addr, s, sprev);
1058 }
1060 private void opPop() {
1061 popOne();
1062 }
1064 private void opPushOrJumpExact1() {
1065 final int addr = code[ip++];
1066 // beyond string check
1067 if (s < range && code[ip] == chars[s]) {
1068 ip++;
1069 pushAlt(ip + addr, s, sprev);
1070 return;
1071 }
1072 ip += addr + 1;
1073 }
1075 private void opPushIfPeekNext() {
1076 final int addr = code[ip++];
1077 // beyond string check
1078 if (s < range && code[ip] == chars[s]) {
1079 ip++;
1080 pushAlt(ip + addr, s, sprev);
1081 return;
1082 }
1083 ip++;
1084 }
1086 private void opRepeat() {
1087 final int mem = code[ip++]; /* mem: OP_REPEAT ID */
1088 final int addr= code[ip++];
1090 // ensure1();
1091 repeatStk[mem] = stk;
1092 pushRepeat(mem, ip);
1094 if (regex.repeatRangeLo[mem] == 0) { // lower
1095 pushAlt(ip + addr, s, sprev);
1096 }
1097 }
1099 private void opRepeatNG() {
1100 final int mem = code[ip++]; /* mem: OP_REPEAT ID */
1101 final int addr= code[ip++];
1103 // ensure1();
1104 repeatStk[mem] = stk;
1105 pushRepeat(mem, ip);
1107 if (regex.repeatRangeLo[mem] == 0) {
1108 pushAlt(ip, s, sprev);
1109 ip += addr;
1110 }
1111 }
1113 private void repeatInc(final int mem, final int si) {
1114 final StackEntry e = stack[si];
1116 e.increaseRepeatCount();
1118 if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) {
1119 /* end of repeat. Nothing to do. */
1120 } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
1121 pushAlt(ip, s, sprev);
1122 ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */
1123 } else {
1124 ip = e.getRepeatPCode();
1125 }
1126 pushRepeatInc(si);
1127 }
1129 private void opRepeatInc() {
1130 final int mem = code[ip++]; /* mem: OP_REPEAT ID */
1131 final int si = repeatStk[mem];
1132 repeatInc(mem, si);
1133 }
1135 private void opRepeatIncSG() {
1136 final int mem = code[ip++]; /* mem: OP_REPEAT ID */
1137 final int si = getRepeat(mem);
1138 repeatInc(mem, si);
1139 }
1141 private void repeatIncNG(final int mem, final int si) {
1142 final StackEntry e = stack[si];
1144 e.increaseRepeatCount();
1146 if (e.getRepeatCount() < regex.repeatRangeHi[mem]) {
1147 if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
1148 final int pcode = e.getRepeatPCode();
1149 pushRepeatInc(si);
1150 pushAlt(pcode, s, sprev);
1151 } else {
1152 ip = e.getRepeatPCode();
1153 pushRepeatInc(si);
1154 }
1155 } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) {
1156 pushRepeatInc(si);
1157 }
1158 }
1160 private void opRepeatIncNG() {
1161 final int mem = code[ip++];
1162 final int si = repeatStk[mem];
1163 repeatIncNG(mem, si);
1164 }
1166 private void opRepeatIncNGSG() {
1167 final int mem = code[ip++];
1168 final int si = getRepeat(mem);
1169 repeatIncNG(mem, si);
1170 }
1172 private void opPushPos() {
1173 pushPos(s, sprev);
1174 }
1176 private void opPopPos() {
1177 final StackEntry e = stack[posEnd()];
1178 s = e.getStatePStr();
1179 sprev= e.getStatePStrPrev();
1180 }
1182 private void opPushPosNot() {
1183 final int addr = code[ip++];
1184 pushPosNot(ip + addr, s, sprev);
1185 }
1187 private void opFailPos() {
1188 popTilPosNot();
1189 opFail();
1190 }
1192 private void opPushStopBT() {
1193 pushStopBT();
1194 }
1196 private void opPopStopBT() {
1197 stopBtEnd();
1198 }
1200 private void opLookBehind() {
1201 final int tlen = code[ip++];
1202 s = EncodingHelper.stepBack(str, s, tlen);
1203 if (s == -1) {opFail(); return;}
1204 sprev = EncodingHelper.prevCharHead(str, s);
1205 }
1207 private void opPushLookBehindNot() {
1208 final int addr = code[ip++];
1209 final int tlen = code[ip++];
1210 final int q = EncodingHelper.stepBack(str, s, tlen);
1211 if (q == -1) {
1212 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
1213 If you want to change to fail, replace following line. */
1214 ip += addr;
1215 // return FAIL;
1216 } else {
1217 pushLookBehindNot(ip + addr, s, sprev);
1218 s = q;
1219 sprev = EncodingHelper.prevCharHead(str, s);
1220 }
1221 }
1223 private void opFailLookBehindNot() {
1224 popTilLookBehindNot();
1225 opFail();
1226 }
1228 private void opFail() {
1229 if (stack == null) {
1230 ip = regex.codeLength - 1;
1231 return;
1232 }
1235 final StackEntry e = pop();
1236 ip = e.getStatePCode();
1237 s = e.getStatePStr();
1238 sprev = e.getStatePStrPrev();
1239 }
1241 private int finish() {
1242 return bestLen;
1243 }
1244 }