src/jdk/nashorn/internal/runtime/regexp/joni/ByteCodeMachine.java

Mon, 03 Nov 2014 11:47:41 +0100

author
lagergren
date
Mon, 03 Nov 2014 11:47:41 +0100
changeset 1082
e1e27c4262be
parent 962
ac62e33a99b0
child 1205
4112748288bb
child 1270
dff9f4cfafd9
permissions
-rw-r--r--

8060204: Fix warnings in Joni and tests
Reviewed-by: hannesw, sundar, attila

     1 /*
     2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
     3  * this software and associated documentation files (the "Software"), to deal in
     4  * the Software without restriction, including without limitation the rights to
     5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
     6  * of the Software, and to permit persons to whom the Software is furnished to do
     7  * so, subject to the following conditions:
     8  *
     9  * The above copyright notice and this permission notice shall be included in all
    10  * copies or substantial portions of the Software.
    11  *
    12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    18  * SOFTWARE.
    19  */
    20 package jdk.nashorn.internal.runtime.regexp.joni;
    22 import static jdk.nashorn.internal.runtime.regexp.joni.BitStatus.bsAt;
    23 import static jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper.isNewLine;
    24 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindCondition;
    25 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest;
    26 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindNotEmpty;
    27 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotBol;
    28 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isNotEol;
    29 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isPosixRegion;
    30 import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
    31 import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode;
    32 import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
    33 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
    34 import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
    36 class ByteCodeMachine extends StackMachine {
    37     private int bestLen;          // return value
    38     private int s = 0;            // current char
    40     private int range;            // right range
    41     private int sprev;
    42     private int sstart;
    43     private int sbegin;
    45     private final int[] code;       // byte code
    46     private int ip;                 // instruction pointer
    48     ByteCodeMachine(final Regex regex, final char[] chars, final int p, final int end) {
    49         super(regex, chars, p, end);
    50         this.code = regex.code;
    51     }
    53     private boolean stringCmpIC(final int caseFlodFlag, final int s1p, final IntHolder ps2, final int mbLen, final int textEnd) {
    54         int s1 = s1p;
    55         int s2 = ps2.value;
    56         final int end1 = s1 + mbLen;
    58         while (s1 < end1) {
    59             final char c1 = EncodingHelper.toLowerCase(chars[s1++]);
    60             final char c2 = EncodingHelper.toLowerCase(chars[s2++]);
    62             if (c1 != c2) {
    63                 return false;
    64             }
    65         }
    66         ps2.value = s2;
    67         return true;
    68     }
    70     private void debugMatchBegin() {
    71         Config.log.println("match_at: " +
    72                 "str: " + str +
    73                 ", end: " + end +
    74                 ", start: " + this.sstart +
    75                 ", sprev: " + this.sprev);
    76         Config.log.println("size: " + (end - str) + ", start offset: " + (this.sstart - str));
    77     }
    79     private void debugMatchLoop() {
    80         if (Config.DEBUG_MATCH) {
    81             Config.log.printf("%4d", (s - str)).print("> \"");
    82             int q, i;
    83             for (i=0, q=s; i<7 && q<end && s>=0; i++) {
    84                 if (q < end) {
    85                     Config.log.print(new String(new char[]{chars[q++]}));
    86                 }
    87             }
    88             final String string = q < end ? "...\"" : "\"";
    89             q += string.length();
    90             Config.log.print(string);
    91             for (i=0; i<20-(q-s);i++) {
    92                 Config.log.print(" ");
    93             }
    94             final StringBuilder sb = new StringBuilder();
    95             new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip);
    96             Config.log.println(sb.toString());
    97         }
    98     }
   100     @Override
   101     protected final int matchAt(final int r, final int ss, final int sp) {
   102         this.range = r;
   103         this.sstart = ss;
   104         this.sprev = sp;
   106         stk = 0;
   107         ip = 0;
   109         if (Config.DEBUG_MATCH) {
   110             debugMatchBegin();
   111         }
   113         init();
   115         bestLen = -1;
   116         s = ss;
   118         final int[] c = this.code;
   119         while (true) {
   120             if (Config.DEBUG_MATCH) {
   121                 debugMatchLoop();
   122             }
   124             sbegin = s;
   125             switch (c[ip++]) {
   126                 case OPCode.END:    if (opEnd()) {
   127                     return finish();
   128                 }                  break;
   129                 case OPCode.EXACT1:                     opExact1();                break;
   130                 case OPCode.EXACT2:                     opExact2();                continue;
   131                 case OPCode.EXACT3:                     opExact3();                continue;
   132                 case OPCode.EXACT4:                     opExact4();                continue;
   133                 case OPCode.EXACT5:                     opExact5();                continue;
   134                 case OPCode.EXACTN:                     opExactN();                continue;
   136                 case OPCode.EXACT1_IC:                  opExact1IC();              break;
   137                 case OPCode.EXACTN_IC:                  opExactNIC();              continue;
   139                 case OPCode.CCLASS:                     opCClass();                break;
   140                 case OPCode.CCLASS_MB:                  opCClassMB();              break;
   141                 case OPCode.CCLASS_MIX:                 opCClassMIX();             break;
   142                 case OPCode.CCLASS_NOT:                 opCClassNot();             break;
   143                 case OPCode.CCLASS_MB_NOT:              opCClassMBNot();           break;
   144                 case OPCode.CCLASS_MIX_NOT:             opCClassMIXNot();          break;
   145                 case OPCode.CCLASS_NODE:                opCClassNode();            break;
   147                 case OPCode.ANYCHAR:                    opAnyChar();               break;
   148                 case OPCode.ANYCHAR_ML:                 opAnyCharML();             break;
   149                 case OPCode.ANYCHAR_STAR:               opAnyCharStar();           break;
   150                 case OPCode.ANYCHAR_ML_STAR:            opAnyCharMLStar();         break;
   151                 case OPCode.ANYCHAR_STAR_PEEK_NEXT:     opAnyCharStarPeekNext();   break;
   152                 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:  opAnyCharMLStarPeekNext(); break;
   154                 case OPCode.WORD:                       opWord();                  break;
   155                 case OPCode.NOT_WORD:                   opNotWord();               break;
   156                 case OPCode.WORD_BOUND:                 opWordBound();             continue;
   157                 case OPCode.NOT_WORD_BOUND:             opNotWordBound();          continue;
   158                 case OPCode.WORD_BEGIN:                 opWordBegin();             continue;
   159                 case OPCode.WORD_END:                   opWordEnd();               continue;
   161                 case OPCode.BEGIN_BUF:                  opBeginBuf();              continue;
   162                 case OPCode.END_BUF:                    opEndBuf();                continue;
   163                 case OPCode.BEGIN_LINE:                 opBeginLine();             continue;
   164                 case OPCode.END_LINE:                   opEndLine();               continue;
   165                 case OPCode.SEMI_END_BUF:               opSemiEndBuf();            continue;
   166                 case OPCode.BEGIN_POSITION:             opBeginPosition();         continue;
   168                 case OPCode.MEMORY_START_PUSH:          opMemoryStartPush();       continue;
   169                 case OPCode.MEMORY_START:               opMemoryStart();           continue;
   170                 case OPCode.MEMORY_END_PUSH:            opMemoryEndPush();         continue;
   171                 case OPCode.MEMORY_END:                 opMemoryEnd();             continue;
   172                 case OPCode.MEMORY_END_PUSH_REC:        opMemoryEndPushRec();      continue;
   173                 case OPCode.MEMORY_END_REC:             opMemoryEndRec();          continue;
   175                 case OPCode.BACKREF1:                   opBackRef1();              continue;
   176                 case OPCode.BACKREF2:                   opBackRef2();              continue;
   177                 case OPCode.BACKREFN:                   opBackRefN();              continue;
   178                 case OPCode.BACKREFN_IC:                opBackRefNIC();            continue;
   179                 case OPCode.BACKREF_MULTI:              opBackRefMulti();          continue;
   180                 case OPCode.BACKREF_MULTI_IC:           opBackRefMultiIC();        continue;
   181                 case OPCode.BACKREF_WITH_LEVEL:         opBackRefAtLevel();        continue;
   183                 case OPCode.NULL_CHECK_START:           opNullCheckStart();        continue;
   184                 case OPCode.NULL_CHECK_END:             opNullCheckEnd();          continue;
   185                 case OPCode.NULL_CHECK_END_MEMST:       opNullCheckEndMemST();     continue;
   186                 case OPCode.NULL_CHECK_END_MEMST_PUSH:  opNullCheckEndMemSTPush(); continue;
   188                 case OPCode.JUMP:                       opJump();                  continue;
   189                 case OPCode.PUSH:                       opPush();                  continue;
   191                 case OPCode.POP:                        opPop();                   continue;
   192                 case OPCode.PUSH_OR_JUMP_EXACT1:        opPushOrJumpExact1();      continue;
   193                 case OPCode.PUSH_IF_PEEK_NEXT:          opPushIfPeekNext();        continue;
   195                 case OPCode.REPEAT:                     opRepeat();                continue;
   196                 case OPCode.REPEAT_NG:                  opRepeatNG();              continue;
   197                 case OPCode.REPEAT_INC:                 opRepeatInc();             continue;
   198                 case OPCode.REPEAT_INC_SG:              opRepeatIncSG();           continue;
   199                 case OPCode.REPEAT_INC_NG:              opRepeatIncNG();           continue;
   200                 case OPCode.REPEAT_INC_NG_SG:           opRepeatIncNGSG();         continue;
   202                 case OPCode.PUSH_POS:                   opPushPos();               continue;
   203                 case OPCode.POP_POS:                    opPopPos();                continue;
   204                 case OPCode.PUSH_POS_NOT:               opPushPosNot();            continue;
   205                 case OPCode.FAIL_POS:                   opFailPos();               continue;
   206                 case OPCode.PUSH_STOP_BT:               opPushStopBT();            continue;
   207                 case OPCode.POP_STOP_BT:                opPopStopBT();             continue;
   209                 case OPCode.LOOK_BEHIND:                opLookBehind();            continue;
   210                 case OPCode.PUSH_LOOK_BEHIND_NOT:       opPushLookBehindNot();     continue;
   211                 case OPCode.FAIL_LOOK_BEHIND_NOT:       opFailLookBehindNot();     continue;
   213                 case OPCode.FINISH:
   214                     return finish();
   216                 case OPCode.FAIL:                       opFail();                  continue;
   218                 default:
   219                     throw new InternalException(ErrorMessages.ERR_UNDEFINED_BYTECODE);
   221             } // main switch
   222         } // main while
   223     }
   225     private boolean opEnd() {
   226         final int n = s - sstart;
   228         if (n > bestLen) {
   229             if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
   230                 if (isFindLongest(regex.options)) {
   231                     if (n > msaBestLen) {
   232                         msaBestLen = n;
   233                         msaBestS = sstart;
   234                     } else {
   235                         // goto end_best_len;
   236                         return endBestLength();
   237                     }
   238                 }
   239             } // USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
   241             bestLen = n;
   242             final Region region = msaRegion;
   243             if (region != null) {
   244                 // USE_POSIX_REGION_OPTION ... else ...
   245                 region.beg[0] = msaBegin = sstart - str;
   246                 region.end[0] = msaEnd   = s      - str;
   247                 for (int i = 1; i <= regex.numMem; i++) {
   248                     // opt!
   249                     if (repeatStk[memEndStk + i] != INVALID_INDEX) {
   250                         region.beg[i] = bsAt(regex.btMemStart, i) ?
   251                                         stack[repeatStk[memStartStk + i]].getMemPStr() - str :
   252                                         repeatStk[memStartStk + i] - str;
   255                         region.end[i] = bsAt(regex.btMemEnd, i) ?
   256                                         stack[repeatStk[memEndStk + i]].getMemPStr() :
   257                                         repeatStk[memEndStk + i] - str;
   259                     } else {
   260                         region.beg[i] = region.end[i] = Region.REGION_NOTPOS;
   261                     }
   263                 }
   265             } else {
   266                 msaBegin = sstart - str;
   267                 msaEnd   = s      - str;
   268             }
   269         } else {
   270             final Region region = msaRegion;
   271             if (Config.USE_POSIX_API_REGION_OPTION) {
   272                 if (!isPosixRegion(regex.options)) {
   273                     if (region != null) {
   274                         region.clear();
   275                     } else {
   276                         msaBegin = msaEnd = 0;
   277                     }
   278                 }
   279             } else {
   280                 if (region != null) {
   281                     region.clear();
   282                 } else {
   283                     msaBegin = msaEnd = 0;
   284                 }
   285             } // USE_POSIX_REGION_OPTION
   286         }
   287         // end_best_len:
   288         /* default behavior: return first-matching result. */
   289         return endBestLength();
   290     }
   292     private boolean endBestLength() {
   293         if (isFindCondition(regex.options)) {
   294             if (isFindNotEmpty(regex.options) && s == sstart) {
   295                 bestLen = -1;
   296                 {opFail(); return false;} /* for retry */
   297             }
   298             if (isFindLongest(regex.options) && s < range) {
   299                 {opFail(); return false;} /* for retry */
   300             }
   301         }
   302         // goto finish;
   303         return true;
   304     }
   306     private void opExact1() {
   307         if (s >= range || code[ip] != chars[s++]) {opFail(); return;}
   308         //if (s > range) {opFail(); return;}
   309         ip++;
   310         sprev = sbegin; // break;
   311     }
   313     private void opExact2() {
   314         if (s + 2 > range) {opFail(); return;}
   315         if (code[ip] != chars[s]) {opFail(); return;}
   316         ip++; s++;
   317         if (code[ip] != chars[s]) {opFail(); return;}
   318         sprev = s;
   319         ip++; s++;
   320     }
   322     private void opExact3() {
   323         if (s + 3 > range) {opFail(); return;}
   324         if (code[ip] != chars[s]) {opFail(); return;}
   325         ip++; s++;
   326         if (code[ip] != chars[s]) {opFail(); return;}
   327         ip++; s++;
   328         if (code[ip] != chars[s]) {opFail(); return;}
   329         sprev = s;
   330         ip++; s++;
   331     }
   333     private void opExact4() {
   334         if (s + 4 > range) {opFail(); return;}
   335         if (code[ip] != chars[s]) {opFail(); return;}
   336         ip++; s++;
   337         if (code[ip] != chars[s]) {opFail(); return;}
   338         ip++; s++;
   339         if (code[ip] != chars[s]) {opFail(); return;}
   340         ip++; s++;
   341         if (code[ip] != chars[s]) {opFail(); return;}
   342         sprev = s;
   343         ip++; s++;
   344     }
   346     private void opExact5() {
   347         if (s + 5 > range) {opFail(); return;}
   348         if (code[ip] != chars[s]) {opFail(); return;}
   349         ip++; s++;
   350         if (code[ip] != chars[s]) {opFail(); return;}
   351         ip++; s++;
   352         if (code[ip] != chars[s]) {opFail(); return;}
   353         ip++; s++;
   354         if (code[ip] != chars[s]) {opFail(); return;}
   355         ip++; s++;
   356         if (code[ip] != chars[s]) {opFail(); return;}
   357         sprev = s;
   358         ip++; s++;
   359     }
   361     private void opExactN() {
   362         int tlen = code[ip++];
   363         if (s + tlen > range) {opFail(); return;}
   365         if (Config.USE_STRING_TEMPLATES) {
   366             final char[] bs = regex.templates[code[ip++]];
   367             int ps = code[ip++];
   369             while (tlen-- > 0) {
   370                 if (bs[ps++] != chars[s++]) {opFail(); return;}
   371             }
   373         } else {
   374             while (tlen-- > 0) {
   375                 if (code[ip++] != chars[s++]) {opFail(); return;}
   376             }
   377         }
   378         sprev = s - 1;
   379     }
   381     private void opExact1IC() {
   382         if (s >= range || code[ip] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
   383         ip++;
   384         sprev = sbegin; // break;
   385     }
   387     private void opExactNIC() {
   388         int tlen = code[ip++];
   389         if (s + tlen > range) {opFail(); return;}
   391         if (Config.USE_STRING_TEMPLATES) {
   392             final char[] bs = regex.templates[code[ip++]];
   393             int ps = code[ip++];
   395             while (tlen-- > 0) {
   396                 if (bs[ps++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
   397             }
   398         } else {
   400             while (tlen-- > 0) {
   401                 if (code[ip++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
   402             }
   403         }
   404         sprev = s - 1;
   405     }
   407     private boolean isInBitSet() {
   408         final int c = chars[s];
   409         return (c <= 0xff && (code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0);
   410     }
   412     private void opCClass() {
   413         if (s >= range || !isInBitSet()) {opFail(); return;}
   414         ip += BitSet.BITSET_SIZE;
   415         s++;
   416         sprev = sbegin; // break;
   417     }
   419     private boolean isInClassMB() {
   420         final int tlen = code[ip++];
   421         if (s >= range) {
   422             return false;
   423         }
   424         final int ss = s;
   425         s++;
   426         final int c = chars[ss];
   427         if (!EncodingHelper.isInCodeRange(code, ip, c)) {
   428             return false;
   429         }
   430         ip += tlen;
   431         return true;
   432     }
   434     private void opCClassMB() {
   435         // beyond string check
   436         if (s >= range || chars[s] <= 0xff) {opFail(); return;}
   437         if (!isInClassMB()) {opFail(); return;} // not!!!
   438         sprev = sbegin; // break;
   439     }
   441     private void opCClassMIX() {
   442         if (s >= range) {opFail(); return;}
   443         if (chars[s] > 0xff) {
   444             ip += BitSet.BITSET_SIZE;
   445             if (!isInClassMB()) {opFail(); return;}
   446         } else {
   447             if (!isInBitSet()) {opFail(); return;}
   448             ip += BitSet.BITSET_SIZE;
   449             final int tlen = code[ip++]; // by code range length
   450             ip += tlen;
   451             s++;
   452         }
   453         sprev = sbegin; // break;
   454     }
   456     private void opCClassNot() {
   457         if (s >= range || isInBitSet()) {opFail(); return;}
   458         ip += BitSet.BITSET_SIZE;
   459         s++;
   460         sprev = sbegin; // break;
   461     }
   463     private boolean isNotInClassMB() {
   464         final int tlen = code[ip++];
   466         if (!(s + 1 <= range)) {
   467             if (s >= range) {
   468                 return false;
   469             }
   470             s = end;
   471             ip += tlen;
   472             return true;
   473         }
   475         final int ss = s;
   476         s++;
   477         final int c = chars[ss];
   479         if (EncodingHelper.isInCodeRange(code, ip, c)) {
   480             return false;
   481         }
   482         ip += tlen;
   483         return true;
   484     }
   486     private void opCClassMBNot() {
   487         if (s >= range) {opFail(); return;}
   488         if (chars[s] <= 0xff) {
   489             s++;
   490             final int tlen = code[ip++];
   491             ip += tlen;
   492             sprev = sbegin; // break;
   493             return;
   494         }
   495         if (!isNotInClassMB()) {opFail(); return;}
   496         sprev = sbegin; // break;
   497     }
   499     private void opCClassMIXNot() {
   500         if (s >= range) {opFail(); return;}
   501         if (chars[s] > 0xff) {
   502             ip += BitSet.BITSET_SIZE;
   503             if (!isNotInClassMB()) {opFail(); return;}
   504         } else {
   505             if (isInBitSet()) {opFail(); return;}
   506             ip += BitSet.BITSET_SIZE;
   507             final int tlen = code[ip++];
   508             ip += tlen;
   509             s++;
   510         }
   511         sprev = sbegin; // break;
   512     }
   514     private void opCClassNode() {
   515         if (s >= range) {opFail(); return;}
   516         final CClassNode cc = (CClassNode)regex.operands[code[ip++]];
   517         final int ss = s;
   518         s++;
   519         final int c = chars[ss];
   520         if (!cc.isCodeInCCLength(c)) {opFail(); return;}
   521         sprev = sbegin; // break;
   522     }
   524     private void opAnyChar() {
   525         if (s >= range) {opFail(); return;}
   526         if (isNewLine(chars[s])) {opFail(); return;}
   527         s++;
   528         sprev = sbegin; // break;
   529     }
   531     private void opAnyCharML() {
   532         if (s >= range) {opFail(); return;}
   533         s++;
   534         sprev = sbegin; // break;
   535     }
   537     private void opAnyCharStar() {
   538         final char[] ch = this.chars;
   539         while (s < range) {
   540             pushAlt(ip, s, sprev);
   541             if (isNewLine(ch, s, end)) {opFail(); return;}
   542             sprev = s;
   543             s++;
   544         }
   545         sprev = sbegin; // break;
   546     }
   548     private void opAnyCharMLStar() {
   549         while (s < range) {
   550             pushAlt(ip, s, sprev);
   551             sprev = s;
   552             s++;
   553         }
   554         sprev = sbegin; // break;
   555     }
   557     private void opAnyCharStarPeekNext() {
   558         final char c = (char)code[ip];
   559         final char[] ch = this.chars;
   561         while (s < range) {
   562             final char b = ch[s];
   563             if (c == b) {
   564                 pushAlt(ip + 1, s, sprev);
   565             }
   566             if (isNewLine(b)) {opFail(); return;}
   567             sprev = s;
   568             s++;
   569         }
   570         ip++;
   571         sprev = sbegin; // break;
   572     }
   574     private void opAnyCharMLStarPeekNext() {
   575         final char c = (char)code[ip];
   576         final char[] ch = this.chars;
   578         while (s < range) {
   579             if (c == ch[s]) {
   580                 pushAlt(ip + 1, s, sprev);
   581             }
   582             sprev = s;
   583             s++;
   584         }
   585         ip++;
   586         sprev = sbegin; // break;
   587     }
   589     private void opWord() {
   590         if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;}
   591         s++;
   592         sprev = sbegin; // break;
   593     }
   595     private void opNotWord() {
   596         if (s >= range || EncodingHelper.isWord(chars[s])) {opFail(); return;}
   597         s++;
   598         sprev = sbegin; // break;
   599     }
   601     private void opWordBound() {
   602         if (s == str) {
   603             if (s >= range || !EncodingHelper.isWord(chars[s])) {opFail(); return;}
   604         } else if (s == end) {
   605             if (sprev >= end || !EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
   606         } else {
   607             if (EncodingHelper.isWord(chars[s]) == EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
   608         }
   609     }
   611     private void opNotWordBound() {
   612         if (s == str) {
   613             if (s < range && EncodingHelper.isWord(chars[s])) {opFail(); return;}
   614         } else if (s == end) {
   615             if (sprev < end && EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
   616         } else {
   617             if (EncodingHelper.isWord(chars[s]) != EncodingHelper.isWord(chars[sprev])) {opFail(); return;}
   618         }
   619     }
   621     private void opWordBegin() {
   622         if (s < range && EncodingHelper.isWord(chars[s])) {
   623             if (s == str || !EncodingHelper.isWord(chars[sprev])) {
   624                 return;
   625             }
   626         }
   627         opFail();
   628     }
   630     private void opWordEnd() {
   631         if (s != str && EncodingHelper.isWord(chars[sprev])) {
   632             if (s == end || !EncodingHelper.isWord(chars[s])) {
   633                 return;
   634             }
   635         }
   636         opFail();
   637     }
   639     private void opBeginBuf() {
   640         if (s != str) {
   641             opFail();
   642         }
   643     }
   645     private void opEndBuf() {
   646         if (s != end) {
   647             opFail();
   648         }
   649     }
   651     private void opBeginLine() {
   652         if (s == str) {
   653             if (isNotBol(msaOptions)) {
   654                 opFail();
   655             }
   656             return;
   657         } else if (isNewLine(chars, sprev, end) && s != end) {
   658             return;
   659         }
   660         opFail();
   661     }
   663     private void opEndLine()  {
   664         if (s == end) {
   665             if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
   666                 if (str == end || !isNewLine(chars, sprev, end)) {
   667                     if (isNotEol(msaOptions)) {
   668                         opFail();
   669                     }
   670                 }
   671                 return;
   672             }
   673             if (isNotEol(msaOptions)) {
   674                 opFail();
   675             }
   676             return;
   677         } else if (isNewLine(chars, s, end)) {
   678             return;
   679         }
   680         opFail();
   681     }
   683     private void opSemiEndBuf() {
   684         if (s == end) {
   685             if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
   686                 if (str == end || !isNewLine(chars, sprev, end)) {
   687                     if (isNotEol(msaOptions)) {
   688                         opFail();
   689                     }
   690                 }
   691                 return;
   692             }
   693             if (isNotEol(msaOptions)) {
   694                 opFail();
   695             }
   696             return;
   697         } else if (isNewLine(chars, s, end) && s + 1 == end) {
   698             return;
   699         }
   700         opFail();
   701     }
   703     private void opBeginPosition() {
   704         if (s != msaStart) {
   705             opFail();
   706         }
   707     }
   709     private void opMemoryStartPush() {
   710         final int mem = code[ip++];
   711         pushMemStart(mem, s);
   712     }
   714     private void opMemoryStart() {
   715         final int mem = code[ip++];
   716         repeatStk[memStartStk + mem] = s;
   717     }
   719     private void opMemoryEndPush() {
   720         final int mem = code[ip++];
   721         pushMemEnd(mem, s);
   722     }
   724     private void opMemoryEnd() {
   725         final int mem = code[ip++];
   726         repeatStk[memEndStk + mem] = s;
   727     }
   729     private void opMemoryEndPushRec() {
   730         final int mem = code[ip++];
   731         final int stkp = getMemStart(mem); /* should be before push mem-end. */
   732         pushMemEnd(mem, s);
   733         repeatStk[memStartStk + mem] = stkp;
   734     }
   736     private void opMemoryEndRec() {
   737         final int mem = code[ip++];
   738         repeatStk[memEndStk + mem] = s;
   739         final int stkp = getMemStart(mem);
   741         if (BitStatus.bsAt(regex.btMemStart, mem)) {
   742             repeatStk[memStartStk + mem] = stkp;
   743         } else {
   744             repeatStk[memStartStk + mem] = stack[stkp].getMemPStr();
   745         }
   747         pushMemEndMark(mem);
   748     }
   750     private boolean backrefInvalid(final int mem) {
   751         return repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX;
   752     }
   754     private int backrefStart(final int mem) {
   755         return bsAt(regex.btMemStart, mem) ? stack[repeatStk[memStartStk + mem]].getMemPStr() : repeatStk[memStartStk + mem];
   756     }
   758     private int backrefEnd(final int mem) {
   759         return bsAt(regex.btMemEnd, mem) ? stack[repeatStk[memEndStk + mem]].getMemPStr() : repeatStk[memEndStk + mem];
   760     }
   762     private void backref(final int mem) {
   763         /* if you want to remove following line,
   764         you should check in parse and compile time. (numMem) */
   765         if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
   767         int pstart = backrefStart(mem);
   768         final int pend = backrefEnd(mem);
   770         int n = pend - pstart;
   771         if (s + n > range) {opFail(); return;}
   772         sprev = s;
   774         // STRING_CMP
   775         while(n-- > 0) {
   776             if (chars[pstart++] != chars[s++]) {opFail(); return;}
   777         }
   779         // beyond string check
   780         if (sprev < range) {
   781             while (sprev + 1 < s) {
   782                 sprev++;
   783             }
   784         }
   785     }
   787     private void opBackRef1() {
   788         backref(1);
   789     }
   791     private void opBackRef2() {
   792         backref(2);
   793     }
   795     private void opBackRefN() {
   796         backref(code[ip++]);
   797     }
   799     private void opBackRefNIC() {
   800         final int mem = code[ip++];
   801         /* if you want to remove following line,
   802         you should check in parse and compile time. (numMem) */
   803         if (mem > regex.numMem || backrefInvalid(mem)) {opFail(); return;}
   805         final int pstart = backrefStart(mem);
   806         final int pend = backrefEnd(mem);
   808         final int n = pend - pstart;
   809         if (s + n > range) {opFail(); return;}
   810         sprev = s;
   812         value = s;
   813         if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end)) {opFail(); return;}
   814         s = value;
   816         // if (sprev < chars.length)
   817         while (sprev + 1 < s) {
   818             sprev++;
   819         }
   820     }
   822     private void opBackRefMulti() {
   823         final int tlen = code[ip++];
   825         int i;
   826         loop:for (i=0; i<tlen; i++) {
   827             final int mem = code[ip++];
   828             if (backrefInvalid(mem)) {
   829                 continue;
   830             }
   832             int pstart = backrefStart(mem);
   833             final int pend = backrefEnd(mem);
   835             int n = pend - pstart;
   836             if (s + n > range) {opFail(); return;}
   838             sprev = s;
   839             int swork = s;
   841             while (n-- > 0) {
   842                 if (chars[pstart++] != chars[swork++]) {
   843                     continue loop;
   844                 }
   845             }
   847             s = swork;
   849             // beyond string check
   850             if (sprev < range) {
   851                 while (sprev + 1 < s) {
   852                     sprev++;
   853                 }
   854             }
   856             ip += tlen - i  - 1; // * SIZE_MEMNUM (1)
   857             break; /* success */
   858         }
   859         if (i == tlen) {opFail(); return;}
   860     }
   862     private void opBackRefMultiIC() {
   863         final int tlen = code[ip++];
   865         int i;
   866         loop:for (i=0; i<tlen; i++) {
   867             final int mem = code[ip++];
   868             if (backrefInvalid(mem)) {
   869                 continue;
   870             }
   872             final int pstart = backrefStart(mem);
   873             final int pend = backrefEnd(mem);
   875             final int n = pend - pstart;
   876             if (s + n > range) {opFail(); return;}
   878             sprev = s;
   880             value = s;
   881             if (!stringCmpIC(regex.caseFoldFlag, pstart, this, n, end))
   882              {
   883                 continue loop; // STRING_CMP_VALUE_IC
   884             }
   885             s = value;
   887             // if (sprev < chars.length)
   888             while (sprev + 1 < s) {
   889                 sprev++;
   890             }
   892             ip += tlen - i  - 1; // * SIZE_MEMNUM (1)
   893             break;  /* success */
   894         }
   895         if (i == tlen) {opFail(); return;}
   896     }
   898     private boolean memIsInMemp(final int mem, final int num, final int mempp) {
   899         for (int i=0, memp = mempp; i<num; i++) {
   900             final int m = code[memp++];
   901             if (mem == m) {
   902                 return true;
   903             }
   904         }
   905         return false;
   906     }
   908     // USE_BACKREF_AT_LEVEL // (s) and (end) implicit
   909     private boolean backrefMatchAtNestedLevel(final boolean ignoreCase, final int caseFoldFlag,
   910                                               final int nest, final int memNum, final int memp) {
   911         int pend = -1;
   912         int level = 0;
   913         int k = stk - 1;
   915         while (k >= 0) {
   916             final StackEntry e = stack[k];
   918             if (e.type == CALL_FRAME) {
   919                 level--;
   920             } else if (e.type == RETURN) {
   921                 level++;
   922             } else if (level == nest) {
   923                 if (e.type == MEM_START) {
   924                     if (memIsInMemp(e.getMemNum(), memNum, memp)) {
   925                         final int pstart = e.getMemPStr();
   926                         if (pend != -1) {
   927                             if (pend - pstart > end - s) {
   928                                 return false; /* or goto next_mem; */
   929                             }
   930                             int p = pstart;
   932                             value = s;
   933                             if (ignoreCase) {
   934                                 if (!stringCmpIC(caseFoldFlag, pstart, this, pend - pstart, end)) {
   935                                     return false; /* or goto next_mem; */
   936                                 }
   937                             } else {
   938                                 while (p < pend) {
   939                                     if (chars[p++] != chars[value++]) {
   940                                         return false; /* or goto next_mem; */
   941                                     }
   942                                 }
   943                             }
   944                             s = value;
   946                             return true;
   947                         }
   948                     }
   949                 } else if (e.type == MEM_END) {
   950                     if (memIsInMemp(e.getMemNum(), memNum, memp)) {
   951                         pend = e.getMemPStr();
   952                     }
   953                 }
   954             }
   955             k--;
   956         }
   957         return false;
   958     }
   960     private void opBackRefAtLevel() {
   961         final int ic      = code[ip++];
   962         final int level   = code[ip++];
   963         final int tlen    = code[ip++];
   965         sprev = s;
   966         if (backrefMatchAtNestedLevel(ic != 0, regex.caseFoldFlag, level, tlen, ip)) { // (s) and (end) implicit
   967             while (sprev + 1 < s) {
   968                 sprev++;
   969             }
   970             ip += tlen; // * SIZE_MEMNUM
   971         } else {
   972             {opFail(); return;}
   973         }
   974     }
   976     private void opNullCheckStart() {
   977         final int mem = code[ip++];
   978         pushNullCheckStart(mem, s);
   979     }
   981     private void nullCheckFound() {
   982         // null_check_found:
   983         /* empty loop founded, skip next instruction */
   984         switch(code[ip++]) {
   985         case OPCode.JUMP:
   986         case OPCode.PUSH:
   987             ip++;       // p += SIZE_RELADDR;
   988             break;
   989         case OPCode.REPEAT_INC:
   990         case OPCode.REPEAT_INC_NG:
   991         case OPCode.REPEAT_INC_SG:
   992         case OPCode.REPEAT_INC_NG_SG:
   993             ip++;        // p += SIZE_MEMNUM;
   994             break;
   995         default:
   996             throw new InternalException(ErrorMessages.ERR_UNEXPECTED_BYTECODE);
   997         } // switch
   998     }
  1000     private void opNullCheckEnd() {
  1001         final int mem = code[ip++];
  1002         final int isNull = nullCheck(mem, s); /* mem: null check id */
  1004         if (isNull != 0) {
  1005             if (Config.DEBUG_MATCH) {
  1006                 Config.log.println("NULL_CHECK_END: skip  id:" + mem + ", s:" + s);
  1009             nullCheckFound();
  1013     // USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
  1014     private void opNullCheckEndMemST() {
  1015         final int mem = code[ip++];   /* mem: null check id */
  1016         final int isNull = nullCheckMemSt(mem, s);
  1018         if (isNull != 0) {
  1019             if (Config.DEBUG_MATCH) {
  1020                 Config.log.println("NULL_CHECK_END_MEMST: skip  id:" + mem + ", s:" + s);
  1023             if (isNull == -1) {opFail(); return;}
  1024             nullCheckFound();
  1028     // USE_SUBEXP_CALL
  1029     private void opNullCheckEndMemSTPush() {
  1030         final int mem = code[ip++];   /* mem: null check id */
  1032         int isNull;
  1033         if (Config.USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT) {
  1034             isNull = nullCheckMemStRec(mem, s);
  1035         } else {
  1036             isNull = nullCheckRec(mem, s);
  1039         if (isNull != 0) {
  1040             if (Config.DEBUG_MATCH) {
  1041                 Config.log.println("NULL_CHECK_END_MEMST_PUSH: skip  id:" + mem + ", s:" + s);
  1044             if (isNull == -1) {opFail(); return;}
  1045             nullCheckFound();
  1046         } else {
  1047             pushNullCheckEnd(mem);
  1051     private void opJump() {
  1052         ip += code[ip] + 1;
  1055     private void opPush() {
  1056         final int addr = code[ip++];
  1057         pushAlt(ip + addr, s, sprev);
  1060     private void opPop() {
  1061         popOne();
  1064     private void opPushOrJumpExact1() {
  1065         final int addr = code[ip++];
  1066         // beyond string check
  1067         if (s < range && code[ip] == chars[s]) {
  1068             ip++;
  1069             pushAlt(ip + addr, s, sprev);
  1070             return;
  1072         ip += addr + 1;
  1075     private void opPushIfPeekNext() {
  1076         final int addr = code[ip++];
  1077         // beyond string check
  1078         if (s < range && code[ip] == chars[s]) {
  1079             ip++;
  1080             pushAlt(ip + addr, s, sprev);
  1081             return;
  1083         ip++;
  1086     private void opRepeat() {
  1087         final int mem = code[ip++];   /* mem: OP_REPEAT ID */
  1088         final int addr= code[ip++];
  1090         // ensure1();
  1091         repeatStk[mem] = stk;
  1092         pushRepeat(mem, ip);
  1094         if (regex.repeatRangeLo[mem] == 0) { // lower
  1095             pushAlt(ip + addr, s, sprev);
  1099     private void opRepeatNG() {
  1100         final int mem = code[ip++];   /* mem: OP_REPEAT ID */
  1101         final int addr= code[ip++];
  1103         // ensure1();
  1104         repeatStk[mem] = stk;
  1105         pushRepeat(mem, ip);
  1107         if (regex.repeatRangeLo[mem] == 0) {
  1108             pushAlt(ip, s, sprev);
  1109             ip += addr;
  1113     private void repeatInc(final int mem, final int si) {
  1114         final StackEntry e = stack[si];
  1116         e.increaseRepeatCount();
  1118         if (e.getRepeatCount() >= regex.repeatRangeHi[mem]) {
  1119             /* end of repeat. Nothing to do. */
  1120         } else if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
  1121             pushAlt(ip, s, sprev);
  1122             ip = e.getRepeatPCode(); /* Don't use stkp after PUSH. */
  1123         } else {
  1124             ip = e.getRepeatPCode();
  1126         pushRepeatInc(si);
  1129     private void opRepeatInc() {
  1130         final int mem = code[ip++];   /* mem: OP_REPEAT ID */
  1131         final int si = repeatStk[mem];
  1132         repeatInc(mem, si);
  1135     private void opRepeatIncSG() {
  1136         final int mem = code[ip++];   /* mem: OP_REPEAT ID */
  1137         final int si = getRepeat(mem);
  1138         repeatInc(mem, si);
  1141     private void repeatIncNG(final int mem, final int si) {
  1142         final StackEntry e = stack[si];
  1144         e.increaseRepeatCount();
  1146         if (e.getRepeatCount() < regex.repeatRangeHi[mem]) {
  1147             if (e.getRepeatCount() >= regex.repeatRangeLo[mem]) {
  1148                 final int pcode = e.getRepeatPCode();
  1149                 pushRepeatInc(si);
  1150                 pushAlt(pcode, s, sprev);
  1151             } else {
  1152                 ip = e.getRepeatPCode();
  1153                 pushRepeatInc(si);
  1155         } else if (e.getRepeatCount() == regex.repeatRangeHi[mem]) {
  1156             pushRepeatInc(si);
  1160     private void opRepeatIncNG() {
  1161         final int mem = code[ip++];
  1162         final int si = repeatStk[mem];
  1163         repeatIncNG(mem, si);
  1166     private void opRepeatIncNGSG() {
  1167         final int mem = code[ip++];
  1168         final int si = getRepeat(mem);
  1169         repeatIncNG(mem, si);
  1172     private void opPushPos() {
  1173         pushPos(s, sprev);
  1176     private void opPopPos() {
  1177         final StackEntry e = stack[posEnd()];
  1178         s    = e.getStatePStr();
  1179         sprev= e.getStatePStrPrev();
  1182     private void opPushPosNot() {
  1183         final int addr = code[ip++];
  1184         pushPosNot(ip + addr, s, sprev);
  1187     private void opFailPos() {
  1188         popTilPosNot();
  1189         opFail();
  1192     private void opPushStopBT() {
  1193         pushStopBT();
  1196     private void opPopStopBT() {
  1197         stopBtEnd();
  1200     private void opLookBehind() {
  1201         final int tlen = code[ip++];
  1202         s = EncodingHelper.stepBack(str, s, tlen);
  1203         if (s == -1) {opFail(); return;}
  1204         sprev = EncodingHelper.prevCharHead(str, s);
  1207     private void opPushLookBehindNot() {
  1208         final int addr = code[ip++];
  1209         final int tlen = code[ip++];
  1210         final int q = EncodingHelper.stepBack(str, s, tlen);
  1211         if (q == -1) {
  1212             /* too short case -> success. ex. /(?<!XXX)a/.match("a")
  1213             If you want to change to fail, replace following line. */
  1214             ip += addr;
  1215             // return FAIL;
  1216         } else {
  1217             pushLookBehindNot(ip + addr, s, sprev);
  1218             s = q;
  1219             sprev = EncodingHelper.prevCharHead(str, s);
  1223     private void opFailLookBehindNot() {
  1224         popTilLookBehindNot();
  1225         opFail();
  1228     private void opFail() {
  1229         if (stack == null) {
  1230             ip = regex.codeLength - 1;
  1231             return;
  1235         final StackEntry e = pop();
  1236         ip    = e.getStatePCode();
  1237         s     = e.getStatePStr();
  1238         sprev = e.getStatePStrPrev();
  1241     private int finish() {
  1242         return bestLen;

mercurial