diff options
-rw-r--r-- | src/cmd/sam/regexp.c | 108 |
1 files changed, 49 insertions, 59 deletions
diff --git a/src/cmd/sam/regexp.c b/src/cmd/sam/regexp.c index b213d2f3..8beaa036 100644 --- a/src/cmd/sam/regexp.c +++ b/src/cmd/sam/regexp.c @@ -527,56 +527,26 @@ classmatch(int classno, int c, int negate) } /* - * Add inst to the list [l, l+NLIST), but only if it is not there already. - * These work lists are stored and processed in increasing - * order of sep->p[0], so if the inst is there already, the one - * there already is a more left match and takes priority. - * (This works for backward searches too, because there - * is no explicit comparison.) + * Note optimization in addinst: + * *l must be pending when addinst called; if *l has been looked + * at already, the optimization is a bug. */ -Ilist* -addinst1(Ilist *l, Inst *inst, Rangeset *sep) -{ - Ilist *p; - - for(p = l; p->inst; p++) - if(p->inst==inst) - return 0; - - if(p == l+NLIST) - return l+NLIST; - - p->inst = inst; - p->se = *sep; - (p+1)->inst = 0; - return p; -} - int addinst(Ilist *l, Inst *inst, Rangeset *sep) { - Ilist *ap; - - ap = addinst1(l, inst, sep); - if(ap == 0) - return 0; - if(ap == l+NLIST) - return -1; - - /* - * Added inst to list at ap. - * Expand any ORs right now, so that entire - * work list ends up being sorted by increasing sep->p[0]. - */ - for(; ap->inst; ap++){ - if(ap->inst->type == OR){ - if(addinst1(l, ap->inst->left, &ap->se) == l+NLIST) - return -1; - if(addinst1(l, ap->inst->right, &ap->se) == l+NLIST) - return -1; + Ilist *p; + + for(p = l; p->inst; p++){ + if(p->inst==inst){ + if((sep)->p[0].p1 < p->se.p[0].p1) + p->se= *sep; /* this would be bug */ + return 0; /* It's already there */ } } - return 0; + p->inst = inst; + p->se= *sep; + (p+1)->inst = 0; + return 1; } int @@ -586,13 +556,13 @@ execute(File *f, Posn startp, Posn eof) Inst *inst; Ilist *tlp; Posn p = startp; + int nnl = 0, ntl; int c; int wrapped = 0; int startchar = startinst->type<OPERATOR? startinst->type : 0; list[0][0].inst = list[1][0].inst = 0; sel.p[0].p1 = -1; - nl = list[0]; /* Execute machine once for each character */ for(;;p++){ doloop: @@ -611,18 +581,21 @@ execute(File *f, Posn startp, Posn eof) default: goto Return; } - }else if(((wrapped && p>=startp) || sel.p[0].p1>0) && nl->inst==0) + }else if(((wrapped && p>=startp) || sel.p[0].p1>0) && nnl==0) break; /* fast check for first char */ - if(startchar && nl->inst==0 && c!=startchar) + if(startchar && nnl==0 && c!=startchar) continue; tl = list[flag]; nl = list[flag^=1]; nl->inst = 0; + ntl = nnl; + nnl = 0; if(sel.p[0].p1<0 && (!wrapped || p<startp || startp==eof)){ /* Add first instruction to this list */ sempty.p[0].p1 = p; - if(addinst(tl, startinst, &sempty) < 0) + if(addinst(tl, startinst, &sempty)) + if(++ntl >= NLIST) Overflow: error(Eoverflow); } @@ -633,7 +606,8 @@ execute(File *f, Posn startp, Posn eof) default: /* regular character */ if(inst->type==c){ Addinst: - if(addinst(nl, inst->next, &tlp->se) < 0) + if(addinst(nl, inst->next, &tlp->se)) + if(++nnl >= NLIST) goto Overflow; } break; @@ -671,8 +645,13 @@ execute(File *f, Posn startp, Posn eof) goto Addinst; break; case OR: - /* already expanded; just a placeholder */ - break; + /* evaluate right choice later */ + if(addinst(tl, inst->right, &tlp->se)) + if(++ntl >= NLIST) + goto Overflow; + /* efficiency: advance and re-evaluate */ + inst = inst->left; + goto Switchstmt; case END: /* Match! */ tlp->se.p[0].p2 = p; newmatch(&tlp->se); @@ -702,13 +681,13 @@ bexecute(File *f, Posn startp) Inst *inst; Ilist *tlp; Posn p = startp; + int nnl = 0, ntl; int c; int wrapped = 0; int startchar = bstartinst->type<OPERATOR? bstartinst->type : 0; list[0][0].inst = list[1][0].inst = 0; sel.p[0].p1= -1; - nl = list[0]; /* Execute machine once for each character, including terminal NUL */ for(;;--p){ doloop: @@ -727,18 +706,22 @@ bexecute(File *f, Posn startp) default: goto Return; } - }else if(((wrapped && p<=startp) || sel.p[0].p1>0) && nl->inst==0) + }else if(((wrapped && p<=startp) || sel.p[0].p1>0) && nnl==0) break; /* fast check for first char */ - if(startchar && nl->inst==0 && c!=startchar) + if(startchar && nnl==0 && c!=startchar) continue; tl = list[flag]; nl = list[flag^=1]; nl->inst = 0; + ntl = nnl; + nnl = 0; if(sel.p[0].p1<0 && (!wrapped || p>startp)){ /* Add first instruction to this list */ - sempty.p[0].p1 = p; - if(addinst(tl, bstartinst, &sempty) < 0) + /* the minus is so the optimizations in addinst work */ + sempty.p[0].p1 = -p; + if(addinst(tl, bstartinst, &sempty)) + if(++ntl >= NLIST) Overflow: error(Eoverflow); } @@ -749,7 +732,8 @@ bexecute(File *f, Posn startp) default: /* regular character */ if(inst->type == c){ Addinst: - if(addinst(nl, inst->next, &tlp->se) < 0) + if(addinst(nl, inst->next, &tlp->se)) + if(++nnl >= NLIST) goto Overflow; } break; @@ -787,9 +771,15 @@ bexecute(File *f, Posn startp) goto Addinst; break; case OR: - /* already expanded; just a placeholder */ - break; + /* evaluate right choice later */ + if(addinst(tl, inst->right, &tlp->se)) + if(++ntl >= NLIST) + goto Overflow; + /* efficiency: advance and re-evaluate */ + inst = inst->left; + goto Switchstmt; case END: /* Match! */ + tlp->se.p[0].p1 = -tlp->se.p[0].p1; /* minus sign */ tlp->se.p[0].p2 = p; bnewmatch(&tlp->se); break; |