diff options
author | Russ Cox <rsc@swtch.com> | 2007-12-07 17:33:41 -0500 |
---|---|---|
committer | Russ Cox <rsc@swtch.com> | 2007-12-07 17:33:41 -0500 |
commit | 6d08a0f548c3d3eda199ee91a83aacd1f895718c (patch) | |
tree | a6f1c990d39d8da40fba6681ac9013023d840786 | |
parent | 2deda14e4268e7e8af4910d453db73e210d3eb58 (diff) | |
download | plan9port-6d08a0f548c3d3eda199ee91a83aacd1f895718c.tar.gz plan9port-6d08a0f548c3d3eda199ee91a83aacd1f895718c.tar.bz2 plan9port-6d08a0f548c3d3eda199ee91a83aacd1f895718c.zip |
libregexp: revert regexp fix
-rw-r--r-- | src/libregexp/regaux.c | 105 | ||||
-rw-r--r-- | src/libregexp/regcomp.c | 10 | ||||
-rw-r--r-- | src/libregexp/regcomp.h | 6 | ||||
-rw-r--r-- | src/libregexp/regexec.c | 48 | ||||
-rw-r--r-- | src/libregexp/rregexec.c | 61 |
5 files changed, 113 insertions, 117 deletions
diff --git a/src/libregexp/regaux.c b/src/libregexp/regaux.c index 39e67725..b854b5ac 100644 --- a/src/libregexp/regaux.c +++ b/src/libregexp/regaux.c @@ -23,89 +23,90 @@ _renewmatch(Resub *mp, int ms, Resublist *sp) } /* - * Add ip to the list [lp, elp], but only if it is not there already. - * These work lists are stored and processed in increasing - * order of sp[0], so if the ip is there already, the one that's - * there already is a more left match and takes priority. + * Note optimization in _renewthread: + * *lp must be pending when _renewthread called; if *l has been looked + * at already, the optimization is a bug. */ -static Relist* -_renewthread1(Relist *lp, /* Relist to add to */ - Relist *elp, /* limit pointer for Relist */ +extern Relist* +_renewthread(Relist *lp, /* _relist to add to */ Reinst *ip, /* instruction to add */ int ms, Resublist *sep) /* pointers to subexpressions */ { Relist *p; - for(p=lp; p->inst; p++) - if(p->inst == ip) + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(sep->m[0].s.sp < p->se.m[0].s.sp){ + if(ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; + } return 0; - - if(p == elp) /* refuse to overflow buffer */ - return elp; - + } + } p->inst = ip; if(ms > 1) p->se = *sep; else p->se.m[0] = sep->m[0]; - (p+1)->inst = 0; + (++p)->inst = 0; return p; } -extern int -_renewthread(Relist *lp, Relist *elp, Reinst *ip, int ms, Resublist *sep) -{ - Relist *ap; - - ap = _renewthread1(lp, elp, ip, ms, sep); - if(ap == 0) - return 0; - if(ap == elp) - return -1; - - /* - * Added ip to list at ap. - * Expand any ORs right now, so that entire - * work list ends up being sorted by increasing m[0].sp. - */ - for(; ap->inst; ap++){ - if(ap->inst->type == OR){ - if(_renewthread1(lp, elp, ap->inst->u1.right, ms, &ap->se) == elp) - return -1; - if(_renewthread1(lp, elp, ap->inst->u2.next, ms, &ap->se) == elp) - return -1; - } - } - return 0; -} - /* * same as renewthread, but called with * initial empty start pointer. */ -extern int +extern Relist* _renewemptythread(Relist *lp, /* _relist to add to */ - Relist *elp, Reinst *ip, /* instruction to add */ int ms, char *sp) /* pointers to subexpressions */ { - Resublist sep; - + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(sp < p->se.m[0].s.sp) { + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.sp = sp; + } + return 0; + } + } + p->inst = ip; if(ms > 1) - memset(&sep, 0, sizeof sep); - sep.m[0].s.sp = sp; - sep.m[0].e.ep = 0; - return _renewthread(lp, elp, ip, ms, &sep); + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.sp = sp; + (++p)->inst = 0; + return p; } -extern int +extern Relist* _rrenewemptythread(Relist *lp, /* _relist to add to */ - Relist *elp, Reinst *ip, /* instruction to add */ int ms, Rune *rsp) /* pointers to subexpressions */ { - return _renewemptythread(lp, elp, ip, ms, (char*)rsp); + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(rsp < p->se.m[0].s.rsp) { + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + (++p)->inst = 0; + return p; } diff --git a/src/libregexp/regcomp.c b/src/libregexp/regcomp.c index ba0175ff..b8286dc7 100644 --- a/src/libregexp/regcomp.c +++ b/src/libregexp/regcomp.c @@ -232,7 +232,7 @@ optimize(Reprog *pp) int size; Reprog *npp; Reclass *cl; - int diff, proglen; + int diff; /* * get rid of NOOP chains @@ -249,13 +249,10 @@ optimize(Reprog *pp) * necessary. Reallocate to the actual space used * and then relocate the code. */ - proglen = freep - pp->firstinst; - size = sizeof(Reprog) + proglen*sizeof(Reinst); + size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); npp = realloc(pp, size); - if(npp==0 || npp==pp){ - pp->proglen = proglen; + if(npp==0 || npp==pp) return pp; - } diff = (char *)npp - (char *)pp; freep = (Reinst *)((char *)freep + diff); for(inst=npp->firstinst; inst<freep; inst++){ @@ -276,7 +273,6 @@ optimize(Reprog *pp) *(char**)(void*)&inst->u2.left += diff; } *(char**)(void*)&npp->startinst += diff; - npp->proglen = proglen; return npp; } diff --git a/src/libregexp/regcomp.h b/src/libregexp/regcomp.h index fde99f54..6c88cd09 100644 --- a/src/libregexp/regcomp.h +++ b/src/libregexp/regcomp.h @@ -68,7 +68,7 @@ struct Reljunk Rune* reol; }; -extern int _renewthread(Relist*, Relist*, Reinst*, int, Resublist*); +extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); extern void _renewmatch(Resub*, int, Resublist*); -extern int _renewemptythread(Relist*, Relist*, Reinst*, int, char*); -extern int _rrenewemptythread(Relist*, Relist*, Reinst*, int, Rune*); +extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); +extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); diff --git a/src/libregexp/regexec.c b/src/libregexp/regexec.c index 10d93f0c..c04182a1 100644 --- a/src/libregexp/regexec.c +++ b/src/libregexp/regexec.c @@ -2,6 +2,7 @@ #include "regexp9.h" #include "regcomp.h" + /* * return 0 if no match * >0 if a match @@ -12,14 +13,16 @@ regexec1(Reprog *progp, /* program to run */ char *bol, /* string to run machine on */ Resub *mp, /* subexpression elements */ int ms, /* number of elements at mp */ - Reljunk *j) + Reljunk *j +) { int flag=0; Reinst *inst; Relist *tlp; char *s; - int i, checkstart, n; + int i, checkstart; Rune r, *rp, *ep; + int n; Relist* tl; /* This list, next list */ Relist* nl; Relist* tle; /* ends of this and next list */ @@ -45,7 +48,7 @@ regexec1(Reprog *progp, /* program to run */ switch(j->starttype) { case RUNE: p = utfrune(s, j->startchar); - if(p == 0 || (j->eol && p >= j->eol)) + if(p == 0 || s == j->eol) return match; s = p; break; @@ -53,7 +56,7 @@ regexec1(Reprog *progp, /* program to run */ if(s == bol) break; p = utfrune(s, '\n'); - if(p == 0 || (j->eol && p+1 >= j->eol)) + if(p == 0 || s == j->eol) return match; s = p+1; break; @@ -74,16 +77,17 @@ regexec1(Reprog *progp, /* program to run */ /* Add first instruction to current list */ if(match == 0) - _renewemptythread(tl, tle, progp->startinst, ms, s); + _renewemptythread(tl, progp->startinst, ms, s); /* Execute machine until current list is empty */ - for(tlp=tl; tlp->inst; tlp++){ + for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ for(inst = tlp->inst; ; inst = inst->u2.next){ switch(inst->type){ case RUNE: /* regular character */ - if(inst->u1.r == r) - if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) + if(inst->u1.r == r){ + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; + } break; case LBRA: tlp->se.m[inst->u1.subid].s.sp = s; @@ -93,11 +97,11 @@ regexec1(Reprog *progp, /* program to run */ continue; case ANY: if(r != '\n') - if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case ANYNL: - if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case BOL: @@ -112,7 +116,7 @@ regexec1(Reprog *progp, /* program to run */ ep = inst->u1.cp->end; for(rp = inst->u1.cp->spans; rp < ep; rp += 2) if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; } @@ -123,12 +127,15 @@ regexec1(Reprog *progp, /* program to run */ if(r >= rp[0] && r <= rp[1]) break; if(rp == ep) - if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case OR: - /* expanded during renewthread; just a place holder */ - break; + /* evaluate right choice later */ + if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; case END: /* Match! */ match = 1; tlp->se.m[0].e.ep = s; @@ -158,18 +165,19 @@ regexec2(Reprog *progp, /* program to run */ int rv; Relist *relist0, *relist1; - relist0 = malloc((progp->proglen+1)*sizeof(Relist)); + /* mark space */ + relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); if(relist0 == nil) return -1; - relist1 = malloc((progp->proglen+1)*sizeof(Relist)); + relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); if(relist1 == nil){ free(relist1); return -1; } j->relist[0] = relist0; j->relist[1] = relist1; - j->reliste[0] = relist0 + progp->proglen; - j->reliste[1] = relist1 + progp->proglen; + j->reliste[0] = relist0 + BIGLISTSIZE - 2; + j->reliste[1] = relist1 + BIGLISTSIZE - 2; rv = regexec1(progp, bol, mp, ms, j); free(relist0); @@ -210,8 +218,8 @@ regexec(Reprog *progp, /* program to run */ /* mark space */ j.relist[0] = relist0; j.relist[1] = relist1; - j.reliste[0] = relist0 + nelem(relist0) - 1; - j.reliste[1] = relist1 + nelem(relist1) - 1; + j.reliste[0] = relist0 + nelem(relist0) - 2; + j.reliste[1] = relist1 + nelem(relist1) - 2; rv = regexec1(progp, bol, mp, ms, &j); if(rv >= 0) diff --git a/src/libregexp/rregexec.c b/src/libregexp/rregexec.c index 907ddef3..ec7907da 100644 --- a/src/libregexp/rregexec.c +++ b/src/libregexp/rregexec.c @@ -9,9 +9,9 @@ */ static int rregexec1(Reprog *progp, /* program to run */ - Rune *bol, /* string to run machine on */ - Resub *mp, /* subexpression elements */ - int ms, /* number of elements at mp */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ Reljunk *j) { int flag=0; @@ -28,7 +28,7 @@ rregexec1(Reprog *progp, /* program to run */ Rune *p; match = 0; - checkstart = j->starttype; + checkstart = j->startchar; if(mp) for(i=0; i<ms; i++) { mp[i].s.rsp = 0; @@ -46,7 +46,7 @@ rregexec1(Reprog *progp, /* program to run */ switch(j->starttype) { case RUNE: p = runestrchr(s, j->startchar); - if(p == 0 || (j->reol && p >= j->reol)) + if(p == 0 || p == j->reol) return match; s = p; break; @@ -54,7 +54,7 @@ rregexec1(Reprog *progp, /* program to run */ if(s == bol) break; p = runestrchr(s, '\n'); - if(p == 0 || (j->reol && p+1 >= j->reol)) + if(p == 0 || s == j->reol) return match; s = p+1; break; @@ -71,16 +71,15 @@ rregexec1(Reprog *progp, /* program to run */ nl->inst = 0; /* Add first instruction to current list */ - if(match == 0) - _rrenewemptythread(tl, tle, progp->startinst, ms, s); + _rrenewemptythread(tl, progp->startinst, ms, s); /* Execute machine until current list is empty */ for(tlp=tl; tlp->inst; tlp++){ - for(inst = tlp->inst; ; inst = inst->u2.next){ + for(inst=tlp->inst; ; inst = inst->u2.next){ switch(inst->type){ case RUNE: /* regular character */ if(inst->u1.r == r) - if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case LBRA: @@ -91,11 +90,11 @@ rregexec1(Reprog *progp, /* program to run */ continue; case ANY: if(r != '\n') - if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case ANYNL: - if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case BOL: @@ -110,7 +109,7 @@ rregexec1(Reprog *progp, /* program to run */ ep = inst->u1.cp->end; for(rp = inst->u1.cp->spans; rp < ep; rp += 2) if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; } @@ -121,12 +120,15 @@ rregexec1(Reprog *progp, /* program to run */ if(r >= rp[0] && r <= rp[1]) break; if(rp == ep) - if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case OR: - /* expanded during renewthread; just a place holder */ - break; + /* evaluate right choice later */ + if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; case END: /* Match! */ match = 1; tlp->se.m[0].e.rep = s; @@ -139,7 +141,7 @@ rregexec1(Reprog *progp, /* program to run */ } if(s == j->reol) break; - checkstart = j->starttype && nl->inst==0; + checkstart = j->startchar && nl->inst==0; s++; }while(r); return match; @@ -153,26 +155,15 @@ rregexec2(Reprog *progp, /* program to run */ Reljunk *j ) { - int rv; - Relist *relist0, *relist1; + Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; - relist0 = malloc((progp->proglen+1)*sizeof(Relist)); - if(relist0 == nil) - return -1; - relist1 = malloc((progp->proglen+1)*sizeof(Relist)); - if(relist1 == nil){ - free(relist1); - return -1; - } + /* mark space */ j->relist[0] = relist0; j->relist[1] = relist1; - j->reliste[0] = relist0 + progp->proglen; - j->reliste[1] = relist1 + progp->proglen; + j->reliste[0] = relist0 + nelem(relist0) - 2; + j->reliste[1] = relist1 + nelem(relist1) - 2; - rv = rregexec1(progp, bol, mp, ms, j); - free(relist0); - free(relist1); - return rv; + return rregexec1(progp, bol, mp, ms, j); } extern int @@ -208,8 +199,8 @@ rregexec(Reprog *progp, /* program to run */ /* mark space */ j.relist[0] = relist0; j.relist[1] = relist1; - j.reliste[0] = relist0 + nelem(relist0) - 1; - j.reliste[1] = relist1 + nelem(relist1) - 1; + j.reliste[0] = relist0 + nelem(relist0) - 2; + j.reliste[1] = relist1 + nelem(relist1) - 2; rv = rregexec1(progp, bol, mp, ms, &j); if(rv >= 0) |