aboutsummaryrefslogtreecommitdiff
path: root/src/libregexp
diff options
context:
space:
mode:
authorRuss Cox <rsc@swtch.com>2007-12-07 17:33:41 -0500
committerRuss Cox <rsc@swtch.com>2007-12-07 17:33:41 -0500
commit6d08a0f548c3d3eda199ee91a83aacd1f895718c (patch)
treea6f1c990d39d8da40fba6681ac9013023d840786 /src/libregexp
parent2deda14e4268e7e8af4910d453db73e210d3eb58 (diff)
downloadplan9port-6d08a0f548c3d3eda199ee91a83aacd1f895718c.tar.gz
plan9port-6d08a0f548c3d3eda199ee91a83aacd1f895718c.tar.bz2
plan9port-6d08a0f548c3d3eda199ee91a83aacd1f895718c.zip
libregexp: revert regexp fix
Diffstat (limited to 'src/libregexp')
-rw-r--r--src/libregexp/regaux.c105
-rw-r--r--src/libregexp/regcomp.c10
-rw-r--r--src/libregexp/regcomp.h6
-rw-r--r--src/libregexp/regexec.c48
-rw-r--r--src/libregexp/rregexec.c61
5 files changed, 113 insertions, 117 deletions
diff --git a/src/libregexp/regaux.c b/src/libregexp/regaux.c
index 39e67725..b854b5ac 100644
--- a/src/libregexp/regaux.c
+++ b/src/libregexp/regaux.c
@@ -23,89 +23,90 @@ _renewmatch(Resub *mp, int ms, Resublist *sp)
}
/*
- * Add ip to the list [lp, elp], but only if it is not there already.
- * These work lists are stored and processed in increasing
- * order of sp[0], so if the ip is there already, the one that's
- * there already is a more left match and takes priority.
+ * Note optimization in _renewthread:
+ * *lp must be pending when _renewthread called; if *l has been looked
+ * at already, the optimization is a bug.
*/
-static Relist*
-_renewthread1(Relist *lp, /* Relist to add to */
- Relist *elp, /* limit pointer for Relist */
+extern Relist*
+_renewthread(Relist *lp, /* _relist to add to */
Reinst *ip, /* instruction to add */
int ms,
Resublist *sep) /* pointers to subexpressions */
{
Relist *p;
- for(p=lp; p->inst; p++)
- if(p->inst == ip)
+ for(p=lp; p->inst; p++){
+ if(p->inst == ip){
+ if(sep->m[0].s.sp < p->se.m[0].s.sp){
+ if(ms > 1)
+ p->se = *sep;
+ else
+ p->se.m[0] = sep->m[0];
+ }
return 0;
-
- if(p == elp) /* refuse to overflow buffer */
- return elp;
-
+ }
+ }
p->inst = ip;
if(ms > 1)
p->se = *sep;
else
p->se.m[0] = sep->m[0];
- (p+1)->inst = 0;
+ (++p)->inst = 0;
return p;
}
-extern int
-_renewthread(Relist *lp, Relist *elp, Reinst *ip, int ms, Resublist *sep)
-{
- Relist *ap;
-
- ap = _renewthread1(lp, elp, ip, ms, sep);
- if(ap == 0)
- return 0;
- if(ap == elp)
- return -1;
-
- /*
- * Added ip to list at ap.
- * Expand any ORs right now, so that entire
- * work list ends up being sorted by increasing m[0].sp.
- */
- for(; ap->inst; ap++){
- if(ap->inst->type == OR){
- if(_renewthread1(lp, elp, ap->inst->u1.right, ms, &ap->se) == elp)
- return -1;
- if(_renewthread1(lp, elp, ap->inst->u2.next, ms, &ap->se) == elp)
- return -1;
- }
- }
- return 0;
-}
-
/*
* same as renewthread, but called with
* initial empty start pointer.
*/
-extern int
+extern Relist*
_renewemptythread(Relist *lp, /* _relist to add to */
- Relist *elp,
Reinst *ip, /* instruction to add */
int ms,
char *sp) /* pointers to subexpressions */
{
- Resublist sep;
-
+ Relist *p;
+
+ for(p=lp; p->inst; p++){
+ if(p->inst == ip){
+ if(sp < p->se.m[0].s.sp) {
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
+ p->se.m[0].s.sp = sp;
+ }
+ return 0;
+ }
+ }
+ p->inst = ip;
if(ms > 1)
- memset(&sep, 0, sizeof sep);
- sep.m[0].s.sp = sp;
- sep.m[0].e.ep = 0;
- return _renewthread(lp, elp, ip, ms, &sep);
+ memset(&p->se, 0, sizeof(p->se));
+ p->se.m[0].s.sp = sp;
+ (++p)->inst = 0;
+ return p;
}
-extern int
+extern Relist*
_rrenewemptythread(Relist *lp, /* _relist to add to */
- Relist *elp,
Reinst *ip, /* instruction to add */
int ms,
Rune *rsp) /* pointers to subexpressions */
{
- return _renewemptythread(lp, elp, ip, ms, (char*)rsp);
+ Relist *p;
+
+ for(p=lp; p->inst; p++){
+ if(p->inst == ip){
+ if(rsp < p->se.m[0].s.rsp) {
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
+ p->se.m[0].s.rsp = rsp;
+ }
+ return 0;
+ }
+ }
+ p->inst = ip;
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
+ p->se.m[0].s.rsp = rsp;
+ (++p)->inst = 0;
+ return p;
}
diff --git a/src/libregexp/regcomp.c b/src/libregexp/regcomp.c
index ba0175ff..b8286dc7 100644
--- a/src/libregexp/regcomp.c
+++ b/src/libregexp/regcomp.c
@@ -232,7 +232,7 @@ optimize(Reprog *pp)
int size;
Reprog *npp;
Reclass *cl;
- int diff, proglen;
+ int diff;
/*
* get rid of NOOP chains
@@ -249,13 +249,10 @@ optimize(Reprog *pp)
* necessary. Reallocate to the actual space used
* and then relocate the code.
*/
- proglen = freep - pp->firstinst;
- size = sizeof(Reprog) + proglen*sizeof(Reinst);
+ size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
npp = realloc(pp, size);
- if(npp==0 || npp==pp){
- pp->proglen = proglen;
+ if(npp==0 || npp==pp)
return pp;
- }
diff = (char *)npp - (char *)pp;
freep = (Reinst *)((char *)freep + diff);
for(inst=npp->firstinst; inst<freep; inst++){
@@ -276,7 +273,6 @@ optimize(Reprog *pp)
*(char**)(void*)&inst->u2.left += diff;
}
*(char**)(void*)&npp->startinst += diff;
- npp->proglen = proglen;
return npp;
}
diff --git a/src/libregexp/regcomp.h b/src/libregexp/regcomp.h
index fde99f54..6c88cd09 100644
--- a/src/libregexp/regcomp.h
+++ b/src/libregexp/regcomp.h
@@ -68,7 +68,7 @@ struct Reljunk
Rune* reol;
};
-extern int _renewthread(Relist*, Relist*, Reinst*, int, Resublist*);
+extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
extern void _renewmatch(Resub*, int, Resublist*);
-extern int _renewemptythread(Relist*, Relist*, Reinst*, int, char*);
-extern int _rrenewemptythread(Relist*, Relist*, Reinst*, int, Rune*);
+extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
+extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);
diff --git a/src/libregexp/regexec.c b/src/libregexp/regexec.c
index 10d93f0c..c04182a1 100644
--- a/src/libregexp/regexec.c
+++ b/src/libregexp/regexec.c
@@ -2,6 +2,7 @@
#include "regexp9.h"
#include "regcomp.h"
+
/*
* return 0 if no match
* >0 if a match
@@ -12,14 +13,16 @@ regexec1(Reprog *progp, /* program to run */
char *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
- Reljunk *j)
+ Reljunk *j
+)
{
int flag=0;
Reinst *inst;
Relist *tlp;
char *s;
- int i, checkstart, n;
+ int i, checkstart;
Rune r, *rp, *ep;
+ int n;
Relist* tl; /* This list, next list */
Relist* nl;
Relist* tle; /* ends of this and next list */
@@ -45,7 +48,7 @@ regexec1(Reprog *progp, /* program to run */
switch(j->starttype) {
case RUNE:
p = utfrune(s, j->startchar);
- if(p == 0 || (j->eol && p >= j->eol))
+ if(p == 0 || s == j->eol)
return match;
s = p;
break;
@@ -53,7 +56,7 @@ regexec1(Reprog *progp, /* program to run */
if(s == bol)
break;
p = utfrune(s, '\n');
- if(p == 0 || (j->eol && p+1 >= j->eol))
+ if(p == 0 || s == j->eol)
return match;
s = p+1;
break;
@@ -74,16 +77,17 @@ regexec1(Reprog *progp, /* program to run */
/* Add first instruction to current list */
if(match == 0)
- _renewemptythread(tl, tle, progp->startinst, ms, s);
+ _renewemptythread(tl, progp->startinst, ms, s);
/* Execute machine until current list is empty */
- for(tlp=tl; tlp->inst; tlp++){
+ for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
for(inst = tlp->inst; ; inst = inst->u2.next){
switch(inst->type){
case RUNE: /* regular character */
- if(inst->u1.r == r)
- if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+ if(inst->u1.r == r){
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
+ }
break;
case LBRA:
tlp->se.m[inst->u1.subid].s.sp = s;
@@ -93,11 +97,11 @@ regexec1(Reprog *progp, /* program to run */
continue;
case ANY:
if(r != '\n')
- if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case ANYNL:
- if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case BOL:
@@ -112,7 +116,7 @@ regexec1(Reprog *progp, /* program to run */
ep = inst->u1.cp->end;
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
}
@@ -123,12 +127,15 @@ regexec1(Reprog *progp, /* program to run */
if(r >= rp[0] && r <= rp[1])
break;
if(rp == ep)
- if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case OR:
- /* expanded during renewthread; just a place holder */
- break;
+ /* evaluate right choice later */
+ if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
+ return -1;
+ /* efficiency: advance and re-evaluate */
+ continue;
case END: /* Match! */
match = 1;
tlp->se.m[0].e.ep = s;
@@ -158,18 +165,19 @@ regexec2(Reprog *progp, /* program to run */
int rv;
Relist *relist0, *relist1;
- relist0 = malloc((progp->proglen+1)*sizeof(Relist));
+ /* mark space */
+ relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
if(relist0 == nil)
return -1;
- relist1 = malloc((progp->proglen+1)*sizeof(Relist));
+ relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
if(relist1 == nil){
free(relist1);
return -1;
}
j->relist[0] = relist0;
j->relist[1] = relist1;
- j->reliste[0] = relist0 + progp->proglen;
- j->reliste[1] = relist1 + progp->proglen;
+ j->reliste[0] = relist0 + BIGLISTSIZE - 2;
+ j->reliste[1] = relist1 + BIGLISTSIZE - 2;
rv = regexec1(progp, bol, mp, ms, j);
free(relist0);
@@ -210,8 +218,8 @@ regexec(Reprog *progp, /* program to run */
/* mark space */
j.relist[0] = relist0;
j.relist[1] = relist1;
- j.reliste[0] = relist0 + nelem(relist0) - 1;
- j.reliste[1] = relist1 + nelem(relist1) - 1;
+ j.reliste[0] = relist0 + nelem(relist0) - 2;
+ j.reliste[1] = relist1 + nelem(relist1) - 2;
rv = regexec1(progp, bol, mp, ms, &j);
if(rv >= 0)
diff --git a/src/libregexp/rregexec.c b/src/libregexp/rregexec.c
index 907ddef3..ec7907da 100644
--- a/src/libregexp/rregexec.c
+++ b/src/libregexp/rregexec.c
@@ -9,9 +9,9 @@
*/
static int
rregexec1(Reprog *progp, /* program to run */
- Rune *bol, /* string to run machine on */
- Resub *mp, /* subexpression elements */
- int ms, /* number of elements at mp */
+ Rune *bol, /* string to run machine on */
+ Resub *mp, /* subexpression elements */
+ int ms, /* number of elements at mp */
Reljunk *j)
{
int flag=0;
@@ -28,7 +28,7 @@ rregexec1(Reprog *progp, /* program to run */
Rune *p;
match = 0;
- checkstart = j->starttype;
+ checkstart = j->startchar;
if(mp)
for(i=0; i<ms; i++) {
mp[i].s.rsp = 0;
@@ -46,7 +46,7 @@ rregexec1(Reprog *progp, /* program to run */
switch(j->starttype) {
case RUNE:
p = runestrchr(s, j->startchar);
- if(p == 0 || (j->reol && p >= j->reol))
+ if(p == 0 || p == j->reol)
return match;
s = p;
break;
@@ -54,7 +54,7 @@ rregexec1(Reprog *progp, /* program to run */
if(s == bol)
break;
p = runestrchr(s, '\n');
- if(p == 0 || (j->reol && p+1 >= j->reol))
+ if(p == 0 || s == j->reol)
return match;
s = p+1;
break;
@@ -71,16 +71,15 @@ rregexec1(Reprog *progp, /* program to run */
nl->inst = 0;
/* Add first instruction to current list */
- if(match == 0)
- _rrenewemptythread(tl, tle, progp->startinst, ms, s);
+ _rrenewemptythread(tl, progp->startinst, ms, s);
/* Execute machine until current list is empty */
for(tlp=tl; tlp->inst; tlp++){
- for(inst = tlp->inst; ; inst = inst->u2.next){
+ for(inst=tlp->inst; ; inst = inst->u2.next){
switch(inst->type){
case RUNE: /* regular character */
if(inst->u1.r == r)
- if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case LBRA:
@@ -91,11 +90,11 @@ rregexec1(Reprog *progp, /* program to run */
continue;
case ANY:
if(r != '\n')
- if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case ANYNL:
- if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case BOL:
@@ -110,7 +109,7 @@ rregexec1(Reprog *progp, /* program to run */
ep = inst->u1.cp->end;
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
}
@@ -121,12 +120,15 @@ rregexec1(Reprog *progp, /* program to run */
if(r >= rp[0] && r <= rp[1])
break;
if(rp == ep)
- if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case OR:
- /* expanded during renewthread; just a place holder */
- break;
+ /* evaluate right choice later */
+ if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
+ return -1;
+ /* efficiency: advance and re-evaluate */
+ continue;
case END: /* Match! */
match = 1;
tlp->se.m[0].e.rep = s;
@@ -139,7 +141,7 @@ rregexec1(Reprog *progp, /* program to run */
}
if(s == j->reol)
break;
- checkstart = j->starttype && nl->inst==0;
+ checkstart = j->startchar && nl->inst==0;
s++;
}while(r);
return match;
@@ -153,26 +155,15 @@ rregexec2(Reprog *progp, /* program to run */
Reljunk *j
)
{
- int rv;
- Relist *relist0, *relist1;
+ Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
- relist0 = malloc((progp->proglen+1)*sizeof(Relist));
- if(relist0 == nil)
- return -1;
- relist1 = malloc((progp->proglen+1)*sizeof(Relist));
- if(relist1 == nil){
- free(relist1);
- return -1;
- }
+ /* mark space */
j->relist[0] = relist0;
j->relist[1] = relist1;
- j->reliste[0] = relist0 + progp->proglen;
- j->reliste[1] = relist1 + progp->proglen;
+ j->reliste[0] = relist0 + nelem(relist0) - 2;
+ j->reliste[1] = relist1 + nelem(relist1) - 2;
- rv = rregexec1(progp, bol, mp, ms, j);
- free(relist0);
- free(relist1);
- return rv;
+ return rregexec1(progp, bol, mp, ms, j);
}
extern int
@@ -208,8 +199,8 @@ rregexec(Reprog *progp, /* program to run */
/* mark space */
j.relist[0] = relist0;
j.relist[1] = relist1;
- j.reliste[0] = relist0 + nelem(relist0) - 1;
- j.reliste[1] = relist1 + nelem(relist1) - 1;
+ j.reliste[0] = relist0 + nelem(relist0) - 2;
+ j.reliste[1] = relist1 + nelem(relist1) - 2;
rv = rregexec1(progp, bol, mp, ms, &j);
if(rv >= 0)