From 62390091b7ff715e9336cc69a3ee41a6c458221e Mon Sep 17 00:00:00 2001 From: rsc Date: Fri, 5 Mar 2004 05:13:56 +0000 Subject: Update libregexp with recent bug fixes from Plan 9. --- src/libregexp/lib9.h | 8 ++------ src/libregexp/lib9.h.std | 6 ++++++ src/libregexp/mkfile | 1 - src/libregexp/regaux.c | 46 +++++++++++++++++++++++++++++++++++++++++----- src/libregexp/regcomp.c | 10 ++++------ src/libregexp/regcomp.h | 6 +++--- src/libregexp/regerror.c | 2 +- src/libregexp/regexec.c | 42 +++++++++++++++++++++++++++--------------- src/libregexp/regsub.c | 3 ++- src/libregexp/rregaux.c | 26 -------------------------- src/libregexp/rregexec.c | 20 ++++++++++---------- src/libregexp/rregsub.c | 22 +++++++++++++++------- 12 files changed, 111 insertions(+), 81 deletions(-) create mode 100644 src/libregexp/lib9.h.std delete mode 100644 src/libregexp/rregaux.c diff --git a/src/libregexp/lib9.h b/src/libregexp/lib9.h index d022656d..07a1f976 100644 --- a/src/libregexp/lib9.h +++ b/src/libregexp/lib9.h @@ -1,6 +1,2 @@ -#include -#include -#include -#include -#include - +#include +#include diff --git a/src/libregexp/lib9.h.std b/src/libregexp/lib9.h.std new file mode 100644 index 00000000..d022656d --- /dev/null +++ b/src/libregexp/lib9.h.std @@ -0,0 +1,6 @@ +#include +#include +#include +#include +#include + diff --git a/src/libregexp/mkfile b/src/libregexp/mkfile index 8cb740cc..ad482240 100644 --- a/src/libregexp/mkfile +++ b/src/libregexp/mkfile @@ -9,7 +9,6 @@ OFILES=\ regexec.$O\ regsub.$O\ regaux.$O\ - rregaux.$O\ rregexec.$O\ rregsub.$O\ diff --git a/src/libregexp/regaux.c b/src/libregexp/regaux.c index 956c1eb0..b854b5ac 100644 --- a/src/libregexp/regaux.c +++ b/src/libregexp/regaux.c @@ -30,19 +30,27 @@ _renewmatch(Resub *mp, int ms, Resublist *sp) extern Relist* _renewthread(Relist *lp, /* _relist to add to */ Reinst *ip, /* instruction to add */ + int ms, Resublist *sep) /* pointers to subexpressions */ { Relist *p; for(p=lp; p->inst; p++){ if(p->inst == ip){ - if((sep)->m[0].s.sp < p->se.m[0].s.sp) - p->se = *sep; + if(sep->m[0].s.sp < p->se.m[0].s.sp){ + if(ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; + } return 0; } } p->inst = ip; - p->se = *sep; + if(ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; (++p)->inst = 0; return p; } @@ -54,6 +62,7 @@ _renewthread(Relist *lp, /* _relist to add to */ extern Relist* _renewemptythread(Relist *lp, /* _relist to add to */ Reinst *ip, /* instruction to add */ + int ms, char *sp) /* pointers to subexpressions */ { Relist *p; @@ -61,16 +70,43 @@ _renewemptythread(Relist *lp, /* _relist to add to */ for(p=lp; p->inst; p++){ if(p->inst == ip){ if(sp < p->se.m[0].s.sp) { - memset((void *)&p->se, 0, sizeof(p->se)); + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); p->se.m[0].s.sp = sp; } return 0; } } p->inst = ip; - memset((void *)&p->se, 0, sizeof(p->se)); + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); p->se.m[0].s.sp = sp; (++p)->inst = 0; return p; } +extern Relist* +_rrenewemptythread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + int ms, + Rune *rsp) /* pointers to subexpressions */ +{ + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(rsp < p->se.m[0].s.rsp) { + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + (++p)->inst = 0; + return p; +} diff --git a/src/libregexp/regcomp.c b/src/libregexp/regcomp.c index 6c6939c6..796efc41 100644 --- a/src/libregexp/regcomp.c +++ b/src/libregexp/regcomp.c @@ -15,8 +15,6 @@ struct Node Reinst* last; }Node; -Reprog RePrOg; - #define NSTACK 20 static Node andstack[NSTACK]; static Node *andp; @@ -252,7 +250,7 @@ optimize(Reprog *pp) * and then relocate the code. */ size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); - npp = (Reprog *)realloc(pp, size); + npp = realloc(pp, size); if(npp==0 || npp==pp) return pp; diff = (char *)npp - (char *)pp; @@ -303,12 +301,12 @@ dump(Reprog *pp) print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, l->u2.left-pp->firstinst, l->u1.right-pp->firstinst); if(l->type == RUNE) - print("\t%C\n", l->r); + print("\t%C\n", l->u1.r); else if(l->type == CCLASS || l->type == NCCLASS){ print("\t["); if(l->type == NCCLASS) print("^"); - for(p = l->cp->spans; p < l->cp->end; p += 2) + for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2) if(p[0] == p[1]) print("%C", p[0]); else @@ -477,7 +475,7 @@ regcomp1(char *s, int literal, int dot_type) Reprog *pp; /* get memory for the program */ - pp = (Reprog *)malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); + pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); if(pp == 0){ regerror("out of memory"); return 0; diff --git a/src/libregexp/regcomp.h b/src/libregexp/regcomp.h index a6728b0e..6c88cd09 100644 --- a/src/libregexp/regcomp.h +++ b/src/libregexp/regcomp.h @@ -68,7 +68,7 @@ struct Reljunk Rune* reol; }; -extern Relist* _renewthread(Relist*, Reinst*, Resublist*); +extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); extern void _renewmatch(Resub*, int, Resublist*); -extern Relist* _renewemptythread(Relist*, Reinst*, char*); -extern Relist* _rrenewemptythread(Relist*, Reinst*, Rune*); +extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); +extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); diff --git a/src/libregexp/regerror.c b/src/libregexp/regerror.c index 2cd1e3e2..99ff0c74 100644 --- a/src/libregexp/regerror.c +++ b/src/libregexp/regerror.c @@ -10,5 +10,5 @@ regerror(char *s) strcat(buf, s); strcat(buf, "\n"); write(2, buf, strlen(buf)); - exit(1); + exits("regerr"); } diff --git a/src/libregexp/regexec.c b/src/libregexp/regexec.c index c9f1eba2..62ab1a31 100644 --- a/src/libregexp/regexec.c +++ b/src/libregexp/regexec.c @@ -48,7 +48,7 @@ regexec1(Reprog *progp, /* program to run */ switch(j->starttype) { case RUNE: p = utfrune(s, j->startchar); - if(p == 0) + if(p == 0 || s == j->eol) return match; s = p; break; @@ -56,14 +56,14 @@ regexec1(Reprog *progp, /* program to run */ if(s == bol) break; p = utfrune(s, '\n'); - if(p == 0) + if(p == 0 || s == j->eol) return match; s = p; break; } } r = *(uchar*)s; - if(r < (Rune)Runeself) + if(r < Runeself) n = 1; else n = chartorune(&r, s); @@ -77,7 +77,7 @@ regexec1(Reprog *progp, /* program to run */ /* Add first instruction to current list */ if(match == 0) - _renewemptythread(tl, progp->startinst, s); + _renewemptythread(tl, progp->startinst, ms, s); /* Execute machine until current list is empty */ for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ @@ -85,7 +85,7 @@ regexec1(Reprog *progp, /* program to run */ switch(inst->type){ case RUNE: /* regular character */ if(inst->u1.r == r){ - if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; } break; @@ -97,11 +97,11 @@ regexec1(Reprog *progp, /* program to run */ continue; case ANY: if(r != '\n') - if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case ANYNL: - if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case BOL: @@ -116,7 +116,7 @@ regexec1(Reprog *progp, /* program to run */ ep = inst->u1.cp->end; for(rp = inst->u1.cp->spans; rp < ep; rp += 2) if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; } @@ -127,12 +127,12 @@ regexec1(Reprog *progp, /* program to run */ if(r >= rp[0] && r <= rp[1]) break; if(rp == ep) - if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case OR: /* evaluate right choice later */ - if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle) + if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle) return -1; /* efficiency: advance and re-evaluate */ continue; @@ -162,15 +162,27 @@ regexec2(Reprog *progp, /* program to run */ Reljunk *j ) { - Relist relist0[BIGLISTSIZE], relist1[BIGLISTSIZE]; + int rv; + Relist *relist0, *relist1; /* mark space */ + relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); + if(relist0 == nil) + return -1; + relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); + if(relist1 == nil){ + free(relist1); + return -1; + } j->relist[0] = relist0; j->relist[1] = relist1; - j->reliste[0] = relist0 + nelem(relist0) - 2; - j->reliste[1] = relist1 + nelem(relist1) - 2; + j->reliste[0] = relist0 + BIGLISTSIZE - 2; + j->reliste[1] = relist1 + BIGLISTSIZE - 2; - return regexec1(progp, bol, mp, ms, j); + rv = regexec1(progp, bol, mp, ms, j); + free(relist0); + free(relist1); + return rv; } extern int @@ -196,7 +208,7 @@ regexec(Reprog *progp, /* program to run */ } j.starttype = 0; j.startchar = 0; - if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) { + if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) { j.starttype = RUNE; j.startchar = progp->startinst->u1.r; } diff --git a/src/libregexp/regsub.c b/src/libregexp/regsub.c index 6de2c957..ab5ad99d 100644 --- a/src/libregexp/regsub.c +++ b/src/libregexp/regsub.c @@ -53,9 +53,10 @@ regsub(char *sp, /* source string */ ssp < mp[0].e.ep; ssp++) if(dp < ep) *dp++ = *ssp; - }else + }else{ if(dp < ep) *dp++ = *sp; + } sp++; } *dp = '\0'; diff --git a/src/libregexp/rregaux.c b/src/libregexp/rregaux.c deleted file mode 100644 index f4cb0068..00000000 --- a/src/libregexp/rregaux.c +++ /dev/null @@ -1,26 +0,0 @@ -#include "lib9.h" -#include "regexp9.h" -#include "regcomp.h" - -extern Relist* -_rrenewemptythread(Relist *lp, /* _relist to add to */ - Reinst *ip, /* instruction to add */ - Rune *rsp) /* pointers to subexpressions */ -{ - Relist *p; - - for(p=lp; p->inst; p++){ - if(p->inst == ip){ - if(rsp < p->se.m[0].s.rsp) { - memset((void *)&p->se, 0, sizeof(p->se)); - p->se.m[0].s.rsp = rsp; - } - return 0; - } - } - p->inst = ip; - memset((void *)&p->se, 0, sizeof(p->se)); - p->se.m[0].s.rsp = rsp; - (++p)->inst = 0; - return p; -} diff --git a/src/libregexp/rregexec.c b/src/libregexp/rregexec.c index e96c9721..eece0eb9 100644 --- a/src/libregexp/rregexec.c +++ b/src/libregexp/rregexec.c @@ -45,7 +45,7 @@ rregexec1(Reprog *progp, /* program to run */ switch(j->starttype) { case RUNE: while(*s != j->startchar) { - if(*s == 0) + if(*s == 0 || s == j->reol) return match; s++; } @@ -54,7 +54,7 @@ rregexec1(Reprog *progp, /* program to run */ if(s == bol) break; while(*s != '\n') { - if(*s == 0) + if(*s == 0 || s == j->reol) return match; s++; } @@ -72,7 +72,7 @@ rregexec1(Reprog *progp, /* program to run */ nl->inst = 0; /* Add first instruction to current list */ - _rrenewemptythread(tl, progp->startinst, s); + _rrenewemptythread(tl, progp->startinst, ms, s); /* Execute machine until current list is empty */ for(tlp=tl; tlp->inst; tlp++){ @@ -80,7 +80,7 @@ rregexec1(Reprog *progp, /* program to run */ switch(inst->type){ case RUNE: /* regular character */ if(inst->u1.r == r) - if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case LBRA: @@ -91,11 +91,11 @@ rregexec1(Reprog *progp, /* program to run */ continue; case ANY: if(r != '\n') - if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case ANYNL: - if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case BOL: @@ -110,7 +110,7 @@ rregexec1(Reprog *progp, /* program to run */ ep = inst->u1.cp->end; for(rp = inst->u1.cp->spans; rp < ep; rp += 2) if(r >= rp[0] && r <= rp[1]){ - if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; } @@ -121,12 +121,12 @@ rregexec1(Reprog *progp, /* program to run */ if(r >= rp[0] && r <= rp[1]) break; if(rp == ep) - if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) return -1; break; case OR: /* evaluate right choice later */ - if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle) + if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle) return -1; /* efficiency: advance and re-evaluate */ continue; @@ -190,7 +190,7 @@ rregexec(Reprog *progp, /* program to run */ } j.starttype = 0; j.startchar = 0; - if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) { + if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) { j.starttype = RUNE; j.startchar = progp->startinst->u1.r; } diff --git a/src/libregexp/rregsub.c b/src/libregexp/rregsub.c index 15f3c174..5a4a564d 100644 --- a/src/libregexp/rregsub.c +++ b/src/libregexp/rregsub.c @@ -5,12 +5,14 @@ extern void rregsub(Rune *sp, /* source string */ Rune *dp, /* destination string */ + int dlen, Resub *mp, /* subexpression elements */ int ms) /* number of elements pointed to by mp */ { - Rune *ssp; + Rune *ssp, *ep; int i; + ep = dp+(dlen/sizeof(Rune))-1; while(*sp != '\0'){ if(*sp == '\\'){ switch(*++sp){ @@ -29,16 +31,19 @@ rregsub(Rune *sp, /* source string */ for(ssp = mp[i].s.rsp; ssp < mp[i].e.rep; ssp++) - *dp++ = *ssp; + if(dp < ep) + *dp++ = *ssp; break; case '\\': - *dp++ = '\\'; + if(dp < ep) + *dp++ = '\\'; break; case '\0': sp--; break; default: - *dp++ = *sp; + if(dp < ep) + *dp++ = *sp; break; } }else if(*sp == '&'){ @@ -46,9 +51,12 @@ rregsub(Rune *sp, /* source string */ if(mp[0].s.rsp != 0) for(ssp = mp[0].s.rsp; ssp < mp[0].e.rep; ssp++) - *dp++ = *ssp; - }else - *dp++ = *sp; + if(dp < ep) + *dp++ = *ssp; + }else{ + if(dp < ep) + *dp++ = *sp; + } sp++; } *dp = '\0'; -- cgit v1.2.3