From 3d31240bfdaee03aff28103b530ff593e9ddbbc5 Mon Sep 17 00:00:00 2001 From: David du Colombier <0intro@gmail.com> Date: Tue, 2 Jul 2013 06:39:17 +0200 Subject: libregexp: update from Plan 9 R=rsc https://codereview.appspot.com/10690044 --- src/libregexp/regcomp.c | 23 +++++++++++++++++------ src/libregexp/regcomp.h | 9 +-------- src/libregexp/regsub.c | 7 +++---- src/libregexp/test.c | 3 +-- src/libregexp/test2.c | 7 +++---- 5 files changed, 25 insertions(+), 24 deletions(-) (limited to 'src/libregexp') diff --git a/src/libregexp/regcomp.c b/src/libregexp/regcomp.c index 0dabd4f0..09678755 100644 --- a/src/libregexp/regcomp.c +++ b/src/libregexp/regcomp.c @@ -15,6 +15,12 @@ struct Node Reinst* last; }Node; +/* max character classes per program is nelem(reprog->class) */ +static Reprog *reprog; + +/* max rune ranges per character class is nelem(classp->spans)/2 */ +#define NCCRUNE nelem(classp->spans) + #define NSTACK 20 static Node andstack[NSTACK]; static Node *andp; @@ -321,8 +327,8 @@ dump(Reprog *pp) static Reclass* newclass(void) { - if(nclass >= NCLASS) - regerr2("too many character classes; limit", NCLASS+'0'); + if(nclass >= nelem(reprog->class)) + rcerror("too many character classes; increase Reprog.class size"); return &(classp[nclass++]); } @@ -407,7 +413,7 @@ bldcclass(void) } /* parse class into a set of spans */ - for(; ep<&r[NCCRUNE];){ + while(ep < &r[NCCRUNE-1]){ if(rune == 0){ rcerror("malformed '[]'"); return 0; @@ -431,6 +437,10 @@ bldcclass(void) } quoted = nextc(&rune); } + if(ep >= &r[NCCRUNE-1]) { + rcerror("char class too large; increase Reclass.spans size"); + return 0; + } /* sort on span start */ for(p = r; p < ep; p += 2){ @@ -454,9 +464,10 @@ bldcclass(void) np[0] = *p++; np[1] = *p++; for(; p < ep; p += 2) - if(p[0] <= np[1]){ - if(p[1] > np[1]) - np[1] = p[1]; + /* overlapping or adjacent ranges? */ + if(p[0] <= np[1] + 1){ + if(p[1] >= np[1]) + np[1] = p[1]; /* coalesce */ } else { np += 2; np[0] = p[0]; diff --git a/src/libregexp/regcomp.h b/src/libregexp/regcomp.h index 6c88cd09..4b9a483b 100644 --- a/src/libregexp/regcomp.h +++ b/src/libregexp/regcomp.h @@ -12,13 +12,6 @@ struct Resublist Resub m[NSUBEXP]; }; -/* max character classes per program */ -extern Reprog RePrOg; -#define NCLASS (sizeof(RePrOg.class)/sizeof(Reclass)) - -/* max rune ranges per character class */ -#define NCCRUNE (sizeof(Reclass)/sizeof(Rune)) - /* * Actions and Tokens (Reinst types) * @@ -48,7 +41,7 @@ extern Reprog RePrOg; * regexec execution lists */ #define LISTSIZE 10 -#define BIGLISTSIZE (10*LISTSIZE) +#define BIGLISTSIZE (25*LISTSIZE) typedef struct Relist Relist; struct Relist { diff --git a/src/libregexp/regsub.c b/src/libregexp/regsub.c index ab5ad99d..579d1243 100644 --- a/src/libregexp/regsub.c +++ b/src/libregexp/regsub.c @@ -27,7 +27,7 @@ regsub(char *sp, /* source string */ case '8': case '9': i = *sp-'0'; - if(mp[i].s.sp != 0 && mp!=0 && ms>i) + if(mp!=0 && mp[i].s.sp != 0 && ms>i) for(ssp = mp[i].s.sp; ssp < mp[i].e.ep; ssp++) @@ -46,9 +46,8 @@ regsub(char *sp, /* source string */ *dp++ = *sp; break; } - }else if(*sp == '&'){ - if(mp[0].s.sp != 0 && mp!=0 && ms>0) - if(mp[0].s.sp != 0) + }else if(*sp == '&'){ + if(mp!=0 && mp[0].s.sp != 0 && ms>0) for(ssp = mp[0].s.sp; ssp < mp[0].e.ep; ssp++) if(dp < ep) diff --git a/src/libregexp/test.c b/src/libregexp/test.c index 83533ee6..f6bea534 100644 --- a/src/libregexp/test.c +++ b/src/libregexp/test.c @@ -22,17 +22,16 @@ struct x t[] = { { 0, 0, 0 }, }; +int main(int ac, char **av) { Resub rs[10]; char dst[128]; - int n; struct x *tp; for(tp = t; tp->re; tp++) tp->p = regcomp(tp->re); - for(tp = t; tp->re; tp++){ print("%s VIA %s", av[1], tp->re); memset(rs, 0, sizeof rs); diff --git a/src/libregexp/test2.c b/src/libregexp/test2.c index 150953e4..62d5213a 100644 --- a/src/libregexp/test2.c +++ b/src/libregexp/test2.c @@ -1,20 +1,19 @@ #include "lib9.h" #include - +int main(int ac, char **av) { Resub rs[10]; Reprog *p; char *s; - int i; p = regcomp("[^a-z]"); s = "\n"; if(regexec(p, s, rs, 10)) - print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep); + print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep); s = "0"; if(regexec(p, s, rs, 10)) - print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep); + print("%s %lux %lux %lux\n", s, s, rs[0].s.sp, rs[0].e.ep); exit(0); } -- cgit v1.2.3