diff options
Diffstat (limited to 'src/cmd/grep/comp.c')
-rw-r--r-- | src/cmd/grep/comp.c | 277 |
1 files changed, 277 insertions, 0 deletions
diff --git a/src/cmd/grep/comp.c b/src/cmd/grep/comp.c new file mode 100644 index 00000000..4a3f3e8f --- /dev/null +++ b/src/cmd/grep/comp.c @@ -0,0 +1,277 @@ +#include "grep.h" + +/* + * incremental compiler. + * add the branch c to the + * state s. + */ +void +increment(State *s, int c) +{ + int i; + State *t, **tt; + Re *re1, *re2; + + nfollow = 0; + gen++; + matched = 0; + for(i=0; i<s->count; i++) + fol1(s->re[i], c); + qsort(follow, nfollow, sizeof(*follow), fcmp); + for(tt=&state0; t = *tt;) { + if(t->count > nfollow) { + tt = &t->linkleft; + goto cont; + } + if(t->count < nfollow) { + tt = &t->linkright; + goto cont; + } + for(i=0; i<nfollow; i++) { + re1 = t->re[i]; + re2 = follow[i]; + if(re1 > re2) { + tt = &t->linkleft; + goto cont; + } + if(re1 < re2) { + tt = &t->linkright; + goto cont; + } + } + if(!!matched && !t->match) { + tt = &t->linkleft; + goto cont; + } + if(!matched && !!t->match) { + tt = &t->linkright; + goto cont; + } + s->next[c] = t; + return; + cont:; + } + + t = sal(nfollow); + *tt = t; + for(i=0; i<nfollow; i++) { + re1 = follow[i]; + t->re[i] = re1; + } + s->next[c] = t; + t->match = matched; +} + +int +fcmp(const void *va, const void *vb) +{ + Re **aa, **bb; + Re *a, *b; + + aa = (Re**)va; + bb = (Re**)vb; + a = *aa; + b = *bb; + if(a > b) + return 1; + if(a < b) + return -1; + return 0; +} + +void +fol1(Re *r, int c) +{ + Re *r1; + +loop: + if(r->gen == gen) + return; + if(nfollow >= maxfollow) + error("nfollow"); + r->gen = gen; + switch(r->type) { + default: + error("fol1"); + + case Tcase: + if(c >= 0 && c < 256) + if(r1 = r->cases[c]) + follow[nfollow++] = r1; + if(r = r->next) + goto loop; + break; + + case Talt: + case Tor: + fol1(r->alt, c); + r = r->next; + goto loop; + + case Tbegin: + if(c == '\n' || c == Cbegin) + follow[nfollow++] = r->next; + break; + + case Tend: + if(c == '\n') + matched = 1; + break; + + case Tclass: + if(c >= r->lo && c <= r->hi) + follow[nfollow++] = r->next; + break; + } +} + +Rune tab1[] = +{ + 0x007f, + 0x07ff, +}; +Rune tab2[] = +{ + 0x003f, + 0x0fff, +}; + +Re2 +rclass(Rune p0, Rune p1) +{ + char xc0[6], xc1[6]; + int i, n, m; + Re2 x; + + if(p0 > p1) + return re2char(0xff, 0xff); // no match + + /* + * bust range into same length + * character sequences + */ + for(i=0; i<nelem(tab1); i++) { + m = tab1[i]; + if(p0 <= m && p1 > m) + return re2or(rclass(p0, m), rclass(m+1, p1)); + } + + /* + * bust range into part of a single page + * or into full pages + */ + for(i=0; i<nelem(tab2); i++) { + m = tab2[i]; + if((p0 & ~m) != (p1 & ~m)) { + if((p0 & m) != 0) + return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1)); + if((p1 & m) != m) + return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1)); + } + } + + n = runetochar(xc0, &p0); + i = runetochar(xc1, &p1); + if(i != n) + error("length"); + + x = re2char(xc0[0], xc1[0]); + for(i=1; i<n; i++) + x = re2cat(x, re2char(xc0[i], xc1[i])); + return x; +} + +int +pcmp(const void *va, const void *vb) +{ + int n; + Rune *a, *b; + + a = (Rune*)va; + b = (Rune*)vb; + + n = a[0] - b[0]; + if(n) + return n; + return a[1] - b[1]; +} + +/* + * convert character chass into + * run-pair ranges of matches. + * this is 10646/utf specific and + * needs to be changed for some + * other input character set. + * this is the key to a fast + * regular search of characters + * by looking at sequential bytes. + */ +Re2 +re2class(char *s) +{ + Rune pairs[200], *p, *q, ov; + int nc; + Re2 x; + + nc = 0; + if(*s == '^') { + nc = 1; + s++; + } + + p = pairs; + s += chartorune(p, s); + for(;;) { + if(*p == '\\') + s += chartorune(p, s); + if(*p == 0) + break; + p[1] = *p; + p += 2; + s += chartorune(p, s); + if(*p != '-') + continue; + s += chartorune(p, s); + if(*p == '\\') + s += chartorune(p, s); + if(*p == 0) + break; + p[-1] = *p; + s += chartorune(p, s); + } + *p = 0; + qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp); + + q = pairs; + for(p=pairs+2; *p; p+=2) { + if(p[0] > p[1]) + continue; + if(p[0] > q[1] || p[1] < q[0]) { + q[2] = p[0]; + q[3] = p[1]; + q += 2; + continue; + } + if(p[0] < q[0]) + q[0] = p[0]; + if(p[1] > q[1]) + q[1] = p[1]; + } + q[2] = 0; + + p = pairs; + if(nc) { + x = rclass(0, p[0]-1); + ov = p[1]+1; + for(p+=2; *p; p+=2) { + x = re2or(x, rclass(ov, p[0]-1)); + ov = p[1]+1; + } + x = re2or(x, rclass(ov, 0xffff)); + } else { + x = rclass(p[0], p[1]); + for(p+=2; *p; p+=2) + x = re2or(x, rclass(p[0], p[1])); + } + return x; +} |