From c8af1ab17b72f500c27688598dbb893f09f62c53 Mon Sep 17 00:00:00 2001 From: rsc Date: Mon, 19 Apr 2004 18:13:05 +0000 Subject: tweaks --- src/cmd/look.c | 342 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 342 insertions(+) create mode 100644 src/cmd/look.c (limited to 'src') diff --git a/src/cmd/look.c b/src/cmd/look.c new file mode 100644 index 00000000..5b300868 --- /dev/null +++ b/src/cmd/look.c @@ -0,0 +1,342 @@ +#include +#include +#include + /* Macros for Rune support of ctype.h-like functions */ + +#define isupper(r) (L'A' <= (r) && (r) <= L'Z') +#define islower(r) (L'a' <= (r) && (r) <= L'z') +#define isalpha(r) (isupper(r) || islower(r)) +#define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF) + +#define isdigit(r) (L'0' <= (r) && (r) <= L'9') + +#define isalnum(r) (isalpha(r) || isdigit(r)) + +#define isspace(r) ((r) == L' ' || (r) == L'\t' \ + || (0x0A <= (r) && (r) <= 0x0D)) + +#define tolower(r) ((r)-'A'+'a') + +#define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0)) + +#define WORDSIZ 4000 +char *filename = "#9/lib/words"; +Biobuf *dfile; +Biobuf bout; +Biobuf bin; + +int fold; +int direc; +int exact; +int iflag; +int rev = 1; /*-1 for reverse-ordered file, not implemented*/ +int (*compare)(Rune*, Rune*); +Rune tab = '\t'; +Rune entry[WORDSIZ]; +Rune word[WORDSIZ]; +Rune key[50], orig[50]; +Rune latin_fold_tab[] = +{ +/* Table to fold latin 1 characters to ASCII equivalents + based at Rune value 0xc0 + + À Á Â Ã Ä Å Æ Ç + È É Ê Ë Ì Í Î Ï + Ð Ñ Ò Ó Ô Õ Ö × + Ø Ù Ú Û Ü Ý Þ ß + à á â ã ä å æ ç + è é ê ë ì í î ï + ð ñ ò ó ô õ ö ÷ + ø ù ú û ü ý þ ÿ +*/ + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', + 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', + 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , + 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', + 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', + 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , + 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', +}; + +int locate(void); +int acomp(Rune*, Rune*); +int getword(Biobuf*, Rune *rp, int n); +void torune(char*, Rune*); +void rcanon(Rune*, Rune*); +int ncomp(Rune*, Rune*); + +void +main(int argc, char *argv[]) +{ + int n; + + filename = unsharp(filename); + + Binit(&bin, 0, OREAD); + Binit(&bout, 1, OWRITE); + compare = acomp; + ARGBEGIN{ + case 'd': + direc++; + break; + case 'f': + fold++; + break; + case 'i': + iflag++; + break; + case 'n': + compare = ncomp; + break; + case 't': + chartorune(&tab,ARGF()); + break; + case 'x': + exact++; + break; + default: + fprint(2, "%s: bad option %c\n", argv0, ARGC()); + fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0); + exits("usage"); + } ARGEND + if(!iflag){ + if(argc >= 1) { + torune(argv[0], orig); + argv++; + argc--; + } else + iflag++; + } + if(argc < 1) { + direc++; + fold++; + } else + filename = argv[0]; + if (!iflag) + rcanon(orig, key); + dfile = Bopen(filename, OREAD); + if(dfile == 0) { + fprint(2, "look: can't open %s\n", filename); + exits("no dictionary"); + } + if(!iflag) + if(!locate()) + exits("not found"); + do { + if(iflag) { + Bflush(&bout); + if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0]))) + exits(0); + rcanon(orig, key); + if(!locate()) + continue; + } + if (!exact || !acomp(word, key)) + Bprint(&bout, "%S\n", entry); + while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { + rcanon(entry, word); + n = compare(key, word); + switch(n) { + case -1: + if(exact) + break; + case 0: + if (!exact || !acomp(word, orig)) + Bprint(&bout, "%S\n", entry); + continue; + } + break; + } + } while(iflag); + exits(0); +} + +int +locate(void) +{ + long top, bot, mid; + long c; + int n; + + bot = 0; + top = Bseek(dfile, 0L, 2); + for(;;) { + mid = (top+bot) / 2; + Bseek(dfile, mid, 0); + do + c = Bgetrune(dfile); + while(c>=0 && c!='\n'); + mid = Boffset(dfile); + if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) + break; + rcanon(entry, word); + n = compare(key, word); + switch(n) { + case -2: + case -1: + case 0: + if(top <= mid) + break; + top = mid; + continue; + case 1: + case 2: + bot = mid; + continue; + } + break; + } + Bseek(dfile, bot, 0); + while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { + rcanon(entry, word); + n = compare(key, word); + switch(n) { + case -2: + return 0; + case -1: + if(exact) + return 0; + case 0: + return 1; + case 1: + case 2: + continue; + } + } + return 0; +} + +/* + * acomp(s, t) returns: + * -2 if s strictly precedes t + * -1 if s is a prefix of t + * 0 if s is the same as t + * 1 if t is a prefix of s + * 2 if t strictly precedes s + */ + +int +acomp(Rune *s, Rune *t) +{ + int cs, ct; + + for(;;) { + cs = *s; + ct = *t; + if(cs != ct) + break; + if(cs == 0) + return 0; + s++; + t++; + } + if(cs == 0) + return -1; + if(ct == 0) + return 1; + if(cs < ct) + return -2; + return 2; +} + +void +torune(char *old, Rune *new) +{ + do old += chartorune(new, old); + while(*new++); +} + +void +rcanon(Rune *old, Rune *new) +{ + Rune r; + + while((r = *old++) && r != tab) { + if (islatin1(r) && latin_fold_tab[r-0xc0]) + r = latin_fold_tab[r-0xc0]; + if(direc) + if(!(isalnum(r) || r == L' ' || r == L'\t')) + continue; + if(fold) + if(isupper(r)) + r = tolower(r); + *new++ = r; + } + *new = 0; +} + +int +ncomp(Rune *s, Rune *t) +{ + Rune *is, *it, *js, *jt; + int a, b; + int ssgn, tsgn; + + while(isspace(*s)) + s++; + while(isspace(*t)) + t++; + ssgn = tsgn = -2*rev; + if(*s == '-') { + s++; + ssgn = -ssgn; + } + if(*t == '-') { + t++; + tsgn = -tsgn; + } + for(is = s; isdigit(*is); is++) + ; + for(it = t; isdigit(*it); it++) + ; + js = is; + jt = it; + a = 0; + if(ssgn == tsgn) + while(it>t && is>s) + if(b = *--it - *--is) + a = b; + while(is > s) + if(*--is != '0') + return -ssgn; + while(it > t) + if(*--it != '0') + return tsgn; + if(a) + return sgn(a)*ssgn; + if(*(s=js) == '.') + s++; + if(*(t=jt) == '.') + t++; + if(ssgn == tsgn) + while(isdigit(*s) && isdigit(*t)) + if(a = *t++ - *s++) + return sgn(a)*ssgn; + while(isdigit(*s)) + if(*s++ != '0') + return -ssgn; + while(isdigit(*t)) + if(*t++ != '0') + return tsgn; + return 0; +} + +int +getword(Biobuf *f, Rune *rp, int n) +{ + long c; + + while(n-- > 0) { + c = Bgetrune(f); + if(c < 0) + return 0; + if(c == '\n') { + *rp = L'\0'; + return 1; + } + *rp++ = c; + } + fprint(2, "Look: word too long. Bailing out.\n"); + return 0; +} -- cgit v1.2.3