#include <u.h> #include <libc.h> #include <bio.h> /* Macros for Rune support of ctype.h-like functions */ #undef isupper #undef islower #undef isalpha #undef isdigit #undef isalnum #undef isspace #undef tolower #define isupper(r) ('A' <= (r) && (r) <= 'Z') #define islower(r) ('a' <= (r) && (r) <= 'z') #define isalpha(r) (isupper(r) || islower(r)) #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF) #define isdigit(r) ('0' <= (r) && (r) <= '9') #define isalnum(r) (isalpha(r) || isdigit(r)) #define isspace(r) ((r) == ' ' || (r) == '\t' \ || (0x0A <= (r) && (r) <= 0x0D)) #define tolower(r) ((r)-'A'+'a') #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0)) #define WORDSIZ 4000 char *filename = "#9/lib/words"; Biobuf *dfile; Biobuf bout; Biobuf bin; int fold; int direc; int exact; int iflag; int rev = 1; /*-1 for reverse-ordered file, not implemented*/ int (*compare)(Rune*, Rune*); Rune tab = '\t'; Rune entry[WORDSIZ]; Rune word[WORDSIZ]; Rune key[50], orig[50]; Rune latin_fold_tab[] = { /* Table to fold latin 1 characters to ASCII equivalents based at Rune value 0xc0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ */ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', }; int locate(void); int acomp(Rune*, Rune*); int getword(Biobuf*, Rune *rp, int n); void torune(char*, Rune*); void rcanon(Rune*, Rune*); int ncomp(Rune*, Rune*); void main(int argc, char *argv[]) { int n; filename = unsharp(filename); Binit(&bin, 0, OREAD); Binit(&bout, 1, OWRITE); compare = acomp; ARGBEGIN{ case 'd': direc++; break; case 'f': fold++; break; case 'i': iflag++; break; case 'n': compare = ncomp; break; case 't': chartorune(&tab,ARGF()); break; case 'x': exact++; break; default: fprint(2, "%s: bad option %c\n", argv0, ARGC()); fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0); exits("usage"); } ARGEND if(!iflag){ if(argc >= 1) { torune(argv[0], orig); argv++; argc--; } else iflag++; } if(argc < 1) { direc++; fold++; } else filename = argv[0]; if (!iflag) rcanon(orig, key); dfile = Bopen(filename, OREAD); if(dfile == 0) { fprint(2, "look: can't open %s\n", filename); exits("no dictionary"); } if(!iflag) if(!locate()) exits("not found"); do { if(iflag) { Bflush(&bout); if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0]))) exits(0); rcanon(orig, key); if(!locate()) continue; } if (!exact || !acomp(word, key)) Bprint(&bout, "%S\n", entry); while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { rcanon(entry, word); n = compare(key, word); switch(n) { case -1: if(exact) break; case 0: if (!exact || !acomp(word, orig)) Bprint(&bout, "%S\n", entry); continue; } break; } } while(iflag); exits(0); } int locate(void) { vlong top, bot, mid; int c; int n; bot = 0; top = Bseek(dfile, 0L, 2); for(;;) { mid = (top+bot) / 2; Bseek(dfile, mid, 0); do c = Bgetrune(dfile); while(c>=0 && c!='\n'); mid = Boffset(dfile); if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) break; rcanon(entry, word); n = compare(key, word); switch(n) { case -2: case -1: case 0: if(top <= mid) break; top = mid; continue; case 1: case 2: bot = mid; continue; } break; } Bseek(dfile, bot, 0); while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { rcanon(entry, word); n = compare(key, word); switch(n) { case -2: return 0; case -1: if(exact) return 0; case 0: return 1; case 1: case 2: continue; } } return 0; } /* * acomp(s, t) returns: * -2 if s strictly precedes t * -1 if s is a prefix of t * 0 if s is the same as t * 1 if t is a prefix of s * 2 if t strictly precedes s */ int acomp(Rune *s, Rune *t) { int cs, ct; for(;;) { cs = *s; ct = *t; if(cs != ct) break; if(cs == 0) return 0; s++; t++; } if(cs == 0) return -1; if(ct == 0) return 1; if(cs < ct) return -2; return 2; } void torune(char *old, Rune *new) { do old += chartorune(new, old); while(*new++); } void rcanon(Rune *old, Rune *new) { Rune r; while((r = *old++) && r != tab) { if (islatin1(r) && latin_fold_tab[r-0xc0]) r = latin_fold_tab[r-0xc0]; if(direc) if(!(isalnum(r) || r == ' ' || r == '\t')) continue; if(fold) if(isupper(r)) r = tolower(r); *new++ = r; } *new = 0; } int ncomp(Rune *s, Rune *t) { Rune *is, *it, *js, *jt; int a, b; int ssgn, tsgn; while(isspace(*s)) s++; while(isspace(*t)) t++; ssgn = tsgn = -2*rev; if(*s == '-') { s++; ssgn = -ssgn; } if(*t == '-') { t++; tsgn = -tsgn; } for(is = s; isdigit(*is); is++) ; for(it = t; isdigit(*it); it++) ; js = is; jt = it; a = 0; if(ssgn == tsgn) while(it>t && is>s) if(b = *--it - *--is) a = b; while(is > s) if(*--is != '0') return -ssgn; while(it > t) if(*--it != '0') return tsgn; if(a) return sgn(a)*ssgn; if(*(s=js) == '.') s++; if(*(t=jt) == '.') t++; if(ssgn == tsgn) while(isdigit(*s) && isdigit(*t)) if(a = *t++ - *s++) return sgn(a)*ssgn; while(isdigit(*s)) if(*s++ != '0') return -ssgn; while(isdigit(*t)) if(*t++ != '0') return tsgn; return 0; } int getword(Biobuf *f, Rune *rp, int n) { long c; while(n-- > 0) { c = Bgetrune(f); if(c < 0) return 0; if(c == '\n') { *rp = '\0'; return 1; } *rp++ = c; } fprint(2, "Look: word too long. Bailing out.\n"); return 0; }