#include <u.h> #include <libc.h> #include <bio.h> #include "dict.h" Dict dicts[] = { {"oed", "Oxford English Dictionary, 2nd Ed.", "oed2", "oed2index", oednextoff, oedprintentry, oedprintkey}, {"ahd", "American Heritage Dictionary, 2nd College Ed.", "ahd/DICT.DB", "ahd/index", ahdnextoff, ahdprintentry, ahdprintkey}, {"pgw", "Project Gutenberg Webster Dictionary", "pgw", "pgwindex", pgwnextoff, pgwprintentry, pgwprintkey}, {"thesaurus", "Collins Thesaurus", "thesaurus", "thesindex", thesnextoff, thesprintentry, thesprintkey}, {"roget", "Project Gutenberg Roget's Thesaurus", "roget", "rogetindex", rogetnextoff, rogetprintentry, rogetprintkey}, {"ce", "Gendai Chinese->English", "world/sansdata/sandic24.dat", "world/sansdata/ceindex", worldnextoff, worldprintentry, worldprintkey}, {"ceh", "Gendai Chinese->English (Hanzi index)", "world/sansdata/sandic24.dat", "world/sansdata/cehindex", worldnextoff, worldprintentry, worldprintkey}, {"ec", "Gendai English->Chinese", "world/sansdata/sandic24.dat", "world/sansdata/ecindex", worldnextoff, worldprintentry, worldprintkey}, {"dae", "Gyldendal Danish->English", "world/gylddata/sandic30.dat", "world/gylddata/daeindex", worldnextoff, worldprintentry, worldprintkey}, {"eda", "Gyldendal English->Danish", "world/gylddata/sandic29.dat", "world/gylddata/edaindex", worldnextoff, worldprintentry, worldprintkey}, {"due", "Wolters-Noordhoff Dutch->English", "world/woltdata/sandic07.dat", "world/woltdata/deindex", worldnextoff, worldprintentry, worldprintkey}, {"edu", "Wolters-Noordhoff English->Dutch", "world/woltdata/sandic06.dat", "world/woltdata/edindex", worldnextoff, worldprintentry, worldprintkey}, {"fie", "WSOY Finnish->English", "world/werndata/sandic32.dat", "world/werndata/fieindex", worldnextoff, worldprintentry, worldprintkey}, {"efi", "WSOY English->Finnish", "world/werndata/sandic31.dat", "world/werndata/efiindex", worldnextoff, worldprintentry, worldprintkey}, {"fe", "Collins French->English", "fe", "feindex", pcollnextoff, pcollprintentry, pcollprintkey}, {"ef", "Collins English->French", "ef", "efindex", pcollnextoff, pcollprintentry, pcollprintkey}, {"ge", "Collins German->English", "ge", "geindex", pcollgnextoff, pcollgprintentry, pcollgprintkey}, {"eg", "Collins English->German", "eg", "egindex", pcollgnextoff, pcollgprintentry, pcollgprintkey}, {"ie", "Collins Italian->English", "ie", "ieindex", pcollnextoff, pcollprintentry, pcollprintkey}, {"ei", "Collins English->Italian", "ei", "eiindex", pcollnextoff, pcollprintentry, pcollprintkey}, {"je", "Sanshusha Japanese->English", "world/sansdata/sandic18.dat", "world/sansdata/jeindex", worldnextoff, worldprintentry, worldprintkey}, {"jek", "Sanshusha Japanese->English (Kanji index)", "world/sansdata/sandic18.dat", "world/sansdata/jekindex", worldnextoff, worldprintentry, worldprintkey}, {"ej", "Sanshusha English->Japanese", "world/sansdata/sandic18.dat", "world/sansdata/ejindex", worldnextoff, worldprintentry, worldprintkey}, {"tjeg", "Sanshusha technical Japanese->English,German", "world/sansdata/sandic16.dat", "world/sansdata/tjegindex", worldnextoff, worldprintentry, worldprintkey}, {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)", "world/sansdata/sandic16.dat", "world/sansdata/tjegkindex", worldnextoff, worldprintentry, worldprintkey}, {"tegj", "Sanshusha technical English->German,Japanese", "world/sansdata/sandic16.dat", "world/sansdata/tegjindex", worldnextoff, worldprintentry, worldprintkey}, {"tgje", "Sanshusha technical German->Japanese,English", "world/sansdata/sandic16.dat", "world/sansdata/tgjeindex", worldnextoff, worldprintentry, worldprintkey}, {"ne", "Kunnskapforlaget Norwegian->English", "world/kunndata/sandic28.dat", "world/kunndata/neindex", worldnextoff, worldprintentry, worldprintkey}, {"en", "Kunnskapforlaget English->Norwegian", "world/kunndata/sandic27.dat", "world/kunndata/enindex", worldnextoff, worldprintentry, worldprintkey}, {"re", "Leon Ungier Russian->English", "re", "reindex", simplenextoff, simpleprintentry, simpleprintkey}, {"er", "Leon Ungier English->Russian", "re", "erindex", simplenextoff, simpleprintentry, simpleprintkey}, {"se", "Collins Spanish->English", "se", "seindex", pcollnextoff, pcollprintentry, pcollprintkey}, {"es", "Collins English->Spanish", "es", "esindex", pcollnextoff, pcollprintentry, pcollprintkey}, {"swe", "Esselte Studium Swedish->English", "world/essedata/sandic34.dat", "world/essedata/sweindex", worldnextoff, worldprintentry, worldprintkey}, {"esw", "Esselte Studium English->Swedish", "world/essedata/sandic33.dat", "world/essedata/eswindex", worldnextoff, worldprintentry, worldprintkey}, {"movie", "Movies -- by title", "movie/data", "movtindex", movienextoff, movieprintentry, movieprintkey}, {"moviea", "Movies -- by actor", "movie/data", "movaindex", movienextoff, movieprintentry, movieprintkey}, {"movied", "Movies -- by director", "movie/data", "movdindex", movienextoff, movieprintentry, movieprintkey}, {"slang", "English Slang", "slang", "slangindex", slangnextoff, slangprintentry, slangprintkey}, {"robert", "Robert Électronique", "robert/_pointers", "robert/_index", robertnextoff, robertindexentry, robertprintkey}, {"robertv", "Robert Électronique - formes des verbes", "robert/flex.rob", "robert/_flexindex", robertnextflex, robertflexentry, robertprintkey}, {0, 0, 0, 0, 0} }; typedef struct Lig Lig; struct Lig { Rune start; /* accent rune */ Rune pairs[100]; /* <char,accented version> pairs */ }; /* keep in sync with dict.h */ static Lig ligtab[Nligs] = { {0xb4, {0x41, 0xc1, 0x61, 0xe1, 0x43, 0x106, 0x63, 0x107, 0x45, 0xc9, 0x65, 0xe9, 0x67, 0x123, 0x49, 0xcd, 0x69, 0xed, 0x131, 0xed, 0x4c, 0x139, 0x6c, 0x13a, 0x4e, 0x143, 0x6e, 0x144, 0x4f, 0xd3, 0x6f, 0xf3, 0x52, 0x154, 0x72, 0x155, 0x53, 0x15a, 0x73, 0x15b, 0x55, 0xda, 0x75, 0xfa, 0x59, 0xdd, 0x79, 0xfd, 0x5a, 0x179, 0x7a, 0x17a, 0}}, {0x2cb, {0x41, 0xc0, 0x61, 0xe0, 0x45, 0xc8, 0x65, 0xe8, 0x49, 0xcc, 0x69, 0xec, 0x131, 0xec, 0x4f, 0xd2, 0x6f, 0xf2, 0x55, 0xd9, 0x75, 0xf9, 0}}, {0xa8, {0x41, 0xc4, 0x61, 0xe4, 0x45, 0xcb, 0x65, 0xeb, 0x49, 0xcf, 0x69, 0xef, 0x4f, 0xd6, 0x6f, 0xf6, 0x55, 0xdc, 0x75, 0xfc, 0x59, 0x178, 0x79, 0xff, 0}}, {0xb8, {0x43, 0xc7, 0x63, 0xe7, 0x47, 0x122, 0x4b, 0x136, 0x6b, 0x137, 0x4c, 0x13b, 0x6c, 0x13c, 0x4e, 0x145, 0x6e, 0x146, 0x52, 0x156, 0x72, 0x157, 0x53, 0x15e, 0x73, 0x15f, 0x54, 0x162, 0x74, 0x163, 0}}, {0x2dc, {0x41, 0xc3, 0x61, 0xe3, 0x49, 0x128, 0x69, 0x129, 0x131, 0x129, 0x4e, 0xd1, 0x6e, 0xf1, 0x4f, 0xd5, 0x6f, 0xf5, 0x55, 0x168, 0x75, 0x169, 0}}, {0x2d8, {0x41, 0x102, 0x61, 0x103, 0x45, 0x114, 0x65, 0x115, 0x47, 0x11e, 0x67, 0x11f, 0x49, 0x12c, 0x69, 0x12d, 0x131, 0x12d, 0x4f, 0x14e, 0x6f, 0x14f, 0x55, 0x16c, 0x75, 0x16d, 0}}, {0x2da, {0x41, 0xc5, 0x61, 0xe5, 0x55, 0x16e, 0x75, 0x16f, 0}}, {0x2d9, {0x43, 0x10a, 0x63, 0x10b, 0x45, 0x116, 0x65, 0x117, 0x47, 0x120, 0x67, 0x121, 0x49, 0x130, 0x4c, 0x13f, 0x6c, 0x140, 0x5a, 0x17b, 0x7a, 0x17c, 0}}, {0x2e, {0}}, {0x2322, {0x41, 0xc2, 0x61, 0xe2, 0x43, 0x108, 0x63, 0x109, 0x45, 0xca, 0x65, 0xea, 0x47, 0x11c, 0x67, 0x11d, 0x48, 0x124, 0x68, 0x125, 0x49, 0xce, 0x69, 0xee, 0x131, 0xee, 0x4a, 0x134, 0x6a, 0x135, 0x4f, 0xd4, 0x6f, 0xf4, 0x53, 0x15c, 0x73, 0x15d, 0x55, 0xdb, 0x75, 0xfb, 0x57, 0x174, 0x77, 0x175, 0x59, 0x176, 0x79, 0x177, 0}}, {0x32f, {0}}, {0x2db, {0x41, 0x104, 0x61, 0x105, 0x45, 0x118, 0x65, 0x119, 0x49, 0x12e, 0x69, 0x12f, 0x131, 0x12f, 0x55, 0x172, 0x75, 0x173, 0}}, {0xaf, {0x41, 0x100, 0x61, 0x101, 0x45, 0x112, 0x65, 0x113, 0x49, 0x12a, 0x69, 0x12b, 0x131, 0x12b, 0x4f, 0x14c, 0x6f, 0x14d, 0x55, 0x16a, 0x75, 0x16b, 0}}, {0x2c7, {0x43, 0x10c, 0x63, 0x10d, 0x44, 0x10e, 0x64, 0x10f, 0x45, 0x11a, 0x65, 0x11b, 0x4c, 0x13d, 0x6c, 0x13e, 0x4e, 0x147, 0x6e, 0x148, 0x52, 0x158, 0x72, 0x159, 0x53, 0x160, 0x73, 0x161, 0x54, 0x164, 0x74, 0x165, 0x5a, 0x17d, 0x7a, 0x17e, 0}}, {0x2bd, {0}}, {0x2bc, {0}}, {0x32e, {0}} }; Rune multitab[Nmulti][5] = { {0x2bd, 0x3b1, 0}, {0x2bc, 0x3b1, 0}, {0x61, 0x6e, 0x64, 0}, {0x61, 0x2f, 0x71, 0}, {0x3c, 0x7c, 0}, {0x2e, 0x2e, 0}, {0x2e, 0x2e, 0x2e, 0}, {0x2bd, 0x3b5, 0}, {0x2bc, 0x3b5, 0}, {0x2014, 0x2014, 0}, {0x2bd, 0x3b7, 0}, {0x2bc, 0x3b7, 0}, {0x2bd, 0x3b9, 0}, {0x2bc, 0x3b9, 0}, {0x63, 0x74, 0}, {0x66, 0x66, 0}, {0x66, 0x66, 0x69, 0}, {0x66, 0x66, 0x6c, 0}, {0x66, 0x6c, 0}, {0x66, 0x69, 0}, {0x26b, 0x26b, 0}, {0x73, 0x74, 0}, {0x2bd, 0x3bf, 0}, {0x2bc, 0x3bf, 0}, {0x6f, 0x72, 0}, {0x2bd, 0x3c1, 0}, {0x2bc, 0x3c1, 0}, {0x7e, 0x7e, 0}, {0x2bd, 0x3c5, 0}, {0x2bc, 0x3c5, 0}, {0x2bd, 0x3c9, 0}, {0x2bc, 0x3c9, 0}, {0x6f, 0x65, 0}, {0x20, 0x20, 0} }; #define risupper(r) (0x41 <= (r) && (r) <= 0x5a) #define rislatin1(r) (0xC0 <= (r) && (r) <= 0xFF) #define rtolower(r) ((r)-'A'+'a') static Rune latin_fold_tab[] = { /* Table to fold latin 1 characters to ASCII equivalents based at Rune value 0xc0 À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ */ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y' }; static Rune *ttabstack[20]; static int ntt; /* * tab is an array of n Assoc's, sorted by key. * Look for key in tab, and return corresponding val * or -1 if not there */ long lookassoc(Assoc *tab, int n, char *key) { Assoc *q; long i, low, high; int r; for(low = -1, high = n; high > low+1; ){ i = (high+low)/2; q = &tab[i]; if((r=strcmp(key, q->key))<0) high = i; else if(r == 0) return q->val; else low=i; } return -1; } long looknassoc(Nassoc *tab, int n, long key) { Nassoc *q; long i, low, high; for(low = -1, high = n; high > low+1; ){ i = (high+low)/2; q = &tab[i]; if(key < q->key) high = i; else if(key == q->key) return q->val; else low=i; } return -1; } void err(char *fmt, ...) { char buf[1000]; va_list v; va_start(v, fmt); vsnprint(buf, sizeof(buf), fmt, v); va_end(v); fprint(2, "%s: %s\n", argv0, buf); } /* * Write the rune r to bout, keeping track of line length * and breaking the lines (at blanks) when they get too long */ void outrune(long r) { if(outinhibit) return; if(++linelen > breaklen && r == 0x20) { Bputc(bout, '\n'); linelen = 0; } else Bputrune(bout, r); } void outrunes(Rune *rp) { Rune r; while((r = *rp++) != 0) outrune(r); } /* like outrune, but when arg is know to be a char */ void outchar(int c) { if(outinhibit) return; if(++linelen > breaklen && c == ' ') { c ='\n'; linelen = 0; } Bputc(bout, c); } void outchars(char *s) { char c; while((c = *s++) != 0) outchar(c); } void outprint(char *fmt, ...) { char buf[1000]; va_list v; va_start(v, fmt); vsnprint(buf, sizeof(buf), fmt, v); va_end(v); outchars(buf); } void outpiece(char *b, char *e) { int c, lastc; lastc = 0; while(b < e) { c = *b++; if(c == '\n') c = ' '; if(!(c == ' ' && lastc == ' ')) outchar(c); lastc = c; } } /* * Go to new line if not already there; indent if ind != 0. * If ind > 1, leave a blank line too. * Slight hack: assume if current line is only one or two * characters long, then they were spaces. */ void outnl(int ind) { if(outinhibit) return; if(ind) { if(ind > 1) { if(linelen > 2) Bputc(bout, '\n'); Bprint(bout, "\n "); } else if(linelen == 0) Bprint(bout, " "); else if(linelen == 1) Bputc(bout, ' '); else if(linelen != 2) Bprint(bout, "\n "); linelen = 2; } else { if(linelen) { Bputc(bout, '\n'); linelen = 0; } } } /* * Fold the runes in null-terminated rp. * Use the sort(1) definition of folding (uppercase to lowercase, * latin1-accented characters to corresponding unaccented chars) */ void fold(Rune *rp) { Rune r; while((r = *rp) != 0) { if (rislatin1(r) && latin_fold_tab[r-0xc0]) r = latin_fold_tab[r-0xc0]; if(risupper(r)) r = rtolower(r); *rp++ = r; } } /* * Like fold, but put folded result into new * (assumed to have enough space). * old is a regular expression, but we know that * metacharacters aren't affected */ void foldre(char *new, char *old) { Rune r; while(*old) { old += chartorune(&r, old); if (rislatin1(r) && latin_fold_tab[r-0xc0]) r = latin_fold_tab[r-0xc0]; if(risupper(r)) r = rtolower(r); new += runetochar(new, &r); } *new = 0; } /* * acomp(s, t) returns: * -2 if s strictly precedes t * -1 if s is a prefix of t * 0 if s is the same as t * 1 if t is a prefix of s * 2 if t strictly precedes s */ int acomp(Rune *s, Rune *t) { int cs, ct; for(;;) { cs = *s; ct = *t; if(cs != ct) break; if(cs == 0) return 0; s++; t++; } if(cs == 0) return -1; if(ct == 0) return 1; if(cs < ct) return -2; return 2; } /* * Copy null terminated Runes from 'from' to 'to'. */ void runescpy(Rune *to, Rune *from) { while((*to++ = *from++) != 0) continue; } /* * Conversion of unsigned number to long, no overflow detection */ long runetol(Rune *r) { int c; long n; n = 0; for(;; r++){ c = *r; if(0x30<=c && c<=0x39) c -= '0'; else break; n = n*10 + c; } return n; } /* * See if there is a rune corresponding to the accented * version of r with accent acc (acc in [LIGS..LIGE-1]), * and return it if so, else return NONE. */ Rune liglookup(Rune acc, Rune r) { Rune *p; if(acc < LIGS || acc >= LIGE) return NONE; for(p = ligtab[acc-LIGS].pairs; *p; p += 2) if(*p == r) return *(p+1); return NONE; } /* * Maintain a translation table stack (a translation table * is an array of Runes indexed by bytes or 7-bit bytes). * If starting is true, push the curtab onto the stack * and return newtab; else pop the top of the stack and * return it. * If curtab is 0, initialize the stack and return. */ Rune * changett(Rune *curtab, Rune *newtab, int starting) { if(curtab == 0) { ntt = 0; return 0; } if(starting) { if(ntt >= asize(ttabstack)) { if(debug) err("translation stack overflow"); return curtab; } ttabstack[ntt++] = curtab; return newtab; } else { if(ntt == 0) { if(debug) err("translation stack underflow"); return curtab; } return ttabstack[--ntt]; } }