diff options
author | rsc <devnull@localhost> | 2003-11-25 03:37:45 +0000 |
---|---|---|
committer | rsc <devnull@localhost> | 2003-11-25 03:37:45 +0000 |
commit | 08708877939323c1e1cb87210193ec25fc472ff7 (patch) | |
tree | bd34e2144a3e9532ab228619d7ae8d4a0078aeeb /src/cmd/dict/pcollins.c | |
parent | 091f74d0a0db5ba1e098a518922525cb032a97b4 (diff) | |
download | plan9port-08708877939323c1e1cb87210193ec25fc472ff7.tar.gz plan9port-08708877939323c1e1cb87210193ec25fc472ff7.tar.bz2 plan9port-08708877939323c1e1cb87210193ec25fc472ff7.zip |
add dict
Diffstat (limited to 'src/cmd/dict/pcollins.c')
-rw-r--r-- | src/cmd/dict/pcollins.c | 226 |
1 files changed, 226 insertions, 0 deletions
diff --git a/src/cmd/dict/pcollins.c b/src/cmd/dict/pcollins.c new file mode 100644 index 00000000..83ee3abc --- /dev/null +++ b/src/cmd/dict/pcollins.c @@ -0,0 +1,226 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include "dict.h" + +/* + * Routines for handling dictionaries in the "Paperback Collins" + * format (with tags surrounded by >....<) + */ +enum { + Buflen=1000, +}; + +/* More special runes */ +enum { + B = MULTIE+1, /* bold */ + H, /* headword start */ + I, /* italics */ + Ps, /* pronunciation start */ + Pe, /* pronunciation end */ + R, /* roman */ + X, /* headword end */ +}; + +/* Assoc tables must be sorted on first field */ + +static Assoc tagtab[] = { + {"AA", 0xc5}, + {"AC", LACU}, + {"B", B}, + {"CE", LCED}, + {"CI", LFRN}, + {"Di", 0x131}, + {"EL", 0x2d}, + {"GR", LGRV}, + {"H", H}, + {"I", I}, + {"OE", 0x152}, + {"R", R}, + {"TI", LTIL}, + {"UM", LUML}, + {"X", X}, + {"[", Ps}, + {"]", Pe}, + {"ac", LACU}, + {"ce", LCED}, + {"ci", LFRN}, + {"gr", LGRV}, + {"oe", 0x153}, + {"supe", 0x65}, /* should be raised */ + {"supo", 0x6f}, /* should be raised */ + {"ti", LTIL}, + {"um", LUML}, + {"{", Ps}, + {"~", 0x7e}, + {"~~", MTT}, +}; + +static Rune normtab[128] = { + /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ +/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, 0x20, NONE, NONE, NONE, NONE, NONE, +/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, '\'', + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, +/*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, TAGE, 0x3d, TAGS, 0x3f, +/*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, L'L', 0x4d, 0x4e, 0x4f, +/*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, L'\\', 0x5d, 0x5e, 0x5f, +/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, +/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE, +}; + +static char *gettag(char *, char *); + +static Entry curentry; +static char tag[Buflen]; +#define cursize (curentry.end-curentry.start) + +void +pcollprintentry(Entry e, int cmd) +{ + char *p, *pe; + long r, rprev, t, rlig; + int saveoi; + Rune *transtab; + + p = e.start; + pe = e.end; + transtab = normtab; + rprev = NONE; + changett(0, 0, 0); + curentry = e; + saveoi = 0; + if(cmd == 'h') + outinhibit = 1; + while(p < pe) { + if(cmd == 'r') { + outchar(*p++); + continue; + } + r = transtab[(*p++)&0x7F]; + if(r < NONE) { + /* Emit the rune, but buffer in case of ligature */ + if(rprev != NONE) + outrune(rprev); + rprev = r; + } else if(r == TAGS) { + p = gettag(p, pe); + t = lookassoc(tagtab, asize(tagtab), tag); + if(t == -1) { + if(debug && !outinhibit) + err("tag %ld %d %s", + e.doff, cursize, tag); + continue; + } + if(t < NONE) { + if(rprev != NONE) + outrune(rprev); + rprev = t; + } else if(t >= LIGS && t < LIGE) { + /* handle possible ligature */ + rlig = liglookup(t, rprev); + if(rlig != NONE) + rprev = rlig; /* overwrite rprev */ + else { + /* could print accent, but let's not */ + if(rprev != NONE) outrune(rprev); + rprev = NONE; + } + } else if(t >= MULTI && t < MULTIE) { + if(rprev != NONE) { + outrune(rprev); + rprev = NONE; + } + outrunes(multitab[t-MULTI]); + } else { + if(rprev != NONE) { + outrune(rprev); + rprev = NONE; + } + switch(t){ + case H: + if(cmd == 'h') + outinhibit = 0; + else + outnl(0); + break; + case X: + if(cmd == 'h') + outinhibit = 1; + else + outchars(". "); + break; + case Ps: + /* don't know enough of pron. key yet */ + saveoi = outinhibit; + outinhibit = 1; + break; + case Pe: + outinhibit = saveoi; + break; + } + } + } + } + if(cmd == 'h') + outinhibit = 0; + outnl(0); +} + +long +pcollnextoff(long fromoff) +{ + long a; + char *p; + + a = Bseek(bdict, fromoff, 0); + if(a < 0) + return -1; + for(;;) { + p = Brdline(bdict, '\n'); + if(!p) + break; + if(p[0] == '>' && p[1] == 'H' && p[2] == '<') + return (Boffset(bdict)-Blinelen(bdict)); + } + return -1; +} + +void +pcollprintkey(void) +{ + Bprint(bout, "No pronunciation key yet\n"); +} + +/* + * f points just after '>'; fe points at end of entry. + * Expect next characters from bin to match: + * [^ <]+< + * tag + * Accumulate the tag in tag[]. + * Return pointer to after final '<'. + */ +static char * +gettag(char *f, char *fe) +{ + char *t; + int c, i; + + t = tag; + i = Buflen; + while(--i > 0) { + c = *f++; + if(c == '<' || f == fe) + break; + *t++ = c; + } + *t = 0; + return f; +} |