diff options
author | rsc <devnull@localhost> | 2003-11-25 03:37:45 +0000 |
---|---|---|
committer | rsc <devnull@localhost> | 2003-11-25 03:37:45 +0000 |
commit | 08708877939323c1e1cb87210193ec25fc472ff7 (patch) | |
tree | bd34e2144a3e9532ab228619d7ae8d4a0078aeeb /src/cmd/dict/oed.c | |
parent | 091f74d0a0db5ba1e098a518922525cb032a97b4 (diff) | |
download | plan9port-08708877939323c1e1cb87210193ec25fc472ff7.tar.gz plan9port-08708877939323c1e1cb87210193ec25fc472ff7.tar.bz2 plan9port-08708877939323c1e1cb87210193ec25fc472ff7.zip |
add dict
Diffstat (limited to 'src/cmd/dict/oed.c')
-rw-r--r-- | src/cmd/dict/oed.c | 1425 |
1 files changed, 1425 insertions, 0 deletions
diff --git a/src/cmd/dict/oed.c b/src/cmd/dict/oed.c new file mode 100644 index 00000000..868eb486 --- /dev/null +++ b/src/cmd/dict/oed.c @@ -0,0 +1,1425 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include "dict.h" + +enum { + Buflen=1000, + Maxaux=5, +}; + +/* Possible tags */ +enum { + A, /* author in quote (small caps) */ + B, /* bold */ + Ba, /* author inside bib */ + Bch, /* builtup chem component */ + Bib, /* surrounds word 'in' for bibliographic ref */ + Bl, /* bold */ + Bo, /* bond over */ + Bu, /* bond under */ + Cb, /* ? block of stuff (indent) */ + Cf, /* cross ref to another entry (italics) */ + Chem, /* chemistry formula */ + Co, /* over (preceding sum, integral, etc.) */ + Col, /* column of table (aux just may be r) */ + Cu, /* under (preceding sum, integral, etc.) */ + Dat, /* date */ + Db, /* def block? indent */ + Dn, /* denominator of fraction */ + E, /* main entry */ + Ed, /* editor's comments (in [...]) */ + Etym, /* etymology (in [...]) */ + Fq, /* frequency count (superscript) */ + Form, /* formula */ + Fr, /* fraction (contains <nu>, then <dn>) */ + Gk, /* greek (transliteration) */ + Gr, /* grammar? (e.g., around 'pa.' in 'pa. pple.') */ + Hg, /* headword group */ + Hm, /* homonym (superscript) */ + Hw, /* headword (bold) */ + I, /* italics */ + Il, /* italic list? */ + In, /* inferior (subscript) */ + L, /* row of col of table */ + La, /* status or usage label (italic) */ + Lc, /* chapter/verse sort of thing for works */ + N, /* note (smaller type) */ + Nu, /* numerator of fraction */ + Ov, /* needs overline */ + P, /* paragraph (indent) */ + Ph, /* pronunciation (transliteration) */ + Pi, /* pile (frac without line) */ + Pqp, /* subblock of quote */ + Pr, /* pronunciation (in (...)) */ + Ps, /* position (e.g., adv.) (italic) */ + Pt, /* part (in lc) */ + Q, /* quote in quote block */ + Qd, /* quote date (bold) */ + Qig, /* quote number (greek) */ + Qla, /* status or usage label in quote (italic) */ + Qp, /* quote block (small type, indent) */ + Qsn, /* quote number */ + Qt, /* quote words */ + R, /* roman type style */ + Rx, /* relative cross reference (e.g., next) */ + S, /* another form? (italic) */ + S0, /* sense (sometimes surrounds several sx's) */ + S1, /* sense (aux num: indented bold letter) */ + S2, /* sense (aux num: indented bold capital rom num) */ + S3, /* sense (aux num: indented number of asterisks) */ + S4, /* sense (aux num: indented bold number) */ + S5, /* sense (aux num: indented number of asterisks) */ + S6, /* subsense (aux num: bold letter) */ + S7a, /* subsense (aux num: letter) */ + S7n, /* subsense (aux num: roman numeral) */ + Sc, /* small caps */ + Sgk, /* subsense (aux num: transliterated greek) */ + Sn, /* sense of subdefinition (aux num: roman letter) */ + Ss, /* sans serif */ + Ssb, /* sans serif bold */ + Ssi, /* sans serif italic */ + Su, /* superior (superscript) */ + Sub, /* subdefinition */ + Table, /* table (aux cols=number of columns) */ + Tt, /* title? (italics) */ + Vd, /* numeric label for variant form */ + Ve, /* variant entry */ + Vf, /* variant form (light bold) */ + Vfl, /* list of vf's (starts with Also or Forms) */ + W, /* work (e.g., Beowulf) (italics) */ + X, /* cross reference to main word (small caps) */ + Xd, /* cross reference to quotation by date */ + Xi, /* internal cross reference ? (italic) */ + Xid, /* cross reference identifer, in quote ? */ + Xs, /* cross reference sense (lower number) */ + Xr, /* list of x's */ + Ntag /* end of tags */ +}; + +/* Assoc tables must be sorted on first field */ + +static Assoc tagtab[] = { + {"a", A}, + {"b", B}, + {"ba", Ba}, + {"bch", Bch}, + {"bib", Bib}, + {"bl", Bl}, + {"bo", Bo}, + {"bu", Bu}, + {"cb", Cb}, + {"cf", Cf}, + {"chem", Chem}, + {"co", Co}, + {"col", Col}, + {"cu", Cu}, + {"dat", Dat}, + {"db", Db}, + {"dn", Dn}, + {"e", E}, + {"ed", Ed}, + {"et", Etym}, + {"etym", Etym}, + {"form", Form}, + {"fq", Fq}, + {"fr", Fr}, + {"frac", Fr}, + {"gk", Gk}, + {"gr", Gr}, + {"hg", Hg}, + {"hm", Hm}, + {"hw", Hw}, + {"i", I}, + {"il", Il}, + {"in", In}, + {"l", L}, + {"la", La}, + {"lc", Lc}, + {"n", N}, + {"nu", Nu}, + {"ov", Ov}, + {"p", P}, + {"ph", Ph}, + {"pi", Pi}, + {"pqp", Pqp}, + {"pr", Pr}, + {"ps", Ps}, + {"pt", Pt}, + {"q", Q}, + {"qd", Qd}, + {"qig", Qig}, + {"qla", Qla}, + {"qp", Qp}, + {"qsn", Qsn}, + {"qt", Qt}, + {"r", R}, + {"rx", Rx}, + {"s", S}, + {"s0", S0}, + {"s1", S1}, + {"s2", S2}, + {"s3", S3}, + {"s4", S4}, + {"s5", S5}, + {"s6", S6}, + {"s7a", S7a}, + {"s7n", S7n}, + {"sc", Sc}, + {"sgk", Sgk}, + {"sn", Sn}, + {"ss", Ss,}, + {"ssb", Ssb}, + {"ssi", Ssi}, + {"su", Su}, + {"sub", Sub}, + {"table", Table}, + {"tt", Tt}, + {"vd", Vd}, + {"ve", Ve}, + {"vf", Vf}, + {"vfl", Vfl}, + {"w", W}, + {"x", X}, + {"xd", Xd}, + {"xi", Xi}, + {"xid", Xid}, + {"xr", Xr}, + {"xs", Xs}, +}; + +/* Possible tag auxilliary info */ +enum { + Cols, /* number of columns in a table */ + Num, /* letter or number, for a sense */ + St, /* status (e.g., obs) */ + Naux +}; + +static Assoc auxtab[] = { + {"cols", Cols}, + {"num", Num}, + {"st", St} +}; + +static Assoc spectab[] = { + {"3on4", 0xbe}, + {"Aacu", 0xc1}, + {"Aang", 0xc5}, + {"Abarab", 0x100}, + {"Acirc", 0xc2}, + {"Ae", 0xc6}, + {"Agrave", 0xc0}, + {"Alpha", 0x391}, + {"Amac", 0x100}, + {"Asg", 0x1b7}, /* Unicyle. Cf "Sake" */ + {"Auml", 0xc4}, + {"Beta", 0x392}, + {"Cced", 0xc7}, + {"Chacek", 0x10c}, + {"Chi", 0x3a7}, + {"Chirho", 0x2627}, /* Chi Rho U+2627 */ + {"Csigma", 0x3da}, + {"Delta", 0x394}, + {"Eacu", 0xc9}, + {"Ecirc", 0xca}, + {"Edh", 0xd0}, + {"Epsilon", 0x395}, + {"Eta", 0x397}, + {"Gamma", 0x393}, + {"Iacu", 0xcd}, + {"Icirc", 0xce}, + {"Imac", 0x12a}, + {"Integ", 0x222b}, + {"Iota", 0x399}, + {"Kappa", 0x39a}, + {"Koppa", 0x3de}, + {"Lambda", 0x39b}, + {"Lbar", 0x141}, + {"Mu", 0x39c}, + {"Naira", 0x4e}, /* should have bar through */ + {"Nplus", 0x4e}, /* should have plus above */ + {"Ntilde", 0xd1}, + {"Nu", 0x39d}, + {"Oacu", 0xd3}, + {"Obar", 0xd8}, + {"Ocirc", 0xd4}, + {"Oe", 0x152}, + {"Omega", 0x3a9}, + {"Omicron", 0x39f}, + {"Ouml", 0xd6}, + {"Phi", 0x3a6}, + {"Pi", 0x3a0}, + {"Psi", 0x3a8}, + {"Rho", 0x3a1}, + {"Sacu", 0x15a}, + {"Sigma", 0x3a3}, + {"Summ", 0x2211}, + {"Tau", 0x3a4}, + {"Th", 0xde}, + {"Theta", 0x398}, + {"Tse", 0x426}, + {"Uacu", 0xda}, + {"Ucirc", 0xdb}, + {"Upsilon", 0x3a5}, + {"Uuml", 0xdc}, + {"Wyn", 0x1bf}, /* wynn U+01BF */ + {"Xi", 0x39e}, + {"Ygh", 0x1b7}, /* Yogh U+01B7 */ + {"Zeta", 0x396}, + {"Zh", 0x1b7}, /* looks like Yogh. Cf "Sake" */ + {"a", 0x61}, /* ante */ + {"aacu", 0xe1}, + {"aang", 0xe5}, + {"aasper", MAAS}, + {"abreve", 0x103}, + {"acirc", 0xe2}, + {"acu", LACU}, + {"ae", 0xe6}, + {"agrave", 0xe0}, + {"ahook", 0x105}, + {"alenis", MALN}, + {"alpha", 0x3b1}, + {"amac", 0x101}, + {"amp", 0x26}, + {"and", MAND}, + {"ang", LRNG}, + {"angle", 0x2220}, + {"ankh", 0x2625}, /* ankh U+2625 */ + {"ante", 0x61}, /* before (year) */ + {"aonq", MAOQ}, + {"appreq", 0x2243}, + {"aquar", 0x2652}, + {"arDadfull", 0x636}, /* Dad U+0636 */ + {"arHa", 0x62d}, /* haa U+062D */ + {"arTa", 0x62a}, /* taa U+062A */ + {"arain", 0x639}, /* ain U+0639 */ + {"arainfull", 0x639}, /* ain U+0639 */ + {"aralif", 0x627}, /* alef U+0627 */ + {"arba", 0x628}, /* baa U+0628 */ + {"arha", 0x647}, /* ha U+0647 */ + {"aries", 0x2648}, + {"arnun", 0x646}, /* noon U+0646 */ + {"arnunfull", 0x646}, /* noon U+0646 */ + {"arpa", 0x647}, /* ha U+0647 */ + {"arqoph", 0x642}, /* qaf U+0642 */ + {"arshinfull", 0x634}, /* sheen U+0634 */ + {"arta", 0x62a}, /* taa U+062A */ + {"artafull", 0x62a}, /* taa U+062A */ + {"artha", 0x62b}, /* thaa U+062B */ + {"arwaw", 0x648}, /* waw U+0648 */ + {"arya", 0x64a}, /* ya U+064A */ + {"aryafull", 0x64a}, /* ya U+064A */ + {"arzero", 0x660}, /* indic zero U+0660 */ + {"asg", 0x292}, /* unicycle character. Cf "hallow" */ + {"asper", LASP}, + {"assert", 0x22a2}, + {"astm", 0x2042}, /* asterism: should be upside down */ + {"at", 0x40}, + {"atilde", 0xe3}, + {"auml", 0xe4}, + {"ayin", 0x639}, /* arabic ain U+0639 */ + {"b1", 0x2d}, /* single bond */ + {"b2", 0x3d}, /* double bond */ + {"b3", 0x2261}, /* triple bond */ + {"bbar", 0x180}, /* b with bar U+0180 */ + {"beta", 0x3b2}, + {"bigobl", 0x2f}, + {"blC", 0x43}, /* should be black letter */ + {"blJ", 0x4a}, /* should be black letter */ + {"blU", 0x55}, /* should be black letter */ + {"blb", 0x62}, /* should be black letter */ + {"blozenge", 0x25ca}, /* U+25CA; should be black */ + {"bly", 0x79}, /* should be black letter */ + {"bra", MBRA}, + {"brbl", LBRB}, + {"breve", LBRV}, + {"bslash", L'\\'}, + {"bsquare", 0x25a0}, /* black square U+25A0 */ + {"btril", 0x25c0}, /* U+25C0 */ + {"btrir", 0x25b6}, /* U+25B6 */ + {"c", 0x63}, /* circa */ + {"cab", 0x232a}, + {"cacu", 0x107}, + {"canc", 0x264b}, + {"capr", 0x2651}, + {"caret", 0x5e}, + {"cb", 0x7d}, + {"cbigb", 0x7d}, + {"cbigpren", 0x29}, + {"cbigsb", 0x5d}, + {"cced", 0xe7}, + {"cdil", LCED}, + {"cdsb", 0x301b}, /* ]] U+301b */ + {"cent", 0xa2}, + {"chacek", 0x10d}, + {"chi", 0x3c7}, + {"circ", LRNG}, + {"circa", 0x63}, /* about (year) */ + {"circbl", 0x325}, /* ring below accent U+0325 */ + {"circle", 0x25cb}, /* U+25CB */ + {"circledot", 0x2299}, + {"click", 0x296}, + {"club", 0x2663}, + {"comtime", 0x43}, + {"conj", 0x260c}, + {"cprt", 0xa9}, + {"cq", '\''}, + {"cqq", 0x201d}, + {"cross", 0x2720}, /* maltese cross U+2720 */ + {"crotchet", 0x2669}, + {"csb", 0x5d}, + {"ctilde", 0x63}, /* +tilde */ + {"ctlig", MLCT}, + {"cyra", 0x430}, + {"cyre", 0x435}, + {"cyrhard", 0x44a}, + {"cyrjat", 0x463}, + {"cyrm", 0x43c}, + {"cyrn", 0x43d}, + {"cyrr", 0x440}, + {"cyrsoft", 0x44c}, + {"cyrt", 0x442}, + {"cyry", 0x44b}, + {"dag", 0x2020}, + {"dbar", 0x111}, + {"dblar", 0x21cb}, + {"dblgt", 0x226b}, + {"dbllt", 0x226a}, + {"dced", 0x64}, /* +cedilla */ + {"dd", MDD}, + {"ddag", 0x2021}, + {"ddd", MDDD}, + {"decr", 0x2193}, + {"deg", 0xb0}, + {"dele", 0x64}, /* should be dele */ + {"delta", 0x3b4}, + {"descnode", 0x260b}, /* descending node U+260B */ + {"diamond", 0x2662}, + {"digamma", 0x3dd}, + {"div", 0xf7}, + {"dlessi", 0x131}, + {"dlessj1", 0x6a}, /* should be dotless */ + {"dlessj2", 0x6a}, /* should be dotless */ + {"dlessj3", 0x6a}, /* should be dotless */ + {"dollar", 0x24}, + {"dotab", LDOT}, + {"dotbl", LDTB}, + {"drachm", 0x292}, + {"dubh", 0x2d}, + {"eacu", 0xe9}, + {"earth", 0x2641}, + {"easper", MEAS}, + {"ebreve", 0x115}, + {"ecirc", 0xea}, + {"edh", 0xf0}, + {"egrave", 0xe8}, + {"ehacek", 0x11b}, + {"ehook", 0x119}, + {"elem", 0x220a}, + {"elenis", MELN}, + {"em", 0x2014}, + {"emac", 0x113}, + {"emem", MEMM}, + {"en", 0x2013}, + {"epsilon", 0x3b5}, + {"equil", 0x21cb}, + {"ergo", 0x2234}, + {"es", MES}, + {"eszett", 0xdf}, + {"eta", 0x3b7}, + {"eth", 0xf0}, + {"euml", 0xeb}, + {"expon", 0x2191}, + {"fact", 0x21}, + {"fata", 0x251}, + {"fatpara", 0xb6}, /* should have fatter, filled in bowl */ + {"female", 0x2640}, + {"ffilig", MLFFI}, + {"fflig", MLFF}, + {"ffllig", MLFFL}, + {"filig", MLFI}, + {"flat", 0x266d}, + {"fllig", MLFL}, + {"frE", 0x45}, /* should be curly */ + {"frL", L'L'}, /* should be curly */ + {"frR", 0x52}, /* should be curly */ + {"frakB", 0x42}, /* should have fraktur style */ + {"frakG", 0x47}, + {"frakH", 0x48}, + {"frakI", 0x49}, + {"frakM", 0x4d}, + {"frakU", 0x55}, + {"frakX", 0x58}, + {"frakY", 0x59}, + {"frakh", 0x68}, + {"frbl", LFRB}, + {"frown", LFRN}, + {"fs", 0x20}, + {"fsigma", 0x3c2}, + {"gAacu", 0xc1}, /* should be Α+acute */ + {"gaacu", 0x3b1}, /* +acute */ + {"gabreve", 0x3b1}, /* +breve */ + {"gafrown", 0x3b1}, /* +frown */ + {"gagrave", 0x3b1}, /* +grave */ + {"gamac", 0x3b1}, /* +macron */ + {"gamma", 0x3b3}, + {"gauml", 0x3b1}, /* +umlaut */ + {"ge", 0x2267}, + {"geacu", 0x3b5}, /* +acute */ + {"gegrave", 0x3b5}, /* +grave */ + {"ghacu", 0x3b7}, /* +acute */ + {"ghfrown", 0x3b7}, /* +frown */ + {"ghgrave", 0x3b7}, /* +grave */ + {"ghmac", 0x3b7}, /* +macron */ + {"giacu", 0x3b9}, /* +acute */ + {"gibreve", 0x3b9}, /* +breve */ + {"gifrown", 0x3b9}, /* +frown */ + {"gigrave", 0x3b9}, /* +grave */ + {"gimac", 0x3b9}, /* +macron */ + {"giuml", 0x3b9}, /* +umlaut */ + {"glagjat", 0x467}, + {"glots", 0x2c0}, + {"goacu", 0x3bf}, /* +acute */ + {"gobreve", 0x3bf}, /* +breve */ + {"grave", LGRV}, + {"gt", 0x3e}, + {"guacu", 0x3c5}, /* +acute */ + {"gufrown", 0x3c5}, /* +frown */ + {"gugrave", 0x3c5}, /* +grave */ + {"gumac", 0x3c5}, /* +macron */ + {"guuml", 0x3c5}, /* +umlaut */ + {"gwacu", 0x3c9}, /* +acute */ + {"gwfrown", 0x3c9}, /* +frown */ + {"gwgrave", 0x3c9}, /* +grave */ + {"hacek", LHCK}, + {"halft", 0x2308}, + {"hash", 0x23}, + {"hasper", MHAS}, + {"hatpath", 0x5b2}, /* hataf patah U+05B2 */ + {"hatqam", 0x5b3}, /* hataf qamats U+05B3 */ + {"hatseg", 0x5b1}, /* hataf segol U+05B1 */ + {"hbar", 0x127}, + {"heart", 0x2661}, + {"hebaleph", 0x5d0}, /* aleph U+05D0 */ + {"hebayin", 0x5e2}, /* ayin U+05E2 */ + {"hebbet", 0x5d1}, /* bet U+05D1 */ + {"hebbeth", 0x5d1}, /* bet U+05D1 */ + {"hebcheth", 0x5d7}, /* bet U+05D7 */ + {"hebdaleth", 0x5d3}, /* dalet U+05D3 */ + {"hebgimel", 0x5d2}, /* gimel U+05D2 */ + {"hebhe", 0x5d4}, /* he U+05D4 */ + {"hebkaph", 0x5db}, /* kaf U+05DB */ + {"heblamed", 0x5dc}, /* lamed U+05DC */ + {"hebmem", 0x5de}, /* mem U+05DE */ + {"hebnun", 0x5e0}, /* nun U+05E0 */ + {"hebnunfin", 0x5df}, /* final nun U+05DF */ + {"hebpe", 0x5e4}, /* pe U+05E4 */ + {"hebpedag", 0x5e3}, /* final pe? U+05E3 */ + {"hebqoph", 0x5e7}, /* qof U+05E7 */ + {"hebresh", 0x5e8}, /* resh U+05E8 */ + {"hebshin", 0x5e9}, /* shin U+05E9 */ + {"hebtav", 0x5ea}, /* tav U+05EA */ + {"hebtsade", 0x5e6}, /* tsadi U+05E6 */ + {"hebwaw", 0x5d5}, /* vav? U+05D5 */ + {"hebyod", 0x5d9}, /* yod U+05D9 */ + {"hebzayin", 0x5d6}, /* zayin U+05D6 */ + {"hgz", 0x292}, /* ??? Cf "alet" */ + {"hireq", 0x5b4}, /* U+05B4 */ + {"hlenis", MHLN}, + {"hook", LOGO}, + {"horizE", 0x45}, /* should be on side */ + {"horizP", 0x50}, /* should be on side */ + {"horizS", 0x223d}, + {"horizT", 0x22a3}, + {"horizb", 0x7b}, /* should be underbrace */ + {"ia", 0x3b1}, + {"iacu", 0xed}, + {"iasper", MIAS}, + {"ib", 0x3b2}, + {"ibar", 0x268}, + {"ibreve", 0x12d}, + {"icirc", 0xee}, + {"id", 0x3b4}, + {"ident", 0x2261}, + {"ie", 0x3b5}, + {"ifilig", MLFI}, + {"ifflig", MLFF}, + {"ig", 0x3b3}, + {"igrave", 0xec}, + {"ih", 0x3b7}, + {"ii", 0x3b9}, + {"ik", 0x3ba}, + {"ilenis", MILN}, + {"imac", 0x12b}, + {"implies", 0x21d2}, + {"index", 0x261e}, + {"infin", 0x221e}, + {"integ", 0x222b}, + {"intsec", 0x2229}, + {"invpri", 0x2cf}, + {"iota", 0x3b9}, + {"iq", 0x3c8}, + {"istlig", MLST}, + {"isub", 0x3f5}, /* iota below accent */ + {"iuml", 0xef}, + {"iz", 0x3b6}, + {"jup", 0x2643}, + {"kappa", 0x3ba}, + {"koppa", 0x3df}, + {"lambda", 0x3bb}, + {"lar", 0x2190}, + {"lbar", 0x142}, + {"le", 0x2266}, + {"lenis", LLEN}, + {"leo", 0x264c}, + {"lhalfbr", 0x2308}, + {"lhshoe", 0x2283}, + {"libra", 0x264e}, + {"llswing", MLLS}, + {"lm", 0x2d0}, + {"logicand", 0x2227}, + {"logicor", 0x2228}, + {"longs", 0x283}, + {"lrar", 0x2194}, + {"lt", 0x3c}, + {"ltappr", 0x227e}, + {"ltflat", 0x2220}, + {"lumlbl", 0x6c}, /* +umlaut below */ + {"mac", LMAC}, + {"male", 0x2642}, + {"mc", 0x63}, /* should be raised */ + {"merc", 0x263f}, /* mercury U+263F */ + {"min", 0x2212}, + {"moonfq", 0x263d}, /* first quarter moon U+263D */ + {"moonlq", 0x263e}, /* last quarter moon U+263E */ + {"msylab", 0x6d}, /* +sylab (ˌ) */ + {"mu", 0x3bc}, + {"nacu", 0x144}, + {"natural", 0x266e}, + {"neq", 0x2260}, + {"nfacu", 0x2032}, + {"nfasper", 0x2bd}, + {"nfbreve", 0x2d8}, + {"nfced", 0xb8}, + {"nfcirc", 0x2c6}, + {"nffrown", 0x2322}, + {"nfgra", 0x2cb}, + {"nfhacek", 0x2c7}, + {"nfmac", 0xaf}, + {"nftilde", 0x2dc}, + {"nfuml", 0xa8}, + {"ng", 0x14b}, + {"not", 0xac}, + {"notelem", 0x2209}, + {"ntilde", 0xf1}, + {"nu", 0x3bd}, + {"oab", 0x2329}, + {"oacu", 0xf3}, + {"oasper", MOAS}, + {"ob", 0x7b}, + {"obar", 0xf8}, + {"obigb", 0x7b}, /* should be big */ + {"obigpren", 0x28}, + {"obigsb", 0x5b}, /* should be big */ + {"obreve", 0x14f}, + {"ocirc", 0xf4}, + {"odsb", 0x301a}, /* [[ U+301A */ + {"oe", 0x153}, + {"oeamp", 0x26}, + {"ograve", 0xf2}, + {"ohook", 0x6f}, /* +hook */ + {"olenis", MOLN}, + {"omac", 0x14d}, + {"omega", 0x3c9}, + {"omicron", 0x3bf}, + {"ope", 0x25b}, + {"opp", 0x260d}, + {"oq", 0x60}, + {"oqq", 0x201c}, + {"or", MOR}, + {"osb", 0x5b}, + {"otilde", 0xf5}, + {"ouml", 0xf6}, + {"ounce", 0x2125}, /* ounce U+2125 */ + {"ovparen", 0x2322}, /* should be sideways ( */ + {"p", 0x2032}, + {"pa", 0x2202}, + {"page", 0x50}, + {"pall", 0x28e}, + {"paln", 0x272}, + {"par", PAR}, + {"para", 0xb6}, + {"pbar", 0x70}, /* +bar */ + {"per", 0x2118}, /* per U+2118 */ + {"phi", 0x3c6}, + {"phi2", 0x3d5}, + {"pi", 0x3c0}, + {"pisces", 0x2653}, + {"planck", 0x127}, + {"plantinJ", 0x4a}, /* should be script */ + {"pm", 0xb1}, + {"pmil", 0x2030}, + {"pp", 0x2033}, + {"ppp", 0x2034}, + {"prop", 0x221d}, + {"psi", 0x3c8}, + {"pstlg", 0xa3}, + {"q", 0x3f}, /* should be raised */ + {"qamets", 0x5b3}, /* U+05B3 */ + {"quaver", 0x266a}, + {"rar", 0x2192}, + {"rasper", MRAS}, + {"rdot", 0xb7}, + {"recipe", 0x211e}, /* U+211E */ + {"reg", 0xae}, + {"revC", 0x186}, /* open O U+0186 */ + {"reva", 0x252}, + {"revc", 0x254}, + {"revope", 0x25c}, + {"revr", 0x279}, + {"revsc", 0x2d2}, /* upside-down semicolon */ + {"revv", 0x28c}, + {"rfa", 0x6f}, /* +hook (Cf "goal") */ + {"rhacek", 0x159}, + {"rhalfbr", 0x2309}, + {"rho", 0x3c1}, + {"rhshoe", 0x2282}, + {"rlenis", MRLN}, + {"rsylab", 0x72}, /* +sylab */ + {"runash", 0x46}, /* should be runic 'ash' */ + {"rvow", 0x2d4}, + {"sacu", 0x15b}, + {"sagit", 0x2650}, + {"sampi", 0x3e1}, + {"saturn", 0x2644}, + {"sced", 0x15f}, + {"schwa", 0x259}, + {"scorpio", 0x264f}, + {"scrA", 0x41}, /* should be script */ + {"scrC", 0x43}, + {"scrE", 0x45}, + {"scrF", 0x46}, + {"scrI", 0x49}, + {"scrJ", 0x4a}, + {"scrL", L'L'}, + {"scrO", 0x4f}, + {"scrP", 0x50}, + {"scrQ", 0x51}, + {"scrS", 0x53}, + {"scrT", 0x54}, + {"scrb", 0x62}, + {"scrd", 0x64}, + {"scrh", 0x68}, + {"scrl", 0x6c}, + {"scruple", 0x2108}, /* U+2108 */ + {"sdd", 0x2d0}, + {"sect", 0xa7}, + {"semE", 0x2203}, + {"sh", 0x283}, + {"shacek", 0x161}, + {"sharp", 0x266f}, + {"sheva", 0x5b0}, /* U+05B0 */ + {"shti", 0x26a}, + {"shtsyll", 0x222a}, + {"shtu", 0x28a}, + {"sidetri", 0x22b2}, + {"sigma", 0x3c3}, + {"since", 0x2235}, + {"slge", 0x2265}, /* should have slanted line under */ + {"slle", 0x2264}, /* should have slanted line under */ + {"sm", 0x2c8}, + {"smm", 0x2cc}, + {"spade", 0x2660}, + {"sqrt", 0x221a}, + {"square", 0x25a1}, /* U+25A1 */ + {"ssChi", 0x3a7}, /* should be sans serif */ + {"ssIota", 0x399}, + {"ssOmicron", 0x39f}, + {"ssPi", 0x3a0}, + {"ssRho", 0x3a1}, + {"ssSigma", 0x3a3}, + {"ssTau", 0x3a4}, + {"star", 0x2a}, + {"stlig", MLST}, + {"sup2", 0x2072}, + {"supgt", 0x2c3}, + {"suplt", 0x2c2}, + {"sur", 0x2b3}, + {"swing", 0x223c}, + {"tau", 0x3c4}, + {"taur", 0x2649}, + {"th", 0xfe}, + {"thbar", 0xfe}, /* +bar */ + {"theta", 0x3b8}, + {"thinqm", 0x3f}, /* should be thinner */ + {"tilde", LTIL}, + {"times", 0xd7}, + {"tri", 0x2206}, + {"trli", 0x2016}, + {"ts", 0x2009}, + {"uacu", 0xfa}, + {"uasper", MUAS}, + {"ubar", 0x75}, /* +bar */ + {"ubreve", 0x16d}, + {"ucirc", 0xfb}, + {"udA", 0x2200}, + {"udT", 0x22a5}, + {"uda", 0x250}, + {"udh", 0x265}, + {"udqm", 0xbf}, + {"udpsi", 0x22d4}, + {"udtr", 0x2207}, + {"ugrave", 0xf9}, + {"ulenis", MULN}, + {"umac", 0x16b}, + {"uml", LUML}, + {"undl", 0x2cd}, /* underline accent */ + {"union", 0x222a}, + {"upsilon", 0x3c5}, + {"uuml", 0xfc}, + {"vavpath", 0x5d5}, /* vav U+05D5 (+patah) */ + {"vavsheva", 0x5d5}, /* vav U+05D5 (+sheva) */ + {"vb", 0x7c}, + {"vddd", 0x22ee}, + {"versicle2", 0x2123}, /* U+2123 */ + {"vinc", 0xaf}, + {"virgo", 0x264d}, + {"vpal", 0x25f}, + {"vvf", 0x263}, + {"wasper", MWAS}, + {"wavyeq", 0x2248}, + {"wlenis", MWLN}, + {"wyn", 0x1bf}, /* wynn U+01BF */ + {"xi", 0x3be}, + {"yacu", 0xfd}, + {"ycirc", 0x177}, + {"ygh", 0x292}, + {"ymac", 0x79}, /* +macron */ + {"yuml", 0xff}, + {"zced", 0x7a}, /* +cedilla */ + {"zeta", 0x3b6}, + {"zh", 0x292}, + {"zhacek", 0x17e}, +}; +/* + The following special characters don't have close enough + equivalents in Unicode, so aren't in the above table. + 22n 2^(2^n) Cf Fermat + 2on4 2/4 + 3on8 3/8 + Bantuo Bantu O. Cf Otshi-herero + Car C with circular arrow on top + albrtime cut-time: C with vertical line + ardal Cf dental + bantuo Bantu o. Cf Otshi-herero + bbc1 single chem bond below + bbc2 double chem bond below + bbl1 chem bond like / + bbl2 chem bond like // + bbr1 chem bond like \ + bbr2 chem bond \\ + bcop1 copper symbol. Cf copper + bcop2 copper symbol. Cf copper + benchm Cf benchmark + btc1 single chem bond above + btc2 double chem bond above + btl1 chem bond like \ + btl2 chem bond like \\ + btr1 chem bond like / + btr2 chem bond line // + burman Cf Burman + devph sanskrit letter. Cf ph + devrfls sanskrit letter. Cf cerebral + duplong[12] musical note + egchi early form of chi + eggamma[12] early form of gamma + egiota early form of iota + egkappa early form of kappa + eglambda early form of lambda + egmu[12] early form of mu + egnu[12] early form of nu + egpi[123] early form of pi + egrho[12] early form of rho + egsampi early form of sampi + egsan early form of san + egsigma[12] early form of sigma + egxi[123] early form of xi + elatS early form of S + elatc[12] early form of C + elatg[12] early form of G + glagjeri Slavonic Glagolitic jeri + glagjeru Slavonic Glagolitic jeru + hypolem hypolemisk (line with underdot) + lhrbr lower half } + longmord long mordent + mbwvow backwards scretched C. Cf retract. + mord music symbol. Cf mordent + mostra Cf direct + ohgcirc old form of circumflex + oldbeta old form of β. Cf perturbate + oldsemibr[12] old forms of semibreve. Cf prolation + ormg old form of g. Cf G + para[12345] form of ¶ + pauseo musical pause sign + pauseu musical pause sign + pharyng Cf pharyngal + ragr Black letter ragged r + repetn musical repeat. Cf retort + segno musical segno sign + semain[12] semitic ain + semhe semitic he + semheth semitic heth + semkaph semitic kaph + semlamed[12] semitic lamed + semmem semitic mem + semnum semitic nun + sempe semitic pe + semqoph[123] semitic qoph + semresh semitic resh + semtav[1234] semitic tav + semyod semitic yod + semzayin[123] semitic zayin + shtlong[12] U with underbar. Cf glyconic + sigmatau σ,τ combination + squaver sixteenth note + sqbreve square musical breve note + swast swastika + uhrbr upper half of big } + versicle1 Cf versicle + */ + + +static Rune normtab[128] = { + /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ +/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, +/*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f, +/*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, L'L', 0x4d, 0x4e, 0x4f, +/*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, L'\\', 0x5d, 0x5e, 0x5f, +/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, +/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE, +}; +static Rune phtab[128] = { + /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ +/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*20*/ 0x20, 0x21, 0x2c8, 0x23, 0x24, 0x2cc, 0xe6, '\'', + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, +/*30*/ 0x30, 0x31, 0x32, 0x25c, 0x34, 0x35, 0x36, 0x37, + 0x38, 0xf8, 0x2d0, 0x3b, TAGS, 0x3d, TAGE, 0x3f, +/*40*/ 0x259, 0x251, 0x42, 0x43, 0xf0, 0x25b, 0x46, 0x47, + 0x48, 0x26a, 0x4a, 0x4b, L'L', 0x4d, 0x14b, 0x254, +/*50*/ 0x50, 0x252, 0x52, 0x283, 0x3b8, 0x28a, 0x28c, 0x57, + 0x58, 0x59, 0x292, 0x5b, L'\\', 0x5d, 0x5e, 0x5f, +/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, +/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE, +}; +static Rune grtab[128] = { + /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ +/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, +/*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f, +/*40*/ 0x40, 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393, + 0x397, 0x399, 0x3da, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f, +/*50*/ 0x3a0, 0x398, 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x56, 0x3a9, + 0x3a7, 0x3a8, 0x396, 0x5b, L'\\', 0x5d, 0x5e, 0x5f, +/*60*/ 0x60, 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3c6, 0x3b3, + 0x3b7, 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf, +/*70*/ 0x3c0, 0x3b8, 0x3c1, 0x3c3, 0x3c4, 0x3c5, 0x76, 0x3c9, + 0x3c7, 0x3c8, 0x3b6, 0x7b, 0x7c, 0x7d, 0x7e, NONE, +}; +static Rune subtab[128] = { + /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ +/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', + 0x208d, 0x208e, 0x2a, 0x208a, 0x2c, 0x208b, 0x2e, 0x2f, +/*30*/ 0x2080, 0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087, + 0x2088, 0x2089, 0x3a, 0x3b, TAGS, 0x208c, TAGE, 0x3f, +/*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, L'L', 0x4d, 0x4e, 0x4f, +/*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, L'\\', 0x5d, 0x5e, 0x5f, +/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, +/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE, +}; +static Rune suptab[128] = { + /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/ +/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, + NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE, +/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'', + 0x207d, 0x207e, 0x2a, 0x207a, 0x2c, 0x207b, 0x2e, 0x2f, +/*30*/ 0x2070, 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077, + 0x2078, 0x2079, 0x3a, 0x3b, TAGS, 0x207c, TAGE, 0x3f, +/*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, L'L', 0x4d, 0x4e, 0x4f, +/*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, L'\\', 0x5d, 0x5e, 0x5f, +/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, +/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE, +}; + +static int tagstarts; +static char tag[Buflen]; +static int naux; +static char auxname[Maxaux][Buflen]; +static char auxval[Maxaux][Buflen]; +static char spec[Buflen]; +static uchar *auxstate[Naux]; /* vals for most recent tag */ +static Entry curentry; +#define cursize (curentry.end-curentry.start) + +static char *getspec(char *, char *); +static char *gettag(char *, char *); +static void dostatus(void); + +/* + * cmd is one of: + * 'p': normal print + * 'h': just print headwords + * 'P': print raw + */ +void +oedprintentry(Entry e, int cmd) +{ + char *p, *pe; + int t, a, i; + long r, rprev, rlig; + Rune *transtab; + + p = e.start; + pe = e.end; + transtab = normtab; + rprev = NONE; + changett(0, 0, 0); + curentry = e; + if(cmd == 'h') + outinhibit = 1; + while(p < pe) { + if(cmd == 'r') { + outchar(*p++); + continue; + } + r = transtab[(*p++)&0x7F]; + if(r < NONE) { + /* Emit the rune, but buffer in case of ligature */ + if(rprev != NONE) + outrune(rprev); + rprev = r; + } else if(r == SPCS) { + /* Start of special character name */ + p = getspec(p, pe); + r = lookassoc(spectab, asize(spectab), spec); + if(r == -1) { + if(debug) + err("spec %ld %d %s", + e.doff, cursize, spec); + r = 0xfffd; + } + if(r >= LIGS && r < LIGE) { + /* handle possible ligature */ + rlig = liglookup(r, rprev); + if(rlig != NONE) + rprev = rlig; /* overwrite rprev */ + else { + /* could print accent, but let's not */ + if(rprev != NONE) outrune(rprev); + rprev = NONE; + } + } else if(r >= MULTI && r < MULTIE) { + if(rprev != NONE) { + outrune(rprev); + rprev = NONE; + } + outrunes(multitab[r-MULTI]); + } else if(r == PAR) { + if(rprev != NONE) { + outrune(rprev); + rprev = NONE; + } + outnl(1); + } else { + if(rprev != NONE) outrune(rprev); + rprev = r; + } + } else if(r == TAGS) { + /* Start of tag name */ + if(rprev != NONE) { + outrune(rprev); + rprev = NONE; + } + p = gettag(p, pe); + t = lookassoc(tagtab, asize(tagtab), tag); + if(t == -1) { + if(debug) + err("tag %ld %d %s", + e.doff, cursize, tag); + continue; + } + for(i = 0; i < Naux; i++) + auxstate[i] = 0; + for(i = 0; i < naux; i++) { + a = lookassoc(auxtab, asize(auxtab), auxname[i]); + if(a == -1) { + if(debug) + err("aux %ld %d %s", + e.doff, cursize, auxname[i]); + } else + auxstate[a] = auxval[i]; + } + switch(t){ + case E: + case Ve: + outnl(0); + if(tagstarts) + dostatus(); + break; + case Ed: + case Etym: + outchar(tagstarts? '[' : ']'); + break; + case Pr: + outchar(tagstarts? '(' : ')'); + break; + case In: + transtab = changett(transtab, subtab, tagstarts); + break; + case Hm: + case Su: + case Fq: + transtab = changett(transtab, suptab, tagstarts); + break; + case Gk: + transtab = changett(transtab, grtab, tagstarts); + break; + case Ph: + transtab = changett(transtab, phtab, tagstarts); + break; + case Hw: + if(cmd == 'h') { + if(!tagstarts) + outchar(' '); + outinhibit = !tagstarts; + } + break; + case S0: + case S1: + case S2: + case S3: + case S4: + case S5: + case S6: + case S7a: + case S7n: + case Sn: + case Sgk: + if(tagstarts) { + outnl(2); + dostatus(); + if(auxstate[Num]) { + if(t == S3 || t == S5) { + i = atoi(auxstate[Num]); + while(i--) + outchar('*'); + outchars(" "); + } else if(t == S7a || t == S7n || t == Sn) { + outchar('('); + outchars(auxstate[Num]); + outchars(") "); + } else if(t == Sgk) { + i = grtab[auxstate[Num][0]]; + if(i != NONE) + outrune(i); + outchars(". "); + } else { + outchars(auxstate[Num]); + outchars(". "); + } + } + } + break; + case Cb: + case Db: + case Qp: + case P: + if(tagstarts) + outnl(1); + break; + case Table: + /* + * Todo: gather columns, justify them, etc. + * For now, just let colums come out as rows + */ + if(!tagstarts) + outnl(0); + break; + case Col: + if(tagstarts) + outnl(0); + break; + case Dn: + if(tagstarts) + outchar('/'); + break; + } + } + } + if(cmd == 'h') { + outinhibit = 0; + outnl(0); + } +} + +/* + * Return offset into bdict where next oed entry after fromoff starts. + * Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...> + */ +long +oednextoff(long fromoff) +{ + long a, n; + int c; + + a = Bseek(bdict, fromoff, 0); + if(a < 0) + return -1; + n = 0; + for(;;) { + c = Bgetc(bdict); + if(c < 0) + break; + if(c == '<') { + c = Bgetc(bdict); + if(c == 'e') { + c = Bgetc(bdict); + if(c == '>' || c == ' ') + n = 3; + } else if(c == 'v' && Bgetc(bdict) == 'e') { + c = Bgetc(bdict); + if(c == '>' || c == ' ') + n = 4; + } + if(n) + break; + } + } + return (Boffset(bdict)-n); +} + +static char *prkey = +"KEY TO THE PRONUNCIATION\n" +"\n" +"I. CONSONANTS\n" +"b, d, f, k, l, m, n, p, t, v, z: usual English values\n" +"\n" +"g as in go (gəʊ)\n" +"h ... ho! (həʊ)\n" +"r ... run (rʌn), terrier (ˈtɛriə(r))\n" +"(r)... her (hɜː(r))\n" +"s ... see (siː), success (səkˈsɜs)\n" +"w ... wear (wɛə(r))\n" +"hw ... when (hwɛn)\n" +"j ... yes (jɛs)\n" +"θ ... thin (θin), bath (bɑːθ)\n" +"ð ... then (ðɛn), bathe (beɪð)\n" +"ʃ ... shop (ʃɒp), dish (dɪʃ)\n" +"tʃ ... chop (tʃɒp), ditch (dɪtʃ)\n" +"ʒ ... vision (ˈvɪʒən), déjeuner (deʒøne)\n" +"dʒ ... judge (dʒʌdʒ)\n" +"ŋ ... singing (ˈsɪŋɪŋ), think (θiŋk)\n" +"ŋg ... finger (ˈfiŋgə(r))\n" +"\n" +"Foreign\n" +"ʎ as in It. seraglio (serˈraʎo)\n" +"ɲ ... Fr. cognac (kɔɲak)\n" +"x ... Ger. ach (ax), Sc. loch (lɒx)\n" +"ç ... Ger. ich (ɪç), Sc. nicht (nɪçt)\n" +"ɣ ... North Ger. sagen (ˈzaːɣən)\n" +"c ... Afrikaans baardmannetjie (ˈbaːrtmanəci)\n" +"ɥ ... Fr. cuisine (kɥizin)\n" +"\n" +"II. VOWELS AND DIPTHONGS\n" +"\n" +"Short\n" +"ɪ as in pit (pɪt), -ness (-nɪs)\n" +"ɛ ... pet (pɛt), Fr. sept (sɛt)\n" +"æ ... pat (pæt)\n" +"ʌ ... putt (pʌt)\n" +"ɒ ... pot (pɒt)\n" +"ʊ ... put (pʊt)\n" +"ə ... another (əˈnʌðə(r))\n" +"(ə)... beaten (ˈbiːt(ə)n)\n" +"i ... Fr. si (si)\n" +"e ... Fr. bébé (bebe)\n" +"a ... Fr. mari (mari)\n" +"ɑ ... Fr. bâtiment (bɑtimã)\n" +"ɔ ... Fr. homme (ɔm)\n" +"o ... Fr. eau (o)\n" +"ø ... Fr. peu (pø)\n" +"œ ... Fr. boeuf (bœf), coeur (kœr)\n" +"u ... Fr. douce (dus)\n" +"ʏ ... Ger. Müller (ˈmʏlər)\n" +"y ... Fr. du (dy)\n" +"\n" +"Long\n" +"iː as in bean (biːn)\n" +"ɑː ... barn (bɑːn)\n" +"ɔː ... born (bɔːn)\n" +"uː ... boon (buːn)\n" +"ɜː ... burn (bɜːn)\n" +"eː ... Ger. Schnee (ʃneː)\n" +"ɛː ... Ger. Fähre (ˈfɛːrə)\n" +"aː ... Ger. Tag (taːk)\n" +"oː ... Ger. Sohn (zoːn)\n" +"øː ... Ger. Goethe (gøːtə)\n" +"yː ... Ger. grün (gryːn)\n" +"\n" +"Nasal\n" +"ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n" +"ã ... Fr. franc (frã)\n" +"ɔ˜ ... Fr. bon (bɔ˜n)\n" +"œ˜ ... Fr. un (œ˜)\n" +"\n" +"Dipthongs, etc.\n" +"eɪ as in bay (beɪ)\n" +"aɪ ... buy (baɪ)\n" +"ɔɪ ... boy (bɔɪ)\n" +"əʊ ... no (nəʊ)\n" +"aʊ ... now (naʊ)\n" +"ɪə ... peer (pɪə(r))\n" +"ɛə ... pair (pɛə(r))\n" +"ʊə ... tour (tʊə(r))\n" +"ɔə ... boar (bɔə(r))\n" +"\n" +"III. STRESS\n" +"\n" +"Main stress: ˈ preceding stressed syllable\n" +"Secondary stress: ˌ preceding stressed syllable\n" +"\n" +"E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n"; +/* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */ + +void +oedprintkey(void) +{ + Bprint(bout, "%s", prkey); +} + +/* + * f points just after a '&', fe points at end of entry. + * Accumulate the special name, starting after the & + * and continuing until the next '.', in spec[]. + * Return pointer to char after '.'. + */ +static char * +getspec(char *f, char *fe) +{ + char *t; + int c, i; + + t = spec; + i = sizeof spec; + while(--i > 0) { + c = *f++; + if(c == '.' || f == fe) + break; + *t++ = c; + } + *t = 0; + return f; +} + +/* + * f points just after '<'; fe points at end of entry. + * Expect next characters from bin to match: + * [/][^ >]+( [^>=]+=[^ >]+)*> + * tag auxname auxval + * Accumulate the tag and its auxilliary information in + * tag[], auxname[][] and auxval[][]. + * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0. + * Set naux to the number of aux pairs found. + * Return pointer to after final '>'. + */ +static char * +gettag(char *f, char *fe) +{ + char *t; + int c, i; + + t = tag; + c = *f++; + if(c == '/') + tagstarts = 0; + else { + tagstarts = 1; + *t++ = c; + } + i = Buflen; + naux = 0; + while(--i > 0) { + c = *f++; + if(c == '>' || f == fe) + break; + if(c == ' ') { + *t = 0; + t = auxname[naux]; + i = Buflen; + if(naux < Maxaux-1) + naux++; + } else if(naux && c == '=') { + *t = 0; + t = auxval[naux-1]; + i = Buflen; + } else + *t++ = c; + } + *t = 0; + return f; +} + +static void +dostatus(void) +{ + char *s; + + s = auxstate[St]; + if(s) { + if(strcmp(s, "obs") == 0) + outrune(0x2020); + else if(strcmp(s, "ali") == 0) + outrune(0x2016); + else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0) + outrune(0xb6); + else if(strcmp(s, "xref") == 0) + {/* nothing */} + else if(debug) + err("status %ld %d %s", curentry.doff, cursize, s); + } +} |