#include <u.h>
#include <libc.h>
#include <bio.h>
#include "dict.h"

enum {
	Buflen=1000,
	Maxaux=5,
};

/* Possible tags */
enum {
	A,		/* author in quote (small caps) */
	B,		/* bold */
	Ba,		/* author inside bib */
	Bch,		/* builtup chem component */
	Bib,		/* surrounds word 'in' for bibliographic ref */
	Bl,		/* bold */
	Bo,		/* bond over */
	Bu,		/* bond under */
	Cb,		/* ? block of stuff (indent) */
	Cf,		/* cross ref to another entry (italics) */
	Chem,		/* chemistry formula */
	Co,		/* over (preceding sum, integral, etc.) */
	Col,		/* column of table (aux just may be r) */
	Cu,		/* under (preceding sum, integral, etc.) */
	Dat,		/* date */
	Db,		/* def block? indent */
	Dn,		/* denominator of fraction */
	E,		/* main entry */
	Ed,		/* editor's comments (in [...]) */
	Etym,		/* etymology (in [...]) */
	Fq,		/* frequency count (superscript) */
	Form,		/* formula */
	Fr,		/* fraction (contains <nu>, then <dn>) */
	Gk,		/* greek (transliteration) */
	Gr,		/* grammar? (e.g., around 'pa.' in 'pa. pple.') */
	Hg,		/* headword group */
	Hm,		/* homonym (superscript) */
	Hw,		/* headword (bold) */
	I,		/* italics */
	Il,		/* italic list? */
	In,		/* inferior (subscript) */
	L,		/* row of col of table */
	La,		/* status or usage label (italic) */
	Lc,		/* chapter/verse sort of thing for works */
	N,		/* note (smaller type) */
	Nu,		/* numerator of fraction */
	Ov,		/* needs overline */
	P,		/* paragraph (indent) */
	Ph,		/* pronunciation (transliteration) */
	Pi,		/* pile (frac without line) */
	Pqp,		/* subblock of quote */
	Pr,		/* pronunciation (in (...)) */
	Ps,		/* position (e.g., adv.) (italic) */
	Pt,		/* part (in lc) */
	Q,		/* quote in quote block */
	Qd,		/* quote date (bold) */
	Qig,		/* quote number (greek) */
	Qla,		/* status or usage label in quote (italic) */
	Qp,		/* quote block (small type, indent) */
	Qsn,		/* quote number */
	Qt,		/* quote words */
	R,		/* roman type style */
	Rx,		/* relative cross reference (e.g., next) */
	S,		/* another form? (italic) */
	S0,		/* sense (sometimes surrounds several sx's) */
	S1,		/* sense (aux num: indented bold letter) */
	S2,		/* sense (aux num: indented bold capital rom num) */
	S3,		/* sense (aux num: indented number of asterisks) */
	S4,		/* sense (aux num: indented bold number) */
	S5,		/* sense (aux num: indented number of asterisks) */
	S6,		/* subsense (aux num: bold letter) */
	S7a,		/* subsense (aux num: letter) */
	S7n,		/* subsense (aux num: roman numeral) */
	Sc,		/* small caps */
	Sgk,		/* subsense (aux num: transliterated greek) */
	Sn,		/* sense of subdefinition (aux num: roman letter) */
	Ss,		/* sans serif */
	Ssb,		/* sans serif bold */
	Ssi,		/* sans serif italic */
	Su,		/* superior (superscript) */
	Sub,		/* subdefinition */
	Table,		/* table (aux cols=number of columns) */
	Tt,		/* title? (italics) */
	Vd,		/* numeric label for variant form */
	Ve,		/* variant entry */
	Vf,		/* variant form (light bold) */
	Vfl,		/* list of vf's (starts with Also or Forms) */
	W,		/* work (e.g., Beowulf) (italics) */
	X,		/* cross reference to main word (small caps) */
	Xd,		/* cross reference to quotation by date */
	Xi,		/* internal cross reference ? (italic) */
	Xid,		/* cross reference identifer, in quote ? */
	Xs,		/* cross reference sense (lower number) */
	Xr,		/* list of x's */
	Ntag		/* end of tags */
};

/* Assoc tables must be sorted on first field */

static Assoc tagtab[] = {
	{"a",		A},
	{"b",		B},
	{"ba",		Ba},
	{"bch",		Bch},
	{"bib",		Bib},
	{"bl",		Bl},
	{"bo",		Bo},
	{"bu",		Bu},
	{"cb",		Cb},
	{"cf",		Cf},
	{"chem",	Chem},
	{"co",		Co},
	{"col",		Col},
	{"cu",		Cu},
	{"dat",		Dat},
	{"db",		Db},
	{"dn",		Dn},
	{"e",		E},
	{"ed",		Ed},
	{"et",		Etym},
	{"etym",	Etym},
	{"form",	Form},
	{"fq",		Fq},
	{"fr",		Fr},
	{"frac",	Fr},
	{"gk",		Gk},
	{"gr",		Gr},
	{"hg",		Hg},
	{"hm",		Hm},
	{"hw",		Hw},
	{"i",		I},
	{"il",		Il},
	{"in",		In},
	{"l",		L},
	{"la",		La},
	{"lc",		Lc},
	{"n",		N},
	{"nu",		Nu},
	{"ov",		Ov},
	{"p",		P},
	{"ph",		Ph},
	{"pi",		Pi},
	{"pqp",		Pqp},
	{"pr",		Pr},
	{"ps",		Ps},
	{"pt",		Pt},
	{"q",		Q},
	{"qd",		Qd},
	{"qig",		Qig},
	{"qla",		Qla},
	{"qp",		Qp},
	{"qsn",		Qsn},
	{"qt",		Qt},
	{"r",		R},
	{"rx",		Rx},
	{"s",		S},
	{"s0",		S0},
	{"s1",		S1},
	{"s2",		S2},
	{"s3",		S3},
	{"s4",		S4},
	{"s5",		S5},
	{"s6",		S6},
	{"s7a",		S7a},
	{"s7n",		S7n},
	{"sc",		Sc},
	{"sgk",		Sgk},
	{"sn",		Sn},
	{"ss",		Ss,},
	{"ssb",		Ssb},
	{"ssi",		Ssi},
	{"su",		Su},
	{"sub",		Sub},
	{"table",	Table},
	{"tt",		Tt},
	{"vd",		Vd},
	{"ve",		Ve},
	{"vf",		Vf},
	{"vfl",		Vfl},
	{"w",		W},
	{"x",		X},
	{"xd",		Xd},
	{"xi",		Xi},
	{"xid",		Xid},
	{"xr",		Xr},
	{"xs",		Xs},
};

/* Possible tag auxilliary info */
enum {
	Cols,		/* number of columns in a table */
	Num,		/* letter or number, for a sense */
	St,		/* status (e.g., obs) */
	Naux
};

static Assoc auxtab[] = {
	{"cols",	Cols},
	{"num",		Num},
	{"st",		St}
};

static Assoc spectab[] = {
	{"3on4",	0xbe},
	{"Aacu",	0xc1},
	{"Aang",	0xc5},
	{"Abarab",	0x100},
	{"Acirc",	0xc2},
	{"Ae",		0xc6},
	{"Agrave",	0xc0},
	{"Alpha",	0x391},
	{"Amac",	0x100},
	{"Asg",		0x1b7},		/* Unicyle. Cf "Sake" */
	{"Auml",	0xc4},
	{"Beta",	0x392},
	{"Cced",	0xc7},
	{"Chacek",	0x10c},
	{"Chi",		0x3a7},
	{"Chirho",	0x2627},		/* Chi Rho U+2627 */
	{"Csigma",	0x3da},
	{"Delta",	0x394},
	{"Eacu",	0xc9},
	{"Ecirc",	0xca},
	{"Edh",		0xd0},
	{"Epsilon",	0x395},
	{"Eta",		0x397},
	{"Gamma",	0x393},
	{"Iacu",	0xcd},
	{"Icirc",	0xce},
	{"Imac",	0x12a},
	{"Integ",	0x222b},
	{"Iota",	0x399},
	{"Kappa",	0x39a},
	{"Koppa",	0x3de},
	{"Lambda",	0x39b},
	{"Lbar",	0x141},
	{"Mu",		0x39c},
	{"Naira",	0x4e},		/* should have bar through */
	{"Nplus",	0x4e},		/* should have plus above */
	{"Ntilde",	0xd1},
	{"Nu",		0x39d},
	{"Oacu",	0xd3},
	{"Obar",	0xd8},
	{"Ocirc",	0xd4},
	{"Oe",		0x152},
	{"Omega",	0x3a9},
	{"Omicron",	0x39f},
	{"Ouml",	0xd6},
	{"Phi",		0x3a6},
	{"Pi",		0x3a0},
	{"Psi",		0x3a8},
	{"Rho",		0x3a1},
	{"Sacu",	0x15a},
	{"Sigma",	0x3a3},
	{"Summ",	0x2211},
	{"Tau",		0x3a4},
	{"Th",		0xde},
	{"Theta",	0x398},
	{"Tse",		0x426},
	{"Uacu",	0xda},
	{"Ucirc",	0xdb},
	{"Upsilon",	0x3a5},
	{"Uuml",	0xdc},
	{"Wyn",		0x1bf},		/* wynn U+01BF */
	{"Xi",		0x39e},
	{"Ygh",		0x1b7},		/* Yogh	U+01B7 */
	{"Zeta",	0x396},
	{"Zh",		0x1b7},		/* looks like Yogh. Cf "Sake" */
	{"a",		0x61},		/* ante */
	{"aacu",	0xe1},
	{"aang",	0xe5},
	{"aasper",	MAAS},
	{"abreve",	0x103},
	{"acirc",	0xe2},
	{"acu",		LACU},
	{"ae",		0xe6},
	{"agrave",	0xe0},
	{"ahook",	0x105},
	{"alenis",	MALN},
	{"alpha",	0x3b1},
	{"amac",	0x101},
	{"amp",		0x26},
	{"and",		MAND},
	{"ang",		LRNG},
	{"angle",	0x2220},
	{"ankh",	0x2625},		/* ankh U+2625 */
	{"ante",	0x61},		/* before (year) */
	{"aonq",	MAOQ},
	{"appreq",	0x2243},
	{"aquar",	0x2652},
	{"arDadfull",	0x636},		/* Dad U+0636 */
	{"arHa",	0x62d},		/* haa U+062D */
	{"arTa",	0x62a},		/* taa U+062A */
	{"arain",	0x639},		/* ain U+0639 */
	{"arainfull",	0x639},		/* ain U+0639 */
	{"aralif",	0x627},		/* alef U+0627 */
	{"arba",	0x628},		/* baa U+0628 */
	{"arha",	0x647},		/* ha U+0647 */
	{"aries",	0x2648},
	{"arnun",	0x646},		/* noon U+0646 */
	{"arnunfull",	0x646},		/* noon U+0646 */
	{"arpa",	0x647},		/* ha U+0647 */
	{"arqoph",	0x642},		/* qaf U+0642 */
	{"arshinfull",	0x634},		/* sheen U+0634 */
	{"arta",	0x62a},		/* taa U+062A */
	{"artafull",	0x62a},		/* taa U+062A */
	{"artha",	0x62b},		/* thaa U+062B */
	{"arwaw",	0x648},		/* waw U+0648 */
	{"arya",	0x64a},		/* ya U+064A */
	{"aryafull",	0x64a},		/* ya U+064A */
	{"arzero",	0x660},		/* indic zero U+0660 */
	{"asg",		0x292},		/* unicycle character. Cf "hallow" */
	{"asper",	LASP},
	{"assert",	0x22a2},
	{"astm",	0x2042},		/* asterism: should be upside down */
	{"at",		0x40},
	{"atilde",	0xe3},
	{"auml",	0xe4},
	{"ayin",	0x639},		/* arabic ain U+0639 */
	{"b1",		0x2d},		/* single bond */
	{"b2",		0x3d},		/* double bond */
	{"b3",		0x2261},		/* triple bond */
	{"bbar",	0x180},		/* b with bar U+0180 */
	{"beta",	0x3b2},
	{"bigobl",	0x2f},
	{"blC",		0x43},		/* should be black letter */
	{"blJ",		0x4a},		/* should be black letter */
	{"blU",		0x55},		/* should be black letter */
	{"blb",		0x62},		/* should be black letter */
	{"blozenge",	0x25ca},		/* U+25CA; should be black */
	{"bly",		0x79},		/* should be black letter */
	{"bra",		MBRA},
	{"brbl",	LBRB},
	{"breve",	LBRV},
	{"bslash",	'\\'},
	{"bsquare",	0x25a0},		/* black square U+25A0 */
	{"btril",	0x25c0},		/* U+25C0 */
	{"btrir",	0x25b6},		/* U+25B6 */
	{"c",		0x63},		/* circa */
	{"cab",		0x232a},
	{"cacu",	0x107},
	{"canc",	0x264b},
	{"capr",	0x2651},
	{"caret",	0x5e},
	{"cb",		0x7d},
	{"cbigb",	0x7d},
	{"cbigpren",	0x29},
	{"cbigsb",	0x5d},
	{"cced",	0xe7},
	{"cdil",	LCED},
	{"cdsb",	0x301b},		/* ]] U+301b */
	{"cent",	0xa2},
	{"chacek",	0x10d},
	{"chi",		0x3c7},
	{"circ",	LRNG},
	{"circa",	0x63},		/* about (year) */
	{"circbl",	0x325},		/* ring below accent U+0325 */
	{"circle",	0x25cb},		/* U+25CB */
	{"circledot",	0x2299},
	{"click",	0x296},
	{"club",	0x2663},
	{"comtime",	0x43},
	{"conj",	0x260c},
	{"cprt",	0xa9},
	{"cq",		'\''},
	{"cqq",		0x201d},
	{"cross",	0x2720},		/* maltese cross U+2720 */
	{"crotchet",	0x2669},
	{"csb",		0x5d},
	{"ctilde",	0x63},		/* +tilde */
	{"ctlig",	MLCT},
	{"cyra",	0x430},
	{"cyre",	0x435},
	{"cyrhard",	0x44a},
	{"cyrjat",	0x463},
	{"cyrm",	0x43c},
	{"cyrn",	0x43d},
	{"cyrr",	0x440},
	{"cyrsoft",	0x44c},
	{"cyrt",	0x442},
	{"cyry",	0x44b},
	{"dag",		0x2020},
	{"dbar",	0x111},
	{"dblar",	0x21cb},
	{"dblgt",	0x226b},
	{"dbllt",	0x226a},
	{"dced",	0x64},		/* +cedilla */
	{"dd",		MDD},
	{"ddag",	0x2021},
	{"ddd",		MDDD},
	{"decr",	0x2193},
	{"deg",		0xb0},
	{"dele",	0x64},		/* should be dele */
	{"delta",	0x3b4},
	{"descnode",	0x260b},		/* descending node U+260B */
	{"diamond",	0x2662},
	{"digamma",	0x3dd},
	{"div",		0xf7},
	{"dlessi",	0x131},
	{"dlessj1",	0x6a},		/* should be dotless */
	{"dlessj2",	0x6a},		/* should be dotless */
	{"dlessj3",	0x6a},		/* should be dotless */
	{"dollar",	0x24},
	{"dotab",	LDOT},
	{"dotbl",	LDTB},
	{"drachm",	0x292},
	{"dubh",	0x2d},
	{"eacu",	0xe9},
	{"earth",	0x2641},
	{"easper",	MEAS},
	{"ebreve",	0x115},
	{"ecirc",	0xea},
	{"edh",		0xf0},
	{"egrave",	0xe8},
	{"ehacek",	0x11b},
	{"ehook",	0x119},
	{"elem",	0x220a},
	{"elenis",	MELN},
	{"em",		0x2014},
	{"emac",	0x113},
	{"emem",	MEMM},
	{"en",		0x2013},
	{"epsilon",	0x3b5},
	{"equil",	0x21cb},
	{"ergo",	0x2234},
	{"es",		MES},
	{"eszett",	0xdf},
	{"eta",		0x3b7},
	{"eth",		0xf0},
	{"euml",	0xeb},
	{"expon",	0x2191},
	{"fact",	0x21},
	{"fata",	0x251},
	{"fatpara",	0xb6},		/* should have fatter, filled in bowl */
	{"female",	0x2640},
	{"ffilig",	MLFFI},
	{"fflig",	MLFF},
	{"ffllig",	MLFFL},
	{"filig",	MLFI},
	{"flat",	0x266d},
	{"fllig",	MLFL},
	{"frE",		0x45},		/* should be curly */
	{"frL",		'L'},		/* should be curly */
	{"frR",		0x52},		/* should be curly */
	{"frakB",	0x42},		/* should have fraktur style */
	{"frakG",	0x47},
	{"frakH",	0x48},
	{"frakI",	0x49},
	{"frakM",	0x4d},
	{"frakU",	0x55},
	{"frakX",	0x58},
	{"frakY",	0x59},
	{"frakh",	0x68},
	{"frbl",	LFRB},
	{"frown",	LFRN},
	{"fs",		0x20},
	{"fsigma",	0x3c2},
	{"gAacu",	0xc1},		/* should be Α+acute */
	{"gaacu",	0x3b1},		/* +acute */
	{"gabreve",	0x3b1},		/* +breve */
	{"gafrown",	0x3b1},		/* +frown */
	{"gagrave",	0x3b1},		/* +grave */
	{"gamac",	0x3b1},		/* +macron */
	{"gamma",	0x3b3},
	{"gauml",	0x3b1},		/* +umlaut */
	{"ge",		0x2267},
	{"geacu",	0x3b5},		/* +acute */
	{"gegrave",	0x3b5},		/* +grave */
	{"ghacu",	0x3b7},		/* +acute */
	{"ghfrown",	0x3b7},		/* +frown */
	{"ghgrave",	0x3b7},		/* +grave */
	{"ghmac",	0x3b7},		/* +macron */
	{"giacu",	0x3b9},		/* +acute */
	{"gibreve",	0x3b9},		/* +breve */
	{"gifrown",	0x3b9},		/* +frown */
	{"gigrave",	0x3b9},		/* +grave */
	{"gimac",	0x3b9},		/* +macron */
	{"giuml",	0x3b9},		/* +umlaut */
	{"glagjat",	0x467},
	{"glots",	0x2c0},
	{"goacu",	0x3bf},		/* +acute */
	{"gobreve",	0x3bf},		/* +breve */
	{"grave",	LGRV},
	{"gt",		0x3e},
	{"guacu",	0x3c5},		/* +acute */
	{"gufrown",	0x3c5},		/* +frown */
	{"gugrave",	0x3c5},		/* +grave */
	{"gumac",	0x3c5},		/* +macron */
	{"guuml",	0x3c5},		/* +umlaut */
	{"gwacu",	0x3c9},		/* +acute */
	{"gwfrown",	0x3c9},		/* +frown */
	{"gwgrave",	0x3c9},		/* +grave */
	{"hacek",	LHCK},
	{"halft",	0x2308},
	{"hash",	0x23},
	{"hasper",	MHAS},
	{"hatpath",	0x5b2},		/* hataf patah U+05B2 */
	{"hatqam",	0x5b3},		/* hataf qamats U+05B3 */
	{"hatseg",	0x5b1},		/* hataf segol U+05B1 */
	{"hbar",	0x127},
	{"heart",	0x2661},
	{"hebaleph",	0x5d0},		/* aleph U+05D0 */
	{"hebayin",	0x5e2},		/* ayin U+05E2 */
	{"hebbet",	0x5d1},		/* bet U+05D1 */
	{"hebbeth",	0x5d1},		/* bet U+05D1 */
	{"hebcheth",	0x5d7},		/* bet U+05D7 */
	{"hebdaleth",	0x5d3},		/* dalet U+05D3 */
	{"hebgimel",	0x5d2},		/* gimel U+05D2 */
	{"hebhe",	0x5d4},		/* he U+05D4 */
	{"hebkaph",	0x5db},		/* kaf U+05DB */
	{"heblamed",	0x5dc},		/* lamed U+05DC */
	{"hebmem",	0x5de},		/* mem U+05DE */
	{"hebnun",	0x5e0},		/* nun U+05E0 */
	{"hebnunfin",	0x5df},		/* final nun U+05DF */
	{"hebpe",	0x5e4},		/* pe U+05E4 */
	{"hebpedag",	0x5e3},		/* final pe? U+05E3 */
	{"hebqoph",	0x5e7},		/* qof U+05E7 */
	{"hebresh",	0x5e8},		/* resh U+05E8 */
	{"hebshin",	0x5e9},		/* shin U+05E9 */
	{"hebtav",	0x5ea},		/* tav U+05EA */
	{"hebtsade",	0x5e6},		/* tsadi U+05E6 */
	{"hebwaw",	0x5d5},		/* vav? U+05D5 */
	{"hebyod",	0x5d9},		/* yod U+05D9 */
	{"hebzayin",	0x5d6},		/* zayin U+05D6 */
	{"hgz",		0x292},		/* ??? Cf "alet" */
	{"hireq",	0x5b4},		/* U+05B4 */
	{"hlenis",	MHLN},
	{"hook",	LOGO},
	{"horizE",	0x45},		/* should be on side */
	{"horizP",	0x50},		/* should be on side */
	{"horizS",	0x223d},
	{"horizT",	0x22a3},
	{"horizb",	0x7b},		/* should be underbrace */
	{"ia",		0x3b1},
	{"iacu",	0xed},
	{"iasper",	MIAS},
	{"ib",		0x3b2},
	{"ibar",	0x268},
	{"ibreve",	0x12d},
	{"icirc",	0xee},
	{"id",		0x3b4},
	{"ident",	0x2261},
	{"ie",		0x3b5},
	{"ifilig",	MLFI},
	{"ifflig",	MLFF},
	{"ig",		0x3b3},
	{"igrave",	0xec},
	{"ih",		0x3b7},
	{"ii",		0x3b9},
	{"ik",		0x3ba},
	{"ilenis",	MILN},
	{"imac",	0x12b},
	{"implies",	0x21d2},
	{"index",	0x261e},
	{"infin",	0x221e},
	{"integ",	0x222b},
	{"intsec",	0x2229},
	{"invpri",	0x2cf},
	{"iota",	0x3b9},
	{"iq",		0x3c8},
	{"istlig",	MLST},
	{"isub",	0x3f5},		/* iota below accent */
	{"iuml",	0xef},
	{"iz",		0x3b6},
	{"jup",		0x2643},
	{"kappa",	0x3ba},
	{"koppa",	0x3df},
	{"lambda",	0x3bb},
	{"lar",		0x2190},
	{"lbar",	0x142},
	{"le",		0x2266},
	{"lenis",	LLEN},
	{"leo",		0x264c},
	{"lhalfbr",	0x2308},
	{"lhshoe",	0x2283},
	{"libra",	0x264e},
	{"llswing",	MLLS},
	{"lm",		0x2d0},
	{"logicand",	0x2227},
	{"logicor",	0x2228},
	{"longs",	0x283},
	{"lrar",	0x2194},
	{"lt",		0x3c},
	{"ltappr",	0x227e},
	{"ltflat",	0x2220},
	{"lumlbl",	0x6c},		/* +umlaut below */
	{"mac",		LMAC},
	{"male",	0x2642},
	{"mc",		0x63},		/* should be raised */
	{"merc",	0x263f},		/* mercury U+263F */
	{"min",		0x2212},
	{"moonfq",	0x263d},		/* first quarter moon U+263D */
	{"moonlq",	0x263e},		/* last quarter moon U+263E */
	{"msylab",	0x6d},		/* +sylab (ˌ) */
	{"mu",		0x3bc},
	{"nacu",	0x144},
	{"natural",	0x266e},
	{"neq",		0x2260},
	{"nfacu",	0x2032},
	{"nfasper",	0x2bd},
	{"nfbreve",	0x2d8},
	{"nfced",	0xb8},
	{"nfcirc",	0x2c6},
	{"nffrown",	0x2322},
	{"nfgra",	0x2cb},
	{"nfhacek",	0x2c7},
	{"nfmac",	0xaf},
	{"nftilde",	0x2dc},
	{"nfuml",	0xa8},
	{"ng",		0x14b},
	{"not",		0xac},
	{"notelem",	0x2209},
	{"ntilde",	0xf1},
	{"nu",		0x3bd},
	{"oab",		0x2329},
	{"oacu",	0xf3},
	{"oasper",	MOAS},
	{"ob",		0x7b},
	{"obar",	0xf8},
	{"obigb",	0x7b},		/* should be big */
	{"obigpren",	0x28},
	{"obigsb",	0x5b},		/* should be big */
	{"obreve",	0x14f},
	{"ocirc",	0xf4},
	{"odsb",	0x301a},		/* [[ U+301A */
	{"oe",		0x153},
	{"oeamp",	0x26},
	{"ograve",	0xf2},
	{"ohook",	0x6f},		/* +hook */
	{"olenis",	MOLN},
	{"omac",	0x14d},
	{"omega",	0x3c9},
	{"omicron",	0x3bf},
	{"ope",		0x25b},
	{"opp",		0x260d},
	{"oq",		0x60},
	{"oqq",		0x201c},
	{"or",		MOR},
	{"osb",		0x5b},
	{"otilde",	0xf5},
	{"ouml",	0xf6},
	{"ounce",	0x2125},		/* ounce U+2125 */
	{"ovparen",	0x2322},		/* should be sideways ( */
	{"p",		0x2032},
	{"pa",		0x2202},
	{"page",	0x50},
	{"pall",	0x28e},
	{"paln",	0x272},
	{"par",		PAR},
	{"para",	0xb6},
	{"pbar",	0x70},		/* +bar */
	{"per",		0x2118},		/* per U+2118 */
	{"phi",		0x3c6},
	{"phi2",	0x3d5},
	{"pi",		0x3c0},
	{"pisces",	0x2653},
	{"planck",	0x127},
	{"plantinJ",	0x4a},		/* should be script */
	{"pm",		0xb1},
	{"pmil",	0x2030},
	{"pp",		0x2033},
	{"ppp",		0x2034},
	{"prop",	0x221d},
	{"psi",		0x3c8},
	{"pstlg",	0xa3},
	{"q",		0x3f},		/* should be raised */
	{"qamets",	0x5b3},		/* U+05B3 */
	{"quaver",	0x266a},
	{"rar",		0x2192},
	{"rasper",	MRAS},
	{"rdot",	0xb7},
	{"recipe",	0x211e},		/* U+211E */
	{"reg",		0xae},
	{"revC",	0x186},		/* open O U+0186 */
	{"reva",	0x252},
	{"revc",	0x254},
	{"revope",	0x25c},
	{"revr",	0x279},
	{"revsc",	0x2d2},		/* upside-down semicolon */
	{"revv",	0x28c},
	{"rfa",		0x6f},		/* +hook (Cf "goal") */
	{"rhacek",	0x159},
	{"rhalfbr",	0x2309},
	{"rho",		0x3c1},
	{"rhshoe",	0x2282},
	{"rlenis",	MRLN},
	{"rsylab",	0x72},		/* +sylab */
	{"runash",	0x46},		/* should be runic 'ash' */
	{"rvow",	0x2d4},
	{"sacu",	0x15b},
	{"sagit",	0x2650},
	{"sampi",	0x3e1},
	{"saturn",	0x2644},
	{"sced",	0x15f},
	{"schwa",	0x259},
	{"scorpio",	0x264f},
	{"scrA",	0x41},		/* should be script */
	{"scrC",	0x43},
	{"scrE",	0x45},
	{"scrF",	0x46},
	{"scrI",	0x49},
	{"scrJ",	0x4a},
	{"scrL",	'L'},
	{"scrO",	0x4f},
	{"scrP",	0x50},
	{"scrQ",	0x51},
	{"scrS",	0x53},
	{"scrT",	0x54},
	{"scrb",	0x62},
	{"scrd",	0x64},
	{"scrh",	0x68},
	{"scrl",	0x6c},
	{"scruple",	0x2108},		/* U+2108 */
	{"sdd",		0x2d0},
	{"sect",	0xa7},
	{"semE",	0x2203},
	{"sh",		0x283},
	{"shacek",	0x161},
	{"sharp",	0x266f},
	{"sheva",	0x5b0},		/* U+05B0 */
	{"shti",	0x26a},
	{"shtsyll",	0x222a},
	{"shtu",	0x28a},
	{"sidetri",	0x22b2},
	{"sigma",	0x3c3},
	{"since",	0x2235},
	{"slge",	0x2265},		/* should have slanted line under */
	{"slle",	0x2264},		/* should have slanted line under */
	{"sm",		0x2c8},
	{"smm",		0x2cc},
	{"spade",	0x2660},
	{"sqrt",	0x221a},
	{"square",	0x25a1},		/* U+25A1 */
	{"ssChi",	0x3a7},		/* should be sans serif */
	{"ssIota",	0x399},
	{"ssOmicron",	0x39f},
	{"ssPi",	0x3a0},
	{"ssRho",	0x3a1},
	{"ssSigma",	0x3a3},
	{"ssTau",	0x3a4},
	{"star",	0x2a},
	{"stlig",	MLST},
	{"sup2",	0x2072},
	{"supgt",	0x2c3},
	{"suplt",	0x2c2},
	{"sur",		0x2b3},
	{"swing",	0x223c},
	{"tau",		0x3c4},
	{"taur",	0x2649},
	{"th",		0xfe},
	{"thbar",	0xfe},		/* +bar */
	{"theta",	0x3b8},
	{"thinqm",	0x3f},		/* should be thinner */
	{"tilde",	LTIL},
	{"times",	0xd7},
	{"tri",		0x2206},
	{"trli",	0x2016},
	{"ts",		0x2009},
	{"uacu",	0xfa},
	{"uasper",	MUAS},
	{"ubar",	0x75},		/* +bar */
	{"ubreve",	0x16d},
	{"ucirc",	0xfb},
	{"udA",		0x2200},
	{"udT",		0x22a5},
	{"uda",		0x250},
	{"udh",		0x265},
	{"udqm",	0xbf},
	{"udpsi",	0x22d4},
	{"udtr",	0x2207},
	{"ugrave",	0xf9},
	{"ulenis",	MULN},
	{"umac",	0x16b},
	{"uml",		LUML},
	{"undl",	0x2cd},		/* underline accent */
	{"union",	0x222a},
	{"upsilon",	0x3c5},
	{"uuml",	0xfc},
	{"vavpath",	0x5d5},		/* vav U+05D5 (+patah) */
	{"vavsheva",	0x5d5},		/* vav U+05D5 (+sheva) */
	{"vb",		0x7c},
	{"vddd",	0x22ee},
	{"versicle2",	0x2123},		/* U+2123 */
	{"vinc",	0xaf},
	{"virgo",	0x264d},
	{"vpal",	0x25f},
	{"vvf",		0x263},
	{"wasper",	MWAS},
	{"wavyeq",	0x2248},
	{"wlenis",	MWLN},
	{"wyn",		0x1bf},		/* wynn U+01BF */
	{"xi",		0x3be},
	{"yacu",	0xfd},
	{"ycirc",	0x177},
	{"ygh",		0x292},
	{"ymac",	0x79},		/* +macron */
	{"yuml",	0xff},
	{"zced",	0x7a},		/* +cedilla */
	{"zeta",	0x3b6},
	{"zh",		0x292},
	{"zhacek",	0x17e},
};
/*
   The following special characters don't have close enough
   equivalents in Unicode, so aren't in the above table.
	22n		2^(2^n) Cf Fermat
	2on4		2/4
	3on8		3/8
	Bantuo		Bantu O. Cf Otshi-herero
	Car		C with circular arrow on top
	albrtime 	cut-time: C with vertical line
	ardal		Cf dental
	bantuo		Bantu o. Cf Otshi-herero
	bbc1		single chem bond below
	bbc2		double chem bond below
	bbl1		chem bond like /
	bbl2		chem bond like //
	bbr1		chem bond like \
	bbr2		chem bond \\
	bcop1		copper symbol. Cf copper
	bcop2		copper symbol. Cf copper
	benchm		Cf benchmark
	btc1		single chem bond above
	btc2		double chem bond above
	btl1		chem bond like \
	btl2		chem bond like \\
	btr1		chem bond like /
	btr2		chem bond line //
	burman		Cf Burman
	devph		sanskrit letter. Cf ph
	devrfls		sanskrit letter. Cf cerebral
	duplong[12]	musical note
	egchi		early form of chi
	eggamma[12]	early form of gamma
	egiota		early form of iota
	egkappa		early form of kappa
	eglambda	early form of lambda
	egmu[12]	early form of mu
	egnu[12]	early form of nu
	egpi[123]	early form of pi
	egrho[12]	early form of rho
	egsampi		early form of sampi
	egsan		early form of san
	egsigma[12]	early form of sigma
	egxi[123]	early form of xi
	elatS		early form of S
	elatc[12]	early form of C
	elatg[12]	early form of G
	glagjeri	Slavonic Glagolitic jeri
	glagjeru	Slavonic Glagolitic jeru
	hypolem		hypolemisk (line with underdot)
	lhrbr		lower half }
	longmord	long mordent
	mbwvow		backwards scretched C. Cf retract.
	mord		music symbol.  Cf mordent
	mostra		Cf direct
	ohgcirc		old form of circumflex
	oldbeta		old form of β. Cf perturbate
	oldsemibr[12]	old forms of semibreve. Cf prolation
	ormg		old form of g. Cf G
	para[12345]	form of ¶
	pauseo		musical pause sign
	pauseu		musical pause sign
	pharyng		Cf pharyngal
	ragr		Black letter ragged r
	repetn		musical repeat. Cf retort
	segno		musical segno sign
	semain[12]	semitic ain
	semhe		semitic he
	semheth		semitic heth
	semkaph		semitic kaph
	semlamed[12]	semitic lamed
	semmem		semitic mem
	semnum		semitic nun
	sempe		semitic pe
	semqoph[123]	semitic qoph
	semresh		semitic resh
	semtav[1234]	semitic tav
	semyod		semitic yod
	semzayin[123]	semitic zayin
	shtlong[12]	U with underbar. Cf glyconic
	sigmatau	σ,τ combination
	squaver		sixteenth note
	sqbreve		square musical breve note
	swast		swastika
	uhrbr		upper half of big }
	versicle1		Cf versicle
 */


static Rune normtab[128] = {
	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
/*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
	0x38,	0x39,	0x3a,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
/*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
/*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
/*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
/*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE,
};
static Rune phtab[128] = {
	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*20*/	0x20,	0x21,	0x2c8,	0x23,	0x24,	0x2cc,	0xe6,	'\'',
	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
/*30*/  0x30,	0x31,	0x32,	0x25c,	0x34,	0x35,	0x36,	0x37,
	0x38,	0xf8,	0x2d0,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
/*40*/  0x259,	0x251,	0x42,	0x43,	0xf0,	0x25b,	0x46,	0x47,
	0x48,	0x26a,	0x4a,	0x4b,	'L',	0x4d,	0x14b,	0x254,
/*50*/	0x50,	0x252,	0x52,	0x283,	0x3b8,	0x28a,	0x28c,	0x57,
	0x58,	0x59,	0x292,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
/*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
/*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE,
};
static Rune grtab[128] = {
	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
/*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
	0x38,	0x39,	0x3a,	0x3b,	TAGS,	0x3d,	TAGE,	0x3f,
/*40*/  0x40,	0x391,	0x392,	0x39e,	0x394,	0x395,	0x3a6,	0x393,
	0x397,	0x399,	0x3da,	0x39a,	0x39b,	0x39c,	0x39d,	0x39f,
/*50*/	0x3a0,	0x398,	0x3a1,	0x3a3,	0x3a4,	0x3a5,	0x56,	0x3a9,
	0x3a7,	0x3a8,	0x396,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
/*60*/	0x60,	0x3b1,	0x3b2,	0x3be,	0x3b4,	0x3b5,	0x3c6,	0x3b3,
	0x3b7,	0x3b9,	0x3c2,	0x3ba,	0x3bb,	0x3bc,	0x3bd,	0x3bf,
/*70*/	0x3c0,	0x3b8,	0x3c1,	0x3c3,	0x3c4,	0x3c5,	0x76,	0x3c9,
	0x3c7,	0x3c8,	0x3b6,	0x7b,	0x7c,	0x7d,	0x7e,	NONE,
};
static Rune subtab[128] = {
	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
	0x208d,	0x208e,	0x2a,	0x208a,	0x2c,	0x208b,	0x2e,	0x2f,
/*30*/  0x2080,	0x2081,	0x2082,	0x2083,	0x2084,	0x2085,	0x2086,	0x2087,
	0x2088,	0x2089,	0x3a,	0x3b,	TAGS,	0x208c,	TAGE,	0x3f,
/*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
/*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
/*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
/*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE,
};
static Rune suptab[128] = {
	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	SPCS,	'\'',
	0x207d,	0x207e,	0x2a,	0x207a,	0x2c,	0x207b,	0x2e,	0x2f,
/*30*/  0x2070,	0x2071,	0x2072,	0x2073,	0x2074,	0x2075,	0x2076,	0x2077,
	0x2078,	0x2079,	0x3a,	0x3b,	TAGS,	0x207c,	TAGE,	0x3f,
/*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
/*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
/*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
/*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE,
};

static int	tagstarts;
static char	tag[Buflen];
static int	naux;
static char	auxname[Maxaux][Buflen];
static char	auxval[Maxaux][Buflen];
static char	spec[Buflen];
static char	*auxstate[Naux];	/* vals for most recent tag */
static Entry	curentry;
#define cursize (curentry.end-curentry.start)

static char	*getspec(char *, char *);
static char	*gettag(char *, char *);
static void	dostatus(void);

/*
 * cmd is one of:
 *    'p': normal print
 *    'h': just print headwords
 *    'P': print raw
 */
void
oedprintentry(Entry e, int cmd)
{
	char *p, *pe;
	int t, a, i;
	long r, rprev, rlig;
	Rune *transtab;

	p = e.start;
	pe = e.end;
	transtab = normtab;
	rprev = NONE;
	changett(0, 0, 0);
	curentry = e;
	if(cmd == 'h')
		outinhibit = 1;
	while(p < pe) {
		if(cmd == 'r') {
			outchar(*p++);
			continue;
		}
		r = transtab[(*p++)&0x7F];
		if(r < NONE) {
			/* Emit the rune, but buffer in case of ligature */
			if(rprev != NONE)
				outrune(rprev);
			rprev = r;
		} else if(r == SPCS) {
			/* Start of special character name */
			p = getspec(p, pe);
			r = lookassoc(spectab, asize(spectab), spec);
			if(r == -1) {
				if(debug)
					err("spec %ld %d %s",
						e.doff, cursize, spec);
				r = 0xfffd;
			}
			if(r >= LIGS && r < LIGE) {
				/* handle possible ligature */
				rlig = liglookup(r, rprev);
				if(rlig != NONE)
					rprev = rlig;	/* overwrite rprev */
				else {
					/* could print accent, but let's not */
					if(rprev != NONE) outrune(rprev);
					rprev = NONE;
				}
			} else if(r >= MULTI && r < MULTIE) {
				if(rprev != NONE) {
					outrune(rprev);
					rprev = NONE;
				}
				outrunes(multitab[r-MULTI]);
			} else if(r == PAR) {
				if(rprev != NONE) {
					outrune(rprev);
					rprev = NONE;
				}
				outnl(1);
			} else {
				if(rprev != NONE) outrune(rprev);
				rprev = r;
			}
		} else if(r == TAGS) {
			/* Start of tag name */
			if(rprev != NONE) {
				outrune(rprev);
				rprev = NONE;
			}
			p = gettag(p, pe);
			t = lookassoc(tagtab, asize(tagtab), tag);
			if(t == -1) {
				if(debug)
					err("tag %ld %d %s",
						e.doff, cursize, tag);
				continue;
			}
			for(i = 0; i < Naux; i++)
				auxstate[i] = 0;
			for(i = 0; i < naux; i++) {
				a = lookassoc(auxtab, asize(auxtab), auxname[i]);
				if(a == -1) {
					if(debug)
						err("aux %ld %d %s",
							e.doff, cursize, auxname[i]);
				} else
					auxstate[a] = auxval[i];
			}
			switch(t){
			case E:
			case Ve:
				outnl(0);
				if(tagstarts)
					dostatus();
				break;
			case Ed:
			case Etym:
				outchar(tagstarts? '[' : ']');
				break;
			case Pr:
				outchar(tagstarts? '(' : ')');
				break;
			case In:
				transtab = changett(transtab, subtab, tagstarts);
				break;
			case Hm:
			case Su:
			case Fq:
				transtab = changett(transtab, suptab, tagstarts);
				break;
			case Gk:
				transtab = changett(transtab, grtab, tagstarts);
				break;
			case Ph:
				transtab = changett(transtab, phtab, tagstarts);
				break;
			case Hw:
				if(cmd == 'h') {
					if(!tagstarts)
						outchar(' ');
					outinhibit = !tagstarts;
				}
				break;
			case S0:
			case S1:
			case S2:
			case S3:
			case S4:
			case S5:
			case S6:
			case S7a:
			case S7n:
			case Sn:
			case Sgk:
				if(tagstarts) {
					outnl(2);
					dostatus();
					if(auxstate[Num]) {
						if(t == S3 || t == S5) {
							i = atoi(auxstate[Num]);
							while(i--)
								outchar('*');
							outchars("  ");
						} else if(t == S7a || t == S7n || t == Sn) {
							outchar('(');
							outchars(auxstate[Num]);
							outchars(") ");
						} else if(t == Sgk) {
							i = grtab[(uchar)auxstate[Num][0]];
							if(i != NONE)
								outrune(i);
							outchars(".  ");
						} else {
							outchars(auxstate[Num]);
							outchars(".  ");
						}
					}
				}
				break;
			case Cb:
			case Db:
			case Qp:
			case P:
				if(tagstarts)
					outnl(1);
				break;
			case Table:
				/*
				 * Todo: gather columns, justify them, etc.
				 * For now, just let colums come out as rows
				 */
				if(!tagstarts)
					outnl(0);
				break;
			case Col:
				if(tagstarts)
					outnl(0);
				break;
			case Dn:
				if(tagstarts)
					outchar('/');
				break;
			}
		}
	}
	if(cmd == 'h') {
		outinhibit = 0;
		outnl(0);
	}
}

/*
 * Return offset into bdict where next oed entry after fromoff starts.
 * Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...>
 */
long
oednextoff(long fromoff)
{
	long a, n;
	int c;

	a = Bseek(bdict, fromoff, 0);
	if(a < 0)
		return -1;
	n = 0;
	for(;;) {
		c = Bgetc(bdict);
		if(c < 0)
			break;
		if(c == '<') {
			c = Bgetc(bdict);
			if(c == 'e') {
				c = Bgetc(bdict);
				if(c == '>' || c == ' ')
					n = 3;
			} else if(c == 'v' && Bgetc(bdict) == 'e') {
				c = Bgetc(bdict);
				if(c == '>' || c == ' ')
					n = 4;
			}
			if(n)
				break;
		}
	}
	return (Boffset(bdict)-n);
}

static char *prkey =
"KEY TO THE PRONUNCIATION\n"
"\n"
"I. CONSONANTS\n"
"b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
"\n"
"g as in go (gəʊ)\n"
"h  ...  ho! (həʊ)\n"
"r  ...  run (rʌn), terrier (ˈtɛriə(r))\n"
"(r)...  her (hɜː(r))\n"
"s  ...  see (siː), success (səkˈsɜs)\n"
"w  ...  wear (wɛə(r))\n"
"hw ...  when (hwɛn)\n"
"j  ...  yes (jɛs)\n"
"θ  ...  thin (θin), bath (bɑːθ)\n"
"ð  ...  then (ðɛn), bathe (beɪð)\n"
"ʃ  ...  shop (ʃɒp), dish (dɪʃ)\n"
"tʃ ...  chop (tʃɒp), ditch (dɪtʃ)\n"
"ʒ  ...  vision (ˈvɪʒən), déjeuner (deʒøne)\n"
"dʒ ...  judge (dʒʌdʒ)\n"
"ŋ  ...  singing (ˈsɪŋɪŋ), think (θiŋk)\n"
"ŋg ...  finger (ˈfiŋgə(r))\n"
"\n"
"Foreign\n"
"ʎ as in It. seraglio (serˈraʎo)\n"
"ɲ  ...  Fr. cognac (kɔɲak)\n"
"x  ...  Ger. ach (ax), Sc. loch (lɒx)\n"
"ç  ...  Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
"ɣ  ...  North Ger. sagen (ˈzaːɣən)\n"
"c  ...  Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
"ɥ  ...  Fr. cuisine (kɥizin)\n"
"\n"
"II. VOWELS AND DIPTHONGS\n"
"\n"
"Short\n"
"ɪ as in pit (pɪt), -ness (-nɪs)\n"
"ɛ  ...  pet (pɛt), Fr. sept (sɛt)\n"
"æ  ...  pat (pæt)\n"
"ʌ  ...  putt (pʌt)\n"
"ɒ  ...  pot (pɒt)\n"
"ʊ  ...  put (pʊt)\n"
"ə  ...  another (əˈnʌðə(r))\n"
"(ə)...  beaten (ˈbiːt(ə)n)\n"
"i  ...  Fr. si (si)\n"
"e  ...  Fr. bébé (bebe)\n"
"a  ...  Fr. mari (mari)\n"
"ɑ  ...  Fr. bâtiment (bɑtimã)\n"
"ɔ  ...  Fr. homme (ɔm)\n"
"o  ...  Fr. eau (o)\n"
"ø  ...  Fr. peu (pø)\n"
"œ  ...  Fr. boeuf (bœf), coeur (kœr)\n"
"u  ...  Fr. douce (dus)\n"
"ʏ  ...  Ger. Müller (ˈmʏlər)\n"
"y  ...  Fr. du (dy)\n"
"\n"
"Long\n"
"iː as in bean (biːn)\n"
"ɑː ...  barn (bɑːn)\n"
"ɔː ...  born (bɔːn)\n"
"uː ...  boon (buːn)\n"
"ɜː ...  burn (bɜːn)\n"
"eː ...  Ger. Schnee (ʃneː)\n"
"ɛː ...  Ger. Fähre (ˈfɛːrə)\n"
"aː ...  Ger. Tag (taːk)\n"
"oː ...  Ger. Sohn (zoːn)\n"
"øː ...  Ger. Goethe (gøːtə)\n"
"yː ...  Ger. grün (gryːn)\n"
"\n"
"Nasal\n"
"ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
"ã  ...  Fr. franc (frã)\n"
"ɔ˜ ...  Fr. bon (bɔ˜n)\n"
"œ˜ ...  Fr. un (œ˜)\n"
"\n"
"Dipthongs, etc.\n"
"eɪ as in bay (beɪ)\n"
"aɪ ...  buy (baɪ)\n"
"ɔɪ ...  boy (bɔɪ)\n"
"əʊ ...  no (nəʊ)\n"
"aʊ ...  now (naʊ)\n"
"ɪə ...  peer (pɪə(r))\n"
"ɛə ...  pair (pɛə(r))\n"
"ʊə ...  tour (tʊə(r))\n"
"ɔə ...  boar (bɔə(r))\n"
"\n"
"III. STRESS\n"
"\n"
"Main stress: ˈ preceding stressed syllable\n"
"Secondary stress: ˌ preceding stressed syllable\n"
"\n"
"E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
/* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */

void
oedprintkey(void)
{
	Bprint(bout, "%s", prkey);
}

/*
 * f points just after a '&', fe points at end of entry.
 * Accumulate the special name, starting after the &
 * and continuing until the next '.', in spec[].
 * Return pointer to char after '.'.
 */
static char *
getspec(char *f, char *fe)
{
	char *t;
	int c, i;

	t = spec;
	i = sizeof spec;
	while(--i > 0) {
		c = *f++;
		if(c == '.' || f == fe)
			break;
		*t++ = c;
	}
	*t = 0;
	return f;
}

/*
 * f points just after '<'; fe points at end of entry.
 * Expect next characters from bin to match:
 *  [/][^ >]+( [^>=]+=[^ >]+)*>
 *      tag   auxname auxval
 * Accumulate the tag and its auxilliary information in
 * tag[], auxname[][] and auxval[][].
 * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
 * Set naux to the number of aux pairs found.
 * Return pointer to after final '>'.
 */
static char *
gettag(char *f, char *fe)
{
	char *t;
	int c, i;

	t = tag;
	c = *f++;
	if(c == '/')
		tagstarts = 0;
	else {
		tagstarts = 1;
		*t++ = c;
	}
	i = Buflen;
	naux = 0;
	while(--i > 0) {
		c = *f++;
		if(c == '>' || f == fe)
			break;
		if(c == ' ') {
			*t = 0;
			t = auxname[naux];
			i = Buflen;
			if(naux < Maxaux-1)
				naux++;
		} else if(naux && c == '=') {
			*t = 0;
			t = auxval[naux-1];
			i = Buflen;
		} else
			*t++ = c;
	}
	*t = 0;
	return f;
}

static void
dostatus(void)
{
	char *s;

	s = auxstate[St];
	if(s) {
		if(strcmp(s, "obs") == 0)
			outrune(0x2020);
		else if(strcmp(s, "ali") == 0)
			outrune(0x2016);
		else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0)
			outrune(0xb6);
		else if(strcmp(s, "xref") == 0)
			{/* nothing */}
		else if(debug)
			err("status %ld %d %s", curentry.doff, cursize, s);
	}
}