#include <u.h>
#include <libc.h>
#include <bio.h>
#include "dict.h"

/*
 * Routines for handling dictionaries in the "Paperback Collins"
 * format (with tags surrounded by >....<)
 */
enum {
	Buflen=1000
};

/* More special runes */
enum {
	B = MULTIE+1,	/* bold */
	H,		/* headword start */
	I,		/* italics */
	Ps,		/* pronunciation start */
	Pe,		/* pronunciation end */
	R,		/* roman */
	X		/* headword end */
};

/* Assoc tables must be sorted on first field */

static Assoc tagtab[] = {
	{"AA",		0xc5},
	{"AC",		LACU},
	{"B",		B},
	{"CE",		LCED},
	{"CI",		LFRN},
	{"Di",		0x131},
	{"EL",		0x2d},
	{"GR",		LGRV},
	{"H",		H},
	{"I",		I},
	{"OE",		0x152},
	{"R",		R},
	{"TI",		LTIL},
	{"UM",		LUML},
	{"X",		X},
	{"[",		Ps},
	{"]",		Pe},
	{"ac",		LACU},
	{"ce",		LCED},
	{"ci",		LFRN},
	{"gr",		LGRV},
	{"oe",		0x153},
	{"supe",	0x65},		/* should be raised */
	{"supo",	0x6f},		/* should be raised */
	{"ti",		LTIL},
	{"um",		LUML},
	{"{",		Ps},
	{"~",		0x7e},
	{"~~",		MTT}
};

static Rune normtab[128] = {
	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	0x20,	NONE,	NONE,	NONE,	NONE,	NONE,
/*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
/*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	'\'',
	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
/*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
	0x38,	0x39,	0x3a,	0x3b,	TAGE,	0x3d,	TAGS,	0x3f,
/*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
	0x48,	0x49,	0x4a,	0x4b,	'L',	0x4d,	0x4e,	0x4f,
/*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
	0x58,	0x59,	0x5a,	0x5b,	'\\',	0x5d,	0x5e,	0x5f,
/*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
/*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE
};

static char *gettag(char *, char *);

static Entry	curentry;
static char	tag[Buflen];
#define cursize (curentry.end-curentry.start)

void
pcollprintentry(Entry e, int cmd)
{
	char *p, *pe;
	long r, rprev, t, rlig;
	int saveoi;
	Rune *transtab;

	p = e.start;
	pe = e.end;
	transtab = normtab;
	rprev = NONE;
	changett(0, 0, 0);
	curentry = e;
	saveoi = 0;
	if(cmd == 'h')
		outinhibit = 1;
	while(p < pe) {
		if(cmd == 'r') {
			outchar(*p++);
			continue;
		}
		r = transtab[(*p++)&0x7F];
		if(r < NONE) {
			/* Emit the rune, but buffer in case of ligature */
			if(rprev != NONE)
				outrune(rprev);
			rprev = r;
		} else if(r == TAGS) {
			p = gettag(p, pe);
			t = lookassoc(tagtab, asize(tagtab), tag);
			if(t == -1) {
				if(debug && !outinhibit)
					err("tag %ld %d %s",
						e.doff, cursize, tag);
				continue;
			}
			if(t < NONE) {
				if(rprev != NONE)
					outrune(rprev);
				rprev = t;
			} else if(t >= LIGS && t < LIGE) {
				/* handle possible ligature */
				rlig = liglookup(t, rprev);
				if(rlig != NONE)
					rprev = rlig;	/* overwrite rprev */
				else {
					/* could print accent, but let's not */
					if(rprev != NONE) outrune(rprev);
					rprev = NONE;
				}
			} else if(t >= MULTI && t < MULTIE) {
				if(rprev != NONE) {
					outrune(rprev);
					rprev = NONE;
				}
				outrunes(multitab[t-MULTI]);
			} else {
				if(rprev != NONE) {
					outrune(rprev);
					rprev = NONE;
				}
				switch(t){
				case H:
					if(cmd == 'h')
						outinhibit = 0;
					else
						outnl(0);
					break;
				case X:
					if(cmd == 'h')
						outinhibit = 1;
					else
						outchars(".  ");
					break;
				case Ps:
					/* don't know enough of pron. key yet */
					saveoi = outinhibit;
					outinhibit = 1;
					break;
				case Pe:
					outinhibit = saveoi;
					break;
				}
			}
		}
	}
	if(cmd == 'h')
		outinhibit = 0;
	outnl(0);
}

long
pcollnextoff(long fromoff)
{
	long a;
	char *p;

	a = Bseek(bdict, fromoff, 0);
	if(a < 0)
		return -1;
	for(;;) {
		p = Brdline(bdict, '\n');
		if(!p)
			break;
		if(p[0] == '>' && p[1] == 'H' && p[2] == '<')
			return (Boffset(bdict)-Blinelen(bdict));
	}
	return -1;
}

void
pcollprintkey(void)
{
	Bprint(bout, "No pronunciation key yet\n");
}

/*
 * f points just after '>'; fe points at end of entry.
 * Expect next characters from bin to match:
 *  [^ <]+<
 *     tag
 * Accumulate the tag in tag[].
 * Return pointer to after final '<'.
 */
static char *
gettag(char *f, char *fe)
{
	char *t;
	int c, i;

	t = tag;
	i = Buflen;
	while(--i > 0) {
		c = *f++;
		if(c == '<' || f == fe)
			break;
		*t++ = c;
	}
	*t = 0;
	return f;
}