#include <u.h>
#include <libc.h>
#include <bio.h>
#include "dict.h"

Dict dicts[] = {
	{"oed",		"Oxford English Dictionary, 2nd Ed.",
	 "oed2",	"oed2index",
	 oednextoff,	oedprintentry,		oedprintkey},
	{"ahd",		"American Heritage Dictionary, 2nd College Ed.",
	 "ahd/DICT.DB",	"ahd/index",
	 ahdnextoff,	ahdprintentry,		ahdprintkey},
	{"pgw",		"Project Gutenberg Webster Dictionary",
	 "pgw",	"pgwindex",
	 pgwnextoff,	pgwprintentry,		pgwprintkey},
	{"thesaurus",	"Collins Thesaurus",
	 "thesaurus",	"thesindex",
	 thesnextoff,	thesprintentry,	thesprintkey},
	{"roget",		"Project Gutenberg Roget's Thesaurus",
	 "roget", "rogetindex",
	 rogetnextoff,	rogetprintentry,	rogetprintkey},

	{"ce",		"Gendai Chinese->English",
	 "world/sansdata/sandic24.dat",
	 "world/sansdata/ceindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"ceh",		"Gendai Chinese->English (Hanzi index)",
	 "world/sansdata/sandic24.dat",
	 "world/sansdata/cehindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"ec",		"Gendai English->Chinese",
	 "world/sansdata/sandic24.dat",
	 "world/sansdata/ecindex",
	 worldnextoff,	worldprintentry,	worldprintkey},

	{"dae",		"Gyldendal Danish->English",
	 "world/gylddata/sandic30.dat",
	 "world/gylddata/daeindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"eda",		"Gyldendal English->Danish",
	 "world/gylddata/sandic29.dat",
	 "world/gylddata/edaindex",
	 worldnextoff,	worldprintentry,	worldprintkey},

	{"due",		"Wolters-Noordhoff Dutch->English",
	 "world/woltdata/sandic07.dat",
	 "world/woltdata/deindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"edu",		"Wolters-Noordhoff English->Dutch",
	 "world/woltdata/sandic06.dat",
	 "world/woltdata/edindex",
	 worldnextoff,	worldprintentry,	worldprintkey},

	{"fie",		"WSOY Finnish->English",
	 "world/werndata/sandic32.dat",
	 "world/werndata/fieindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"efi",		"WSOY English->Finnish",
	 "world/werndata/sandic31.dat",
	 "world/werndata/efiindex",
	 worldnextoff,	worldprintentry,	worldprintkey},

	{"fe",		"Collins French->English",
	 "fe",	"feindex",
	 pcollnextoff,	pcollprintentry,	pcollprintkey},
	{"ef",		"Collins English->French",
	 "ef",	"efindex",
	 pcollnextoff,	pcollprintentry,	pcollprintkey},

	{"ge",		"Collins German->English",
	 "ge",	"geindex",
	 pcollgnextoff,	pcollgprintentry,	pcollgprintkey},
	{"eg",		"Collins English->German",
	 "eg",	"egindex",
	 pcollgnextoff,	pcollgprintentry,	pcollgprintkey},

	{"ie",		"Collins Italian->English",
	 "ie",	"ieindex",
	 pcollnextoff,	pcollprintentry,	pcollprintkey},
	{"ei",		"Collins English->Italian",
	 "ei",	"eiindex",
	 pcollnextoff,	pcollprintentry,	pcollprintkey},

	{"je",		"Sanshusha Japanese->English",
	 "world/sansdata/sandic18.dat",
	 "world/sansdata/jeindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"jek",		"Sanshusha Japanese->English (Kanji index)",
	 "world/sansdata/sandic18.dat",
	 "world/sansdata/jekindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"ej",		"Sanshusha English->Japanese",
	 "world/sansdata/sandic18.dat",
	 "world/sansdata/ejindex",
	 worldnextoff,	worldprintentry,	worldprintkey},

	{"tjeg",	"Sanshusha technical Japanese->English,German",
	 "world/sansdata/sandic16.dat",
	 "world/sansdata/tjegindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"tjegk",	"Sanshusha technical Japanese->English,German (Kanji index)",
	 "world/sansdata/sandic16.dat",
	 "world/sansdata/tjegkindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"tegj",	"Sanshusha technical English->German,Japanese",
	 "world/sansdata/sandic16.dat",
	 "world/sansdata/tegjindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"tgje",	"Sanshusha technical German->Japanese,English",
	 "world/sansdata/sandic16.dat",
	 "world/sansdata/tgjeindex",
	 worldnextoff,	worldprintentry,	worldprintkey},

	{"ne",		"Kunnskapforlaget Norwegian->English",
	 "world/kunndata/sandic28.dat",
	 "world/kunndata/neindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"en",		"Kunnskapforlaget English->Norwegian",
	 "world/kunndata/sandic27.dat",
	 "world/kunndata/enindex",
	 worldnextoff,	worldprintentry,	worldprintkey},

	{"re",		"Leon Ungier Russian->English",
	 "re",	"reindex",
	 simplenextoff,	simpleprintentry,	simpleprintkey},
	{"er",		"Leon Ungier English->Russian",
	 "re",	"erindex",
	 simplenextoff,	simpleprintentry,	simpleprintkey},

	{"se",		"Collins Spanish->English",
	 "se",	"seindex",
	 pcollnextoff,	pcollprintentry,	pcollprintkey},
	{"es",		"Collins English->Spanish",
	 "es",	"esindex",
	 pcollnextoff,	pcollprintentry,	pcollprintkey},

	{"swe",		"Esselte Studium Swedish->English",
	 "world/essedata/sandic34.dat",
	 "world/essedata/sweindex",
	 worldnextoff,	worldprintentry,	worldprintkey},
	{"esw",		"Esselte Studium English->Swedish",
	 "world/essedata/sandic33.dat",
	 "world/essedata/eswindex",
	 worldnextoff,	worldprintentry,	worldprintkey},

	{"movie",	"Movies -- by title",
	 "movie/data",	"movtindex",
	 movienextoff,	movieprintentry,	movieprintkey},
	{"moviea",	"Movies -- by actor",
	 "movie/data",	"movaindex",
	 movienextoff,	movieprintentry,	movieprintkey},
	{"movied",	"Movies -- by director",
	 "movie/data",	"movdindex",
	 movienextoff,	movieprintentry,	movieprintkey},

	{"slang",	"English Slang",
	 "slang",	"slangindex",
	 slangnextoff,	slangprintentry,	slangprintkey},

	{"robert",	"Robert Électronique",
	 "robert/_pointers",	"robert/_index",
	 robertnextoff,	robertindexentry,	robertprintkey},
	{"robertv",	"Robert Électronique - formes des verbes",
	 "robert/flex.rob",	"robert/_flexindex",
	 robertnextflex,	robertflexentry,	robertprintkey},

	{0, 0, 0, 0, 0}
};

typedef struct Lig Lig;
struct Lig {
	Rune	start;		/* accent rune */
	Rune	pairs[100];		/* <char,accented version> pairs */
};

/* keep in sync with dict.h */
static Lig ligtab[Nligs] = {
	{0xb4,	{0x41, 0xc1, 0x61, 0xe1, 0x43, 0x106, 0x63, 0x107, 0x45, 0xc9, 0x65, 0xe9, 0x67, 0x123, 0x49, 0xcd, 0x69, 0xed, 0x131, 0xed, 0x4c, 0x139, 0x6c, 0x13a, 0x4e, 0x143, 0x6e, 0x144, 0x4f, 0xd3, 0x6f, 0xf3, 0x52, 0x154, 0x72, 0x155, 0x53, 0x15a, 0x73, 0x15b, 0x55, 0xda, 0x75, 0xfa, 0x59, 0xdd, 0x79, 0xfd, 0x5a, 0x179, 0x7a, 0x17a, 0}},
	{0x2cb,	{0x41, 0xc0, 0x61, 0xe0, 0x45, 0xc8, 0x65, 0xe8, 0x49, 0xcc, 0x69, 0xec, 0x131, 0xec, 0x4f, 0xd2, 0x6f, 0xf2, 0x55, 0xd9, 0x75, 0xf9, 0}},
	{0xa8,	{0x41, 0xc4, 0x61, 0xe4, 0x45, 0xcb, 0x65, 0xeb, 0x49, 0xcf, 0x69, 0xef, 0x4f, 0xd6, 0x6f, 0xf6, 0x55, 0xdc, 0x75, 0xfc, 0x59, 0x178, 0x79, 0xff, 0}},
	{0xb8,	{0x43, 0xc7, 0x63, 0xe7, 0x47, 0x122, 0x4b, 0x136, 0x6b, 0x137, 0x4c, 0x13b, 0x6c, 0x13c, 0x4e, 0x145, 0x6e, 0x146, 0x52, 0x156, 0x72, 0x157, 0x53, 0x15e, 0x73, 0x15f, 0x54, 0x162, 0x74, 0x163, 0}},
	{0x2dc,	{0x41, 0xc3, 0x61, 0xe3, 0x49, 0x128, 0x69, 0x129, 0x131, 0x129, 0x4e, 0xd1, 0x6e, 0xf1, 0x4f, 0xd5, 0x6f, 0xf5, 0x55, 0x168, 0x75, 0x169, 0}},
	{0x2d8,	{0x41, 0x102, 0x61, 0x103, 0x45, 0x114, 0x65, 0x115, 0x47, 0x11e, 0x67, 0x11f, 0x49, 0x12c, 0x69, 0x12d, 0x131, 0x12d, 0x4f, 0x14e, 0x6f, 0x14f, 0x55, 0x16c, 0x75, 0x16d, 0}},
	{0x2da,	{0x41, 0xc5, 0x61, 0xe5, 0x55, 0x16e, 0x75, 0x16f, 0}},
	{0x2d9,	{0x43, 0x10a, 0x63, 0x10b, 0x45, 0x116, 0x65, 0x117, 0x47, 0x120, 0x67, 0x121, 0x49, 0x130, 0x4c, 0x13f, 0x6c, 0x140, 0x5a, 0x17b, 0x7a, 0x17c, 0}},
	{0x2e,	{0}},
	{0x2322,	{0x41, 0xc2, 0x61, 0xe2, 0x43, 0x108, 0x63, 0x109, 0x45, 0xca, 0x65, 0xea, 0x47, 0x11c, 0x67, 0x11d, 0x48, 0x124, 0x68, 0x125, 0x49, 0xce, 0x69, 0xee, 0x131, 0xee, 0x4a, 0x134, 0x6a, 0x135, 0x4f, 0xd4, 0x6f, 0xf4, 0x53, 0x15c, 0x73, 0x15d, 0x55, 0xdb, 0x75, 0xfb, 0x57, 0x174, 0x77, 0x175, 0x59, 0x176, 0x79, 0x177, 0}},
	{0x32f,	{0}},
	{0x2db,	{0x41, 0x104, 0x61, 0x105, 0x45, 0x118, 0x65, 0x119, 0x49, 0x12e, 0x69, 0x12f, 0x131, 0x12f, 0x55, 0x172, 0x75, 0x173, 0}},
	{0xaf,	{0x41, 0x100, 0x61, 0x101, 0x45, 0x112, 0x65, 0x113, 0x49, 0x12a, 0x69, 0x12b, 0x131, 0x12b, 0x4f, 0x14c, 0x6f, 0x14d, 0x55, 0x16a, 0x75, 0x16b, 0}},
	{0x2c7,	{0x43, 0x10c, 0x63, 0x10d, 0x44, 0x10e, 0x64, 0x10f, 0x45, 0x11a, 0x65, 0x11b, 0x4c, 0x13d, 0x6c, 0x13e, 0x4e, 0x147, 0x6e, 0x148, 0x52, 0x158, 0x72, 0x159, 0x53, 0x160, 0x73, 0x161, 0x54, 0x164, 0x74, 0x165, 0x5a, 0x17d, 0x7a, 0x17e, 0}},
	{0x2bd,	{0}},
	{0x2bc,	{0}},
	{0x32e,	{0}}
};

Rune multitab[Nmulti][5] = {
	{0x2bd, 0x3b1, 0},
	{0x2bc, 0x3b1, 0},
	{0x61, 0x6e, 0x64, 0},
	{0x61, 0x2f, 0x71, 0},
	{0x3c, 0x7c, 0},
	{0x2e, 0x2e, 0},
	{0x2e, 0x2e, 0x2e, 0},
	{0x2bd, 0x3b5, 0},
	{0x2bc, 0x3b5, 0},
	{0x2014, 0x2014, 0},
	{0x2bd, 0x3b7, 0},
	{0x2bc, 0x3b7, 0},
	{0x2bd, 0x3b9, 0},
	{0x2bc, 0x3b9, 0},
	{0x63, 0x74, 0},
	{0x66, 0x66, 0},
	{0x66, 0x66, 0x69, 0},
	{0x66, 0x66, 0x6c, 0},
	{0x66, 0x6c, 0},
	{0x66, 0x69, 0},
	{0x26b, 0x26b, 0},
	{0x73, 0x74, 0},
	{0x2bd, 0x3bf, 0},
	{0x2bc, 0x3bf, 0},
	{0x6f, 0x72, 0},
	{0x2bd, 0x3c1, 0},
	{0x2bc, 0x3c1, 0},
	{0x7e, 0x7e, 0},
	{0x2bd, 0x3c5, 0},
	{0x2bc, 0x3c5, 0},
	{0x2bd, 0x3c9, 0},
	{0x2bc, 0x3c9, 0},
	{0x6f, 0x65, 0},
	{0x20, 0x20, 0}
};

#define	risupper(r)	(0x41 <= (r) && (r) <= 0x5a)
#define	rislatin1(r)	(0xC0 <= (r) && (r) <= 0xFF)
#define	rtolower(r)	((r)-'A'+'a')

static Rune latin_fold_tab[] =
{
/*	Table to fold latin 1 characters to ASCII equivalents
			based at Rune value 0xc0

	 À    Á    Â    Ã    Ä    Å    Æ    Ç
	 È    É    Ê    Ë    Ì    Í    Î    Ï
	 Ð    Ñ    Ò    Ó    Ô    Õ    Ö    ×
	 Ø    Ù    Ú    Û    Ü    Ý    Þ    ß
	 à    á    â    ã    ä    å    æ    ç
	 è    é    ê    ë    ì    í    î    ï
	 ð    ñ    ò    ó    ô    õ    ö    ÷
	 ø    ù    ú    û    ü    ý    þ    ÿ
*/
	'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
	'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
	'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
	'o', 'u', 'u', 'u', 'u', 'y',  0 ,  0 ,
	'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
	'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
	'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
	'o', 'u', 'u', 'u', 'u', 'y',  0 , 'y'
};

static Rune 	*ttabstack[20];
static int	ntt;

/*
 * tab is an array of n Assoc's, sorted by key.
 * Look for key in tab, and return corresponding val
 * or -1 if not there
 */
long
lookassoc(Assoc *tab, int n, char *key)
{
	Assoc *q;
	long i, low, high;
	int r;

	for(low = -1, high = n; high > low+1; ){
		i = (high+low)/2;
		q = &tab[i];
		if((r=strcmp(key, q->key))<0)
			high = i;
		else if(r == 0)
			return q->val;
		else
			low=i;
	}
	return -1;
}

long
looknassoc(Nassoc *tab, int n, long key)
{
	Nassoc *q;
	long i, low, high;

	for(low = -1, high = n; high > low+1; ){
		i = (high+low)/2;
		q = &tab[i];
		if(key < q->key)
			high = i;
		else if(key == q->key)
			return q->val;
		else
			low=i;
	}
	return -1;
}

void
err(char *fmt, ...)
{
	char buf[1000];
	va_list v;

	va_start(v, fmt);
	vsnprint(buf, sizeof(buf), fmt, v);
	va_end(v);
	fprint(2, "%s: %s\n", argv0, buf);
}

/*
 * Write the rune r to bout, keeping track of line length
 * and breaking the lines (at blanks) when they get too long
 */
void
outrune(long r)
{
	if(outinhibit)
		return;
	if(++linelen > breaklen && r == 0x20) {
		Bputc(bout, '\n');
		linelen = 0;
	} else
		Bputrune(bout, r);
}

void
outrunes(Rune *rp)
{
	Rune r;

	while((r = *rp++) != 0)
		outrune(r);
}

/* like outrune, but when arg is know to be a char */
void
outchar(int c)
{
	if(outinhibit)
		return;
	if(++linelen > breaklen && c == ' ') {
		c ='\n';
		linelen = 0;
	}
	Bputc(bout, c);
}

void
outchars(char *s)
{
	char c;

	while((c = *s++) != 0)
		outchar(c);
}

void
outprint(char *fmt, ...)
{
	char buf[1000];
	va_list v;

	va_start(v, fmt);
	vsnprint(buf, sizeof(buf), fmt, v);
	va_end(v);
	outchars(buf);
}

void
outpiece(char *b, char *e)
{
	int c, lastc;

	lastc = 0;
	while(b < e) {
		c = *b++;
		if(c == '\n')
			c = ' ';
		if(!(c == ' ' && lastc == ' '))
			outchar(c);
		lastc = c;
	}
}

/*
 * Go to new line if not already there; indent if ind != 0.
 * If ind > 1, leave a blank line too.
 * Slight hack: assume if current line is only one or two
 * characters long, then they were spaces.
 */
void
outnl(int ind)
{
	if(outinhibit)
		return;
	if(ind) {
		if(ind > 1) {
			if(linelen > 2)
				Bputc(bout, '\n');
			Bprint(bout, "\n  ");
		} else if(linelen == 0)
			Bprint(bout, "  ");
		else if(linelen == 1)
			Bputc(bout, ' ');
		else if(linelen != 2)
			Bprint(bout, "\n  ");
		linelen = 2;
	} else {
		if(linelen) {
			Bputc(bout, '\n');
			linelen = 0;
		}
	}
}

/*
 * Fold the runes in null-terminated rp.
 * Use the sort(1) definition of folding (uppercase to lowercase,
 * latin1-accented characters to corresponding unaccented chars)
 */
void
fold(Rune *rp)
{
	Rune r;

	while((r = *rp) != 0) {
		if (rislatin1(r) && latin_fold_tab[r-0xc0])
				r = latin_fold_tab[r-0xc0];
		if(risupper(r))
			r = rtolower(r);
		*rp++ = r;
	}
}

/*
 * Like fold, but put folded result into new
 * (assumed to have enough space).
 * old is a regular expression, but we know that
 * metacharacters aren't affected
 */
void
foldre(char *new, char *old)
{
	Rune r;

	while(*old) {
		old += chartorune(&r, old);
		if (rislatin1(r) && latin_fold_tab[r-0xc0])
				r = latin_fold_tab[r-0xc0];
		if(risupper(r))
			r = rtolower(r);
		new += runetochar(new, &r);
	}
	*new = 0;
}

/*
 *	acomp(s, t) returns:
 *		-2 if s strictly precedes t
 *		-1 if s is a prefix of t
 *		0 if s is the same as t
 *		1 if t is a prefix of s
 *		2 if t strictly precedes s
 */

int
acomp(Rune *s, Rune *t)
{
	int cs, ct;

	for(;;) {
		cs = *s;
		ct = *t;
		if(cs != ct)
			break;
		if(cs == 0)
			return 0;
		s++;
		t++;
	}
	if(cs == 0)
		return -1;
	if(ct == 0)
		return 1;
	if(cs < ct)
		return -2;
	return 2;
}

/*
 * Copy null terminated Runes from 'from' to 'to'.
 */
void
runescpy(Rune *to, Rune *from)
{
	while((*to++ = *from++) != 0)
		continue;
}

/*
 * Conversion of unsigned number to long, no overflow detection
 */
long
runetol(Rune *r)
{
	int c;
	long n;

	n = 0;
	for(;; r++){
		c = *r;
		if(0x30<=c && c<=0x39)
			c -= '0';
		else
			break;
		n = n*10 + c;
	}
	return n;
}

/*
 * See if there is a rune corresponding to the accented
 * version of r with accent acc (acc in [LIGS..LIGE-1]),
 * and return it if so, else return NONE.
 */
Rune
liglookup(Rune acc, Rune r)
{
	Rune *p;

	if(acc < LIGS || acc >= LIGE)
		return NONE;
	for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
		if(*p == r)
			return *(p+1);
	return NONE;
}

/*
 * Maintain a translation table stack (a translation table
 * is an array of Runes indexed by bytes or 7-bit bytes).
 * If starting is true, push the curtab onto the stack
 * and return newtab; else pop the top of the stack and
 * return it.
 * If curtab is 0, initialize the stack and return.
 */
Rune *
changett(Rune *curtab, Rune *newtab, int starting)
{
	if(curtab == 0) {
		ntt = 0;
		return 0;
	}
	if(starting) {
		if(ntt >= asize(ttabstack)) {
			if(debug)
				err("translation stack overflow");
			return curtab;
		}
		ttabstack[ntt++] = curtab;
		return newtab;
	} else {
		if(ntt == 0) {
			if(debug)
				err("translation stack underflow");
			return curtab;
		}
		return ttabstack[--ntt];
	}
}