aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cmd/htmlfmt/dat.h50
-rw-r--r--src/cmd/htmlfmt/html.c331
-rw-r--r--src/cmd/htmlfmt/main.c71
-rw-r--r--src/cmd/htmlfmt/mkfile30
-rw-r--r--src/cmd/htmlfmt/util.c120
-rw-r--r--src/libhtml/build.c4238
-rw-r--r--src/libhtml/impl.h163
-rw-r--r--src/libhtml/lex.c1384
-rw-r--r--src/libhtml/mkfile22
-rw-r--r--src/libhtml/runetab.c83
-rw-r--r--src/libhtml/runetab.h59
-rw-r--r--src/libhtml/strinttab.c64
-rw-r--r--src/libhtml/utils.c591
13 files changed, 7206 insertions, 0 deletions
diff --git a/src/cmd/htmlfmt/dat.h b/src/cmd/htmlfmt/dat.h
new file mode 100644
index 00000000..f3b05605
--- /dev/null
+++ b/src/cmd/htmlfmt/dat.h
@@ -0,0 +1,50 @@
+typedef struct Bytes Bytes;
+typedef struct URLwin URLwin;
+
+enum
+{
+ STACK = 8192,
+ EVENTSIZE = 256,
+};
+
+struct Bytes
+{
+ uchar *b;
+ long n;
+ long nalloc;
+};
+
+struct URLwin
+{
+ int infd;
+ int outfd;
+ int type;
+
+ char *url;
+ Item *items;
+ Docinfo *docinfo;
+};
+
+extern char* url;
+extern int aflag;
+extern int width;
+extern int defcharset;
+
+extern char* loadhtml(int);
+
+extern char* readfile(char*, char*, int*);
+extern int charset(char*);
+extern void* emalloc(ulong);
+extern char* estrdup(char*);
+extern char* estrstrdup(char*, char*);
+extern char* egrow(char*, char*, char*);
+extern char* eappend(char*, char*, char*);
+extern void error(char*, ...);
+
+extern void growbytes(Bytes*, char*, long);
+
+extern void rendertext(URLwin*, Bytes*);
+extern void rerender(URLwin*);
+extern void freeurlwin(URLwin*);
+
+#pragma varargck argpos error 1
diff --git a/src/cmd/htmlfmt/html.c b/src/cmd/htmlfmt/html.c
new file mode 100644
index 00000000..4f2e436f
--- /dev/null
+++ b/src/cmd/htmlfmt/html.c
@@ -0,0 +1,331 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <draw.h>
+#include <regexp.h>
+#include <html.h>
+#include <ctype.h>
+#include "dat.h"
+
+char urlexpr[] = "^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero)://([a-zA-Z0-9_@\\-]+([.:][a-zA-Z0-9_@\\-]+)*)";
+Reprog *urlprog;
+
+int inword = 0;
+int col = 0;
+int wordi = 0;
+
+char*
+loadhtml(int fd)
+{
+ URLwin *u;
+ Bytes *b;
+ int n;
+ char buf[4096];
+
+ u = emalloc(sizeof(URLwin));
+ u->infd = fd;
+ u->outfd = 1;
+ u->url = estrdup(url);
+ u->type = TextHtml;
+
+ b = emalloc(sizeof(Bytes));
+ while((n = read(fd, buf, sizeof buf)) > 0)
+ growbytes(b, buf, n);
+ if(b->b == nil)
+ return nil; /* empty file */
+ rendertext(u, b);
+ freeurlwin(u);
+ return nil;
+}
+
+char*
+runetobyte(Rune *r, int n)
+{
+ char *s;
+
+ if(n == 0)
+ return emalloc(1);
+ s = smprint("%.*S", n, r);
+ if(s == nil)
+ error("malloc failed");
+ return s;
+}
+
+int
+closingpunct(int c)
+{
+ return strchr(".,:;'\")]}>!?", c) != nil;
+}
+
+void
+emitword(Bytes *b, Rune *r, int nr)
+{
+ char *s;
+ int space;
+
+ if(nr == 0)
+ return;
+ s = smprint("%.*S", nr, r);
+ space = (b->n>0) && !isspace(b->b[b->n-1]) && !closingpunct(r[0]);
+ if(col>0 && col+space+nr > width){
+ growbytes(b, "\n", 1);
+ space = 0;
+ col = 0;
+ }
+ if(space && col>0){
+ growbytes(b, " ", 1);
+ col++;
+ }
+ growbytes(b, s, strlen(s));
+ col += nr;
+ free(s);
+ inword = 0;
+}
+
+void
+renderrunes(Bytes *b, Rune *r)
+{
+ int i, n;
+
+ n = runestrlen(r);
+ for(i=0; i<n; i++){
+ switch(r[i]){
+ case '\n':
+ if(inword)
+ emitword(b, r+wordi, i-wordi);
+ col = 0;
+ if(b->n == 0)
+ break; /* don't start with blank lines */
+ if(b->n<2 || b->b[b->n-1]!='\n' || b->b[b->n-2]!='\n')
+ growbytes(b, "\n", 1);
+ break;
+ case ' ':
+ if(inword)
+ emitword(b, r+wordi, i-wordi);
+ break;
+ default:
+ if(!inword)
+ wordi = i;
+ inword = 1;
+ break;
+ }
+ }
+ if(inword)
+ emitword(b, r+wordi, i-wordi);
+}
+
+void
+renderbytes(Bytes *b, char *fmt, ...)
+{
+ Rune *r;
+ va_list arg;
+
+ va_start(arg, fmt);
+ r = runevsmprint(fmt, arg);
+ va_end(arg);
+ renderrunes(b, r);
+ free(r);
+}
+
+char*
+baseurl(char *url)
+{
+ char *base, *slash;
+ Resub rs[10];
+
+ if(url == nil)
+ return nil;
+ if(urlprog == nil){
+ urlprog = regcomp(urlexpr);
+ if(urlprog == nil)
+ error("can't compile URL regexp");
+ }
+ memset(rs, 0, sizeof rs);
+ if(regexec(urlprog, url, rs, nelem(rs)) == 0)
+ return nil;
+ base = estrdup(url);
+ slash = strrchr(base, '/');
+ if(slash!=nil && slash>=&base[rs[0].e.p-rs[0].s.p])
+ *slash = '\0';
+ else
+ base[rs[0].e.p-rs[0].s.p] = '\0';
+ return base;
+}
+
+char*
+fullurl(URLwin *u, Rune *rhref)
+{
+ char *base, *href, *hrefbase;
+ char *result;
+
+ if(rhref == nil)
+ return estrdup("NULL URL");
+ href = runetobyte(rhref, runestrlen(rhref));
+ hrefbase = baseurl(href);
+ result = nil;
+ if(hrefbase==nil && (base = baseurl(u->url))!=nil){
+ result = estrdup(base);
+ if(base[strlen(base)-1]!='/' && (href==nil || href[0]!='/'))
+ result = eappend(result, "/", "");
+ free(base);
+ }
+ if(href){
+ if(result)
+ result = eappend(result, "", href);
+ else
+ result = estrdup(href);
+ }
+ free(hrefbase);
+ if(result == nil)
+ return estrdup("***unknown***");
+ return result;
+}
+
+void
+render(URLwin *u, Bytes *t, Item *items, int curanchor)
+{
+ Item *il;
+ Itext *it;
+ Ifloat *ifl;
+ Ispacer *is;
+ Itable *ita;
+ Iimage *im;
+ Anchor *a;
+ Table *tab;
+ Tablecell *cell;
+ char *href;
+
+ inword = 0;
+ col = 0;
+ wordi = 0;
+
+ for(il=items; il!=nil; il=il->next){
+ if(il->state & IFbrk)
+ renderbytes(t, "\n");
+ if(il->state & IFbrksp)
+ renderbytes(t, "\n");
+
+ switch(il->tag){
+ case Itexttag:
+ it = (Itext*)il;
+ renderrunes(t, it->s);
+ break;
+ case Iruletag:
+ if(t->n>0 && t->b[t->n-1]!='\n')
+ renderbytes(t, "\n");
+ renderbytes(t, "=======\n");
+ break;
+ case Iimagetag:
+ if(!aflag)
+ break;
+ im = (Iimage*)il;
+ if(im->imsrc){
+ href = fullurl(u, im->imsrc);
+ renderbytes(t, "[image %s]", href);
+ free(href);
+ }
+ break;
+ case Iformfieldtag:
+ if(aflag)
+ renderbytes(t, "[formfield]");
+ break;
+ case Itabletag:
+ ita = (Itable*)il;
+ tab = ita->table;
+ for(cell=tab->cells; cell!=nil; cell=cell->next){
+ render(u, t, cell->content, curanchor);
+ }
+ if(t->n>0 && t->b[t->n-1]!='\n')
+ renderbytes(t, "\n");
+ break;
+ case Ifloattag:
+ ifl = (Ifloat*)il;
+ render(u, t, ifl->item, curanchor);
+ break;
+ case Ispacertag:
+ is = (Ispacer*)il;
+ if(is->spkind != ISPnull)
+ renderbytes(t, " ");
+ break;
+ default:
+ error("unknown item tag %d\n", il->tag);
+ }
+ if(il->anchorid != 0 && il->anchorid!=curanchor){
+ for(a=u->docinfo->anchors; a!=nil; a=a->next)
+ if(aflag && a->index == il->anchorid){
+ href = fullurl(u, a->href);
+ renderbytes(t, "[%s]", href);
+ free(href);
+ break;
+ }
+ curanchor = il->anchorid;
+ }
+ }
+ if(t->n>0 && t->b[t->n-1]!='\n')
+ renderbytes(t, "\n");
+}
+
+void
+rerender(URLwin *u)
+{
+ Bytes *t;
+
+ t = emalloc(sizeof(Bytes));
+
+ render(u, t, u->items, 0);
+
+ if(t->n)
+ write(u->outfd, (char*)t->b, t->n);
+ free(t->b);
+ free(t);
+}
+
+/*
+ * Somewhat of a hack. Not a full parse, just looks for strings in the beginning
+ * of the document (cistrstr only looks at first somewhat bytes).
+ */
+int
+charset(char *s)
+{
+ char *meta, *emeta, *charset;
+
+ if(defcharset == 0)
+ defcharset = ISO_8859_1;
+ meta = cistrstr(s, "<meta");
+ if(meta == nil)
+ return defcharset;
+ for(emeta=meta; *emeta!='>' && *emeta!='\0'; emeta++)
+ ;
+ charset = cistrstr(s, "charset=");
+ if(charset == nil)
+ return defcharset;
+ charset += 8;
+ if(*charset == '"')
+ charset++;
+ if(cistrncmp(charset, "utf-8", 5) || cistrncmp(charset, "utf8", 4))
+ return UTF_8;
+ return defcharset;
+}
+
+void
+rendertext(URLwin *u, Bytes *b)
+{
+ Rune *rurl;
+
+ rurl = toStr((uchar*)u->url, strlen(u->url), ISO_8859_1);
+ u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo);
+// free(rurl);
+
+ rerender(u);
+}
+
+
+void
+freeurlwin(URLwin *u)
+{
+ freeitems(u->items);
+ u->items = nil;
+ freedocinfo(u->docinfo);
+ u->docinfo = nil;
+ free(u);
+}
diff --git a/src/cmd/htmlfmt/main.c b/src/cmd/htmlfmt/main.c
new file mode 100644
index 00000000..f85bbb48
--- /dev/null
+++ b/src/cmd/htmlfmt/main.c
@@ -0,0 +1,71 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <draw.h>
+#include <html.h>
+#include "dat.h"
+
+char *url = "";
+int aflag;
+int width = 70;
+int defcharset;
+
+void
+usage(void)
+{
+ fprint(2, "usage: htmlfmt [-c charset] [-u URL] [-a] [-l length] [file ...]\n");
+ exits("usage");
+}
+
+void
+main(int argc, char *argv[])
+{
+ int i, fd;
+ char *p, *err, *file;
+ char errbuf[ERRMAX];
+
+ ARGBEGIN{
+ case 'a':
+ aflag++;
+ break;
+ case 'c':
+ p = smprint("<meta charset=\"%s\">", EARGF(usage()));
+ defcharset = charset(p);
+ free(p);
+ break;
+ case 'l': case 'w':
+ err = EARGF(usage());
+ width = atoi(err);
+ if(width <= 0)
+ usage();
+ break;
+ case 'u':
+ url = EARGF(usage());
+ aflag++;
+ break;
+ default:
+ usage();
+ }ARGEND
+
+ err = nil;
+ file = "<stdin>";
+ if(argc == 0)
+ err = loadhtml(0);
+ else
+ for(i=0; err==nil && i<argc; i++){
+ file = argv[i];
+ fd = open(file, OREAD);
+ if(fd < 0){
+ errstr(errbuf, sizeof errbuf);
+ err = errbuf;
+ break;
+ }
+ err = loadhtml(fd);
+ close(fd);
+ if(err)
+ break;
+ }
+ if(err)
+ fprint(2, "htmlfmt: processing %s: %s\n", file, err);
+ exits(err);
+}
diff --git a/src/cmd/htmlfmt/mkfile b/src/cmd/htmlfmt/mkfile
new file mode 100644
index 00000000..5b263532
--- /dev/null
+++ b/src/cmd/htmlfmt/mkfile
@@ -0,0 +1,30 @@
+<$SYS9/$systype/$objtype/mkfile
+
+TARG=htmlfmt
+OFILES=\
+ main.$O\
+ html.$O\
+ util.$O\
+
+HFILES=\
+ dat.h\
+ $SYS9/sys/include/html.h\
+
+LIB=$SYS9/$systype/$objtype/lib/libbio.a\
+ $SYS9/$systype/$objtype/lib/libregexp.a\
+ $SYS9/$systype/$objtype/lib/libhtml.a\
+ $SYS9/$systype/$objtype/lib/lib9c.a
+
+BIN=$SYS9/$systype/$objtype/bin
+
+UPDATE=\
+ mkfile\
+ $HFILES\
+ ${OFILES:%.$O=%.c}
+
+<$SYS9/sys/src/cmd/mkone
+
+CFLAGS=$CFLAGS
+
+#$O.out: $OFILES
+# $LD -o $target $LDFLAGS $OFILES
diff --git a/src/cmd/htmlfmt/util.c b/src/cmd/htmlfmt/util.c
new file mode 100644
index 00000000..b22b0ab5
--- /dev/null
+++ b/src/cmd/htmlfmt/util.c
@@ -0,0 +1,120 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <draw.h>
+#include <html.h>
+#include "dat.h"
+
+void*
+emalloc(ulong n)
+{
+ void *p;
+
+ p = malloc(n);
+ if(p == nil)
+ error("can't malloc: %r");
+ memset(p, 0, n);
+ return p;
+}
+
+void*
+erealloc(void *p, ulong n)
+{
+ p = realloc(p, n);
+ if(p == nil)
+ error("can't malloc: %r");
+ return p;
+}
+
+char*
+estrdup(char *s)
+{
+ char *t;
+
+ t = emalloc(strlen(s)+1);
+ strcpy(t, s);
+ return t;
+}
+
+char*
+estrstrdup(char *s, char *t)
+{
+ long ns, nt;
+ char *u;
+
+ ns = strlen(s);
+ nt = strlen(t);
+ /* use malloc to avoid memset */
+ u = malloc(ns+nt+1);
+ if(u == nil)
+ error("can't malloc: %r");
+ memmove(u, s, ns);
+ memmove(u+ns, t, nt);
+ u[ns+nt] = '\0';
+ return u;
+}
+
+char*
+eappend(char *s, char *sep, char *t)
+{
+ long ns, nsep, nt;
+ char *u;
+
+ if(t == nil)
+ u = estrstrdup(s, sep);
+ else{
+ ns = strlen(s);
+ nsep = strlen(sep);
+ nt = strlen(t);
+ /* use malloc to avoid memset */
+ u = malloc(ns+nsep+nt+1);
+ if(u == nil)
+ error("can't malloc: %r");
+ memmove(u, s, ns);
+ memmove(u+ns, sep, nsep);
+ memmove(u+ns+nsep, t, nt);
+ u[ns+nsep+nt] = '\0';
+ }
+ free(s);
+ return u;
+}
+
+char*
+egrow(char *s, char *sep, char *t)
+{
+ s = eappend(s, sep, t);
+ free(t);
+ return s;
+}
+
+void
+error(char *fmt, ...)
+{
+ va_list arg;
+ char buf[256];
+ Fmt f;
+
+ fmtfdinit(&f, 2, buf, sizeof buf);
+ fmtprint(&f, "Mail: ");
+ va_start(arg, fmt);
+ fmtvprint(&f, fmt, arg);
+ va_end(arg);
+ fmtprint(&f, "\n");
+ fmtfdflush(&f);
+ exits(fmt);
+}
+
+void
+growbytes(Bytes *b, char *s, long ns)
+{
+ if(b->nalloc < b->n + ns + 1){
+ b->nalloc = b->n + ns + 8000;
+ /* use realloc to avoid memset */
+ b->b = realloc(b->b, b->nalloc);
+ if(b->b == nil)
+ error("growbytes: can't realloc: %r");
+ }
+ memmove(b->b+b->n, s, ns);
+ b->n += ns;
+ b->b[b->n] = '\0';
+}
diff --git a/src/libhtml/build.c b/src/libhtml/build.c
new file mode 100644
index 00000000..32e64015
--- /dev/null
+++ b/src/libhtml/build.c
@@ -0,0 +1,4238 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <ctype.h>
+#include <html.h>
+#include "impl.h"
+
+// A stack for holding integer values
+enum {
+ Nestmax = 40 // max nesting level of lists, font styles, etc.
+};
+
+struct Stack {
+ int n; // next available slot (top of stack is stack[n-1])
+ int slots[Nestmax]; // stack entries
+};
+
+// Parsing state
+struct Pstate
+{
+ Pstate* next; // in stack of Pstates
+ int skipping; // true when we shouldn't add items
+ int skipwhite; // true when we should strip leading space
+ int curfont; // font index for current font
+ int curfg; // current foreground color
+ Background curbg; // current background
+ int curvoff; // current baseline offset
+ uchar curul; // current underline/strike state
+ uchar curjust; // current justify state
+ int curanchor; // current (href) anchor id (if in one), or 0
+ int curstate; // current value of item state
+ int literal; // current literal state
+ int inpar; // true when in a paragraph-like construct
+ int adjsize; // current font size adjustment
+ Item* items; // dummy head of item list we're building
+ Item* lastit; // tail of item list we're building
+ Item* prelastit; // item before lastit
+ Stack fntstylestk; // style stack
+ Stack fntsizestk; // size stack
+ Stack fgstk; // text color stack
+ Stack ulstk; // underline stack
+ Stack voffstk; // vertical offset stack
+ Stack listtypestk; // list type stack
+ Stack listcntstk; // list counter stack
+ Stack juststk; // justification stack
+ Stack hangstk; // hanging stack
+};
+
+struct ItemSource
+{
+ Docinfo* doc;
+ Pstate* psstk;
+ int nforms;
+ int ntables;
+ int nanchors;
+ int nframes;
+ Form* curform;
+ Map* curmap;
+ Table* tabstk;
+ Kidinfo* kidstk;
+};
+
+// Some layout parameters
+enum {
+ FRKIDMARGIN = 6, // default margin around kid frames
+ IMGHSPACE = 0, // default hspace for images (0 matches IE, Netscape)
+ IMGVSPACE = 0, // default vspace for images
+ FLTIMGHSPACE = 2, // default hspace for float images
+ TABSP = 5, // default cellspacing for tables
+ TABPAD = 1, // default cell padding for tables
+ LISTTAB = 1, // number of tabs to indent lists
+ BQTAB = 1, // number of tabs to indent blockquotes
+ HRSZ = 2, // thickness of horizontal rules
+ SUBOFF = 4, // vertical offset for subscripts
+ SUPOFF = 6, // vertical offset for superscripts
+ NBSP = 160 // non-breaking space character
+};
+
+// These tables must be sorted
+static StringInt *align_tab;
+static AsciiInt _align_tab[] = {
+ {"baseline", ALbaseline},
+ {"bottom", ALbottom},
+ {"center", ALcenter},
+ {"char", ALchar},
+ {"justify", ALjustify},
+ {"left", ALleft},
+ {"middle", ALmiddle},
+ {"right", ALright},
+ {"top", ALtop}
+};
+#define NALIGNTAB (sizeof(align_tab)/sizeof(StringInt))
+
+static StringInt *input_tab;
+static AsciiInt _input_tab[] = {
+ {"button", Fbutton},
+ {"checkbox", Fcheckbox},
+ {"file", Ffile},
+ {"hidden", Fhidden},
+ {"image", Fimage},
+ {"password", Fpassword},
+ {"radio", Fradio},
+ {"reset", Freset},
+ {"submit", Fsubmit},
+ {"text", Ftext}
+};
+#define NINPUTTAB (sizeof(input_tab)/sizeof(StringInt))
+
+static StringInt *clear_tab;
+static AsciiInt _clear_tab[] = {
+ {"all", IFcleft|IFcright},
+ {"left", IFcleft},
+ {"right", IFcright}
+};
+#define NCLEARTAB (sizeof(clear_tab)/sizeof(StringInt))
+
+static StringInt *fscroll_tab;
+static AsciiInt _fscroll_tab[] = {
+ {"auto", FRhscrollauto|FRvscrollauto},
+ {"no", FRnoscroll},
+ {"yes", FRhscroll|FRvscroll},
+};
+#define NFSCROLLTAB (sizeof(fscroll_tab)/sizeof(StringInt))
+
+static StringInt *shape_tab;
+static AsciiInt _shape_tab[] = {
+ {"circ", SHcircle},
+ {"circle", SHcircle},
+ {"poly", SHpoly},
+ {"polygon", SHpoly},
+ {"rect", SHrect},
+ {"rectangle", SHrect}
+};
+#define NSHAPETAB (sizeof(shape_tab)/sizeof(StringInt))
+
+static StringInt *method_tab;
+static AsciiInt _method_tab[] = {
+ {"get", HGet},
+ {"post", HPost}
+};
+#define NMETHODTAB (sizeof(method_tab)/sizeof(StringInt))
+
+static Rune** roman;
+static char* _roman[15]= {
+ "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X",
+ "XI", "XII", "XIII", "XIV", "XV"
+};
+#define NROMAN 15
+
+// List number types
+enum {
+ LTdisc, LTsquare, LTcircle, LT1, LTa, LTA, LTi, LTI
+};
+
+enum {
+ SPBefore = 2,
+ SPAfter = 4,
+ BL = 1,
+ BLBA = (BL|SPBefore|SPAfter)
+};
+
+// blockbrk[tag] is break info for a block level element, or one
+// of a few others that get the same treatment re ending open paragraphs
+// and requiring a line break / vertical space before them.
+// If we want a line of space before the given element, SPBefore is OR'd in.
+// If we want a line of space after the given element, SPAfter is OR'd in.
+
+static uchar blockbrk[Numtags]= {
+ [Taddress] BLBA, [Tblockquote] BLBA, [Tcenter] BL,
+ [Tdir] BLBA, [Tdiv] BL, [Tdd] BL, [Tdl] BLBA,
+ [Tdt] BL, [Tform] BLBA,
+ // headings and tables get breaks added manually
+ [Th1] BL, [Th2] BL, [Th3] BL,
+ [Th4] BL, [Th5] BL, [Th6] BL,
+ [Thr] BL, [Tisindex] BLBA, [Tli] BL, [Tmenu] BLBA,
+ [Tol] BLBA, [Tp] BLBA, [Tpre] BLBA,
+ [Tul] BLBA
+};
+
+enum {
+ AGEN = 1
+};
+
+// attrinfo is information about attributes.
+// The AGEN value means that the attribute is generic (applies to almost all elements)
+static uchar attrinfo[Numattrs]= {
+ [Aid] AGEN, [Aclass] AGEN, [Astyle] AGEN, [Atitle] AGEN,
+ [Aonblur] AGEN, [Aonchange] AGEN, [Aonclick] AGEN,
+ [Aondblclick] AGEN, [Aonfocus] AGEN, [Aonkeypress] AGEN,
+ [Aonkeyup] AGEN, [Aonload] AGEN, [Aonmousedown] AGEN,
+ [Aonmousemove] AGEN, [Aonmouseout] AGEN, [Aonmouseover] AGEN,
+ [Aonmouseup] AGEN, [Aonreset] AGEN, [Aonselect] AGEN,
+ [Aonsubmit] AGEN, [Aonunload] AGEN
+};
+
+static uchar scriptev[Numattrs]= {
+ [Aonblur] SEonblur, [Aonchange] SEonchange, [Aonclick] SEonclick,
+ [Aondblclick] SEondblclick, [Aonfocus] SEonfocus, [Aonkeypress] SEonkeypress,
+ [Aonkeyup] SEonkeyup, [Aonload] SEonload, [Aonmousedown] SEonmousedown,
+ [Aonmousemove] SEonmousemove, [Aonmouseout] SEonmouseout, [Aonmouseover] SEonmouseover,
+ [Aonmouseup] SEonmouseup, [Aonreset] SEonreset, [Aonselect] SEonselect,
+ [Aonsubmit] SEonsubmit, [Aonunload] SEonunload
+};
+
+// Color lookup table
+static StringInt *color_tab;
+static AsciiInt _color_tab[] = {
+ {"aqua", 0x00FFFF},
+ {"black", 0x000000},
+ {"blue", 0x0000CC},
+ {"fuchsia", 0xFF00FF},
+ {"gray", 0x808080},
+ {"green", 0x008000},
+ {"lime", 0x00FF00},
+ {"maroon", 0x800000},
+ {"navy", 0x000080,},
+ {"olive", 0x808000},
+ {"purple", 0x800080},
+ {"red", 0xFF0000},
+ {"silver", 0xC0C0C0},
+ {"teal", 0x008080},
+ {"white", 0xFFFFFF},
+ {"yellow", 0xFFFF00}
+};
+#define NCOLORS (sizeof(color_tab)/sizeof(StringInt))
+
+static StringInt *targetmap;
+static int targetmapsize;
+static int ntargets;
+
+static int buildinited = 0;
+
+#define SMALLBUFSIZE 240
+#define BIGBUFSIZE 2000
+
+int dbgbuild = 0;
+int warn = 0;
+
+static Align aalign(Token* tok);
+static int acolorval(Token* tok, int attid, int dflt);
+static void addbrk(Pstate* ps, int sp, int clr);
+static void additem(Pstate* ps, Item* it, Token* tok);
+static void addlinebrk(Pstate* ps, int clr);
+static void addnbsp(Pstate* ps);
+static void addtext(Pstate* ps, Rune* s);
+static Dimen adimen(Token* tok, int attid);
+static int aflagval(Token* tok, int attid);
+static int aintval(Token* tok, int attid, int dflt);
+static Rune* astrval(Token* tok, int attid, Rune* dflt);
+static int atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt);
+static int atargval(Token* tok, int dflt);
+static int auintval(Token* tok, int attid, int dflt);
+static Rune* aurlval(Token* tok, int attid, Rune* dflt, Rune* base);
+static Rune* aval(Token* tok, int attid);
+static void buildinit(void);
+static Pstate* cell_pstate(Pstate* oldps, int ishead);
+static void changehang(Pstate* ps, int delta);
+static void changeindent(Pstate* ps, int delta);
+static int color(Rune* s, int dflt);
+static void copystack(Stack* tostk, Stack* fromstk);
+static int dimprint(char* buf, int nbuf, Dimen d);
+static Pstate* finishcell(Table* curtab, Pstate* psstk);
+static void finish_table(Table* t);
+static void freeanchor(Anchor* a);
+static void freedestanchor(DestAnchor* da);
+static void freeform(Form* f);
+static void freeformfield(Formfield* ff);
+static void freeitem(Item* it);
+static void freepstate(Pstate* p);
+static void freepstatestack(Pstate* pshead);
+static void freescriptevents(SEvent* ehead);
+static void freetable(Table* t);
+static Map* getmap(Docinfo* di, Rune* name);
+static Rune* getpcdata(Token* toks, int tokslen, int* ptoki);
+static Pstate* lastps(Pstate* psl);
+static Rune* listmark(uchar ty, int n);
+static int listtyval(Token* tok, int dflt);
+static Align makealign(int halign, int valign);
+static Background makebackground(Rune* imgurl, int color);
+static Dimen makedimen(int kind, int spec);
+static Anchor* newanchor(int index, Rune* name, Rune* href, int target, Anchor* link);
+static Area* newarea(int shape, Rune* href, int target, Area* link);
+static DestAnchor* newdestanchor(int index, Rune* name, Item* item, DestAnchor* link);
+static Docinfo* newdocinfo(void);
+static Genattr* newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events);
+static Form* newform(int formid, Rune* name, Rune* action,
+ int target, int method, Form* link);
+static Formfield* newformfield(int ftype, int fieldid, Form* form, Rune* name,
+ Rune* value, int size, int maxlength, Formfield* link);
+static Item* newifloat(Item* it, int side);
+static Item* newiformfield(Formfield* ff);
+static Item* newiimage(Rune* src, Rune* altrep, int align, int width, int height,
+ int hspace, int vspace, int border, int ismap, Map* map);
+static Item* newirule(int align, int size, int noshade, Dimen wspec);
+static Item* newispacer(int spkind);
+static Item* newitable(Table* t);
+static ItemSource* newitemsource(Docinfo* di);
+static Item* newitext(Rune* s, int fnt, int fg, int voff, int ul);
+static Kidinfo* newkidinfo(int isframeset, Kidinfo* link);
+static Option* newoption(int selected, Rune* value, Rune* display, Option* link);
+static Pstate* newpstate(Pstate* link);
+static SEvent* newscriptevent(int type, Rune* script, SEvent* link);
+static Table* newtable(int tableid, Align align, Dimen width, int border,
+ int cellspacing, int cellpadding, Background bg, Token* tok, Table* link);
+static Tablecell* newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec,
+ int hspec, Background bg, int flags, Tablecell* link);
+static Tablerow* newtablerow(Align align, Background bg, int flags, Tablerow* link);
+static Dimen parsedim(Rune* s, int ns);
+static void pop(Stack* stk);
+static void popfontsize(Pstate* ps);
+static void popfontstyle(Pstate* ps);
+static void popjust(Pstate* ps);
+static int popretnewtop(Stack* stk, int dflt);
+static int push(Stack* stk, int val);
+static void pushfontsize(Pstate* ps, int sz);
+static void pushfontstyle(Pstate* ps, int sty);
+static void pushjust(Pstate* ps, int j);
+static Item* textit(Pstate* ps, Rune* s);
+static Rune* removeallwhite(Rune* s);
+static void resetdocinfo(Docinfo* d);
+static void setcurfont(Pstate* ps);
+static void setcurjust(Pstate* ps);
+static void setdimarray(Token* tok, int attid, Dimen** pans, int* panslen);
+static Rune* stringalign(int a);
+static void targetmapinit(void);
+static int toint(Rune* s);
+static int top(Stack* stk, int dflt);
+static void trim_cell(Tablecell* c);
+static int validalign(Align a);
+static int validdimen(Dimen d);
+static int validformfield(Formfield* f);
+static int validhalign(int a);
+static int validptr(void* p);
+static int validStr(Rune* s);
+static int validtable(Table* t);
+static int validtablerow(Tablerow* r);
+static int validtablecol(Tablecol* c);
+static int validtablecell(Tablecell* c);
+static int validvalign(int a);
+static int Iconv(Fmt *f);
+
+static void
+buildinit(void)
+{
+ runetabinit();
+ roman = cvtstringtab(_roman, nelem(_roman));
+ color_tab = cvtstringinttab(_color_tab, nelem(_color_tab));
+ method_tab = cvtstringinttab(_method_tab, nelem(_method_tab));
+ shape_tab = cvtstringinttab(_shape_tab, nelem(_shape_tab));
+ fscroll_tab = cvtstringinttab(_fscroll_tab, nelem(_fscroll_tab));
+ clear_tab = cvtstringinttab(_clear_tab, nelem(_clear_tab));
+ input_tab = cvtstringinttab(_input_tab, nelem(_input_tab));
+ align_tab = cvtstringinttab(_align_tab, nelem(_align_tab));
+
+ fmtinstall('I', Iconv);
+ targetmapinit();
+ buildinited = 1;
+}
+
+static ItemSource*
+newitemsource(Docinfo* di)
+{
+ ItemSource* is;
+ Pstate* ps;
+
+ ps = newpstate(nil);
+ if(di->mediatype != TextHtml) {
+ ps->curstate &= ~IFwrap;
+ ps->literal = 1;
+ pushfontstyle(ps, FntT);
+ }
+ is = (ItemSource*)emalloc(sizeof(ItemSource));
+ is->doc = di;
+ is->psstk = ps;
+ is->nforms = 0;
+ is->ntables = 0;
+ is->nanchors = 0;
+ is->nframes = 0;
+ is->curform = nil;
+ is->curmap = nil;
+ is->tabstk = nil;
+ is->kidstk = nil;
+ return is;
+}
+
+static Item *getitems(ItemSource* is, uchar* data, int datalen);
+
+// Parse an html document and create a list of layout items.
+// Allocate and return document info in *pdi.
+// When caller is done with the items, it should call
+// freeitems on the returned result, and then
+// freedocinfo(*pdi).
+Item*
+parsehtml(uchar* data, int datalen, Rune* pagesrc, int mtype, int chset, Docinfo** pdi)
+{
+ Item *it;
+ Docinfo* di;
+ ItemSource* is;
+
+ di = newdocinfo();
+ di->src = _Strdup(pagesrc);
+ di->base = _Strdup(pagesrc);
+ di->mediatype = mtype;
+ di->chset = chset;
+ *pdi = di;
+ is = newitemsource(di);
+ it = getitems(is, data, datalen);
+ freepstatestack(is->psstk);
+ free(is);
+ return it;
+}
+
+// Get a group of tokens for lexer, parse them, and create
+// a list of layout items.
+// When caller is done with the items, it should call
+// freeitems on the returned result.
+static Item*
+getitems(ItemSource* is, uchar* data, int datalen)
+{
+ int i;
+ int j;
+ int nt;
+ int pt;
+ int doscripts;
+ int tokslen;
+ int toki;
+ int h;
+ int sz;
+ int method;
+ int n;
+ int nblank;
+ int norsz;
+ int bramt;
+ int sty;
+ int nosh;
+ int oldcuranchor;
+ int dfltbd;
+ int v;
+ int hang;
+ int isempty;
+ int tag;
+ int brksp;
+ int target;
+ uchar brk;
+ uchar flags;
+ uchar align;
+ uchar al;
+ uchar ty;
+ uchar ty2;
+ Pstate* ps;
+ Pstate* nextps;
+ Pstate* outerps;
+ Table* curtab;
+ Token* tok;
+ Token* toks;
+ Docinfo* di;
+ Item* ans;
+ Item* img;
+ Item* ffit;
+ Item* tabitem;
+ Rune* s;
+ Rune* t;
+ Rune* name;
+ Rune* enctype;
+ Rune* usemap;
+ Rune* prompt;
+ Rune* equiv;
+ Rune* val;
+ Rune* nsz;
+ Rune* script;
+ Map* map;
+ Form* frm;
+ Iimage* ii;
+ Kidinfo* kd;
+ Kidinfo* ks;
+ Kidinfo* pks;
+ Dimen wd;
+ Option* option;
+ Table* tab;
+ Tablecell* c;
+ Tablerow* tr;
+ Formfield* field;
+ Formfield* ff;
+ Rune* href;
+ Rune* src;
+ Rune* scriptsrc;
+ Rune* bgurl;
+ Rune* action;
+ Background bg;
+
+ if(!buildinited)
+ buildinit();
+ doscripts = 0; // for now
+ ps = is->psstk;
+ curtab = is->tabstk;
+ di = is->doc;
+ toks = _gettoks(data, datalen, di->chset, di->mediatype, &tokslen);
+ toki = 0;
+ for(; toki < tokslen; toki++) {
+ tok = &toks[toki];
+ if(dbgbuild > 1)
+ fprint(2, "build: curstate %ux, token %T\n", ps->curstate, tok);
+ tag = tok->tag;
+ brk = 0;
+ brksp = 0;
+ if(tag < Numtags) {
+ brk = blockbrk[tag];
+ if(brk&SPBefore)
+ brksp = 1;
+ }
+ else if(tag < Numtags + RBRA) {
+ brk = blockbrk[tag - RBRA];
+ if(brk&SPAfter)
+ brksp = 1;
+ }
+ if(brk) {
+ addbrk(ps, brksp, 0);
+ if(ps->inpar) {
+ popjust(ps);
+ ps->inpar = 0;
+ }
+ }
+ // check common case first (Data), then switch statement on tag
+ if(tag == Data) {
+ // Lexing didn't pay attention to SGML record boundary rules:
+ // \n after start tag or before end tag to be discarded.
+ // (Lex has already discarded all \r's).
+ // Some pages assume this doesn't happen in <PRE> text,
+ // so we won't do it if literal is true.
+ // BUG: won't discard \n before a start tag that begins
+ // the next bufferful of tokens.
+ s = tok->text;
+ n = _Strlen(s);
+ if(!ps->literal) {
+ i = 0;
+ j = n;
+ if(toki > 0) {
+ pt = toks[toki - 1].tag;
+ // IE and Netscape both ignore this rule (contrary to spec)
+ // if previous tag was img
+ if(pt < Numtags && pt != Timg && j > 0 && s[0] == '\n')
+ i++;
+ }
+ if(toki < tokslen - 1) {
+ nt = toks[toki + 1].tag;
+ if(nt >= RBRA && nt < Numtags + RBRA && j > i && s[j - 1] == '\n')
+ j--;
+ }
+ if(i > 0 || j < n) {
+ t = s;
+ s = _Strsubstr(s, i, j);
+ free(t);
+ n = j-i;
+ }
+ }
+ if(ps->skipwhite) {
+ _trimwhite(s, n, &t, &nt);
+ if(t == nil) {
+ free(s);
+ s = nil;
+ }
+ else if(t != s) {
+ t = _Strndup(t, nt);
+ free(s);
+ s = t;
+ }
+ if(s != nil)
+ ps->skipwhite = 0;
+ }
+ tok->text = nil; // token doesn't own string anymore
+ if(s != nil)
+ addtext(ps, s);
+ }
+ else
+ switch(tag) {
+ // Some abbrevs used in following DTD comments
+ // %text = #PCDATA
+ // | TT | I | B | U | STRIKE | BIG | SMALL | SUB | SUP
+ // | EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE
+ // | A | IMG | APPLET | FONT | BASEFONT | BR | SCRIPT | MAP
+ // | INPUT | SELECT | TEXTAREA
+ // %block = P | UL | OL | DIR | MENU | DL | PRE | DL | DIV | CENTER
+ // | BLOCKQUOTE | FORM | ISINDEX | HR | TABLE
+ // %flow = (%text | %block)*
+ // %body.content = (%heading | %text | %block | ADDRESS)*
+
+ // <!ELEMENT A - - (%text) -(A)>
+ // Anchors are not supposed to be nested, but you sometimes see
+ // href anchors inside destination anchors.
+ case Ta:
+ if(ps->curanchor != 0) {
+ if(warn)
+ fprint(2, "warning: nested <A> or missing </A>\n");
+ ps->curanchor = 0;
+ }
+ name = aval(tok, Aname);
+ href = aurlval(tok, Ahref, nil, di->base);
+ // ignore rel, rev, and title attrs
+ if(href != nil) {
+ target = atargval(tok, di->target);
+ di->anchors = newanchor(++is->nanchors, name, href, target, di->anchors);
+ if(name != nil)
+ name = _Strdup(name); // for DestAnchor construction, below
+ ps->curanchor = is->nanchors;
+ ps->curfg = push(&ps->fgstk, di->link);
+ ps->curul = push(&ps->ulstk, ULunder);
+ }
+ if(name != nil) {
+ // add a null item to be destination
+ additem(ps, newispacer(ISPnull), tok);
+ di->dests = newdestanchor(++is->nanchors, name, ps->lastit, di->dests);
+ }
+ break;
+
+ case Ta+RBRA :
+ if(ps->curanchor != 0) {
+ ps->curfg = popretnewtop(&ps->fgstk, di->text);
+ ps->curul = popretnewtop(&ps->ulstk, ULnone);
+ ps->curanchor = 0;
+ }
+ break;
+
+ // <!ELEMENT APPLET - - (PARAM | %text)* >
+ // We can't do applets, so ignore PARAMS, and let
+ // the %text contents appear for the alternative rep
+ case Tapplet:
+ case Tapplet+RBRA:
+ if(warn && tag == Tapplet)
+ fprint(2, "warning: <APPLET> ignored\n");
+ break;
+
+ // <!ELEMENT AREA - O EMPTY>
+ case Tarea:
+ map = di->maps;
+ if(map == nil) {
+ if(warn)
+ fprint(2, "warning: <AREA> not inside <MAP>\n");
+ continue;
+ }
+ map->areas = newarea(atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect),
+ aurlval(tok, Ahref, nil, di->base),
+ atargval(tok, di->target),
+ map->areas);
+ setdimarray(tok, Acoords, &map->areas->coords, &map->areas->ncoords);
+ break;
+
+ // <!ELEMENT (B|STRONG) - - (%text)*>
+ case Tb:
+ case Tstrong:
+ pushfontstyle(ps, FntB);
+ break;
+
+ case Tb+RBRA:
+ case Tcite+RBRA:
+ case Tcode+RBRA:
+ case Tdfn+RBRA:
+ case Tem+RBRA:
+ case Tkbd+RBRA:
+ case Ti+RBRA:
+ case Tsamp+RBRA:
+ case Tstrong+RBRA:
+ case Ttt+RBRA:
+ case Tvar+RBRA :
+ case Taddress+RBRA:
+ popfontstyle(ps);
+ break;
+
+ // <!ELEMENT BASE - O EMPTY>
+ case Tbase:
+ t = di->base;
+ di->base = aurlval(tok, Ahref, di->base, di->base);
+ if(t != nil)
+ free(t);
+ di->target = atargval(tok, di->target);
+ break;
+
+ // <!ELEMENT BASEFONT - O EMPTY>
+ case Tbasefont:
+ ps->adjsize = aintval(tok, Asize, 3) - 3;
+ break;
+
+ // <!ELEMENT (BIG|SMALL) - - (%text)*>
+ case Tbig:
+ case Tsmall:
+ sz = ps->adjsize;
+ if(tag == Tbig)
+ sz += Large;
+ else
+ sz += Small;
+ pushfontsize(ps, sz);
+ break;
+
+ case Tbig+RBRA:
+ case Tsmall+RBRA:
+ popfontsize(ps);
+ break;
+
+ // <!ELEMENT BLOCKQUOTE - - %body.content>
+ case Tblockquote:
+ changeindent(ps, BQTAB);
+ break;
+
+ case Tblockquote+RBRA:
+ changeindent(ps, -BQTAB);
+ break;
+
+ // <!ELEMENT BODY O O %body.content>
+ case Tbody:
+ ps->skipping = 0;
+ bg = makebackground(nil, acolorval(tok, Abgcolor, di->background.color));
+ bgurl = aurlval(tok, Abackground, nil, di->base);
+ if(bgurl != nil) {
+ if(di->backgrounditem != nil)
+ freeitem((Item*)di->backgrounditem);
+ // really should remove old item from di->images list,
+ // but there should only be one BODY element ...
+ di->backgrounditem = (Iimage*)newiimage(bgurl, nil, ALnone, 0, 0, 0, 0, 0, 0, nil);
+ di->backgrounditem->nextimage = di->images;
+ di->images = di->backgrounditem;
+ }
+ ps->curbg = bg;
+ di->background = bg;
+ di->text = acolorval(tok, Atext, di->text);
+ di->link = acolorval(tok, Alink, di->link);
+ di->vlink = acolorval(tok, Avlink, di->vlink);
+ di->alink = acolorval(tok, Aalink, di->alink);
+ if(di->text != ps->curfg) {
+ ps->curfg = di->text;
+ ps->fgstk.n = 0;
+ }
+ break;
+
+ case Tbody+RBRA:
+ // HTML spec says ignore things after </body>,
+ // but IE and Netscape don't
+ // ps.skipping = 1;
+ break;
+
+ // <!ELEMENT BR - O EMPTY>
+ case Tbr:
+ addlinebrk(ps, atabval(tok, Aclear, clear_tab, NCLEARTAB, 0));
+ break;
+
+ // <!ELEMENT CAPTION - - (%text;)*>
+ case Tcaption:
+ if(curtab == nil) {
+ if(warn)
+ fprint(2, "warning: <CAPTION> outside <TABLE>\n");
+ continue;
+ }
+ if(curtab->caption != nil) {
+ if(warn)
+ fprint(2, "warning: more than one <CAPTION> in <TABLE>\n");
+ continue;
+ }
+ ps = newpstate(ps);
+ curtab->caption_place = atabval(tok, Aalign, align_tab, NALIGNTAB, ALtop);
+ break;
+
+ case Tcaption+RBRA:
+ nextps = ps->next;
+ if(curtab == nil || nextps == nil) {
+ if(warn)
+ fprint(2, "warning: unexpected </CAPTION>\n");
+ continue;
+ }
+ curtab->caption = ps->items->next;
+ free(ps);
+ ps = nextps;
+ break;
+
+ case Tcenter:
+ case Tdiv:
+ if(tag == Tcenter)
+ al = ALcenter;
+ else
+ al = atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust);
+ pushjust(ps, al);
+ break;
+
+ case Tcenter+RBRA:
+ case Tdiv+RBRA:
+ popjust(ps);
+ break;
+
+ // <!ELEMENT DD - O %flow >
+ case Tdd:
+ if(ps->hangstk.n == 0) {
+ if(warn)
+ fprint(2, "warning: <DD> not inside <DL\n");
+ continue;
+ }
+ h = top(&ps->hangstk, 0);
+ if(h != 0)
+ changehang(ps, -10*LISTTAB);
+ else
+ addbrk(ps, 0, 0);
+ push(&ps->hangstk, 0);
+ break;
+
+ //<!ELEMENT (DIR|MENU) - - (LI)+ -(%block) >
+ //<!ELEMENT (OL|UL) - - (LI)+>
+ case Tdir:
+ case Tmenu:
+ case Tol:
+ case Tul:
+ changeindent(ps, LISTTAB);
+ push(&ps->listtypestk, listtyval(tok, (tag==Tol)? LT1 : LTdisc));
+ push(&ps->listcntstk, aintval(tok, Astart, 1));
+ break;
+
+ case Tdir+RBRA:
+ case Tmenu+RBRA:
+ case Tol+RBRA:
+ case Tul+RBRA:
+ if(ps->listtypestk.n == 0) {
+ if(warn)
+ fprint(2, "warning: %T ended no list\n", tok);
+ continue;
+ }
+ addbrk(ps, 0, 0);
+ pop(&ps->listtypestk);
+ pop(&ps->listcntstk);
+ changeindent(ps, -LISTTAB);
+ break;
+
+ // <!ELEMENT DL - - (DT|DD)+ >
+ case Tdl:
+ changeindent(ps, LISTTAB);
+ push(&ps->hangstk, 0);
+ break;
+
+ case Tdl+RBRA:
+ if(ps->hangstk.n == 0) {
+ if(warn)
+ fprint(2, "warning: unexpected </DL>\n");
+ continue;
+ }
+ changeindent(ps, -LISTTAB);
+ if(top(&ps->hangstk, 0) != 0)
+ changehang(ps, -10*LISTTAB);
+ pop(&ps->hangstk);
+ break;
+
+ // <!ELEMENT DT - O (%text)* >
+ case Tdt:
+ if(ps->hangstk.n == 0) {
+ if(warn)
+ fprint(2, "warning: <DT> not inside <DL>\n");
+ continue;
+ }
+ h = top(&ps->hangstk, 0);
+ pop(&ps->hangstk);
+ if(h != 0)
+ changehang(ps, -10*LISTTAB);
+ changehang(ps, 10*LISTTAB);
+ push(&ps->hangstk, 1);
+ break;
+
+ // <!ELEMENT FONT - - (%text)*>
+ case Tfont:
+ sz = top(&ps->fntsizestk, Normal);
+ if(_tokaval(tok, Asize, &nsz, 0)) {
+ if(_prefix(L(Lplus), nsz))
+ sz = Normal + _Strtol(nsz+1, nil, 10) + ps->adjsize;
+ else if(_prefix(L(Lminus), nsz))
+ sz = Normal - _Strtol(nsz+1, nil, 10) + ps->adjsize;
+ else if(nsz != nil)
+ sz = Normal + (_Strtol(nsz, nil, 10) - 3);
+ }
+ ps->curfg = push(&ps->fgstk, acolorval(tok, Acolor, ps->curfg));
+ pushfontsize(ps, sz);
+ break;
+
+ case Tfont+RBRA:
+ if(ps->fgstk.n == 0) {
+ if(warn)
+ fprint(2, "warning: unexpected </FONT>\n");
+ continue;
+ }
+ ps->curfg = popretnewtop(&ps->fgstk, di->text);
+ popfontsize(ps);
+ break;
+
+ // <!ELEMENT FORM - - %body.content -(FORM) >
+ case Tform:
+ if(is->curform != nil) {
+ if(warn)
+ fprint(2, "warning: <FORM> nested inside another\n");
+ continue;
+ }
+ action = aurlval(tok, Aaction, di->base, di->base);
+ s = aval(tok, Aid);
+ name = astrval(tok, Aname, s);
+ if(s)
+ free(s);
+ target = atargval(tok, di->target);
+ method = atabval(tok, Amethod, method_tab, NMETHODTAB, HGet);
+ if(warn && _tokaval(tok, Aenctype, &enctype, 0) &&
+ _Strcmp(enctype, L(Lappl_form)))
+ fprint(2, "form enctype %S not handled\n", enctype);
+ frm = newform(++is->nforms, name, action, target, method, di->forms);
+ di->forms = frm;
+ is->curform = frm;
+ break;
+
+ case Tform+RBRA:
+ if(is->curform == nil) {
+ if(warn)
+ fprint(2, "warning: unexpected </FORM>\n");
+ continue;
+ }
+ // put fields back in input order
+ is->curform->fields = (Formfield*)_revlist((List*)is->curform->fields);
+ is->curform = nil;
+ break;
+
+ // <!ELEMENT FRAME - O EMPTY>
+ case Tframe:
+ ks = is->kidstk;
+ if(ks == nil) {
+ if(warn)
+ fprint(2, "warning: <FRAME> not in <FRAMESET>\n");
+ continue;
+ }
+ ks->kidinfos = kd = newkidinfo(0, ks->kidinfos);
+ kd->src = aurlval(tok, Asrc, nil, di->base);
+ kd->name = aval(tok, Aname);
+ if(kd->name == nil) {
+ s = _ltoStr(++is->nframes);
+ kd->name = _Strdup2(L(Lfr), s);
+ free(s);
+ }
+ kd->marginw = auintval(tok, Amarginwidth, 0);
+ kd->marginh = auintval(tok, Amarginheight, 0);
+ kd->framebd = auintval(tok, Aframeborder, 1);
+ kd->flags = atabval(tok, Ascrolling, fscroll_tab, NFSCROLLTAB, kd->flags);
+ norsz = aflagval(tok, Anoresize);
+ if(norsz)
+ kd->flags |= FRnoresize;
+ break;
+
+ // <!ELEMENT FRAMESET - - (FRAME|FRAMESET)+>
+ case Tframeset:
+ ks = newkidinfo(1, nil);
+ pks = is->kidstk;
+ if(pks == nil)
+ di->kidinfo = ks;
+ else {
+ ks->next = pks->kidinfos;
+ pks->kidinfos = ks;
+ }
+ ks->nextframeset = pks;
+ is->kidstk = ks;
+ setdimarray(tok, Arows, &ks->rows, &ks->nrows);
+ if(ks->nrows == 0) {
+ ks->rows = (Dimen*)emalloc(sizeof(Dimen));
+ ks->nrows = 1;
+ ks->rows[0] = makedimen(Dpercent, 100);
+ }
+ setdimarray(tok, Acols, &ks->cols, &ks->ncols);
+ if(ks->ncols == 0) {
+ ks->cols = (Dimen*)emalloc(sizeof(Dimen));
+ ks->ncols = 1;
+ ks->cols[0] = makedimen(Dpercent, 100);
+ }
+ break;
+
+ case Tframeset+RBRA:
+ if(is->kidstk == nil) {
+ if(warn)
+ fprint(2, "warning: unexpected </FRAMESET>\n");
+ continue;
+ }
+ ks = is->kidstk;
+ // put kids back in original order
+ // and add blank frames to fill out cells
+ n = ks->nrows*ks->ncols;
+ nblank = n - _listlen((List*)ks->kidinfos);
+ while(nblank-- > 0)
+ ks->kidinfos = newkidinfo(0, ks->kidinfos);
+ ks->kidinfos = (Kidinfo*)_revlist((List*)ks->kidinfos);
+ is->kidstk = is->kidstk->nextframeset;
+ if(is->kidstk == nil) {
+ // end input
+ ans = nil;
+ goto return_ans;
+ }
+ break;
+
+ // <!ELEMENT H1 - - (%text;)*>, etc.
+ case Th1:
+ case Th2:
+ case Th3:
+ case Th4:
+ case Th5:
+ case Th6:
+ bramt = 1;
+ if(ps->items == ps->lastit)
+ bramt = 0;
+ addbrk(ps, bramt, IFcleft|IFcright);
+ sz = Verylarge - (tag - Th1);
+ if(sz < Tiny)
+ sz = Tiny;
+ pushfontsize(ps, sz);
+ sty = top(&ps->fntstylestk, FntR);
+ if(tag == Th1)
+ sty = FntB;
+ pushfontstyle(ps, sty);
+ pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust));
+ ps->skipwhite = 1;
+ break;
+
+ case Th1+RBRA:
+ case Th2+RBRA:
+ case Th3+RBRA:
+ case Th4+RBRA:
+ case Th5+RBRA:
+ case Th6+RBRA:
+ addbrk(ps, 1, IFcleft|IFcright);
+ popfontsize(ps);
+ popfontstyle(ps);
+ popjust(ps);
+ break;
+
+ case Thead:
+ // HTML spec says ignore regular markup in head,
+ // but Netscape and IE don't
+ // ps.skipping = 1;
+ break;
+
+ case Thead+RBRA:
+ ps->skipping = 0;
+ break;
+
+ // <!ELEMENT HR - O EMPTY>
+ case Thr:
+ al = atabval(tok, Aalign, align_tab, NALIGNTAB, ALcenter);
+ sz = auintval(tok, Asize, HRSZ);
+ wd = adimen(tok, Awidth);
+ if(dimenkind(wd) == Dnone)
+ wd = makedimen(Dpercent, 100);
+ nosh = aflagval(tok, Anoshade);
+ additem(ps, newirule(al, sz, nosh, wd), tok);
+ addbrk(ps, 0, 0);
+ break;
+
+ case Ti:
+ case Tcite:
+ case Tdfn:
+ case Tem:
+ case Tvar:
+ case Taddress:
+ pushfontstyle(ps, FntI);
+ break;
+
+ // <!ELEMENT IMG - O EMPTY>
+ case Timg:
+ map = nil;
+ oldcuranchor = ps->curanchor;
+ if(_tokaval(tok, Ausemap, &usemap, 0)) {
+ if(!_prefix(L(Lhash), usemap)) {
+ if(warn)
+ fprint(2, "warning: can't handle non-local map %S\n", usemap);
+ }
+ else {
+ map = getmap(di, usemap+1);
+ if(ps->curanchor == 0) {
+ di->anchors = newanchor(++is->nanchors, nil, nil, di->target, di->anchors);
+ ps->curanchor = is->nanchors;
+ }
+ }
+ }
+ align = atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom);
+ dfltbd = 0;
+ if(ps->curanchor != 0)
+ dfltbd = 2;
+ src = aurlval(tok, Asrc, nil, di->base);
+ if(src == nil) {
+ if(warn)
+ fprint(2, "warning: <img> has no src attribute\n");
+ ps->curanchor = oldcuranchor;
+ continue;
+ }
+ img = newiimage(src,
+ aval(tok, Aalt),
+ align,
+ auintval(tok, Awidth, 0),
+ auintval(tok, Aheight, 0),
+ auintval(tok, Ahspace, IMGHSPACE),
+ auintval(tok, Avspace, IMGVSPACE),
+ auintval(tok, Aborder, dfltbd),
+ aflagval(tok, Aismap),
+ map);
+ if(align == ALleft || align == ALright) {
+ additem(ps, newifloat(img, align), tok);
+ // if no hspace specified, use FLTIMGHSPACE
+ if(!_tokaval(tok, Ahspace, &val, 0))
+ ((Iimage*)img)->hspace = FLTIMGHSPACE;
+ }
+ else {
+ ps->skipwhite = 0;
+ additem(ps, img, tok);
+ }
+ if(!ps->skipping) {
+ ((Iimage*)img)->nextimage = di->images;
+ di->images = (Iimage*)img;
+ }
+ ps->curanchor = oldcuranchor;
+ break;
+
+ // <!ELEMENT INPUT - O EMPTY>
+ case Tinput:
+ ps->skipwhite = 0;
+ if(is->curform == nil) {
+ if(warn)
+ fprint(2, "<INPUT> not inside <FORM>\n");
+ continue;
+ }
+ is->curform->fields = field = newformfield(
+ atabval(tok, Atype, input_tab, NINPUTTAB, Ftext),
+ ++is->curform->nfields,
+ is->curform,
+ aval(tok, Aname),
+ aval(tok, Avalue),
+ auintval(tok, Asize, 0),
+ auintval(tok, Amaxlength, 1000),
+ is->curform->fields);
+ if(aflagval(tok, Achecked))
+ field->flags = FFchecked;
+
+ switch(field->ftype) {
+ case Ftext:
+ case Fpassword:
+ case Ffile:
+ if(field->size == 0)
+ field->size = 20;
+ break;
+
+ case Fcheckbox:
+ if(field->name == nil) {
+ if(warn)
+ fprint(2, "warning: checkbox form field missing name\n");
+ continue;
+ }
+ if(field->value == nil)
+ field->value = _Strdup(L(Lone));
+ break;
+
+ case Fradio:
+ if(field->name == nil || field->value == nil) {
+ if(warn)
+ fprint(2, "warning: radio form field missing name or value\n");
+ continue;
+ }
+ break;
+
+ case Fsubmit:
+ if(field->value == nil)
+ field->value = _Strdup(L(Lsubmit));
+ if(field->name == nil)
+ field->name = _Strdup(L(Lnoname));
+ break;
+
+ case Fimage:
+ src = aurlval(tok, Asrc, nil, di->base);
+ if(src == nil) {
+ if(warn)
+ fprint(2, "warning: image form field missing src\n");
+ continue;
+ }
+ // width and height attrs aren't specified in HTML 3.2,
+ // but some people provide them and they help avoid
+ // a relayout
+ field->image = newiimage(src,
+ astrval(tok, Aalt, L(Lsubmit)),
+ atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom),
+ auintval(tok, Awidth, 0), auintval(tok, Aheight, 0),
+ 0, 0, 0, 0, nil);
+ ii = (Iimage*)field->image;
+ ii->nextimage = di->images;
+ di->images = ii;
+ break;
+
+ case Freset:
+ if(field->value == nil)
+ field->value = _Strdup(L(Lreset));
+ break;
+
+ case Fbutton:
+ if(field->value == nil)
+ field->value = _Strdup(L(Lspace));
+ break;
+ }
+ ffit = newiformfield(field);
+ additem(ps, ffit, tok);
+ if(ffit->genattr != nil)
+ field->events = ffit->genattr->events;
+ break;
+
+ // <!ENTITY ISINDEX - O EMPTY>
+ case Tisindex:
+ ps->skipwhite = 0;
+ prompt = astrval(tok, Aprompt, L(Lindex));
+ target = atargval(tok, di->target);
+ additem(ps, textit(ps, prompt), tok);
+ frm = newform(++is->nforms,
+ nil,
+ di->base,
+ target,
+ HGet,
+ di->forms);
+ di->forms = frm;
+ ff = newformfield(Ftext,
+ 1,
+ frm,
+ _Strdup(L(Lisindex)),
+ nil,
+ 50,
+ 1000,
+ nil);
+ frm->fields = ff;
+ frm->nfields = 1;
+ additem(ps, newiformfield(ff), tok);
+ addbrk(ps, 1, 0);
+ break;
+
+ // <!ELEMENT LI - O %flow>
+ case Tli:
+ if(ps->listtypestk.n == 0) {
+ if(warn)
+ fprint(2, "<LI> not in list\n");
+ continue;
+ }
+ ty = top(&ps->listtypestk, 0);
+ ty2 = listtyval(tok, ty);
+ if(ty != ty2) {
+ ty = ty2;
+ push(&ps->listtypestk, ty2);
+ }
+ v = aintval(tok, Avalue, top(&ps->listcntstk, 1));
+ if(ty == LTdisc || ty == LTsquare || ty == LTcircle)
+ hang = 10*LISTTAB - 3;
+ else
+ hang = 10*LISTTAB - 1;
+ changehang(ps, hang);
+ addtext(ps, listmark(ty, v));
+ push(&ps->listcntstk, v + 1);
+ changehang(ps, -hang);
+ ps->skipwhite = 1;
+ break;
+
+ // <!ELEMENT MAP - - (AREA)+>
+ case Tmap:
+ if(_tokaval(tok, Aname, &name, 0))
+ is->curmap = getmap(di, name);
+ break;
+
+ case Tmap+RBRA:
+ map = is->curmap;
+ if(map == nil) {
+ if(warn)
+ fprint(2, "warning: unexpected </MAP>\n");
+ continue;
+ }
+ map->areas = (Area*)_revlist((List*)map->areas);
+ break;
+
+ case Tmeta:
+ if(ps->skipping)
+ continue;
+ if(_tokaval(tok, Ahttp_equiv, &equiv, 0)) {
+ val = aval(tok, Acontent);
+ n = _Strlen(equiv);
+ if(!_Strncmpci(equiv, n, L(Lrefresh)))
+ di->refresh = val;
+ else if(!_Strncmpci(equiv, n, L(Lcontent))) {
+ n = _Strlen(val);
+ if(!_Strncmpci(val, n, L(Ljavascript))
+ || !_Strncmpci(val, n, L(Ljscript1))
+ || !_Strncmpci(val, n, L(Ljscript)))
+ di->scripttype = TextJavascript;
+ else {
+ if(warn)
+ fprint(2, "unimplemented script type %S\n", val);
+ di->scripttype = UnknownType;
+ }
+ }
+ }
+ break;
+
+ // Nobr is NOT in HMTL 4.0, but it is ubiquitous on the web
+ case Tnobr:
+ ps->skipwhite = 0;
+ ps->curstate &= ~IFwrap;
+ break;
+
+ case Tnobr+RBRA:
+ ps->curstate |= IFwrap;
+ break;
+
+ // We do frames, so skip stuff in noframes
+ case Tnoframes:
+ ps->skipping = 1;
+ break;
+
+ case Tnoframes+RBRA:
+ ps->skipping = 0;
+ break;
+
+ // We do scripts (if enabled), so skip stuff in noscripts
+ case Tnoscript:
+ if(doscripts)
+ ps->skipping = 1;
+ break;
+
+ case Tnoscript+RBRA:
+ if(doscripts)
+ ps->skipping = 0;
+ break;
+
+ // <!ELEMENT OPTION - O ( //PCDATA)>
+ case Toption:
+ if(is->curform == nil || is->curform->fields == nil) {
+ if(warn)
+ fprint(2, "warning: <OPTION> not in <SELECT>\n");
+ continue;
+ }
+ field = is->curform->fields;
+ if(field->ftype != Fselect) {
+ if(warn)
+ fprint(2, "warning: <OPTION> not in <SELECT>\n");
+ continue;
+ }
+ val = aval(tok, Avalue);
+ option = newoption(aflagval(tok, Aselected), val, nil, field->options);
+ field->options = option;
+ option->display = getpcdata(toks, tokslen, &toki);
+ if(val == nil)
+ option->value = _Strdup(option->display);
+ break;
+
+ // <!ELEMENT P - O (%text)* >
+ case Tp:
+ pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust));
+ ps->inpar = 1;
+ ps->skipwhite = 1;
+ break;
+
+ case Tp+RBRA:
+ break;
+
+ // <!ELEMENT PARAM - O EMPTY>
+ // Do something when we do applets...
+ case Tparam:
+ break;
+
+ // <!ELEMENT PRE - - (%text)* -(IMG|BIG|SMALL|SUB|SUP|FONT) >
+ case Tpre:
+ ps->curstate &= ~IFwrap;
+ ps->literal = 1;
+ ps->skipwhite = 0;
+ pushfontstyle(ps, FntT);
+ break;
+
+ case Tpre+RBRA:
+ ps->curstate |= IFwrap;
+ if(ps->literal) {
+ popfontstyle(ps);
+ ps->literal = 0;
+ }
+ break;
+
+ // <!ELEMENT SCRIPT - - CDATA>
+ case Tscript:
+ if(doscripts) {
+ if(!di->hasscripts) {
+ if(di->scripttype == TextJavascript) {
+ // TODO: initialize script if nec.
+ // initjscript(di);
+ di->hasscripts = 1;
+ }
+ }
+ }
+ if(!di->hasscripts) {
+ if(warn)
+ fprint(2, "warning: <SCRIPT> ignored\n");
+ ps->skipping = 1;
+ }
+ else {
+ scriptsrc = aurlval(tok, Asrc, nil, di->base);
+ script = nil;
+ if(scriptsrc != nil) {
+ if(warn)
+ fprint(2, "warning: non-local <SCRIPT> ignored\n");
+ free(scriptsrc);
+ }
+ else {
+ script = getpcdata(toks, tokslen, &toki);
+ }
+ if(script != nil) {
+ if(warn)
+ fprint(2, "script ignored\n");
+ free(script);
+ }
+ }
+ break;
+
+ case Tscript+RBRA:
+ ps->skipping = 0;
+ break;
+
+ // <!ELEMENT SELECT - - (OPTION+)>
+ case Tselect:
+ if(is->curform == nil) {
+ if(warn)
+ fprint(2, "<SELECT> not inside <FORM>\n");
+ continue;
+ }
+ field = newformfield(Fselect,
+ ++is->curform->nfields,
+ is->curform,
+ aval(tok, Aname),
+ nil,
+ auintval(tok, Asize, 0),
+ 0,
+ is->curform->fields);
+ is->curform->fields = field;
+ if(aflagval(tok, Amultiple))
+ field->flags = FFmultiple;
+ ffit = newiformfield(field);
+ additem(ps, ffit, tok);
+ if(ffit->genattr != nil)
+ field->events = ffit->genattr->events;
+ // throw away stuff until next tag (should be <OPTION>)
+ s = getpcdata(toks, tokslen, &toki);
+ if(s != nil)
+ free(s);
+ break;
+
+ case Tselect+RBRA:
+ if(is->curform == nil || is->curform->fields == nil) {
+ if(warn)
+ fprint(2, "warning: unexpected </SELECT>\n");
+ continue;
+ }
+ field = is->curform->fields;
+ if(field->ftype != Fselect)
+ continue;
+ // put options back in input order
+ field->options = (Option*)_revlist((List*)field->options);
+ break;
+
+ // <!ELEMENT (STRIKE|U) - - (%text)*>
+ case Tstrike:
+ case Tu:
+ ps->curul = push(&ps->ulstk, (tag==Tstrike)? ULmid : ULunder);
+ break;
+
+ case Tstrike+RBRA:
+ case Tu+RBRA:
+ if(ps->ulstk.n == 0) {
+ if(warn)
+ fprint(2, "warning: unexpected %T\n", tok);
+ continue;
+ }
+ ps->curul = popretnewtop(&ps->ulstk, ULnone);
+ break;
+
+ // <!ELEMENT STYLE - - CDATA>
+ case Tstyle:
+ if(warn)
+ fprint(2, "warning: unimplemented <STYLE>\n");
+ ps->skipping = 1;
+ break;
+
+ case Tstyle+RBRA:
+ ps->skipping = 0;
+ break;
+
+ // <!ELEMENT (SUB|SUP) - - (%text)*>
+ case Tsub:
+ case Tsup:
+ if(tag == Tsub)
+ ps->curvoff += SUBOFF;
+ else
+ ps->curvoff -= SUPOFF;
+ push(&ps->voffstk, ps->curvoff);
+ sz = top(&ps->fntsizestk, Normal);
+ pushfontsize(ps, sz - 1);
+ break;
+
+ case Tsub+RBRA:
+ case Tsup+RBRA:
+ if(ps->voffstk.n == 0) {
+ if(warn)
+ fprint(2, "warning: unexpected %T\n", tok);
+ continue;
+ }
+ ps->curvoff = popretnewtop(&ps->voffstk, 0);
+ popfontsize(ps);
+ break;
+
+ // <!ELEMENT TABLE - - (CAPTION?, TR+)>
+ case Ttable:
+ ps->skipwhite = 0;
+ tab = newtable(++is->ntables,
+ aalign(tok),
+ adimen(tok, Awidth),
+ aflagval(tok, Aborder),
+ auintval(tok, Acellspacing, TABSP),
+ auintval(tok, Acellpadding, TABPAD),
+ makebackground(nil, acolorval(tok, Abgcolor, ps->curbg.color)),
+ tok,
+ is->tabstk);
+ is->tabstk = tab;
+ curtab = tab;
+ break;
+
+ case Ttable+RBRA:
+ if(curtab == nil) {
+ if(warn)
+ fprint(2, "warning: unexpected </TABLE>\n");
+ continue;
+ }
+ isempty = (curtab->cells == nil);
+ if(isempty) {
+ if(warn)
+ fprint(2, "warning: <TABLE> has no cells\n");
+ }
+ else {
+ ps = finishcell(curtab, ps);
+ if(curtab->rows != nil)
+ curtab->rows->flags = 0;
+ finish_table(curtab);
+ }
+ ps->skipping = 0;
+ if(!isempty) {
+ tabitem = newitable(curtab);
+ al = curtab->align.halign;
+ switch(al) {
+ case ALleft:
+ case ALright:
+ additem(ps, newifloat(tabitem, al), tok);
+ break;
+ default:
+ if(al == ALcenter)
+ pushjust(ps, ALcenter);
+ addbrk(ps, 0, 0);
+ if(ps->inpar) {
+ popjust(ps);
+ ps->inpar = 0;
+ }
+ additem(ps, tabitem, curtab->tabletok);
+ if(al == ALcenter)
+ popjust(ps);
+ break;
+ }
+ }
+ if(is->tabstk == nil) {
+ if(warn)
+ fprint(2, "warning: table stack is wrong\n");
+ }
+ else
+ is->tabstk = is->tabstk->next;
+ curtab->next = di->tables;
+ di->tables = curtab;
+ curtab = is->tabstk;
+ if(!isempty)
+ addbrk(ps, 0, 0);
+ break;
+
+ // <!ELEMENT (TH|TD) - O %body.content>
+ // Cells for a row are accumulated in reverse order.
+ // We push ps on a stack, and use a new one to accumulate
+ // the contents of the cell.
+ case Ttd:
+ case Tth:
+ if(curtab == nil) {
+ if(warn)
+ fprint(2, "%T outside <TABLE>\n", tok);
+ continue;
+ }
+ if(ps->inpar) {
+ popjust(ps);
+ ps->inpar = 0;
+ }
+ ps = finishcell(curtab, ps);
+ tr = nil;
+ if(curtab->rows != nil)
+ tr = curtab->rows;
+ if(tr == nil || !tr->flags) {
+ if(warn)
+ fprint(2, "%T outside row\n", tok);
+ tr = newtablerow(makealign(ALnone, ALnone),
+ makebackground(nil, curtab->background.color),
+ TFparsing,
+ curtab->rows);
+ curtab->rows = tr;
+ }
+ ps = cell_pstate(ps, tag == Tth);
+ flags = TFparsing;
+ if(aflagval(tok, Anowrap)) {
+ flags |= TFnowrap;
+ ps->curstate &= ~IFwrap;
+ }
+ if(tag == Tth)
+ flags |= TFisth;
+ c = newtablecell(curtab->cells==nil? 1 : curtab->cells->cellid+1,
+ auintval(tok, Arowspan, 1),
+ auintval(tok, Acolspan, 1),
+ aalign(tok),
+ adimen(tok, Awidth),
+ auintval(tok, Aheight, 0),
+ makebackground(nil, acolorval(tok, Abgcolor, tr->background.color)),
+ flags,
+ curtab->cells);
+ curtab->cells = c;
+ ps->curbg = c->background;
+ if(c->align.halign == ALnone) {
+ if(tr->align.halign != ALnone)
+ c->align.halign = tr->align.halign;
+ else if(tag == Tth)
+ c->align.halign = ALcenter;
+ else
+ c->align.halign = ALleft;
+ }
+ if(c->align.valign == ALnone) {
+ if(tr->align.valign != ALnone)
+ c->align.valign = tr->align.valign;
+ else
+ c->align.valign = ALmiddle;
+ }
+ c->nextinrow = tr->cells;
+ tr->cells = c;
+ break;
+
+ case Ttd+RBRA:
+ case Tth+RBRA:
+ if(curtab == nil || curtab->cells == nil) {
+ if(warn)
+ fprint(2, "unexpected %T\n", tok);
+ continue;
+ }
+ ps = finishcell(curtab, ps);
+ break;
+
+ // <!ELEMENT TEXTAREA - - ( //PCDATA)>
+ case Ttextarea:
+ if(is->curform == nil) {
+ if(warn)
+ fprint(2, "<TEXTAREA> not inside <FORM>\n");
+ continue;
+ }
+ field = newformfield(Ftextarea,
+ ++is->curform->nfields,
+ is->curform,
+ aval(tok, Aname),
+ nil,
+ 0,
+ 0,
+ is->curform->fields);
+ is->curform->fields = field;
+ field->rows = auintval(tok, Arows, 3);
+ field->cols = auintval(tok, Acols, 50);
+ field->value = getpcdata(toks, tokslen, &toki);
+ if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Ttextarea + RBRA)
+ fprint(2, "warning: <TEXTAREA> data ended by %T\n", &toks[toki + 1]);
+ ffit = newiformfield(field);
+ additem(ps, ffit, tok);
+ if(ffit->genattr != nil)
+ field->events = ffit->genattr->events;
+ break;
+
+ // <!ELEMENT TITLE - - ( //PCDATA)* -(%head.misc)>
+ case Ttitle:
+ di->doctitle = getpcdata(toks, tokslen, &toki);
+ if(warn && toki < tokslen - 1 && toks[toki + 1].tag != Ttitle + RBRA)
+ fprint(2, "warning: <TITLE> data ended by %T\n", &toks[toki + 1]);
+ break;
+
+ // <!ELEMENT TR - O (TH|TD)+>
+ // rows are accumulated in reverse order in curtab->rows
+ case Ttr:
+ if(curtab == nil) {
+ if(warn)
+ fprint(2, "warning: <TR> outside <TABLE>\n");
+ continue;
+ }
+ if(ps->inpar) {
+ popjust(ps);
+ ps->inpar = 0;
+ }
+ ps = finishcell(curtab, ps);
+ if(curtab->rows != nil)
+ curtab->rows->flags = 0;
+ curtab->rows = newtablerow(aalign(tok),
+ makebackground(nil, acolorval(tok, Abgcolor, curtab->background.color)),
+ TFparsing,
+ curtab->rows);
+ break;
+
+ case Ttr+RBRA:
+ if(curtab == nil || curtab->rows == nil) {
+ if(warn)
+ fprint(2, "warning: unexpected </TR>\n");
+ continue;
+ }
+ ps = finishcell(curtab, ps);
+ tr = curtab->rows;
+ if(tr->cells == nil) {
+ if(warn)
+ fprint(2, "warning: empty row\n");
+ curtab->rows = tr->next;
+ tr->next = nil;
+ }
+ else
+ tr->flags = 0;
+ break;
+
+ // <!ELEMENT (TT|CODE|KBD|SAMP) - - (%text)*>
+ case Ttt:
+ case Tcode:
+ case Tkbd:
+ case Tsamp:
+ pushfontstyle(ps, FntT);
+ break;
+
+ // Tags that have empty action
+ case Tabbr:
+ case Tabbr+RBRA:
+ case Tacronym:
+ case Tacronym+RBRA:
+ case Tarea+RBRA:
+ case Tbase+RBRA:
+ case Tbasefont+RBRA:
+ case Tbr+RBRA:
+ case Tdd+RBRA:
+ case Tdt+RBRA:
+ case Tframe+RBRA:
+ case Thr+RBRA:
+ case Thtml:
+ case Thtml+RBRA:
+ case Timg+RBRA:
+ case Tinput+RBRA:
+ case Tisindex+RBRA:
+ case Tli+RBRA:
+ case Tlink:
+ case Tlink+RBRA:
+ case Tmeta+RBRA:
+ case Toption+RBRA:
+ case Tparam+RBRA:
+ case Ttextarea+RBRA:
+ case Ttitle+RBRA:
+ break;
+
+
+ // Tags not implemented
+ case Tbdo:
+ case Tbdo+RBRA:
+ case Tbutton:
+ case Tbutton+RBRA:
+ case Tdel:
+ case Tdel+RBRA:
+ case Tfieldset:
+ case Tfieldset+RBRA:
+ case Tiframe:
+ case Tiframe+RBRA:
+ case Tins:
+ case Tins+RBRA:
+ case Tlabel:
+ case Tlabel+RBRA:
+ case Tlegend:
+ case Tlegend+RBRA:
+ case Tobject:
+ case Tobject+RBRA:
+ case Toptgroup:
+ case Toptgroup+RBRA:
+ case Tspan:
+ case Tspan+RBRA:
+ if(warn) {
+ if(tag > RBRA)
+ tag -= RBRA;
+ fprint(2, "warning: unimplemented HTML tag: %S\n", tagnames[tag]);
+ }
+ break;
+
+ default:
+ if(warn)
+ fprint(2, "warning: unknown HTML tag: %S\n", tok->text);
+ break;
+ }
+ }
+ // some pages omit trailing </table>
+ while(curtab != nil) {
+ if(warn)
+ fprint(2, "warning: <TABLE> not closed\n");
+ if(curtab->cells != nil) {
+ ps = finishcell(curtab, ps);
+ if(curtab->cells == nil) {
+ if(warn)
+ fprint(2, "warning: empty table\n");
+ }
+ else {
+ if(curtab->rows != nil)
+ curtab->rows->flags = 0;
+ finish_table(curtab);
+ ps->skipping = 0;
+ additem(ps, newitable(curtab), curtab->tabletok);
+ addbrk(ps, 0, 0);
+ }
+ }
+ if(is->tabstk != nil)
+ is->tabstk = is->tabstk->next;
+ curtab->next = di->tables;
+ di->tables = curtab;
+ curtab = is->tabstk;
+ }
+ outerps = lastps(ps);
+ ans = outerps->items->next;
+ // note: ans may be nil and di->kids not nil, if there's a frameset!
+ outerps->items = newispacer(ISPnull);
+ outerps->lastit = outerps->items;
+ is->psstk = ps;
+ if(ans != nil && di->hasscripts) {
+ // TODO evalscript(nil);
+ ;
+ }
+
+return_ans:
+ if(dbgbuild) {
+ assert(validitems(ans));
+ if(ans == nil)
+ fprint(2, "getitems returning nil\n");
+ else
+ printitems(ans, "getitems returning:");
+ }
+ return ans;
+}
+
+// Concatenate together maximal set of Data tokens, starting at toks[toki+1].
+// Lexer has ensured that there will either be a following non-data token or
+// we will be at eof.
+// Return emallocd trimmed concatenation, and update *ptoki to last used toki
+static Rune*
+getpcdata(Token* toks, int tokslen, int* ptoki)
+{
+ Rune* ans;
+ Rune* p;
+ Rune* trimans;
+ int anslen;
+ int trimanslen;
+ int toki;
+ Token* tok;
+
+ ans = nil;
+ anslen = 0;
+ // first find length of answer
+ toki = (*ptoki) + 1;
+ while(toki < tokslen) {
+ tok = &toks[toki];
+ if(tok->tag == Data) {
+ toki++;
+ anslen += _Strlen(tok->text);
+ }
+ else
+ break;
+ }
+ // now make up the initial answer
+ if(anslen > 0) {
+ ans = _newstr(anslen);
+ p = ans;
+ toki = (*ptoki) + 1;
+ while(toki < tokslen) {
+ tok = &toks[toki];
+ if(tok->tag == Data) {
+ toki++;
+ p = _Stradd(p, tok->text, _Strlen(tok->text));
+ }
+ else
+ break;
+ }
+ *p = 0;
+ _trimwhite(ans, anslen, &trimans, &trimanslen);
+ if(trimanslen != anslen) {
+ p = ans;
+ ans = _Strndup(trimans, trimanslen);
+ free(p);
+ }
+ }
+ *ptoki = toki-1;
+ return ans;
+}
+
+// If still parsing head of curtab->cells list, finish it off
+// by transferring the items on the head of psstk to the cell.
+// Then pop the psstk and return the new psstk.
+static Pstate*
+finishcell(Table* curtab, Pstate* psstk)
+{
+ Tablecell* c;
+ Pstate* psstknext;
+
+ c = curtab->cells;
+ if(c != nil) {
+ if((c->flags&TFparsing)) {
+ psstknext = psstk->next;
+ if(psstknext == nil) {
+ if(warn)
+ fprint(2, "warning: parse state stack is wrong\n");
+ }
+ else {
+ c->content = psstk->items->next;
+ c->flags &= ~TFparsing;
+ freepstate(psstk);
+ psstk = psstknext;
+ }
+ }
+ }
+ return psstk;
+}
+
+// Make a new Pstate for a cell, based on the old pstate, oldps.
+// Also, put the new ps on the head of the oldps stack.
+static Pstate*
+cell_pstate(Pstate* oldps, int ishead)
+{
+ Pstate* ps;
+ int sty;
+
+ ps = newpstate(oldps);
+ ps->skipwhite = 1;
+ ps->curanchor = oldps->curanchor;
+ copystack(&ps->fntstylestk, &oldps->fntstylestk);
+ copystack(&ps->fntsizestk, &oldps->fntsizestk);
+ ps->curfont = oldps->curfont;
+ ps->curfg = oldps->curfg;
+ ps->curbg = oldps->curbg;
+ copystack(&ps->fgstk, &oldps->fgstk);
+ ps->adjsize = oldps->adjsize;
+ if(ishead) {
+ sty = ps->curfont%NumSize;
+ ps->curfont = FntB*NumSize + sty;
+ }
+ return ps;
+}
+
+// Return a new Pstate with default starting state.
+// Use link to add it to head of a list, if any.
+static Pstate*
+newpstate(Pstate* link)
+{
+ Pstate* ps;
+
+ ps = (Pstate*)emalloc(sizeof(Pstate));
+ ps->curfont = DefFnt;
+ ps->curfg = Black;
+ ps->curbg.image = nil;
+ ps->curbg.color = White;
+ ps->curul = ULnone;
+ ps->curjust = ALleft;
+ ps->curstate = IFwrap;
+ ps->items = newispacer(ISPnull);
+ ps->lastit = ps->items;
+ ps->prelastit = nil;
+ ps->next = link;
+ return ps;
+}
+
+// Return last Pstate on psl list
+static Pstate*
+lastps(Pstate* psl)
+{
+ assert(psl != nil);
+ while(psl->next != nil)
+ psl = psl->next;
+ return psl;
+}
+
+// Add it to end of ps item chain, adding in current state from ps.
+// Also, if tok is not nil, scan it for generic attributes and assign
+// the genattr field of the item accordingly.
+static void
+additem(Pstate* ps, Item* it, Token* tok)
+{
+ int aid;
+ int any;
+ Rune* i;
+ Rune* c;
+ Rune* s;
+ Rune* t;
+ Attr* a;
+ SEvent* e;
+
+ if(ps->skipping) {
+ if(warn)
+ fprint(2, "warning: skipping item: %I\n", it);
+ return;
+ }
+ it->anchorid = ps->curanchor;
+ it->state |= ps->curstate;
+ if(tok != nil) {
+ any = 0;
+ i = nil;
+ c = nil;
+ s = nil;
+ t = nil;
+ e = nil;
+ for(a = tok->attr; a != nil; a = a->next) {
+ aid = a->attid;
+ if(!attrinfo[aid])
+ continue;
+ switch(aid) {
+ case Aid:
+ i = a->value;
+ break;
+
+ case Aclass:
+ c = a->value;
+ break;
+
+ case Astyle:
+ s = a->value;
+ break;
+
+ case Atitle:
+ t = a->value;
+ break;
+
+ default:
+ assert(aid >= Aonblur && aid <= Aonunload);
+ e = newscriptevent(scriptev[a->attid], a->value, e);
+ break;
+ }
+ a->value = nil;
+ any = 1;
+ }
+ if(any)
+ it->genattr = newgenattr(i, c, s, t, e);
+ }
+ ps->curstate &= ~(IFbrk|IFbrksp|IFnobrk|IFcleft|IFcright);
+ ps->prelastit = ps->lastit;
+ ps->lastit->next = it;
+ ps->lastit = it;
+}
+
+// Make a text item out of s,
+// using current font, foreground, vertical offset and underline state.
+static Item*
+textit(Pstate* ps, Rune* s)
+{
+ assert(s != nil);
+ return newitext(s, ps->curfont, ps->curfg, ps->curvoff + Voffbias, ps->curul);
+}
+
+// Add text item or items for s, paying attention to
+// current font, foreground, baseline offset, underline state,
+// and literal mode. Unless we're in literal mode, compress
+// whitespace to single blank, and, if curstate has a break,
+// trim any leading whitespace. Whether in literal mode or not,
+// turn nonbreaking spaces into spacer items with IFnobrk set.
+//
+// In literal mode, break up s at newlines and add breaks instead.
+// Also replace tabs appropriate number of spaces.
+// In nonliteral mode, break up the items every 100 or so characters
+// just to make the layout algorithm not go quadratic.
+//
+// addtext assumes ownership of s.
+static void
+addtext(Pstate* ps, Rune* s)
+{
+ int n;
+ int i;
+ int j;
+ int k;
+ int col;
+ int c;
+ int nsp;
+ Item* it;
+ Rune* ss;
+ Rune* p;
+ Rune buf[SMALLBUFSIZE];
+
+ assert(s != nil);
+ n = runestrlen(s);
+ i = 0;
+ j = 0;
+ if(ps->literal) {
+ col = 0;
+ while(i < n) {
+ if(s[i] == '\n') {
+ if(i > j) {
+ // trim trailing blanks from line
+ for(k = i; k > j; k--)
+ if(s[k - 1] != ' ')
+ break;
+ if(k > j)
+ additem(ps, textit(ps, _Strndup(s+j, k-j)), nil);
+ }
+ addlinebrk(ps, 0);
+ j = i + 1;
+ col = 0;
+ }
+ else {
+ if(s[i] == '\t') {
+ col += i - j;
+ nsp = 8 - (col%8);
+ // make ss = s[j:i] + nsp spaces
+ ss = _newstr(i-j+nsp);
+ p = _Stradd(ss, s+j, i-j);
+ p = _Stradd(p, L(Ltab2space), nsp);
+ *p = 0;
+ additem(ps, textit(ps, ss), nil);
+ col += nsp;
+ j = i + 1;
+ }
+ else if(s[i] == NBSP) {
+ if(i > j)
+ additem(ps, textit(ps, _Strndup(s+j, i-j)), nil);
+ addnbsp(ps);
+ col += (i - j) + 1;
+ j = i + 1;
+ }
+ }
+ i++;
+ }
+ if(i > j) {
+ if(j == 0 && i == n) {
+ // just transfer s over
+ additem(ps, textit(ps, s), nil);
+ }
+ else {
+ additem(ps, textit(ps, _Strndup(s+j, i-j)), nil);
+ free(s);
+ }
+ }
+ }
+ else { // not literal mode
+ if((ps->curstate&IFbrk) || ps->lastit == ps->items)
+ while(i < n) {
+ c = s[i];
+ if(c >= 256 || !isspace(c))
+ break;
+ i++;
+ }
+ p = buf;
+ for(j = i; i < n; i++) {
+ assert(p+i-j < buf+SMALLBUFSIZE-1);
+ c = s[i];
+ if(c == NBSP) {
+ if(i > j)
+ p = _Stradd(p, s+j, i-j);
+ if(p > buf)
+ additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
+ p = buf;
+ addnbsp(ps);
+ j = i + 1;
+ continue;
+ }
+ if(c < 256 && isspace(c)) {
+ if(i > j)
+ p = _Stradd(p, s+j, i-j);
+ *p++ = ' ';
+ while(i < n - 1) {
+ c = s[i + 1];
+ if(c >= 256 || !isspace(c))
+ break;
+ i++;
+ }
+ j = i + 1;
+ }
+ if(i - j >= 100) {
+ p = _Stradd(p, s+j, i+1-j);
+ j = i + 1;
+ }
+ if(p-buf >= 100) {
+ additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
+ p = buf;
+ }
+ }
+ if(i > j && j < n) {
+ assert(p+i-j < buf+SMALLBUFSIZE-1);
+ p = _Stradd(p, s+j, i-j);
+ }
+ // don't add a space if previous item ended in a space
+ if(p-buf == 1 && buf[0] == ' ' && ps->lastit != nil) {
+ it = ps->lastit;
+ if(it->tag == Itexttag) {
+ ss = ((Itext*)it)->s;
+ k = _Strlen(ss);
+ if(k > 0 && ss[k] == ' ')
+ p = buf;
+ }
+ }
+ if(p > buf)
+ additem(ps, textit(ps, _Strndup(buf, p-buf)), nil);
+ free(s);
+ }
+}
+
+// Add a break to ps->curstate, with extra space if sp is true.
+// If there was a previous break, combine this one's parameters
+// with that to make the amt be the max of the two and the clr
+// be the most general. (amt will be 0 or 1)
+// Also, if the immediately preceding item was a text item,
+// trim any whitespace from the end of it, if not in literal mode.
+// Finally, if this is at the very beginning of the item list
+// (the only thing there is a null spacer), then don't add the space.
+static void
+addbrk(Pstate* ps, int sp, int clr)
+{
+ int state;
+ Rune* l;
+ int nl;
+ Rune* r;
+ int nr;
+ Itext* t;
+ Rune* s;
+
+ state = ps->curstate;
+ clr = clr|(state&(IFcleft|IFcright));
+ if(sp && !(ps->lastit == ps->items))
+ sp = IFbrksp;
+ else
+ sp = 0;
+ ps->curstate = IFbrk|sp|(state&~(IFcleft|IFcright))|clr;
+ if(ps->lastit != ps->items) {
+ if(!ps->literal && ps->lastit->tag == Itexttag) {
+ t = (Itext*)ps->lastit;
+ _splitr(t->s, _Strlen(t->s), notwhitespace, &l, &nl, &r, &nr);
+ // try to avoid making empty items
+ // but not crucial f the occasional one gets through
+ if(nl == 0 && ps->prelastit != nil) {
+ ps->lastit = ps->prelastit;
+ ps->lastit->next = nil;
+ ps->prelastit = nil;
+ }
+ else {
+ s = t->s;
+ if(nl == 0) {
+ // need a non-nil pointer to empty string
+ // (_Strdup(L(Lempty)) returns nil)
+ t->s = emalloc(sizeof(Rune));
+ t->s[0] = 0;
+ }
+ else
+ t->s = _Strndup(l, nl);
+ if(s)
+ free(s);
+ }
+ }
+ }
+}
+
+// Add break due to a <br> or a newline within a preformatted section.
+// We add a null item first, with current font's height and ascent, to make
+// sure that the current line takes up at least that amount of vertical space.
+// This ensures that <br>s on empty lines cause blank lines, and that
+// multiple <br>s in a row give multiple blank lines.
+// However don't add the spacer if the previous item was something that
+// takes up space itself.
+static void
+addlinebrk(Pstate* ps, int clr)
+{
+ int obrkstate;
+ int b;
+ int addit;
+
+ // don't want break before our null item unless the previous item
+ // was also a null item for the purposes of line breaking
+ obrkstate = ps->curstate&(IFbrk|IFbrksp);
+ b = IFnobrk;
+ addit = 0;
+ if(ps->lastit != nil) {
+ if(ps->lastit->tag == Ispacertag) {
+ if(((Ispacer*)ps->lastit)->spkind == ISPvline)
+ b = IFbrk;
+ addit = 1;
+ }
+ else if(ps->lastit->tag == Ifloattag)
+ addit = 1;
+ }
+ if(addit) {
+ ps->curstate = (ps->curstate&~(IFbrk|IFbrksp))|b;
+ additem(ps, newispacer(ISPvline), nil);
+ ps->curstate = (ps->curstate&~(IFbrk|IFbrksp))|obrkstate;
+ }
+ addbrk(ps, 0, clr);
+}
+
+// Add a nonbreakable space
+static void
+addnbsp(Pstate* ps)
+{
+ // if nbsp comes right where a break was specified,
+ // do the break anyway (nbsp is being used to generate undiscardable
+ // space rather than to prevent a break)
+ if((ps->curstate&IFbrk) == 0)
+ ps->curstate |= IFnobrk;
+ additem(ps, newispacer(ISPhspace), nil);
+ // but definitely no break on next item
+ ps->curstate |= IFnobrk;
+}
+
+// Change hang in ps.curstate by delta.
+// The amount is in 1/10ths of tabs, and is the amount that
+// the current contiguous set of items with a hang value set
+// is to be shifted left from its normal (indented) place.
+static void
+changehang(Pstate* ps, int delta)
+{
+ int amt;
+
+ amt = (ps->curstate&IFhangmask) + delta;
+ if(amt < 0) {
+ if(warn)
+ fprint(2, "warning: hang went negative\n");
+ amt = 0;
+ }
+ ps->curstate = (ps->curstate&~IFhangmask)|amt;
+}
+
+// Change indent in ps.curstate by delta.
+static void
+changeindent(Pstate* ps, int delta)
+{
+ int amt;
+
+ amt = ((ps->curstate&IFindentmask) >> IFindentshift) + delta;
+ if(amt < 0) {
+ if(warn)
+ fprint(2, "warning: indent went negative\n");
+ amt = 0;
+ }
+ ps->curstate = (ps->curstate&~IFindentmask)|(amt << IFindentshift);
+}
+
+// Push val on top of stack, and also return value pushed
+static int
+push(Stack* stk, int val)
+{
+ if(stk->n == Nestmax) {
+ if(warn)
+ fprint(2, "warning: build stack overflow\n");
+ }
+ else
+ stk->slots[stk->n++] = val;
+ return val;
+}
+
+// Pop top of stack
+static void
+pop(Stack* stk)
+{
+ if(stk->n > 0)
+ --stk->n;
+}
+
+//Return top of stack, using dflt if stack is empty
+static int
+top(Stack* stk, int dflt)
+{
+ if(stk->n == 0)
+ return dflt;
+ return stk->slots[stk->n-1];
+}
+
+// pop, then return new top, with dflt if empty
+static int
+popretnewtop(Stack* stk, int dflt)
+{
+ if(stk->n == 0)
+ return dflt;
+ stk->n--;
+ if(stk->n == 0)
+ return dflt;
+ return stk->slots[stk->n-1];
+}
+
+// Copy fromstk entries into tostk
+static void
+copystack(Stack* tostk, Stack* fromstk)
+{
+ int n;
+
+ n = fromstk->n;
+ tostk->n = n;
+ memmove(tostk->slots, fromstk->slots, n*sizeof(int));
+}
+
+static void
+popfontstyle(Pstate* ps)
+{
+ pop(&ps->fntstylestk);
+ setcurfont(ps);
+}
+
+static void
+pushfontstyle(Pstate* ps, int sty)
+{
+ push(&ps->fntstylestk, sty);
+ setcurfont(ps);
+}
+
+static void
+popfontsize(Pstate* ps)
+{
+ pop(&ps->fntsizestk);
+ setcurfont(ps);
+}
+
+static void
+pushfontsize(Pstate* ps, int sz)
+{
+ push(&ps->fntsizestk, sz);
+ setcurfont(ps);
+}
+
+static void
+setcurfont(Pstate* ps)
+{
+ int sty;
+ int sz;
+
+ sty = top(&ps->fntstylestk, FntR);
+ sz = top(&ps->fntsizestk, Normal);
+ if(sz < Tiny)
+ sz = Tiny;
+ if(sz > Verylarge)
+ sz = Verylarge;
+ ps->curfont = sty*NumSize + sz;
+}
+
+static void
+popjust(Pstate* ps)
+{
+ pop(&ps->juststk);
+ setcurjust(ps);
+}
+
+static void
+pushjust(Pstate* ps, int j)
+{
+ push(&ps->juststk, j);
+ setcurjust(ps);
+}
+
+static void
+setcurjust(Pstate* ps)
+{
+ int j;
+ int state;
+
+ j = top(&ps->juststk, ALleft);
+ if(j != ps->curjust) {
+ ps->curjust = j;
+ state = ps->curstate;
+ state &= ~(IFrjust|IFcjust);
+ if(j == ALcenter)
+ state |= IFcjust;
+ else if(j == ALright)
+ state |= IFrjust;
+ ps->curstate = state;
+ }
+}
+
+// Do final rearrangement after table parsing is finished
+// and assign cells to grid points
+static void
+finish_table(Table* t)
+{
+ int ncol;
+ int nrow;
+ int r;
+ Tablerow* rl;
+ Tablecell* cl;
+ int* rowspancnt;
+ Tablecell** rowspancell;
+ int ri;
+ int ci;
+ Tablecell* c;
+ Tablecell* cnext;
+ Tablerow* row;
+ Tablerow* rownext;
+ int rcols;
+ int newncol;
+ int k;
+ int j;
+ int cspan;
+ int rspan;
+ int i;
+
+ rl = t->rows;
+ t->nrow = nrow = _listlen((List*)rl);
+ t->rows = (Tablerow*)emalloc(nrow * sizeof(Tablerow));
+ ncol = 0;
+ r = nrow - 1;
+ for(row = rl; row != nil; row = rownext) {
+ // copy the data from the allocated Tablerow into the array slot
+ t->rows[r] = *row;
+ rownext = row->next;
+ row = &t->rows[r];
+ r--;
+ rcols = 0;
+ c = row->cells;
+
+ // If rowspan is > 1 but this is the last row,
+ // reset the rowspan
+ if(c != nil && c->rowspan > 1 && r == nrow-2)
+ c->rowspan = 1;
+
+ // reverse row->cells list (along nextinrow pointers)
+ row->cells = nil;
+ while(c != nil) {
+ cnext = c->nextinrow;
+ c->nextinrow = row->cells;
+ row->cells = c;
+ rcols += c->colspan;
+ c = cnext;
+ }
+ if(rcols > ncol)
+ ncol = rcols;
+ }
+ t->ncol = ncol;
+ t->cols = (Tablecol*)emalloc(ncol * sizeof(Tablecol));
+
+ // Reverse cells just so they are drawn in source order.
+ // Also, trim their contents so they don't end in whitespace.
+ t->cells = (Tablecell*)_revlist((List*)t->cells);
+ for(c = t->cells; c != nil; c= c->next)
+ trim_cell(c);
+ t->grid = (Tablecell***)emalloc(nrow * sizeof(Tablecell**));
+ for(i = 0; i < nrow; i++)
+ t->grid[i] = (Tablecell**)emalloc(ncol * sizeof(Tablecell*));
+
+ // The following arrays keep track of cells that are spanning
+ // multiple rows; rowspancnt[i] is the number of rows left
+ // to be spanned in column i.
+ // When done, cell's (row,col) is upper left grid point.
+ rowspancnt = (int*)emalloc(ncol * sizeof(int));
+ rowspancell = (Tablecell**)emalloc(ncol * sizeof(Tablecell*));
+ for(ri = 0; ri < nrow; ri++) {
+ row = &t->rows[ri];
+ cl = row->cells;
+ ci = 0;
+ while(ci < ncol || cl != nil) {
+ if(ci < ncol && rowspancnt[ci] > 0) {
+ t->grid[ri][ci] = rowspancell[ci];
+ rowspancnt[ci]--;
+ ci++;
+ }
+ else {
+ if(cl == nil) {
+ ci++;
+ continue;
+ }
+ c = cl;
+ cl = cl->nextinrow;
+ cspan = c->colspan;
+ rspan = c->rowspan;
+ if(ci + cspan > ncol) {
+ // because of row spanning, we calculated
+ // ncol incorrectly; adjust it
+ newncol = ci + cspan;
+ t->cols = (Tablecol*)erealloc(t->cols, newncol * sizeof(Tablecol));
+ rowspancnt = (int*)erealloc(rowspancnt, newncol * sizeof(int));
+ rowspancell = (Tablecell**)erealloc(rowspancell, newncol * sizeof(Tablecell*));
+ k = newncol-ncol;
+ memset(t->cols+ncol, 0, k*sizeof(Tablecol));
+ memset(rowspancnt+ncol, 0, k*sizeof(int));
+ memset(rowspancell+ncol, 0, k*sizeof(Tablecell*));
+ for(j = 0; j < nrow; j++) {
+ t->grid[j] = (Tablecell**)erealloc(t->grid[j], newncol * sizeof(Tablecell*));
+ memset(t->grid[j], 0, k*sizeof(Tablecell*));
+ }
+ t->ncol = ncol = newncol;
+ }
+ c->row = ri;
+ c->col = ci;
+ for(i = 0; i < cspan; i++) {
+ t->grid[ri][ci] = c;
+ if(rspan > 1) {
+ rowspancnt[ci] = rspan - 1;
+ rowspancell[ci] = c;
+ }
+ ci++;
+ }
+ }
+ }
+ }
+}
+
+// Remove tail of cell content until it isn't whitespace.
+static void
+trim_cell(Tablecell* c)
+{
+ int dropping;
+ Rune* s;
+ Rune* x;
+ Rune* y;
+ int nx;
+ int ny;
+ Item* p;
+ Itext* q;
+ Item* pprev;
+
+ dropping = 1;
+ while(c->content != nil && dropping) {
+ p = c->content;
+ pprev = nil;
+ while(p->next != nil) {
+ pprev = p;
+ p = p->next;
+ }
+ dropping = 0;
+ if(!(p->state&IFnobrk)) {
+ if(p->tag == Itexttag) {
+ q = (Itext*)p;
+ s = q->s;
+ _splitr(s, _Strlen(s), notwhitespace, &x, &nx, &y, &ny);
+ if(nx != 0 && ny != 0) {
+ q->s = _Strndup(x, nx);
+ free(s);
+ }
+ break;
+ }
+ }
+ if(dropping) {
+ if(pprev == nil)
+ c->content = nil;
+ else
+ pprev->next = nil;
+ freeitem(p);
+ }
+ }
+}
+
+// Caller must free answer (eventually).
+static Rune*
+listmark(uchar ty, int n)
+{
+ Rune* s;
+ Rune* t;
+ int n2;
+ int i;
+
+ s = nil;
+ switch(ty) {
+ case LTdisc:
+ case LTsquare:
+ case LTcircle:
+ s = _newstr(1);
+ s[0] = (ty == LTdisc)? 0x2022 // bullet
+ : ((ty == LTsquare)? 0x220e // filled square
+ : 0x2218); // degree
+ s[1] = 0;
+ break;
+
+ case LT1:
+ t = _ltoStr(n);
+ n2 = _Strlen(t);
+ s = _newstr(n2+1);
+ t = _Stradd(s, t, n2);
+ *t++ = '.';
+ *t = 0;
+ break;
+
+ case LTa:
+ case LTA:
+ n--;
+ i = 0;
+ if(n < 0)
+ n = 0;
+ s = _newstr((n <= 25)? 2 : 3);
+ if(n > 25) {
+ n2 = n%26;
+ n /= 26;
+ if(n2 > 25)
+ n2 = 25;
+ s[i++] = n2 + (ty == LTa)? 'a' : 'A';
+ }
+ s[i++] = n + (ty == LTa)? 'a' : 'A';
+ s[i++] = '.';
+ s[i] = 0;
+ break;
+
+ case LTi:
+ case LTI:
+ if(n >= NROMAN) {
+ if(warn)
+ fprint(2, "warning: unimplemented roman number > %d\n", NROMAN);
+ n = NROMAN;
+ }
+ t = roman[n - 1];
+ n2 = _Strlen(t);
+ s = _newstr(n2+1);
+ for(i = 0; i < n2; i++)
+ s[i] = (ty == LTi)? tolower(t[i]) : t[i];
+ s[i++] = '.';
+ s[i] = 0;
+ break;
+ }
+ return s;
+}
+
+// Find map with given name in di.maps.
+// If not there, add one, copying name.
+// Ownership of map remains with di->maps list.
+static Map*
+getmap(Docinfo* di, Rune* name)
+{
+ Map* m;
+
+ for(m = di->maps; m != nil; m = m->next) {
+ if(!_Strcmp(name, m->name))
+ return m;
+ }
+ m = (Map*)emalloc(sizeof(Map));
+ m->name = _Strdup(name);
+ m->areas = nil;
+ m->next = di->maps;
+ di->maps = m;
+ return m;
+}
+
+// Transfers ownership of href to Area
+static Area*
+newarea(int shape, Rune* href, int target, Area* link)
+{
+ Area* a;
+
+ a = (Area*)emalloc(sizeof(Area));
+ a->shape = shape;
+ a->href = href;
+ a->target = target;
+ a->next = link;
+ return a;
+}
+
+// Return string value associated with attid in tok, nil if none.
+// Caller must free the result (eventually).
+static Rune*
+aval(Token* tok, int attid)
+{
+ Rune* ans;
+
+ _tokaval(tok, attid, &ans, 1); // transfers string ownership from token to ans
+ return ans;
+}
+
+// Like aval, but use dflt if there was no such attribute in tok.
+// Caller must free the result (eventually).
+static Rune*
+astrval(Token* tok, int attid, Rune* dflt)
+{
+ Rune* ans;
+
+ if(_tokaval(tok, attid, &ans, 1))
+ return ans; // transfers string ownership from token to ans
+ else
+ return _Strdup(dflt);
+}
+
+// Here we're supposed to convert to an int,
+// and have a default when not found
+static int
+aintval(Token* tok, int attid, int dflt)
+{
+ Rune* ans;
+
+ if(!_tokaval(tok, attid, &ans, 0) || ans == nil)
+ return dflt;
+ else
+ return toint(ans);
+}
+
+// Like aintval, but result should be >= 0
+static int
+auintval(Token* tok, int attid, int dflt)
+{
+ Rune* ans;
+ int v;
+
+ if(!_tokaval(tok, attid, &ans, 0) || ans == nil)
+ return dflt;
+ else {
+ v = toint(ans);
+ return v >= 0? v : 0;
+ }
+}
+
+// int conversion, but with possible error check (if warning)
+static int
+toint(Rune* s)
+{
+ int ans;
+ Rune* eptr;
+
+ ans = _Strtol(s, &eptr, 10);
+ if(warn) {
+ if(*eptr != 0) {
+ eptr = _Strclass(eptr, notwhitespace);
+ if(eptr != nil)
+ fprint(2, "warning: expected integer, got %S\n", s);
+ }
+ }
+ return ans;
+}
+
+// Attribute value when need a table to convert strings to ints
+static int
+atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt)
+{
+ Rune* aval;
+ int ans;
+
+ ans = dflt;
+ if(_tokaval(tok, attid, &aval, 0)) {
+ if(!_lookup(tab, ntab, aval, _Strlen(aval), &ans)) {
+ ans = dflt;
+ if(warn)
+ fprint(2, "warning: name not found in table lookup: %S\n", aval);
+ }
+ }
+ return ans;
+}
+
+// Attribute value when supposed to be a color
+static int
+acolorval(Token* tok, int attid, int dflt)
+{
+ Rune* aval;
+ int ans;
+
+ ans = dflt;
+ if(_tokaval(tok, attid, &aval, 0))
+ ans = color(aval, dflt);
+ return ans;
+}
+
+// Attribute value when supposed to be a target frame name
+static int
+atargval(Token* tok, int dflt)
+{
+ int ans;
+ Rune* aval;
+
+ ans = dflt;
+ if(_tokaval(tok, Atarget, &aval, 0)){
+ ans = targetid(aval);
+ }
+ return ans;
+}
+
+// special for list types, where "i" and "I" are different,
+// but "square" and "SQUARE" are the same
+static int
+listtyval(Token* tok, int dflt)
+{
+ Rune* aval;
+ int ans;
+ int n;
+
+ ans = dflt;
+ if(_tokaval(tok, Atype, &aval, 0)) {
+ n = _Strlen(aval);
+ if(n == 1) {
+ switch(aval[0]) {
+ case '1':
+ ans = LT1;
+ break;
+ case 'A':
+ ans = LTA;
+ break;
+ case 'I':
+ ans = LTI;
+ break;
+ case 'a':
+ ans = LTa;
+ break;
+ case 'i':
+ ans = LTi;
+ default:
+ if(warn)
+ fprint(2, "warning: unknown list element type %c\n", aval[0]);
+ }
+ }
+ else {
+ if(!_Strncmpci(aval, n, L(Lcircle)))
+ ans = LTcircle;
+ else if(!_Strncmpci(aval, n, L(Ldisc)))
+ ans = LTdisc;
+ else if(!_Strncmpci(aval, n, L(Lsquare)))
+ ans = LTsquare;
+ else {
+ if(warn)
+ fprint(2, "warning: unknown list element type %S\n", aval);
+ }
+ }
+ }
+ return ans;
+}
+
+// Attribute value when value is a URL, possibly relative to base.
+// FOR NOW: leave the url relative.
+// Caller must free the result (eventually).
+static Rune*
+aurlval(Token* tok, int attid, Rune* dflt, Rune* base)
+{
+ Rune* ans;
+ Rune* url;
+
+ USED(base);
+ ans = nil;
+ if(_tokaval(tok, attid, &url, 0) && url != nil)
+ ans = removeallwhite(url);
+ if(ans == nil)
+ ans = _Strdup(dflt);
+ return ans;
+}
+
+// Return copy of s but with all whitespace (even internal) removed.
+// This fixes some buggy URL specification strings.
+static Rune*
+removeallwhite(Rune* s)
+{
+ int j;
+ int n;
+ int i;
+ int c;
+ Rune* ans;
+
+ j = 0;
+ n = _Strlen(s);
+ for(i = 0; i < n; i++) {
+ c = s[i];
+ if(c >= 256 || !isspace(c))
+ j++;
+ }
+ if(j < n) {
+ ans = _newstr(j);
+ j = 0;
+ for(i = 0; i < n; i++) {
+ c = s[i];
+ if(c >= 256 || !isspace(c))
+ ans[j++] = c;
+ }
+ ans[j] = 0;
+ }
+ else
+ ans = _Strdup(s);
+ return ans;
+}
+
+// Attribute value when mere presence of attr implies value of 1,
+// but if there is an integer there, return it as the value.
+static int
+aflagval(Token* tok, int attid)
+{
+ int val;
+ Rune* sval;
+
+ val = 0;
+ if(_tokaval(tok, attid, &sval, 0)) {
+ val = 1;
+ if(sval != nil)
+ val = toint(sval);
+ }
+ return val;
+}
+
+static Align
+makealign(int halign, int valign)
+{
+ Align al;
+
+ al.halign = halign;
+ al.valign = valign;
+ return al;
+}
+
+// Make an Align (two alignments, horizontal and vertical)
+static Align
+aalign(Token* tok)
+{
+ return makealign(
+ atabval(tok, Aalign, align_tab, NALIGNTAB, ALnone),
+ atabval(tok, Avalign, align_tab, NALIGNTAB, ALnone));
+}
+
+// Make a Dimen, based on value of attid attr
+static Dimen
+adimen(Token* tok, int attid)
+{
+ Rune* wd;
+
+ if(_tokaval(tok, attid, &wd, 0))
+ return parsedim(wd, _Strlen(wd));
+ else
+ return makedimen(Dnone, 0);
+}
+
+// Parse s[0:n] as num[.[num]][unit][%|*]
+static Dimen
+parsedim(Rune* s, int ns)
+{
+ int kind;
+ int spec;
+ Rune* l;
+ int nl;
+ Rune* r;
+ int nr;
+ int mul;
+ int i;
+ Rune* f;
+ int nf;
+ int Tkdpi;
+ Rune* units;
+
+ kind = Dnone;
+ spec = 0;
+ _splitl(s, ns, L(Lnot0to9), &l, &nl, &r, &nr);
+ if(nl != 0) {
+ spec = 1000*_Strtol(l, nil, 10);
+ if(nr > 0 && r[0] == '.') {
+ _splitl(r+1, nr-1, L(Lnot0to9), &f, &nf, &r, &nr);
+ if(nf != 0) {
+ mul = 100;
+ for(i = 0; i < nf; i++) {
+ spec = spec + mul*(f[i]-'0');
+ mul = mul/10;
+ }
+ }
+ }
+ kind = Dpixels;
+ if(nr != 0) {
+ if(nr >= 2) {
+ Tkdpi = 100;
+ units = r;
+ r = r+2;
+ nr -= 2;
+ if(!_Strncmpci(units, 2, L(Lpt)))
+ spec = (spec*Tkdpi)/72;
+ else if(!_Strncmpci(units, 2, L(Lpi)))
+ spec = (spec*12*Tkdpi)/72;
+ else if(!_Strncmpci(units, 2, L(Lin)))
+ spec = spec*Tkdpi;
+ else if(!_Strncmpci(units, 2, L(Lcm)))
+ spec = (spec*100*Tkdpi)/254;
+ else if(!_Strncmpci(units, 2, L(Lmm)))
+ spec = (spec*10*Tkdpi)/254;
+ else if(!_Strncmpci(units, 2, L(Lem)))
+ spec = spec*15;
+ else {
+ if(warn)
+ fprint(2, "warning: unknown units %C%Cs\n", units[0], units[1]);
+ }
+ }
+ if(nr >= 1) {
+ if(r[0] == '%')
+ kind = Dpercent;
+ else if(r[0] == '*')
+ kind = Drelative;
+ }
+ }
+ spec = spec/1000;
+ }
+ else if(nr == 1 && r[0] == '*') {
+ spec = 1;
+ kind = Drelative;
+ }
+ return makedimen(kind, spec);
+}
+
+static void
+setdimarray(Token* tok, int attid, Dimen** pans, int* panslen)
+{
+ Rune* s;
+ Dimen* d;
+ int k;
+ int nc;
+ Rune* a[SMALLBUFSIZE];
+ int an[SMALLBUFSIZE];
+
+ if(_tokaval(tok, attid, &s, 0)) {
+ nc = _splitall(s, _Strlen(s), L(Lcommaspace), a, an, SMALLBUFSIZE);
+ if(nc > 0) {
+ d = (Dimen*)emalloc(nc * sizeof(Dimen));
+ for(k = 0; k < nc; k++) {
+ d[k] = parsedim(a[k], an[k]);
+ }
+ *pans = d;
+ *panslen = nc;
+ return;
+ }
+ }
+ *pans = nil;
+ *panslen = 0;
+}
+
+static Background
+makebackground(Rune* imageurl, int color)
+{
+ Background bg;
+
+ bg.image = imageurl;
+ bg.color = color;
+ return bg;
+}
+
+static Item*
+newitext(Rune* s, int fnt, int fg, int voff, int ul)
+{
+ Itext* t;
+
+ assert(s != nil);
+ t = (Itext*)emalloc(sizeof(Itext));
+ t->item.tag = Itexttag;
+ t->s = s;
+ t->fnt = fnt;
+ t->fg = fg;
+ t->voff = voff;
+ t->ul = ul;
+ return (Item*)t;
+}
+
+static Item*
+newirule(int align, int size, int noshade, Dimen wspec)
+{
+ Irule* r;
+
+ r = (Irule*)emalloc(sizeof(Irule));
+ r->item.tag = Iruletag;
+ r->align = align;
+ r->size = size;
+ r->noshade = noshade;
+ r->wspec = wspec;
+ return (Item*)r;
+}
+
+// Map is owned elsewhere.
+static Item*
+newiimage(Rune* src, Rune* altrep, int align, int width, int height,
+ int hspace, int vspace, int border, int ismap, Map* map)
+{
+ Iimage* i;
+ int state;
+
+ state = 0;
+ if(ismap)
+ state = IFsmap;
+ i = (Iimage*)emalloc(sizeof(Iimage));
+ i->item.tag = Iimagetag;
+ i->item.state = state;
+ i->imsrc = src;
+ i->altrep = altrep;
+ i->align = align;
+ i->imwidth = width;
+ i->imheight = height;
+ i->hspace = hspace;
+ i->vspace = vspace;
+ i->border = border;
+ i->map = map;
+ i->ctlid = -1;
+ return (Item*)i;
+}
+
+static Item*
+newiformfield(Formfield* ff)
+{
+ Iformfield* f;
+
+ f = (Iformfield*)emalloc(sizeof(Iformfield));
+ f->item.tag = Iformfieldtag;
+ f->formfield = ff;
+ return (Item*)f;
+}
+
+static Item*
+newitable(Table* tab)
+{
+ Itable* t;
+
+ t = (Itable*)emalloc(sizeof(Itable));
+ t->item.tag = Itabletag;
+ t->table = tab;
+ return (Item*)t;
+}
+
+static Item*
+newifloat(Item* it, int side)
+{
+ Ifloat* f;
+
+ f = (Ifloat*)emalloc(sizeof(Ifloat));
+ f->_item.tag = Ifloattag;
+ f->_item.state = IFwrap;
+ f->item = it;
+ f->side = side;
+ return (Item*)f;
+}
+
+static Item*
+newispacer(int spkind)
+{
+ Ispacer* s;
+
+ s = (Ispacer*)emalloc(sizeof(Ispacer));
+ s->item.tag = Ispacertag;
+ s->spkind = spkind;
+ return (Item*)s;
+}
+
+// Free one item (caller must deal with next pointer)
+static void
+freeitem(Item* it)
+{
+ Iimage* ii;
+ Genattr* ga;
+
+ if(it == nil)
+ return;
+
+ switch(it->tag) {
+ case Itexttag:
+ free(((Itext*)it)->s);
+ break;
+ case Iimagetag:
+ ii = (Iimage*)it;
+ free(ii->imsrc);
+ free(ii->altrep);
+ break;
+ case Iformfieldtag:
+ freeformfield(((Iformfield*)it)->formfield);
+ break;
+ case Itabletag:
+ freetable(((Itable*)it)->table);
+ break;
+ case Ifloattag:
+ freeitem(((Ifloat*)it)->item);
+ break;
+ }
+ ga = it->genattr;
+ if(ga != nil) {
+ free(ga->id);
+ free(ga->class);
+ free(ga->style);
+ free(ga->title);
+ freescriptevents(ga->events);
+ }
+ free(it);
+}
+
+// Free list of items chained through next pointer
+void
+freeitems(Item* ithead)
+{
+ Item* it;
+ Item* itnext;
+
+ it = ithead;
+ while(it != nil) {
+ itnext = it->next;
+ freeitem(it);
+ it = itnext;
+ }
+}
+
+static void
+freeformfield(Formfield* ff)
+{
+ Option* o;
+ Option* onext;
+
+ if(ff == nil)
+ return;
+
+ free(ff->name);
+ free(ff->value);
+ for(o = ff->options; o != nil; o = onext) {
+ onext = o->next;
+ free(o->value);
+ free(o->display);
+ }
+ free(ff);
+}
+
+static void
+freetable(Table* t)
+{
+ int i;
+ Tablecell* c;
+ Tablecell* cnext;
+
+ if(t == nil)
+ return;
+
+ // We'll find all the unique cells via t->cells and next pointers.
+ // (Other pointers to cells in the table are duplicates of these)
+ for(c = t->cells; c != nil; c = cnext) {
+ cnext = c->next;
+ freeitems(c->content);
+ }
+ if(t->grid != nil) {
+ for(i = 0; i < t->nrow; i++)
+ free(t->grid[i]);
+ free(t->grid);
+ }
+ free(t->rows);
+ free(t->cols);
+ freeitems(t->caption);
+ free(t);
+}
+
+static void
+freeform(Form* f)
+{
+ if(f == nil)
+ return;
+
+ free(f->name);
+ free(f->action);
+ // Form doesn't own its fields (Iformfield items do)
+ free(f);
+}
+
+static void
+freeforms(Form* fhead)
+{
+ Form* f;
+ Form* fnext;
+
+ for(f = fhead; f != nil; f = fnext) {
+ fnext = f->next;
+ freeform(f);
+ }
+}
+
+static void
+freeanchor(Anchor* a)
+{
+ if(a == nil)
+ return;
+
+ free(a->name);
+ free(a->href);
+ free(a);
+}
+
+static void
+freeanchors(Anchor* ahead)
+{
+ Anchor* a;
+ Anchor* anext;
+
+ for(a = ahead; a != nil; a = anext) {
+ anext = a->next;
+ freeanchor(a);
+ }
+}
+
+static void
+freedestanchor(DestAnchor* da)
+{
+ if(da == nil)
+ return;
+
+ free(da->name);
+ free(da);
+}
+
+static void
+freedestanchors(DestAnchor* dahead)
+{
+ DestAnchor* da;
+ DestAnchor* danext;
+
+ for(da = dahead; da != nil; da = danext) {
+ danext = da->next;
+ freedestanchor(da);
+ }
+}
+
+static void
+freearea(Area* a)
+{
+ if(a == nil)
+ return;
+ free(a->href);
+ free(a->coords);
+}
+
+static void freekidinfos(Kidinfo* khead);
+
+static void
+freekidinfo(Kidinfo* k)
+{
+ if(k->isframeset) {
+ free(k->rows);
+ free(k->cols);
+ freekidinfos(k->kidinfos);
+ }
+ else {
+ free(k->src);
+ free(k->name);
+ }
+ free(k);
+}
+
+static void
+freekidinfos(Kidinfo* khead)
+{
+ Kidinfo* k;
+ Kidinfo* knext;
+
+ for(k = khead; k != nil; k = knext) {
+ knext = k->next;
+ freekidinfo(k);
+ }
+}
+
+static void
+freemap(Map* m)
+{
+ Area* a;
+ Area* anext;
+
+ if(m == nil)
+ return;
+
+ free(m->name);
+ for(a = m->areas; a != nil; a = anext) {
+ anext = a->next;
+ freearea(a);
+ }
+ free(m);
+}
+
+static void
+freemaps(Map* mhead)
+{
+ Map* m;
+ Map* mnext;
+
+ for(m = mhead; m != nil; m = mnext) {
+ mnext = m->next;
+ freemap(m);
+ }
+}
+
+void
+freedocinfo(Docinfo* d)
+{
+ if(d == nil)
+ return;
+ free(d->src);
+ free(d->base);
+ freeitem((Item*)d->backgrounditem);
+ free(d->refresh);
+ freekidinfos(d->kidinfo);
+ freeanchors(d->anchors);
+ freedestanchors(d->dests);
+ freeforms(d->forms);
+ freemaps(d->maps);
+ // tables, images, and formfields are freed when
+ // the items pointing at them are freed
+ free(d);
+}
+
+// Currently, someone else owns all the memory
+// pointed to by things in a Pstate.
+static void
+freepstate(Pstate* p)
+{
+ free(p);
+}
+
+static void
+freepstatestack(Pstate* pshead)
+{
+ Pstate* p;
+ Pstate* pnext;
+
+ for(p = pshead; p != nil; p = pnext) {
+ pnext = p->next;
+ free(p);
+ }
+}
+
+static int
+Iconv(Fmt *f)
+{
+ Item* it;
+ Itext* t;
+ Irule* r;
+ Iimage* i;
+ Ifloat* fl;
+ int state;
+ Formfield* ff;
+ Rune* ty;
+ Tablecell* c;
+ Table* tab;
+ char* p;
+ int cl;
+ int hang;
+ int indent;
+ int bi;
+ int nbuf;
+ char buf[BIGBUFSIZE];
+
+ it = va_arg(f->args, Item*);
+ bi = 0;
+ nbuf = sizeof(buf);
+ state = it->state;
+ nbuf = nbuf-1;
+ if(state&IFbrk) {
+ cl = state&(IFcleft|IFcright);
+ p = "";
+ if(cl) {
+ if(cl == (IFcleft|IFcright))
+ p = " both";
+ else if(cl == IFcleft)
+ p = " left";
+ else
+ p = " right";
+ }
+ bi = snprint(buf, nbuf, "brk(%d%s)", (state&IFbrksp)? 1 : 0, p);
+ }
+ if(state&IFnobrk)
+ bi += snprint(buf+bi, nbuf-bi, " nobrk");
+ if(!(state&IFwrap))
+ bi += snprint(buf+bi, nbuf-bi, " nowrap");
+ if(state&IFrjust)
+ bi += snprint(buf+bi, nbuf-bi, " rjust");
+ if(state&IFcjust)
+ bi += snprint(buf+bi, nbuf-bi, " cjust");
+ if(state&IFsmap)
+ bi += snprint(buf+bi, nbuf-bi, " smap");
+ indent = (state&IFindentmask) >> IFindentshift;
+ if(indent > 0)
+ bi += snprint(buf+bi, nbuf-bi, " indent=%d", indent);
+ hang = state&IFhangmask;
+ if(hang > 0)
+ bi += snprint(buf+bi, nbuf-bi, " hang=%d", hang);
+
+ switch(it->tag) {
+ case Itexttag:
+ t = (Itext*)it;
+ bi += snprint(buf+bi, nbuf-bi, " Text '%S', fnt=%d, fg=%x", t->s, t->fnt, t->fg);
+ break;
+
+ case Iruletag:
+ r = (Irule*)it;
+ bi += snprint(buf+bi, nbuf-bi, "Rule size=%d, al=%S, wspec=", r->size, stringalign(r->align));
+ bi += dimprint(buf+bi, nbuf-bi, r->wspec);
+ break;
+
+ case Iimagetag:
+ i = (Iimage*)it;
+ bi += snprint(buf+bi, nbuf-bi,
+ "Image src=%S, alt=%S, al=%S, w=%d, h=%d hsp=%d, vsp=%d, bd=%d, map=%S",
+ i->imsrc, i->altrep? i->altrep : L(Lempty), stringalign(i->align), i->imwidth, i->imheight,
+ i->hspace, i->vspace, i->border, i->map?i->map->name : L(Lempty));
+ break;
+
+ case Iformfieldtag:
+ ff = ((Iformfield*)it)->formfield;
+ if(ff->ftype == Ftextarea)
+ ty = L(Ltextarea);
+ else if(ff->ftype == Fselect)
+ ty = L(Lselect);
+ else {
+ ty = _revlookup(input_tab, NINPUTTAB, ff->ftype);
+ if(ty == nil)
+ ty = L(Lnone);
+ }
+ bi += snprint(buf+bi, nbuf-bi, "Formfield %S, fieldid=%d, formid=%d, name=%S, value=%S",
+ ty, ff->fieldid, ff->form->formid, ff->name? ff->name : L(Lempty),
+ ff->value? ff->value : L(Lempty));
+ break;
+
+ case Itabletag:
+ tab = ((Itable*)it)->table;
+ bi += snprint(buf+bi, nbuf-bi, "Table tableid=%d, width=", tab->tableid);
+ bi += dimprint(buf+bi, nbuf-bi, tab->width);
+ bi += snprint(buf+bi, nbuf-bi, ", nrow=%d, ncol=%d, ncell=%d, totw=%d, toth=%d\n",
+ tab->nrow, tab->ncol, tab->ncell, tab->totw, tab->toth);
+ for(c = tab->cells; c != nil; c = c->next)
+ bi += snprint(buf+bi, nbuf-bi, "Cell %d.%d, at (%d,%d) ",
+ tab->tableid, c->cellid, c->row, c->col);
+ bi += snprint(buf+bi, nbuf-bi, "End of Table %d", tab->tableid);
+ break;
+
+ case Ifloattag:
+ fl = (Ifloat*)it;
+ bi += snprint(buf+bi, nbuf-bi, "Float, x=%d y=%d, side=%S, it=%I",
+ fl->x, fl->y, stringalign(fl->side), fl->item);
+ bi += snprint(buf+bi, nbuf-bi, "\n\t");
+ break;
+
+ case Ispacertag:
+ p = "";
+ switch(((Ispacer*)it)->spkind) {
+ case ISPnull:
+ p = "null";
+ break;
+ case ISPvline:
+ p = "vline";
+ break;
+ case ISPhspace:
+ p = "hspace";
+ break;
+ }
+ bi += snprint(buf+bi, nbuf-bi, "Spacer %s ", p);
+ break;
+ }
+ bi += snprint(buf+bi, nbuf-bi, " w=%d, h=%d, a=%d, anchor=%d\n",
+ it->width, it->height, it->ascent, it->anchorid);
+ buf[bi] = 0;
+ return fmtstrcpy(f, buf);
+}
+
+// String version of alignment 'a'
+static Rune*
+stringalign(int a)
+{
+ Rune* s;
+
+ s = _revlookup(align_tab, NALIGNTAB, a);
+ if(s == nil)
+ s = L(Lnone);
+ return s;
+}
+
+// Put at most nbuf chars of representation of d into buf,
+// and return number of characters put
+static int
+dimprint(char* buf, int nbuf, Dimen d)
+{
+ int n;
+ int k;
+
+ n = 0;
+ n += snprint(buf, nbuf, "%d", dimenspec(d));
+ k = dimenkind(d);
+ if(k == Dpercent)
+ buf[n++] = '%';
+ if(k == Drelative)
+ buf[n++] = '*';
+ return n;
+}
+
+void
+printitems(Item* items, char* msg)
+{
+ Item* il;
+
+ fprint(2, "%s\n", msg);
+ il = items;
+ while(il != nil) {
+ fprint(2, "%I", il);
+ il = il->next;
+ }
+}
+
+static Genattr*
+newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events)
+{
+ Genattr* g;
+
+ g = (Genattr*)emalloc(sizeof(Genattr));
+ g->id = id;
+ g->class = class;
+ g->style = style;
+ g->title = title;
+ g->events = events;
+ return g;
+}
+
+static Formfield*
+newformfield(int ftype, int fieldid, Form* form, Rune* name,
+ Rune* value, int size, int maxlength, Formfield* link)
+{
+ Formfield* ff;
+
+ ff = (Formfield*)emalloc(sizeof(Formfield));
+ ff->ftype = ftype;
+ ff->fieldid = fieldid;
+ ff->form = form;
+ ff->name = name;
+ ff->value = value;
+ ff->size = size;
+ ff->maxlength = maxlength;
+ ff->ctlid = -1;
+ ff->next = link;
+ return ff;
+}
+
+// Transfers ownership of value and display to Option.
+static Option*
+newoption(int selected, Rune* value, Rune* display, Option* link)
+{
+ Option *o;
+
+ o = (Option*)emalloc(sizeof(Option));
+ o->selected = selected;
+ o->value = value;
+ o->display = display;
+ o->next = link;
+ return o;
+}
+
+static Form*
+newform(int formid, Rune* name, Rune* action, int target, int method, Form* link)
+{
+ Form* f;
+
+ f = (Form*)emalloc(sizeof(Form));
+ f->formid = formid;
+ f->name = name;
+ f->action = action;
+ f->target = target;
+ f->method = method;
+ f->nfields = 0;
+ f->fields = nil;
+ f->next = link;
+ return f;
+}
+
+static Table*
+newtable(int tableid, Align align, Dimen width, int border,
+ int cellspacing, int cellpadding, Background bg, Token* tok, Table* link)
+{
+ Table* t;
+
+ t = (Table*)emalloc(sizeof(Table));
+ t->tableid = tableid;
+ t->align = align;
+ t->width = width;
+ t->border = border;
+ t->cellspacing = cellspacing;
+ t->cellpadding = cellpadding;
+ t->background = bg;
+ t->caption_place = ALbottom;
+ t->caption_lay = nil;
+ t->tabletok = tok;
+ t->tabletok = nil;
+ t->next = link;
+ return t;
+}
+
+static Tablerow*
+newtablerow(Align align, Background bg, int flags, Tablerow* link)
+{
+ Tablerow* tr;
+
+ tr = (Tablerow*)emalloc(sizeof(Tablerow));
+ tr->align = align;
+ tr->background = bg;
+ tr->flags = flags;
+ tr->next = link;
+ return tr;
+}
+
+static Tablecell*
+newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec, int hspec,
+ Background bg, int flags, Tablecell* link)
+{
+ Tablecell* c;
+
+ c = (Tablecell*)emalloc(sizeof(Tablecell));
+ c->cellid = cellid;
+ c->lay = nil;
+ c->rowspan = rowspan;
+ c->colspan = colspan;
+ c->align = align;
+ c->flags = flags;
+ c->wspec = wspec;
+ c->hspec = hspec;
+ c->background = bg;
+ c->next = link;
+ return c;
+}
+
+static Anchor*
+newanchor(int index, Rune* name, Rune* href, int target, Anchor* link)
+{
+ Anchor* a;
+
+ a = (Anchor*)emalloc(sizeof(Anchor));
+ a->index = index;
+ a->name = name;
+ a->href = href;
+ a->target = target;
+ a->next = link;
+ return a;
+}
+
+static DestAnchor*
+newdestanchor(int index, Rune* name, Item* item, DestAnchor* link)
+{
+ DestAnchor* d;
+
+ d = (DestAnchor*)emalloc(sizeof(DestAnchor));
+ d->index = index;
+ d->name = name;
+ d->item = item;
+ d->next = link;
+ return d;
+}
+
+static SEvent*
+newscriptevent(int type, Rune* script, SEvent* link)
+{
+ SEvent* ans;
+
+ ans = (SEvent*)emalloc(sizeof(SEvent));
+ ans->type = type;
+ ans->script = script;
+ ans->next = link;
+ return ans;
+}
+
+static void
+freescriptevents(SEvent* ehead)
+{
+ SEvent* e;
+ SEvent* nexte;
+
+ e = ehead;
+ while(e != nil) {
+ nexte = e->next;
+ free(e->script);
+ free(e);
+ e = nexte;
+ }
+}
+
+static Dimen
+makedimen(int kind, int spec)
+{
+ Dimen d;
+
+ if(spec&Dkindmask) {
+ if(warn)
+ fprint(2, "warning: dimension spec too big: %d\n", spec);
+ spec = 0;
+ }
+ d.kindspec = kind|spec;
+ return d;
+}
+
+int
+dimenkind(Dimen d)
+{
+ return (d.kindspec&Dkindmask);
+}
+
+int
+dimenspec(Dimen d)
+{
+ return (d.kindspec&Dspecmask);
+}
+
+static Kidinfo*
+newkidinfo(int isframeset, Kidinfo* link)
+{
+ Kidinfo* ki;
+
+ ki = (Kidinfo*)emalloc(sizeof(Kidinfo));
+ ki->isframeset = isframeset;
+ if(!isframeset) {
+ ki->flags = FRhscrollauto|FRvscrollauto;
+ ki->marginw = FRKIDMARGIN;
+ ki->marginh = FRKIDMARGIN;
+ ki->framebd = 1;
+ }
+ ki->next = link;
+ return ki;
+}
+
+static Docinfo*
+newdocinfo(void)
+{
+ Docinfo* d;
+
+ d = (Docinfo*)emalloc(sizeof(Docinfo));
+ resetdocinfo(d);
+ return d;
+}
+
+static void
+resetdocinfo(Docinfo* d)
+{
+ memset(d, 0, sizeof(Docinfo));
+ d->background = makebackground(nil, White);
+ d->text = Black;
+ d->link = Blue;
+ d->vlink = Blue;
+ d->alink = Blue;
+ d->target = FTself;
+ d->chset = ISO_8859_1;
+ d->scripttype = TextJavascript;
+ d->frameid = -1;
+}
+
+// Use targetmap array to keep track of name <-> targetid mapping.
+// Use real malloc(), and never free
+static void
+targetmapinit(void)
+{
+ targetmapsize = 10;
+ targetmap = (StringInt*)emalloc(targetmapsize*sizeof(StringInt));
+ memset(targetmap, 0, targetmapsize*sizeof(StringInt));
+ targetmap[0].key = _Strdup(L(L_top));
+ targetmap[0].val = FTtop;
+ targetmap[1].key = _Strdup(L(L_self));
+ targetmap[1].val = FTself;
+ targetmap[2].key = _Strdup(L(L_parent));
+ targetmap[2].val = FTparent;
+ targetmap[3].key = _Strdup(L(L_blank));
+ targetmap[3].val = FTblank;
+ ntargets = 4;
+}
+
+int
+targetid(Rune* s)
+{
+ int i;
+ int n;
+
+ n = _Strlen(s);
+ if(n == 0)
+ return FTself;
+ for(i = 0; i < ntargets; i++)
+ if(_Strcmp(s, targetmap[i].key) == 0)
+ return targetmap[i].val;
+ if(i >= targetmapsize) {
+ targetmapsize += 10;
+ targetmap = (StringInt*)erealloc(targetmap, targetmapsize*sizeof(StringInt));
+ }
+ targetmap[i].key = (Rune*)emalloc((n+1)*sizeof(Rune));
+ memmove(targetmap[i].key, s, (n+1)*sizeof(Rune));
+ targetmap[i].val = i;
+ ntargets++;
+ return i;
+}
+
+Rune*
+targetname(int targid)
+{
+ int i;
+
+ for(i = 0; i < ntargets; i++)
+ if(targetmap[i].val == targid)
+ return targetmap[i].key;
+ return L(Lquestion);
+}
+
+// Convert HTML color spec to RGB value, returning dflt if can't.
+// Argument is supposed to be a valid HTML color, or "".
+// Return the RGB value of the color, using dflt if s
+// is nil or an invalid color.
+static int
+color(Rune* s, int dflt)
+{
+ int v;
+ Rune* rest;
+
+ if(s == nil)
+ return dflt;
+ if(_lookup(color_tab, NCOLORS, s, _Strlen(s), &v))
+ return v;
+ if(s[0] == '#')
+ s++;
+ v = _Strtol(s, &rest, 16);
+ if(*rest == 0)
+ return v;
+ return dflt;
+}
+
+// Debugging
+
+#define HUGEPIX 10000
+
+// A "shallow" validitem, that doesn't follow next links
+// or descend into tables.
+static int
+validitem(Item* i)
+{
+ int ok;
+ Itext* ti;
+ Irule* ri;
+ Iimage* ii;
+ Ifloat* fi;
+ int a;
+
+ ok = (i->tag >= Itexttag && i->tag <= Ispacertag) &&
+ (i->next == nil || validptr(i->next)) &&
+ (i->width >= 0 && i->width < HUGEPIX) &&
+ (i->height >= 0 && i->height < HUGEPIX) &&
+ (i->ascent > -HUGEPIX && i->ascent < HUGEPIX) &&
+ (i->anchorid >= 0) &&
+ (i->genattr == nil || validptr(i->genattr));
+ // also, could check state for ridiculous combinations
+ // also, could check anchorid for within-doc-range
+ if(ok)
+ switch(i->tag) {
+ case Itexttag:
+ ti = (Itext*)i;
+ ok = validStr(ti->s) &&
+ (ti->fnt >= 0 && ti->fnt < NumStyle*NumSize) &&
+ (ti->ul == ULnone || ti->ul == ULunder || ti->ul == ULmid);
+ break;
+ case Iruletag:
+ ri = (Irule*)i;
+ ok = (validvalign(ri->align) || validhalign(ri->align)) &&
+ (ri->size >=0 && ri->size < HUGEPIX);
+ break;
+ case Iimagetag:
+ ii = (Iimage*)i;
+ ok = (ii->imsrc == nil || validptr(ii->imsrc)) &&
+ (ii->item.width >= 0 && ii->item.width < HUGEPIX) &&
+ (ii->item.height >= 0 && ii->item.height < HUGEPIX) &&
+ (ii->imwidth >= 0 && ii->imwidth < HUGEPIX) &&
+ (ii->imheight >= 0 && ii->imheight < HUGEPIX) &&
+ (ii->altrep == nil || validStr(ii->altrep)) &&
+ (ii->map == nil || validptr(ii->map)) &&
+ (validvalign(ii->align) || validhalign(ii->align)) &&
+ (ii->nextimage == nil || validptr(ii->nextimage));
+ break;
+ case Iformfieldtag:
+ ok = validformfield(((Iformfield*)i)->formfield);
+ break;
+ case Itabletag:
+ ok = validptr((Itable*)i);
+ break;
+ case Ifloattag:
+ fi = (Ifloat*)i;
+ ok = (fi->side == ALleft || fi->side == ALright) &&
+ validitem(fi->item) &&
+ (fi->item->tag == Iimagetag || fi->item->tag == Itabletag);
+ break;
+ case Ispacertag:
+ a = ((Ispacer*)i)->spkind;
+ ok = a==ISPnull || a==ISPvline || a==ISPhspace || a==ISPgeneral;
+ break;
+ default:
+ ok = 0;
+ }
+ return ok;
+}
+
+// "deep" validation, that checks whole list of items,
+// and descends into tables and floated tables.
+// nil is ok for argument.
+int
+validitems(Item* i)
+{
+ int ok;
+ Item* ii;
+
+ ok = 1;
+ while(i != nil && ok) {
+ ok = validitem(i);
+ if(ok) {
+ if(i->tag == Itabletag) {
+ ok = validtable(((Itable*)i)->table);
+ }
+ else if(i->tag == Ifloattag) {
+ ii = ((Ifloat*)i)->item;
+ if(ii->tag == Itabletag)
+ ok = validtable(((Itable*)ii)->table);
+ }
+ }
+ if(!ok) {
+ fprint(2, "invalid item: %I\n", i);
+ }
+ i = i->next;
+ }
+ return ok;
+}
+
+static int
+validformfield(Formfield* f)
+{
+ int ok;
+
+ ok = (f->next == nil || validptr(f->next)) &&
+ (f->ftype >= 0 && f->ftype <= Ftextarea) &&
+ f->fieldid >= 0 &&
+ (f->form == nil || validptr(f->form)) &&
+ (f->name == nil || validStr(f->name)) &&
+ (f->value == nil || validStr(f->value)) &&
+ (f->options == nil || validptr(f->options)) &&
+ (f->image == nil || validitem(f->image)) &&
+ (f->events == nil || validptr(f->events));
+ // when all built, should have f->fieldid < f->form->nfields,
+ // but this may be called during build...
+ return ok;
+}
+
+// "deep" validation -- checks cell contents too
+static int
+validtable(Table* t)
+{
+ int ok;
+ int i, j;
+ Tablecell* c;
+
+ ok = (t->next == nil || validptr(t->next)) &&
+ t->nrow >= 0 &&
+ t->ncol >= 0 &&
+ t->ncell >= 0 &&
+ validalign(t->align) &&
+ validdimen(t->width) &&
+ (t->border >= 0 && t->border < HUGEPIX) &&
+ (t->cellspacing >= 0 && t->cellspacing < HUGEPIX) &&
+ (t->cellpadding >= 0 && t->cellpadding < HUGEPIX) &&
+ validitems(t->caption) &&
+ (t->caption_place == ALtop || t->caption_place == ALbottom) &&
+ (t->totw >= 0 && t->totw < HUGEPIX) &&
+ (t->toth >= 0 && t->toth < HUGEPIX) &&
+ (t->tabletok == nil || validptr(t->tabletok));
+ // during parsing, t->rows has list;
+ // only when parsing is done is t->nrow set > 0
+ if(ok && t->nrow > 0 && t->ncol > 0) {
+ // table is "finished"
+ for(i = 0; i < t->nrow && ok; i++)
+ ok = validtablerow(t->rows+i);
+ for(j = 0; j < t->ncol && ok; j++)
+ ok = validtablecol(t->cols+j);
+ for(c = t->cells; c != nil && ok; c = c->next)
+ ok = validtablecell(c);
+ for(i = 0; i < t->nrow && ok; i++)
+ for(j = 0; j < t->ncol && ok; j++)
+ ok = validptr(t->grid[i][j]);
+ }
+ return ok;
+}
+
+static int
+validvalign(int a)
+{
+ return a == ALnone || a == ALmiddle || a == ALbottom || a == ALtop || a == ALbaseline;
+}
+
+static int
+validhalign(int a)
+{
+ return a == ALnone || a == ALleft || a == ALcenter || a == ALright ||
+ a == ALjustify || a == ALchar;
+}
+
+static int
+validalign(Align a)
+{
+ return validhalign(a.halign) && validvalign(a.valign);
+}
+
+static int
+validdimen(Dimen d)
+{
+ int ok;
+ int s;
+
+ ok = 0;
+ s = d.kindspec&Dspecmask;
+ switch(d.kindspec&Dkindmask) {
+ case Dnone:
+ ok = s==0;
+ break;
+ case Dpixels:
+ ok = s < HUGEPIX;
+ break;
+ case Dpercent:
+ case Drelative:
+ ok = 1;
+ break;
+ }
+ return ok;
+}
+
+static int
+validtablerow(Tablerow* r)
+{
+ return (r->cells == nil || validptr(r->cells)) &&
+ (r->height >= 0 && r->height < HUGEPIX) &&
+ (r->ascent > -HUGEPIX && r->ascent < HUGEPIX) &&
+ validalign(r->align);
+}
+
+static int
+validtablecol(Tablecol* c)
+{
+ return c->width >= 0 && c->width < HUGEPIX
+ && validalign(c->align);
+}
+
+static int
+validtablecell(Tablecell* c)
+{
+ int ok;
+
+ ok = (c->next == nil || validptr(c->next)) &&
+ (c->nextinrow == nil || validptr(c->nextinrow)) &&
+ (c->content == nil || validptr(c->content)) &&
+ (c->lay == nil || validptr(c->lay)) &&
+ c->rowspan >= 0 &&
+ c->colspan >= 0 &&
+ validalign(c->align) &&
+ validdimen(c->wspec) &&
+ c->row >= 0 &&
+ c->col >= 0;
+ if(ok) {
+ if(c->content != nil)
+ ok = validitems(c->content);
+ }
+ return ok;
+}
+
+static int
+validptr(void* p)
+{
+ // TODO: a better job of this.
+ // For now, just dereference, which cause a bomb
+ // if not valid
+ static char c;
+
+ c = *((char*)p);
+ return 1;
+}
+
+static int
+validStr(Rune* s)
+{
+ return s != nil && validptr(s);
+}
diff --git a/src/libhtml/impl.h b/src/libhtml/impl.h
new file mode 100644
index 00000000..f8c79ea3
--- /dev/null
+++ b/src/libhtml/impl.h
@@ -0,0 +1,163 @@
+
+// UTILS
+typedef struct List List;
+typedef struct Strlist Strlist;
+
+// List of integers (and also generic list with next pointer at beginning)
+struct List
+{
+ List* next;
+ int val;
+};
+
+struct Strlist
+{
+ Strlist* next;
+ Rune* val;
+};
+
+extern int _inclass(Rune c, Rune* cl);
+extern int _listlen(List* l);
+extern Rune* _ltoStr(int n);
+extern List* _newlist(int val, List* rest);
+extern Rune* _newstr(int n);
+extern int _prefix(Rune* pre, Rune* s);
+extern List* _revlist(List* l);
+extern void _splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2);
+extern void _splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2);
+extern int _splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen);
+extern Rune* _Stradd(Rune*s1, Rune* s2, int n);
+extern Rune* _Strclass(Rune* s, Rune* cl);
+extern int _Strcmp(Rune* s1, Rune* s2);
+extern Rune* _Strdup(Rune* s);
+extern Rune* _Strdup2(Rune* s, Rune* t);
+extern int _Streqn(Rune* s1, int n1, Rune* s2);
+extern int _Strlen(Rune* s);
+extern Rune* _Strnclass(Rune* s, Rune* cl, int n);
+extern int _Strncmpci(Rune* s1, int n1, Rune* s2);
+extern Rune* _Strndup(Rune* s, int n);
+extern Rune* _Strnrclass(Rune* s, Rune* cl, int n);
+extern Rune* _Strrclass(Rune* s, Rune* cl);
+extern Rune* _Strsubstr(Rune* s, int start, int stop);
+extern long _Strtol(Rune* s, Rune** eptr, int base);
+extern void _trimwhite(Rune* s, int n, Rune** pans, int* panslen);
+
+extern Rune notwhitespace[];
+extern Rune whitespace[];
+
+// STRINTTAB
+typedef struct StringInt StringInt;
+
+// Element of String-Int table (used for keyword lookup)
+struct StringInt
+{
+ Rune* key;
+ int val;
+};
+
+extern int _lookup(StringInt* t, int n, Rune* key, int keylen, int* pans);
+extern StringInt* _makestrinttab(Rune** a, int n);
+extern Rune* _revlookup(StringInt* t, int n, int val);
+
+// Colors, in html format, not Plan 9 format. (RGB values in bottom 3 bytes)
+enum {
+ White = 0xFFFFFF,
+ Black = 0x000000,
+ Blue = 0x0000CC,
+};
+
+// LEX
+
+// HTML 4.0 tags (plus blink, nobr)
+// sorted in lexical order; used as array indices
+enum {
+ Notfound,
+ Comment,
+ Ta, Tabbr, Tacronym, Taddress, Tapplet, Tarea,
+ Tb, Tbase, Tbasefont, Tbdo, Tbig, Tblink,
+ Tblockquote, Tbody, Tbq, Tbr, Tbutton,
+ Tcaption, Tcenter, Tcite, Tcode, Tcol, Tcolgroup,
+ Tdd, Tdel, Tdfn, Tdir, Tdiv, Tdl, Tdt,
+ Tem,
+ Tfieldset, Tfont, Tform, Tframe, Tframeset,
+ Th1, Th2, Th3, Th4, Th5, Th6,
+ Thead, Thr, Thtml,
+ Ti, Tiframe, Timg, Tinput, Tins, Tisindex,
+ Tkbd,
+ Tlabel, Tlegend, Tli, Tlink,
+ Tmap, Tmenu, Tmeta,
+ Tnobr, Tnoframes, Tnoscript,
+ Tobject, Tol, Toptgroup, Toption,
+ Tp, Tparam, Tpre,
+ Tq,
+ Ts, Tsamp, Tscript, Tselect, Tsmall,
+ Tspan, Tstrike, Tstrong, Tstyle, Tsub, Tsup,
+ Ttable, Ttbody, Ttd, Ttextarea, Ttfoot,
+ Tth, Tthead, Ttitle, Ttr, Ttt,
+ Tu, Tul,
+ Tvar,
+ Numtags,
+ RBRA = Numtags,
+ Data = Numtags+RBRA
+};
+
+// HTML 4.0 tag attributes
+// Keep sorted in lexical order
+enum {
+ Aabbr, Aaccept_charset, Aaccess_key, Aaction,
+ Aalign, Aalink, Aalt, Aarchive, Aaxis,
+ Abackground, Abgcolor, Aborder,
+ Acellpadding, Acellspacing, Achar, Acharoff,
+ Acharset, Achecked, Acite, Aclass, Aclassid,
+ Aclear, Acode, Acodebase, Acodetype, Acolor,
+ Acols, Acolspan, Acompact, Acontent, Acoords,
+ Adata, Adatetime, Adeclare, Adefer, Adir, Adisabled,
+ Aenctype,
+ Aface, Afor, Aframe, Aframeborder,
+ Aheaders, Aheight, Ahref, Ahreflang, Ahspace, Ahttp_equiv,
+ Aid, Aismap,
+ Alabel, Alang, Alink, Alongdesc,
+ Amarginheight, Amarginwidth, Amaxlength,
+ Amedia, Amethod, Amultiple,
+ Aname, Anohref, Anoresize, Anoshade, Anowrap,
+ Aobject, Aonblur, Aonchange, Aonclick, Aondblclick,
+ Aonfocus, Aonkeypress, Aonkeyup, Aonload,
+ Aonmousedown, Aonmousemove, Aonmouseout,
+ Aonmouseover, Aonmouseup, Aonreset, Aonselect,
+ Aonsubmit, Aonunload,
+ Aprofile, Aprompt,
+ Areadonly, Arel, Arev, Arows, Arowspan, Arules,
+ Ascheme, Ascope, Ascrolling, Aselected, Ashape,
+ Asize, Aspan, Asrc, Astandby, Astart, Astyle, Asummary,
+ Atabindex, Atarget, Atext, Atitle, Atype,
+ Ausemap,
+ Avalign, Avalue, Avaluetype, Aversion, Avlink, Avspace,
+ Awidth,
+ Numattrs
+};
+
+struct Attr
+{
+ Attr* next; // in list of attrs for a token
+ int attid; // Aabbr, etc.
+ Rune* value;
+};
+
+struct Token
+{
+ int tag; // Ta, etc
+ Rune* text; // text in Data, attribute text in tag
+ Attr* attr; // list of Attrs
+ int starti; // index into source buffer of token start
+};
+
+extern Rune** tagnames;
+extern Rune** attrnames;
+
+extern void _freetokens(Token* tarray, int n);
+extern Token* _gettoks(uchar* data, int datalen, int chset, int mtype, int* plen);
+extern int _tokaval(Token* t, int attid, Rune** pans, int xfer);
+
+#pragma varargck type "T" Token*
+
+#include "runetab.h"
diff --git a/src/libhtml/lex.c b/src/libhtml/lex.c
new file mode 100644
index 00000000..99c5fc12
--- /dev/null
+++ b/src/libhtml/lex.c
@@ -0,0 +1,1384 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <ctype.h>
+#include <html.h>
+#include "impl.h"
+
+typedef struct TokenSource TokenSource;
+struct TokenSource
+{
+ int i; // index of next byte to use
+ uchar* data; // all the data
+ int edata; // data[0:edata] is valid
+ int chset; // one of US_Ascii, etc.
+ int mtype; // TextHtml or TextPlain
+};
+
+enum {
+ EOF = -2,
+ EOB = -1
+};
+
+#define ISNAMCHAR(c) ((c)<256 && (isalpha(c) || isdigit(c) || (c) == '-' || (c) == '.'))
+
+#define SMALLBUFSIZE 240
+#define BIGBUFSIZE 2000
+
+// HTML 4.0 tag names.
+// Keep sorted, and in correspondence with enum in iparse.h.
+Rune **tagnames;
+char *_tagnames[] = {
+ " ",
+ "!",
+ "a",
+ "abbr",
+ "acronym",
+ "address",
+ "applet",
+ "area",
+ "b",
+ "base",
+ "basefont",
+ "bdo",
+ "big",
+ "blink",
+ "blockquote",
+ "body",
+ "bq",
+ "br",
+ "button",
+ "caption",
+ "center",
+ "cite",
+ "code",
+ "col",
+ "colgroup",
+ "dd",
+ "del",
+ "dfn",
+ "dir",
+ "div",
+ "dl",
+ "dt",
+ "em",
+ "fieldset",
+ "font",
+ "form",
+ "frame",
+ "frameset",
+ "h1",
+ "h2",
+ "h3",
+ "h4",
+ "h5",
+ "h6",
+ "head",
+ "hr",
+ "html",
+ "i",
+ "iframe",
+ "img",
+ "input",
+ "ins",
+ "isindex",
+ "kbd",
+ "label",
+ "legend",
+ "li",
+ "link",
+ "map",
+ "menu",
+ "meta",
+ "nobr",
+ "noframes",
+ "noscript",
+ "object",
+ "ol",
+ "optgroup",
+ "option",
+ "p",
+ "param",
+ "pre",
+ "q",
+ "s",
+ "samp",
+ "script",
+ "select",
+ "small",
+ "span",
+ "strike",
+ "strong",
+ "style",
+ "sub",
+ "sup",
+ "table",
+ "tbody",
+ "td",
+ "textarea",
+ "tfoot",
+ "th",
+ "thead",
+ "title",
+ "tr",
+ "tt",
+ "u",
+ "ul",
+ "var"
+};
+
+// HTML 4.0 attribute names.
+// Keep sorted, and in correspondence with enum in i.h.
+Rune **attrnames;
+char* _attrnames[] = {
+ "abbr",
+ "accept-charset",
+ "access-key",
+ "action",
+ "align",
+ "alink",
+ "alt",
+ "archive",
+ "axis",
+ "background",
+ "bgcolor",
+ "border",
+ "cellpadding",
+ "cellspacing",
+ "char",
+ "charoff",
+ "charset",
+ "checked",
+ "cite",
+ "class",
+ "classid",
+ "clear",
+ "code",
+ "codebase",
+ "codetype",
+ "color",
+ "cols",
+ "colspan",
+ "compact",
+ "content",
+ "coords",
+ "data",
+ "datetime",
+ "declare",
+ "defer",
+ "dir",
+ "disabled",
+ "enctype",
+ "face",
+ "for",
+ "frame",
+ "frameborder",
+ "headers",
+ "height",
+ "href",
+ "hreflang",
+ "hspace",
+ "http-equiv",
+ "id",
+ "ismap",
+ "label",
+ "lang",
+ "link",
+ "longdesc",
+ "marginheight",
+ "marginwidth",
+ "maxlength",
+ "media",
+ "method",
+ "multiple",
+ "name",
+ "nohref",
+ "noresize",
+ "noshade",
+ "nowrap",
+ "object",
+ "onblur",
+ "onchange",
+ "onclick",
+ "ondblclick",
+ "onfocus",
+ "onkeypress",
+ "onkeyup",
+ "onload",
+ "onmousedown",
+ "onmousemove",
+ "onmouseout",
+ "onmouseover",
+ "onmouseup",
+ "onreset",
+ "onselect",
+ "onsubmit",
+ "onunload",
+ "profile",
+ "prompt",
+ "readonly",
+ "rel",
+ "rev",
+ "rows",
+ "rowspan",
+ "rules",
+ "scheme",
+ "scope",
+ "scrolling",
+ "selected",
+ "shape",
+ "size",
+ "span",
+ "src",
+ "standby",
+ "start",
+ "style",
+ "summary",
+ "tabindex",
+ "target",
+ "text",
+ "title",
+ "type",
+ "usemap",
+ "valign",
+ "value",
+ "valuetype",
+ "version",
+ "vlink",
+ "vspace",
+ "width"
+};
+
+
+// Character entity to unicode character number map.
+// Keep sorted by name.
+StringInt *chartab;
+AsciiInt _chartab[142] = {
+ {"AElig", 198},
+ {"Aacute", 193},
+ {"Acirc", 194},
+ {"Agrave", 192},
+ {"Aring", 197},
+ {"Atilde", 195},
+ {"Auml", 196},
+ {"Ccedil", 199},
+ {"ETH", 208},
+ {"Eacute", 201},
+ {"Ecirc", 202},
+ {"Egrave", 200},
+ {"Euml", 203},
+ {"Iacute", 205},
+ {"Icirc", 206},
+ {"Igrave", 204},
+ {"Iuml", 207},
+ {"Ntilde", 209},
+ {"Oacute", 211},
+ {"Ocirc", 212},
+ {"Ograve", 210},
+ {"Oslash", 216},
+ {"Otilde", 213},
+ {"Ouml", 214},
+ {"THORN", 222},
+ {"Uacute", 218},
+ {"Ucirc", 219},
+ {"Ugrave", 217},
+ {"Uuml", 220},
+ {"Yacute", 221},
+ {"aacute", 225},
+ {"acirc", 226},
+ {"acute", 180},
+ {"aelig", 230},
+ {"agrave", 224},
+ {"alpha", 945},
+ {"amp", 38},
+ {"aring", 229},
+ {"atilde", 227},
+ {"auml", 228},
+ {"beta", 946},
+ {"brvbar", 166},
+ {"ccedil", 231},
+ {"cdots", 8943},
+ {"cedil", 184},
+ {"cent", 162},
+ {"chi", 967},
+ {"copy", 169},
+ {"curren", 164},
+ {"ddots", 8945},
+ {"deg", 176},
+ {"delta", 948},
+ {"divide", 247},
+ {"eacute", 233},
+ {"ecirc", 234},
+ {"egrave", 232},
+ {"emdash", 8212},
+ {"emsp", 8195},
+ {"endash", 8211},
+ {"ensp", 8194},
+ {"epsilon", 949},
+ {"eta", 951},
+ {"eth", 240},
+ {"euml", 235},
+ {"frac12", 189},
+ {"frac14", 188},
+ {"frac34", 190},
+ {"gamma", 947},
+ {"gt", 62},
+ {"iacute", 237},
+ {"icirc", 238},
+ {"iexcl", 161},
+ {"igrave", 236},
+ {"iota", 953},
+ {"iquest", 191},
+ {"iuml", 239},
+ {"kappa", 954},
+ {"lambda", 955},
+ {"laquo", 171},
+ {"ldots", 8230},
+ {"lt", 60},
+ {"macr", 175},
+ {"micro", 181},
+ {"middot", 183},
+ {"mu", 956},
+ {"nbsp", 160},
+ {"not", 172},
+ {"ntilde", 241},
+ {"nu", 957},
+ {"oacute", 243},
+ {"ocirc", 244},
+ {"ograve", 242},
+ {"omega", 969},
+ {"omicron", 959},
+ {"ordf", 170},
+ {"ordm", 186},
+ {"oslash", 248},
+ {"otilde", 245},
+ {"ouml", 246},
+ {"para", 182},
+ {"phi", 966},
+ {"pi", 960},
+ {"plusmn", 177},
+ {"pound", 163},
+ {"psi", 968},
+ {"quad", 8193},
+ {"quot", 34},
+ {"raquo", 187},
+ {"reg", 174},
+ {"rho", 961},
+ {"sect", 167},
+ {"shy", 173},
+ {"sigma", 963},
+ {"sp", 8194},
+ {"sup1", 185},
+ {"sup2", 178},
+ {"sup3", 179},
+ {"szlig", 223},
+ {"tau", 964},
+ {"theta", 952},
+ {"thinsp", 8201},
+ {"thorn", 254},
+ {"times", 215},
+ {"trade", 8482},
+ {"uacute", 250},
+ {"ucirc", 251},
+ {"ugrave", 249},
+ {"uml", 168},
+ {"upsilon", 965},
+ {"uuml", 252},
+ {"varepsilon", 8712},
+ {"varphi", 981},
+ {"varpi", 982},
+ {"varrho", 1009},
+ {"vdots", 8942},
+ {"vsigma", 962},
+ {"vtheta", 977},
+ {"xi", 958},
+ {"yacute", 253},
+ {"yen", 165},
+ {"yuml", 255},
+ {"zeta", 950}
+};
+#define NCHARTAB (sizeof(chartab)/sizeof(chartab[0]))
+
+// Characters Winstart..Winend are those that Windows
+// uses interpolated into the Latin1 set.
+// They aren't supposed to appear in HTML, but they do....
+enum {
+ Winstart = 127,
+ Winend = 159
+};
+
+static int winchars[]= { 8226, // 8226 is a bullet
+ 8226, 8226, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 352, 8249, 338, 8226, 8226, 8226,
+ 8226, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 732, 8482, 353, 8250, 339, 8226, 8226, 376};
+
+static StringInt* tagtable; // initialized from tagnames
+static StringInt* attrtable; // initialized from attrnames
+
+static void lexinit();
+static int getplaindata(TokenSource* ts, Token* a, int* pai);
+static int getdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai);
+static int getscriptdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai);
+static int gettag(TokenSource* ts, int starti, Token* a, int* pai);
+static Rune* buftostr(Rune* s, Rune* buf, int j);
+static int comment(TokenSource* ts);
+static int findstr(TokenSource* ts, Rune* s);
+static int ampersand(TokenSource* ts);
+//static int lowerc(int c);
+static int getchar(TokenSource* ts);
+static void ungetchar(TokenSource* ts, int c);
+static void backup(TokenSource* ts, int savei);
+//static void freeinsidetoken(Token* t);
+static void freeattrs(Attr* ahead);
+static Attr* newattr(int attid, Rune* value, Attr* link);
+static int Tconv(Fmt* f);
+
+int dbglex = 0;
+static int lexinited = 0;
+
+static void
+lexinit(void)
+{
+ chartab = cvtstringinttab(_chartab, nelem(_chartab));
+ tagnames = cvtstringtab(_tagnames, nelem(_tagnames));
+ tagtable = _makestrinttab(tagnames, Numtags);
+ attrnames = cvtstringtab(_attrnames, nelem(_attrnames));
+ attrtable = _makestrinttab(attrnames, Numattrs);
+ fmtinstall('T', Tconv);
+ lexinited = 1;
+}
+
+static TokenSource*
+newtokensource(uchar* data, int edata, int chset, int mtype)
+{
+ TokenSource* ans;
+
+ assert(chset == US_Ascii || chset == ISO_8859_1 ||
+ chset == UTF_8 || chset == Unicode);
+ ans = (TokenSource*)emalloc(sizeof(TokenSource));
+ ans->i = 0;
+ ans->data = data;
+ ans->edata = edata;
+ ans->chset = chset;
+ ans->mtype = mtype;
+ return ans;
+}
+
+enum {
+ ToksChunk = 500
+};
+
+// Call this to get the tokens.
+// The number of returned tokens is returned in *plen.
+Token*
+_gettoks(uchar* data, int datalen, int chset, int mtype, int* plen)
+{
+ TokenSource* ts;
+ Token* a;
+ int alen;
+ int ai;
+ int starti;
+ int c;
+ int tag;
+
+ if(!lexinited)
+ lexinit();
+ ts = newtokensource(data, datalen, chset, mtype);
+ alen = ToksChunk;
+ a = (Token*)emalloc(alen * sizeof(Token));
+ ai = 0;
+ if(dbglex)
+ fprint(2, "_gettoks starts, ts.i=%d, ts.edata=%d\n", ts->i, ts->edata);
+ if(ts->mtype == TextHtml) {
+ for(;;) {
+ if(ai == alen) {
+ a = (Token*)erealloc(a, (alen+ToksChunk)*sizeof(Token));
+ alen += ToksChunk;
+ }
+ starti = ts->i;
+ c = getchar(ts);
+ if(c < 0)
+ break;
+ if(c == '<') {
+ tag = gettag(ts, starti, a, &ai);
+ if(tag == Tscript) {
+ // special rules for getting Data after....
+ starti = ts->i;
+ c = getchar(ts);
+ tag = getscriptdata(ts, c, starti, a, &ai);
+ }
+ }
+ else
+ tag = getdata(ts, c, starti, a, &ai);
+ if(tag == -1)
+ break;
+ else if(dbglex > 1 && tag != Comment)
+ fprint(2, "lex: got token %T\n", &a[ai-1]);
+ }
+ }
+ else {
+ // plain text (non-html) tokens
+ for(;;) {
+ if(ai == alen) {
+ a = (Token*)erealloc(a, (alen+ToksChunk)*sizeof(Token));
+ alen += ToksChunk;
+ }
+ tag = getplaindata(ts, a, &ai);
+ if(tag == -1)
+ break;
+ if(dbglex > 1)
+ fprint(2, "lex: got token %T\n", &a[ai]);
+ }
+ }
+ if(dbglex)
+ fprint(2, "lex: returning %d tokens\n", ai);
+ *plen = ai;
+ if(ai == 0)
+ return nil;
+ return a;
+}
+
+// For case where source isn't HTML.
+// Just make data tokens, one per line (or partial line,
+// at end of buffer), ignoring non-whitespace control
+// characters and dumping \r's.
+// If find non-empty token, fill in a[*pai], bump *pai, and return Data.
+// Otherwise return -1;
+static int
+getplaindata(TokenSource* ts, Token* a, int* pai)
+{
+ Rune* s;
+ int j;
+ int starti;
+ int c;
+ Token* tok;
+ Rune buf[BIGBUFSIZE];
+
+ s = nil;
+ j = 0;
+ starti = ts->i;
+ for(c = getchar(ts); c >= 0; c = getchar(ts)) {
+ if(c < ' ') {
+ if(isspace(c)) {
+ if(c == '\r') {
+ // ignore it unless no following '\n',
+ // in which case treat it like '\n'
+ c = getchar(ts);
+ if(c != '\n') {
+ if(c >= 0)
+ ungetchar(ts, c);
+ c = '\n';
+ }
+ }
+ }
+ else
+ c = 0;
+ }
+ if(c != 0) {
+ buf[j++] = c;
+ if(j == sizeof(buf)-1) {
+ s = buftostr(s, buf, j);
+ j = 0;
+ }
+ }
+ if(c == '\n')
+ break;
+ }
+ s = buftostr(s, buf, j);
+ if(s == nil)
+ return -1;
+ tok = &a[(*pai)++];
+ tok->tag = Data;
+ tok->text = s;
+ tok->attr = nil;
+ tok->starti = starti;
+ return Data;
+}
+
+// Return concatenation of s and buf[0:j]
+static Rune*
+buftostr(Rune* s, Rune* buf, int j)
+{
+ buf[j] = 0;
+ if(s == nil)
+ s = _Strndup(buf, j);
+ else
+ s = _Strdup2(s, buf);
+ return s;
+}
+
+// Gather data up to next start-of-tag or end-of-buffer.
+// Translate entity references (&amp;).
+// Ignore non-whitespace control characters and get rid of \r's.
+// If find non-empty token, fill in a[*pai], bump *pai, and return Data.
+// Otherwise return -1;
+static int
+getdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai)
+{
+ Rune* s;
+ int j;
+ int c;
+ Token* tok;
+ Rune buf[BIGBUFSIZE];
+
+ s = nil;
+ j = 0;
+ c = firstc;
+ while(c >= 0) {
+ if(c == '&') {
+ c = ampersand(ts);
+ if(c < 0)
+ break;
+ }
+ else if(c < ' ') {
+ if(isspace(c)) {
+ if(c == '\r') {
+ // ignore it unless no following '\n',
+ // in which case treat it like '\n'
+ c = getchar(ts);
+ if(c != '\n') {
+ if(c >= 0)
+ ungetchar(ts, c);
+ c = '\n';
+ }
+ }
+ }
+ else {
+ if(warn)
+ fprint(2, "warning: non-whitespace control character %d ignored\n", c);
+ c = 0;
+ }
+ }
+ else if(c == '<') {
+ ungetchar(ts, c);
+ break;
+ }
+ if(c != 0) {
+ buf[j++] = c;
+ if(j == BIGBUFSIZE-1) {
+ s = buftostr(s, buf, j);
+ j = 0;
+ }
+ }
+ c = getchar(ts);
+ }
+ s = buftostr(s, buf, j);
+ if(s == nil)
+ return -1;
+ tok = &a[(*pai)++];
+ tok->tag = Data;
+ tok->text = s;
+ tok->attr = nil;
+ tok->starti = starti;
+ return Data;
+}
+
+// The rules for lexing scripts are different (ugh).
+// Gather up everything until see a </SCRIPT>.
+static int
+getscriptdata(TokenSource* ts, int firstc, int starti, Token* a, int* pai)
+{
+ Rune* s;
+ int j;
+ int tstarti;
+ int savei;
+ int c;
+ int tag;
+ int done;
+ Token* tok;
+ Rune buf[BIGBUFSIZE];
+
+ s = nil;
+ j = 0;
+ tstarti = starti;
+ c = firstc;
+ done = 0;
+ while(c >= 0) {
+ if(c == '<') {
+ // other browsers ignore stuff to end of line after <!
+ savei = ts->i;
+ c = getchar(ts);
+ if(c == '!') {
+ while(c >= 0 && c != '\n' && c != '\r')
+ c = getchar(ts);
+ if(c == '\r')
+ c = getchar(ts);
+ if(c == '\n')
+ c = getchar(ts);
+ }
+ else if(c >= 0) {
+ backup(ts, savei);
+ tag = gettag(ts, tstarti, a, pai);
+ if(tag == -1)
+ break;
+ if(tag != Comment)
+ (*pai)--;
+ backup(ts, tstarti);
+ if(tag == Tscript + RBRA) {
+ done = 1;
+ break;
+ }
+ // here tag was not </SCRIPT>, so take as regular data
+ c = getchar(ts);
+ }
+ }
+ if(c < 0)
+ break;
+ if(c != 0) {
+ buf[j++] = c;
+ if(j == BIGBUFSIZE-1) {
+ s = buftostr(s, buf, j);
+ j = 0;
+ }
+ }
+ tstarti = ts->i;
+ c = getchar(ts);
+ }
+ if(done || ts->i == ts->edata) {
+ s = buftostr(s, buf, j);
+ tok = &a[(*pai)++];
+ tok->tag = Data;
+ tok->text = s;
+ tok->attr = nil;
+ tok->starti = starti;
+ return Data;
+ }
+ backup(ts, starti);
+ return -1;
+}
+
+// We've just seen a '<'. Gather up stuff to closing '>' (if buffer
+// ends before then, return -1).
+// If it's a tag, look up the name, gather the attributes, and return
+// the appropriate token.
+// Else it's either just plain data or some kind of ignorable stuff:
+// return Data or Comment as appropriate.
+// If it's not a Comment, put it in a[*pai] and bump *pai.
+static int
+gettag(TokenSource* ts, int starti, Token* a, int* pai)
+{
+ int rbra;
+ int ans;
+ Attr* al;
+ int nexti;
+ int c;
+ int ti;
+ int afnd;
+ int attid;
+ int quote;
+ Rune* val;
+ int nv;
+ int i;
+ int tag;
+ Token* tok;
+ Rune buf[BIGBUFSIZE];
+
+ rbra = 0;
+ nexti = ts->i;
+ tok = &a[*pai];
+ tok->tag = Notfound;
+ tok->text = nil;
+ tok->attr = nil;
+ tok->starti = starti;
+ c = getchar(ts);
+ if(c == '/') {
+ rbra = RBRA;
+ c = getchar(ts);
+ }
+ if(c < 0)
+ goto eob_done;
+ if(c >= 256 || !isalpha(c)) {
+ // not a tag
+ if(c == '!') {
+ ans = comment(ts);
+ if(ans != -1)
+ return ans;
+ goto eob_done;
+ }
+ else {
+ backup(ts, nexti);
+ tok->tag = Data;
+ tok->text = _Strdup(L(Llt));
+ (*pai)++;
+ return Data;
+ }
+ }
+ // c starts a tagname
+ buf[0] = c;
+ i = 1;
+ while(1) {
+ c = getchar(ts);
+ if(c < 0)
+ goto eob_done;
+ if(!ISNAMCHAR(c))
+ break;
+ // if name is bigger than buf it won't be found anyway...
+ if(i < BIGBUFSIZE)
+ buf[i++] = c;
+ }
+ if(_lookup(tagtable, Numtags, buf, i, &tag))
+ tok->tag = tag + rbra;
+ else
+ tok->text = _Strndup(buf, i); // for warning print, in build
+
+ // attribute gathering loop
+ al = nil;
+ while(1) {
+ // look for "ws name" or "ws name ws = ws val" (ws=whitespace)
+ // skip whitespace
+attrloop_continue:
+ while(c < 256 && isspace(c)) {
+ c = getchar(ts);
+ if(c < 0)
+ goto eob_done;
+ }
+ if(c == '>')
+ goto attrloop_done;
+ if(c == '<') {
+ if(warn)
+ fprint(2, "warning: unclosed tag\n");
+ ungetchar(ts, c);
+ goto attrloop_done;
+ }
+ if(c >= 256 || !isalpha(c)) {
+ if(warn)
+ fprint(2, "warning: expected attribute name\n");
+ // skipt to next attribute name
+ while(1) {
+ c = getchar(ts);
+ if(c < 0)
+ goto eob_done;
+ if(c < 256 && isalpha(c))
+ goto attrloop_continue;
+ if(c == '<') {
+ if(warn)
+ fprint(2, "warning: unclosed tag\n");
+ ungetchar(ts, 60);
+ goto attrloop_done;
+ }
+ if(c == '>')
+ goto attrloop_done;
+ }
+ }
+ // gather attribute name
+ buf[0] = c;
+ i = 1;
+ while(1) {
+ c = getchar(ts);
+ if(c < 0)
+ goto eob_done;
+ if(!ISNAMCHAR(c))
+ break;
+ if(i < BIGBUFSIZE-1)
+ buf[i++] = c;
+ }
+ afnd = _lookup(attrtable, Numattrs, buf, i, &attid);
+ if(warn && !afnd) {
+ buf[i] = 0;
+ fprint(2, "warning: unknown attribute name %S\n", buf);
+ }
+ // skip whitespace
+ while(c < 256 && isspace(c)) {
+ c = getchar(ts);
+ if(c < 0)
+ goto eob_done;
+ }
+ if(c != '=') {
+ if(afnd)
+ al = newattr(attid, nil, al);
+ goto attrloop_continue;
+ }
+ //# c is '=' here; skip whitespace
+ while(1) {
+ c = getchar(ts);
+ if(c < 0)
+ goto eob_done;
+ if(c >= 256 || !isspace(c))
+ break;
+ }
+ quote = 0;
+ if(c == '\'' || c == '"') {
+ quote = c;
+ c = getchar(ts);
+ if(c < 0)
+ goto eob_done;
+ }
+ val = nil;
+ nv = 0;
+ while(1) {
+valloop_continue:
+ if(c < 0)
+ goto eob_done;
+ if(c == '>') {
+ if(quote) {
+ // c might be part of string (though not good style)
+ // but if line ends before close quote, assume
+ // there was an unmatched quote
+ ti = ts->i;
+ while(1) {
+ c = getchar(ts);
+ if(c < 0)
+ goto eob_done;
+ if(c == quote) {
+ backup(ts, ti);
+ buf[nv++] = '>';
+ if(nv == BIGBUFSIZE-1) {
+ val = buftostr(val, buf, nv);
+ nv = 0;
+ }
+ c = getchar(ts);
+ goto valloop_continue;
+ }
+ if(c == '\n') {
+ if(warn)
+ fprint(2, "warning: apparent unmatched quote\n");
+ backup(ts, ti);
+ c = '>';
+ goto valloop_done;
+ }
+ }
+ }
+ else
+ goto valloop_done;
+ }
+ if(quote) {
+ if(c == quote) {
+ c = getchar(ts);
+ if(c < 0)
+ goto eob_done;
+ goto valloop_done;
+ }
+ if(c == '\r') {
+ c = getchar(ts);
+ goto valloop_continue;
+ }
+ if(c == '\t' || c == '\n')
+ c = ' ';
+ }
+ else {
+ if(c < 256 && isspace(c))
+ goto valloop_done;
+ }
+ if(c == '&') {
+ c = ampersand(ts);
+ if(c == -1)
+ goto eob_done;
+ }
+ buf[nv++] = c;
+ if(nv == BIGBUFSIZE-1) {
+ val = buftostr(val, buf, nv);
+ nv = 0;
+ }
+ c = getchar(ts);
+ }
+valloop_done:
+ if(afnd) {
+ val = buftostr(val, buf, nv);
+ al = newattr(attid, val, al);
+ }
+ }
+
+attrloop_done:
+ tok->attr = al;
+ (*pai)++;
+ return tok->tag;
+
+eob_done:
+ if(warn)
+ fprint(2, "warning: incomplete tag at end of page\n");
+ backup(ts, nexti);
+ tok->tag = Data;
+ tok->text = _Strdup(L(Llt));
+ return Data;
+}
+
+// We've just read a '<!' at position starti,
+// so this may be a comment or other ignored section, or it may
+// be just a literal string if there is no close before end of file
+// (other browsers do that).
+// The accepted practice seems to be (note: contrary to SGML spec!):
+// If see <!--, look for --> to close, or if none, > to close.
+// If see <!(not --), look for > to close.
+// If no close before end of file, leave original characters in as literal data.
+//
+// If we see ignorable stuff, return Comment.
+// Else return nil (caller should back up and try again when more data arrives,
+// unless at end of file, in which case caller should just make '<' a data token).
+static int
+comment(TokenSource* ts)
+{
+ int nexti;
+ int havecomment;
+ int c;
+
+ nexti = ts->i;
+ havecomment = 0;
+ c = getchar(ts);
+ if(c == '-') {
+ c = getchar(ts);
+ if(c == '-') {
+ if(findstr(ts, L(Larrow)))
+ havecomment = 1;
+ else
+ backup(ts, nexti);
+ }
+ }
+ if(!havecomment) {
+ if(c == '>')
+ havecomment = 1;
+ else if(c >= 0) {
+ if(findstr(ts, L(Lgt)))
+ havecomment = 1;
+ }
+ }
+ if(havecomment)
+ return Comment;
+ return -1;
+}
+
+// Look for string s in token source.
+// If found, return 1, with buffer at next char after s,
+// else return 0 (caller should back up).
+static int
+findstr(TokenSource* ts, Rune* s)
+{
+ int c0;
+ int n;
+ int nexti;
+ int i;
+ int c;
+
+ c0 = s[0];
+ n = runestrlen(s);
+ while(1) {
+ c = getchar(ts);
+ if(c < 0)
+ break;
+ if(c == c0) {
+ if(n == 1)
+ return 1;
+ nexti = ts->i;
+ for(i = 1; i < n; i++) {
+ c = getchar(ts);
+ if(c < 0)
+ goto mainloop_done;
+ if(c != s[i])
+ break;
+ }
+ if(i == n)
+ return 1;
+ backup(ts, nexti);
+ }
+ }
+mainloop_done:
+ return 0;
+}
+
+// We've just read an '&'; look for an entity reference
+// name, and if found, return translated char.
+// if there is a complete entity name but it isn't known,
+// try prefixes (gets around some buggy HTML out there),
+// and if that fails, back up to just past the '&' and return '&'.
+// If the entity can't be completed in the current buffer, back up
+// to the '&' and return -1.
+static int
+ampersand(TokenSource* ts)
+{
+ int savei;
+ int c;
+ int fnd;
+ int ans;
+ int v;
+ int i;
+ int k;
+ Rune buf[SMALLBUFSIZE];
+
+ savei = ts->i;
+ c = getchar(ts);
+ fnd = 0;
+ ans = -1;
+ if(c == '#') {
+ c = getchar(ts);
+ v = 0;
+ while(c >= 0) {
+ if(!(c < 256 && isdigit(c)))
+ break;
+ v = v*10 + c - 48;
+ c = getchar(ts);
+ }
+ if(c >= 0) {
+ if(!(c == ';' || c == '\n' || c == '\r'))
+ ungetchar(ts, c);
+ c = v;
+ if(c == 160)
+ c = 160;
+ if(c >= Winstart && c <= Winend) {
+ c = winchars[c - Winstart];
+ }
+ ans = c;
+ fnd = 1;
+ }
+ }
+ else if(c < 256 && isalpha(c)) {
+ buf[0] = c;
+ k = 1;
+ while(1) {
+ c = getchar(ts);
+ if(c < 0)
+ break;
+ if(ISNAMCHAR(c)) {
+ if(k < SMALLBUFSIZE-1)
+ buf[k++] = c;
+ }
+ else {
+ if(!(c == ';' || c == '\n' || c == '\r'))
+ ungetchar(ts, c);
+ break;
+ }
+ }
+ if(c >= 0) {
+ fnd = _lookup(chartab, NCHARTAB, buf, k, &ans);
+ if(!fnd) {
+ // Try prefixes of s
+ if(c == ';' || c == '\n' || c == '\r')
+ ungetchar(ts, c);
+ i = k;
+ while(--k > 0) {
+ fnd = _lookup(chartab, NCHARTAB, buf, k, &ans);
+ if(fnd) {
+ while(i > k) {
+ i--;
+ ungetchar(ts, buf[i]);
+ }
+ break;
+ }
+ }
+ }
+ }
+ }
+ if(!fnd) {
+ backup(ts, savei);
+ ans = '&';
+ }
+ return ans;
+}
+
+// Get next char, obeying ts.chset.
+// Returns -1 if no complete character left before current end of data.
+static int
+getchar(TokenSource* ts)
+{
+ uchar* buf;
+ int c;
+ int n;
+ int ok;
+ Rune r;
+
+ if(ts->i >= ts->edata)
+ return -1;
+ buf = ts->data;
+ c = buf[ts->i];
+ switch(ts->chset) {
+ case ISO_8859_1:
+ if(c >= Winstart && c <= Winend)
+ c = winchars[c - Winstart];
+ ts->i++;
+ break;
+ case US_Ascii:
+ if(c > 127) {
+ if(warn)
+ fprint(2, "non-ascii char (%x) when US-ASCII specified\n", c);
+ }
+ ts->i++;
+ break;
+ case UTF_8:
+ ok = fullrune((char*)(buf+ts->i), ts->edata-ts->i);
+ n = chartorune(&r, (char*)(buf+ts->i));
+ if(ok) {
+ if(warn && c == 0x80)
+ fprint(2, "warning: invalid utf-8 sequence (starts with %x)\n", ts->data[ts->i]);
+ ts->i += n;
+ c = r;
+ }
+ else {
+ // not enough bytes in buf to complete utf-8 char
+ ts->i = ts->edata; // mark "all used"
+ c = -1;
+ }
+ break;
+ case Unicode:
+ if(ts->i < ts->edata - 1) {
+ //standards say most-significant byte first
+ c = (c << 8)|(buf[ts->i + 1]);
+ ts->i += 2;
+ }
+ else {
+ ts->i = ts->edata; // mark "all used"
+ c = -1;
+ }
+ break;
+ }
+ return c;
+}
+
+// Assuming c was the last character returned by getchar, set
+// things up so that next getchar will get that same character
+// followed by the current 'next character', etc.
+static void
+ungetchar(TokenSource* ts, int c)
+{
+ int n;
+ Rune r;
+ char a[UTFmax];
+
+ n = 1;
+ switch(ts->chset) {
+ case UTF_8:
+ if(c >= 128) {
+ r = c;
+ n = runetochar(a, &r);
+ }
+ break;
+ case Unicode:
+ n = 2;
+ break;
+ }
+ ts->i -= n;
+}
+
+// Restore ts so that it is at the state where the index was savei.
+static void
+backup(TokenSource* ts, int savei)
+{
+ if(dbglex)
+ fprint(2, "lex: backup; i=%d, savei=%d\n", ts->i, savei);
+ ts->i = savei;
+}
+
+
+// Look for value associated with attribute attid in token t.
+// If there is one, return 1 and put the value in *pans,
+// else return 0.
+// If xfer is true, transfer ownership of the string to the caller
+// (nil it out here); otherwise, caller must duplicate the answer
+// if it needs to save it.
+// OK to have pans==0, in which case this is just looking
+// to see if token is present.
+int
+_tokaval(Token* t, int attid, Rune** pans, int xfer)
+{
+ Attr* attr;
+
+ attr = t->attr;
+ while(attr != nil) {
+ if(attr->attid == attid) {
+ if(pans != nil)
+ *pans = attr->value;
+ if(xfer)
+ attr->value = nil;
+ return 1;
+ }
+ attr = attr->next;
+ }
+ if(pans != nil)
+ *pans = nil;
+ return 0;
+}
+
+static int
+Tconv(Fmt *f)
+{
+ Token* t;
+ int i;
+ int tag;
+ char* srbra;
+ Rune* aname;
+ Rune* tname;
+ Attr* a;
+ char buf[BIGBUFSIZE];
+
+ t = va_arg(f->args, Token*);
+ if(t == nil)
+ sprint(buf, "<null>");
+ else {
+ i = 0;
+ if(dbglex > 1)
+ i = snprint(buf, sizeof(buf), "[%d]", t->starti);
+ tag = t->tag;
+ if(tag == Data) {
+ i += snprint(buf+i, sizeof(buf)-i-1, "'%S'", t->text);
+ }
+ else {
+ srbra = "";
+ if(tag >= RBRA) {
+ tag -= RBRA;
+ srbra = "/";
+ }
+ tname = tagnames[tag];
+ if(tag == Notfound)
+ tname = L(Lquestion);
+ i += snprint(buf+i, sizeof(buf)-i-1, "<%s%S", srbra, tname);
+ for(a = t->attr; a != nil; a = a->next) {
+ aname = attrnames[a->attid];
+ i += snprint(buf+i, sizeof(buf)-i-1, " %S", aname);
+ if(a->value != nil)
+ i += snprint(buf+i, sizeof(buf)-i-1, "=%S", a->value);
+ }
+ i += snprint(buf+i, sizeof(buf)-i-1, ">");
+ }
+ buf[i] = 0;
+ }
+ return fmtstrcpy(f, buf);
+}
+
+// Attrs own their constituent strings, but build may eventually
+// transfer some values to its items and nil them out in the Attr.
+static Attr*
+newattr(int attid, Rune* value, Attr* link)
+{
+ Attr* ans;
+
+ ans = (Attr*)emalloc(sizeof(Attr));
+ ans->attid = attid;
+ ans->value = value;
+ ans->next = link;
+ return ans;
+}
+
+// Free list of Attrs linked through next field
+static void
+freeattrs(Attr* ahead)
+{
+ Attr* a;
+ Attr* nexta;
+
+ a = ahead;
+ while(a != nil) {
+ nexta = a->next;
+ free(a->value);
+ free(a);
+ a = nexta;
+ }
+}
+
+// Free array of Tokens.
+// Allocated space might have room for more than n tokens,
+// but only n of them are initialized.
+// If caller has transferred ownership of constitutent strings
+// or attributes, it must have nil'd out the pointers in the Tokens.
+void
+_freetokens(Token* tarray, int n)
+{
+ int i;
+ Token* t;
+
+ if(tarray == nil)
+ return;
+ for(i = 0; i < n; i++) {
+ t = &tarray[i];
+ free(t->text);
+ freeattrs(t->attr);
+ }
+ free(tarray);
+}
diff --git a/src/libhtml/mkfile b/src/libhtml/mkfile
new file mode 100644
index 00000000..0952c451
--- /dev/null
+++ b/src/libhtml/mkfile
@@ -0,0 +1,22 @@
+<$SYS9/$systype/$objtype/mkfile
+
+LIB=$LIB9/libhtml.a
+
+OFILES=\
+ build.$O\
+ lex.$O\
+ strinttab.$O\
+ utils.$O\
+ runetab.$O\
+
+HFILES=\
+ $SYS9/sys/include/html.h\
+ impl.h\
+
+UPDATE=\
+ mkfile\
+ $HFILES\
+ ${OFILES:%.$O=%.c}\
+ ${LIB:$SYS9/$systype/$objtype/%=$SYS9/$systype/386/%}\
+
+<$SYS9/sys/src/cmd/mksyslib
diff --git a/src/libhtml/runetab.c b/src/libhtml/runetab.c
new file mode 100644
index 00000000..abd0a50f
--- /dev/null
+++ b/src/libhtml/runetab.c
@@ -0,0 +1,83 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <html.h>
+#include "impl.h"
+
+Rune **runeconsttab;
+char *_runeconsttab[] = {
+ " ",
+ " ",
+ "",
+ "#",
+ "+",
+ ", ",
+ "-",
+ "-->",
+ "1",
+ "<",
+ ">",
+ "?",
+ "Index search terms:",
+ "Reset",
+ "Submit",
+ "^0-9",
+ "_ISINDEX_",
+ "_blank",
+ "_fr",
+ "_no_name_submit_",
+ "_parent",
+ "_self",
+ "_top",
+ "application/x-www-form-urlencoded",
+ "circle",
+ "cm",
+ "content-script-type",
+ "disc",
+ "em",
+ "in",
+ "javascript",
+ "jscript",
+ "jscript1.1",
+ "mm",
+ "none",
+ "pi",
+ "pt",
+ "refresh",
+ "select",
+ "square",
+ "textarea",
+};
+
+Rune**
+cvtstringtab(char **tab, int n)
+{
+ int i;
+ Rune **rtab;
+
+ rtab = emalloc(n*sizeof(rtab[0]));
+ for(i=0; i<n; i++)
+ rtab[i] = toStr(tab[i], strlen(tab[i]), US_Ascii);
+ return rtab;
+}
+
+StringInt*
+cvtstringinttab(AsciiInt *tab, int n)
+{
+ int i;
+ StringInt *stab;
+
+ stab = emalloc(n*sizeof(stab[0]));
+ for(i=0; i<n; i++){
+ stab[i].key = toStr(tab[i].key, strlen(tab[i].key), US_Ascii);
+ stab[i].val = tab[i].val;
+ }
+ return stab;
+}
+
+void
+runetabinit(void)
+{
+ runeconsttab = cvtstringtab(_runeconsttab, nelem(_runeconsttab));
+ return;
+}
diff --git a/src/libhtml/runetab.h b/src/libhtml/runetab.h
new file mode 100644
index 00000000..edde98c8
--- /dev/null
+++ b/src/libhtml/runetab.h
@@ -0,0 +1,59 @@
+typedef struct AsciiInt AsciiInt;
+
+struct AsciiInt {
+ char* key;
+ int val;
+};
+
+enum {
+ Ltab2space,
+ Lspace,
+ Lempty,
+ Lhash,
+ Lplus,
+ Lcommaspace,
+ Lminus,
+ Larrow,
+ Lone,
+ Llt,
+ Lgt,
+ Lquestion,
+ Lindex,
+ Lreset,
+ Lsubmit,
+ Lnot0to9,
+ Lisindex,
+ L_blank,
+ Lfr,
+ Lnoname,
+ L_parent,
+ L_self,
+ L_top,
+ Lappl_form,
+ Lcircle,
+ Lcm,
+ Lcontent,
+ Ldisc,
+ Lem,
+ Lin,
+ Ljavascript,
+ Ljscript,
+ Ljscript1,
+ Lmm,
+ Lnone,
+ Lpi,
+ Lpt,
+ Lrefresh,
+ Lselect,
+ Lsquare,
+ Ltextarea,
+};
+
+#define L(x) runeconsttab[(x)]
+
+extern Rune **runeconsttab;
+
+/* XXX: for unix port only */
+Rune **cvtstringtab(char**, int);
+StringInt *cvtstringinttab(AsciiInt*, int);
+void runetabinit(void);
diff --git a/src/libhtml/strinttab.c b/src/libhtml/strinttab.c
new file mode 100644
index 00000000..7883c044
--- /dev/null
+++ b/src/libhtml/strinttab.c
@@ -0,0 +1,64 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <html.h>
+#include "impl.h"
+
+// Do case-insensitive lookup of key[0:keylen] in t[0:n] (key part),
+// returning 1 if found, 0 if not.
+// Array t must be sorted in increasing lexicographic order of key.
+// If found, return corresponding val in *pans.
+int
+_lookup(StringInt* t, int n, Rune* key, int keylen, int* pans)
+{
+ int min;
+ int max;
+ int try;
+ int cmpresult;
+
+ min = 0;
+ max = n - 1;
+ while(min <= max) {
+ try = (min + max)/2;
+ cmpresult = _Strncmpci(key, keylen, t[try].key);
+ if(cmpresult > 0)
+ min = try + 1;
+ else if(cmpresult < 0)
+ max = try - 1;
+ else {
+ *pans = t[try].val;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+// Return first key in t[0:n] that corresponds to val,
+// nil if none.
+Rune*
+_revlookup(StringInt* t, int n, int val)
+{
+ int i;
+
+ for(i = 0; i < n; i++)
+ if(t[i].val == val)
+ return t[i].key;
+ return nil;
+}
+
+// Make a StringInt table out of a[0:n], mapping each string
+// to its index. Check that entries are in alphabetical order.
+StringInt*
+_makestrinttab(Rune** a, int n)
+{
+ StringInt* ans;
+ int i;
+
+ ans = (StringInt*)emalloc(n * sizeof(StringInt));
+ for(i = 0; i < n; i++) {
+ ans[i].key = a[i];
+ ans[i].val = i;
+ assert(i == 0 || runestrcmp(a[i], a[i - 1]) >= 0);
+ }
+ return ans;
+}
diff --git a/src/libhtml/utils.c b/src/libhtml/utils.c
new file mode 100644
index 00000000..db22bba7
--- /dev/null
+++ b/src/libhtml/utils.c
@@ -0,0 +1,591 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <html.h>
+#include "impl.h"
+
+Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' };
+Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'};
+
+// All lists start out like List structure.
+// List itself can be used as list of int.
+int
+_listlen(List* l)
+{
+ int n = 0;
+
+ while(l != nil) {
+ l = l->next;
+ n++;
+ }
+ return n;
+}
+
+// Cons
+List*
+_newlist(int val, List* rest)
+{
+ List* ans;
+
+ ans = (List*)emalloc(sizeof(List));
+ ans->val = val;
+ ans->next = rest;
+ return ans;
+}
+
+// Reverse a list in place
+List*
+_revlist(List* l)
+{
+ List* newl;
+ List* nextl;
+
+ newl = nil;
+ while(l != nil) {
+ nextl = l->next;
+ l->next = newl;
+ newl = l;
+ l = nextl;
+ }
+ return newl;
+}
+
+// The next few routines take a "character class" as argument.
+// e.g., "a-zA-Z", or "^ \t\n"
+// (ranges indicated by - except in first position;
+// ^ is first position means "not in" the following class)
+
+// Splitl splits s[0:n] just before first character of class cl.
+// Answers go in (p1, n1) and (p2, n2).
+// If no split, the whole thing goes in the first component.
+// Note: answers contain pointers into original string.
+void
+_splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
+{
+ Rune* p;
+
+ p = _Strnclass(s, cl, n);
+ *p1 = s;
+ if(p == nil) {
+ *n1 = n;
+ *p2 = nil;
+ *n2 = 0;
+ }
+ else {
+ *p2 = p;
+ *n1 = p-s;
+ *n2 = n-*n1;
+ }
+}
+
+// Splitr splits s[0:n] just after last character of class cl.
+// Answers go in (p1, n1) and (p2, n2).
+// If no split, the whole thing goes in the last component.
+// Note: answers contain pointers into original string.
+void
+_splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
+{
+ Rune* p;
+
+ p = _Strnrclass(s, cl, n);
+ if(p == nil) {
+ *p1 = nil;
+ *n1 = 0;
+ *p2 = s;
+ *n2 = n;
+ }
+ else {
+ *p1 = s;
+ *p2 = p+1;
+ *n1 = *p2-s;
+ *n2 = n-*n1;
+ }
+}
+
+// Splitall splits s[0:n] into parts that are separated by characters from class cl.
+// Each part will have nonzero length.
+// At most alen parts are found, and pointers to their starts go into
+// the strarr array, while their lengths go into the lenarr array.
+// The return value is the number of parts found.
+int
+_splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
+{
+ int i;
+ Rune* p;
+ Rune* q;
+ Rune* slast;
+
+ if(s == nil || n == 0)
+ return 0;
+ i = 0;
+ p = s;
+ slast = s+n;
+ while(p < slast && i < alen) {
+ while(p < slast && _inclass(*p, cl))
+ p++;
+ if(p == slast)
+ break;
+ q = _Strnclass(p, cl, slast-p);
+ if(q == nil)
+ q = slast;
+ assert(q > p && q <= slast);
+ strarr[i] = p;
+ lenarr[i] = q-p;
+ i++;
+ p = q;
+ }
+ return i;
+}
+
+// Find part of s that excludes leading and trailing whitespace,
+// and return that part in *pans (and its length in *panslen).
+void
+_trimwhite(Rune* s, int n, Rune** pans, int* panslen)
+{
+ Rune* p;
+ Rune* q;
+
+ p = nil;
+ if(n > 0) {
+ p = _Strnclass(s, notwhitespace, n);
+ if(p != nil) {
+ q = _Strnrclass(s, notwhitespace, n);
+ assert(q != nil);
+ n = q+1-p;
+ }
+ }
+ *pans = p;
+ *panslen = n;
+}
+
+// _Strclass returns a pointer to the first element of s that is
+// a member of class cl, nil if none.
+Rune*
+_Strclass(Rune* s, Rune* cl)
+{
+ Rune* p;
+
+ for(p = s; *p != 0; p++)
+ if(_inclass(*p, cl))
+ return p;
+ return nil;
+}
+
+// _Strnclass returns a pointer to the first element of s[0:n] that is
+// a member of class cl, nil if none.
+Rune*
+_Strnclass(Rune* s, Rune* cl, int n)
+{
+ Rune* p;
+
+ for(p = s; n-- && *p != 0; p++)
+ if(_inclass(*p, cl))
+ return p;
+ return nil;
+}
+
+// _Strrclass returns a pointer to the last element of s that is
+// a member of class cl, nil if none
+Rune*
+_Strrclass(Rune* s, Rune* cl)
+{
+ Rune* p;
+
+ if(s == nil || *s == 0)
+ return nil;
+ p = s + runestrlen(s) - 1;
+ while(p >= s) {
+ if(_inclass(*p, cl))
+ return p;
+ p--;
+ };
+ return nil;
+}
+
+// _Strnrclass returns a pointer to the last element of s[0:n] that is
+// a member of class cl, nil if none
+Rune*
+_Strnrclass(Rune* s, Rune* cl, int n)
+{
+ Rune* p;
+
+ if(s == nil || *s == 0 || n == 0)
+ return nil;
+ p = s + n - 1;
+ while(p >= s) {
+ if(_inclass(*p, cl))
+ return p;
+ p--;
+ };
+ return nil;
+}
+
+// Is c in the class cl?
+int
+_inclass(Rune c, Rune* cl)
+{
+ int n;
+ int ans;
+ int negate;
+ int i;
+
+ n = _Strlen(cl);
+ if(n == 0)
+ return 0;
+ ans = 0;
+ negate = 0;
+ if(cl[0] == '^') {
+ negate = 1;
+ cl++;
+ n--;
+ }
+ for(i = 0; i < n; i++) {
+ if(cl[i] == '-' && i > 0 && i < n - 1) {
+ if(c >= cl[i - 1] && c <= cl[i + 1]) {
+ ans = 1;
+ break;
+ }
+ i++;
+ }
+ else if(c == cl[i]) {
+ ans = 1;
+ break;
+ }
+ }
+ if(negate)
+ ans = !ans;
+ return ans;
+}
+
+// Is pre a prefix of s?
+int
+_prefix(Rune* pre, Rune* s)
+{
+ int ns;
+ int n;
+ int k;
+
+ ns = _Strlen(s);
+ n = _Strlen(pre);
+ if(ns < n)
+ return 0;
+ for(k = 0; k < n; k++) {
+ if(pre[k] != s[k])
+ return 0;
+ }
+ return 1;
+}
+
+// Number of runes in (null-terminated) s
+int
+_Strlen(Rune* s)
+{
+ if(s == nil)
+ return 0;
+ return runestrlen(s);
+}
+
+// -1, 0, 1 as s1 is lexicographically less, equal greater than s2
+int
+_Strcmp(Rune *s1, Rune *s2)
+{
+ if(s1 == nil)
+ return (s2 == nil || *s2 == 0) ? 0 : -1;
+ if(s2 == nil)
+ return (*s1 == 0) ? 0 : 1;
+ return runestrcmp(s1, s2);
+}
+
+// Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
+// Also, do a case-insensitive match, assuming s2
+// has no chars in [A-Z], only their lowercase versions.
+// (This routine is used for in-place keyword lookup, where s2 is in a keyword
+// list and s1 is some substring, possibly mixed-case, in a buffer.)
+int
+_Strncmpci(Rune *s1, int n1, Rune *s2)
+{
+ Rune c1, c2;
+
+ for(;;) {
+ if(n1-- == 0) {
+ if(*s2 == 0)
+ return 0;
+ return -1;
+ }
+ c1 = *s1++;
+ c2 = *s2++;
+ if(c1 >= 'A' && c1 <= 'Z')
+ c1 = c1 - 'A' + 'a';
+ if(c1 != c2) {
+ if(c1 > c2)
+ return 1;
+ return -1;
+ }
+ }
+}
+
+// emalloc and copy
+Rune*
+_Strdup(Rune* s)
+{
+ if(s == nil)
+ return nil;
+ return _Strndup(s, runestrlen(s));
+}
+
+// emalloc and copy n chars of s (assume s is at least that long),
+// and add 0 terminator.
+// Return nil if n==0.
+Rune*
+_Strndup(Rune* s, int n)
+{
+ Rune* ans;
+
+ if(n <= 0)
+ return nil;
+ ans = _newstr(n);
+ memmove(ans, s, n*sizeof(Rune));
+ ans[n] = 0;
+ return ans;
+}
+// emalloc enough room for n Runes, plus 1 null terminator.
+// (Not initialized to anything.)
+Rune*
+_newstr(int n)
+{
+ return (Rune*)emalloc((n+1)*sizeof(Rune));
+}
+
+// emalloc and copy s+t
+Rune*
+_Strdup2(Rune* s, Rune* t)
+{
+ int ns, nt;
+ Rune* ans;
+ Rune* p;
+
+ ns = _Strlen(s);
+ nt = _Strlen(t);
+ if(ns+nt == 0)
+ return nil;
+ ans = _newstr(ns+nt);
+ p = _Stradd(ans, s, ns);
+ p = _Stradd(p, t, nt);
+ *p = 0;
+ return ans;
+}
+
+// Return emalloc'd substring s[start:stop],
+Rune*
+_Strsubstr(Rune* s, int start, int stop)
+{
+ Rune* t;
+
+ if(start == stop)
+ return nil;
+ t = _Strndup(s+start, stop-start);
+ return t;
+}
+
+// Copy n chars to s1 from s2, and return s1+n
+Rune*
+_Stradd(Rune* s1, Rune* s2, int n)
+{
+ if(n == 0)
+ return s1;
+ memmove(s1, s2, n*sizeof(Rune));
+ return s1+n;
+}
+
+// Like strtol, but converting from Rune* string
+
+//#define LONG_MAX 2147483647L
+//#define LONG_MIN -2147483648L
+
+long
+_Strtol(Rune* nptr, Rune** endptr, int base)
+{
+ Rune* p;
+ long n, nn;
+ int c, ovfl, v, neg, ndig;
+
+ p = nptr;
+ neg = 0;
+ n = 0;
+ ndig = 0;
+ ovfl = 0;
+
+ /*
+ * White space
+ */
+ for(;;p++){
+ switch(*p){
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case '\v':
+ continue;
+ }
+ break;
+ }
+
+ /*
+ * Sign
+ */
+ if(*p=='-' || *p=='+')
+ if(*p++ == '-')
+ neg = 1;
+
+ /*
+ * Base
+ */
+ if(base==0){
+ if(*p != '0')
+ base = 10;
+ else{
+ base = 8;
+ if(p[1]=='x' || p[1]=='X'){
+ p += 2;
+ base = 16;
+ }
+ }
+ }else if(base==16 && *p=='0'){
+ if(p[1]=='x' || p[1]=='X')
+ p += 2;
+ }else if(base<0 || 36<base)
+ goto Return;
+
+ /*
+ * Non-empty sequence of digits
+ */
+ for(;; p++,ndig++){
+ c = *p;
+ v = base;
+ if('0'<=c && c<='9')
+ v = c - '0';
+ else if('a'<=c && c<='z')
+ v = c - 'a' + 10;
+ else if('A'<=c && c<='Z')
+ v = c - 'A' + 10;
+ if(v >= base)
+ break;
+ nn = n*base + v;
+ if(nn < n)
+ ovfl = 1;
+ n = nn;
+ }
+
+ Return:
+ if(ndig == 0)
+ p = nptr;
+ if(endptr)
+ *endptr = p;
+ if(ovfl){
+ if(neg)
+ return LONG_MIN;
+ return LONG_MAX;
+ }
+ if(neg)
+ return -n;
+ return n;
+}
+
+// Convert buf[0:n], bytes whose character set is chset,
+// into a emalloc'd null-terminated Unicode string.
+Rune*
+toStr(uchar* buf, int n, int chset)
+{
+ int i;
+ int m;
+ Rune ch;
+ Rune* ans;
+
+ switch(chset) {
+ case US_Ascii:
+ case ISO_8859_1:
+ ans = (Rune*)emalloc((n+1)*sizeof(Rune));
+ for(i = 0; i < n; i++)
+ ans[i] = buf[i];
+ ans[n] = 0;
+ break;
+
+ case UTF_8:
+ m = 0;
+ for(i = 0; i < n; ) {
+ i += chartorune(&ch, (char*)(buf+i));
+ m++;
+ }
+ ans = (Rune*)emalloc((m+1)*sizeof(Rune));
+ m = 0;
+ for(i = 0; i < n; ) {
+ i += chartorune(&ch, (char*)(buf+i));
+ ans[m++] = ch;
+ }
+ ans[m] = 0;
+ break;
+
+ default:
+ ans = nil;
+ assert(0);
+ }
+ return ans;
+}
+
+// Convert buf[0:n], Unicode characters,
+// into an emalloc'd null-terminated string in character set chset.
+// Use 0x80 for unconvertable characters.
+uchar*
+fromStr(Rune* buf, int n, int chset)
+{
+ uchar* ans;
+ int i, lim, m;
+ Rune ch;
+ uchar* p;
+ uchar s[UTFmax];
+
+ ans = nil;
+ switch(chset) {
+ case US_Ascii:
+ case ISO_8859_1:
+ ans = (uchar*)emalloc(n+1);
+ lim = (chset==US_Ascii)? 127 : 255;
+ for(i = 0; i < n; i++) {
+ ch = buf[i];
+ if(ch > lim)
+ ch = 0x80;
+ ans[i] = ch;
+ }
+ ans[n] = 0;
+ break;
+
+ case UTF_8:
+ m = 0;
+ for(i = 0; i < n; i++) {
+ m += runetochar((char*)s, &buf[i]);
+ }
+ ans = (uchar*)emalloc(m+1);
+ p = ans;
+ for(i = 0; i < n; i++)
+ p += runetochar((char*)p, &buf[i]);
+ *p = 0;
+ break;
+
+ default:
+ assert(0);
+ }
+ return ans;
+
+}
+
+// Convert n to emalloc'd String.
+Rune*
+_ltoStr(int n)
+{
+ int m;
+ uchar buf[20];
+
+ m = snprint((char*)buf, sizeof(buf), "%d", n);
+ return toStr(buf, m, US_Ascii);
+}