From 7cf289ca89a7416999ae02330236042b0d37e3db Mon Sep 17 00:00:00 2001 From: wkj Date: Tue, 6 Apr 2004 19:06:52 +0000 Subject: Import version of libhtml that might actually work with ANSI C. --- src/libhtml/build.c | 4238 +++++++++++++++++++++++++++++++++++++++++++++++ src/libhtml/impl.h | 163 ++ src/libhtml/lex.c | 1384 ++++++++++++++++ src/libhtml/mkfile | 22 + src/libhtml/runetab.c | 83 + src/libhtml/runetab.h | 59 + src/libhtml/strinttab.c | 64 + src/libhtml/utils.c | 591 +++++++ 8 files changed, 6604 insertions(+) create mode 100644 src/libhtml/build.c create mode 100644 src/libhtml/impl.h create mode 100644 src/libhtml/lex.c create mode 100644 src/libhtml/mkfile create mode 100644 src/libhtml/runetab.c create mode 100644 src/libhtml/runetab.h create mode 100644 src/libhtml/strinttab.c create mode 100644 src/libhtml/utils.c (limited to 'src/libhtml') diff --git a/src/libhtml/build.c b/src/libhtml/build.c new file mode 100644 index 00000000..32e64015 --- /dev/null +++ b/src/libhtml/build.c @@ -0,0 +1,4238 @@ +#include +#include +#include +#include +#include +#include "impl.h" + +// A stack for holding integer values +enum { + Nestmax = 40 // max nesting level of lists, font styles, etc. +}; + +struct Stack { + int n; // next available slot (top of stack is stack[n-1]) + int slots[Nestmax]; // stack entries +}; + +// Parsing state +struct Pstate +{ + Pstate* next; // in stack of Pstates + int skipping; // true when we shouldn't add items + int skipwhite; // true when we should strip leading space + int curfont; // font index for current font + int curfg; // current foreground color + Background curbg; // current background + int curvoff; // current baseline offset + uchar curul; // current underline/strike state + uchar curjust; // current justify state + int curanchor; // current (href) anchor id (if in one), or 0 + int curstate; // current value of item state + int literal; // current literal state + int inpar; // true when in a paragraph-like construct + int adjsize; // current font size adjustment + Item* items; // dummy head of item list we're building + Item* lastit; // tail of item list we're building + Item* prelastit; // item before lastit + Stack fntstylestk; // style stack + Stack fntsizestk; // size stack + Stack fgstk; // text color stack + Stack ulstk; // underline stack + Stack voffstk; // vertical offset stack + Stack listtypestk; // list type stack + Stack listcntstk; // list counter stack + Stack juststk; // justification stack + Stack hangstk; // hanging stack +}; + +struct ItemSource +{ + Docinfo* doc; + Pstate* psstk; + int nforms; + int ntables; + int nanchors; + int nframes; + Form* curform; + Map* curmap; + Table* tabstk; + Kidinfo* kidstk; +}; + +// Some layout parameters +enum { + FRKIDMARGIN = 6, // default margin around kid frames + IMGHSPACE = 0, // default hspace for images (0 matches IE, Netscape) + IMGVSPACE = 0, // default vspace for images + FLTIMGHSPACE = 2, // default hspace for float images + TABSP = 5, // default cellspacing for tables + TABPAD = 1, // default cell padding for tables + LISTTAB = 1, // number of tabs to indent lists + BQTAB = 1, // number of tabs to indent blockquotes + HRSZ = 2, // thickness of horizontal rules + SUBOFF = 4, // vertical offset for subscripts + SUPOFF = 6, // vertical offset for superscripts + NBSP = 160 // non-breaking space character +}; + +// These tables must be sorted +static StringInt *align_tab; +static AsciiInt _align_tab[] = { + {"baseline", ALbaseline}, + {"bottom", ALbottom}, + {"center", ALcenter}, + {"char", ALchar}, + {"justify", ALjustify}, + {"left", ALleft}, + {"middle", ALmiddle}, + {"right", ALright}, + {"top", ALtop} +}; +#define NALIGNTAB (sizeof(align_tab)/sizeof(StringInt)) + +static StringInt *input_tab; +static AsciiInt _input_tab[] = { + {"button", Fbutton}, + {"checkbox", Fcheckbox}, + {"file", Ffile}, + {"hidden", Fhidden}, + {"image", Fimage}, + {"password", Fpassword}, + {"radio", Fradio}, + {"reset", Freset}, + {"submit", Fsubmit}, + {"text", Ftext} +}; +#define NINPUTTAB (sizeof(input_tab)/sizeof(StringInt)) + +static StringInt *clear_tab; +static AsciiInt _clear_tab[] = { + {"all", IFcleft|IFcright}, + {"left", IFcleft}, + {"right", IFcright} +}; +#define NCLEARTAB (sizeof(clear_tab)/sizeof(StringInt)) + +static StringInt *fscroll_tab; +static AsciiInt _fscroll_tab[] = { + {"auto", FRhscrollauto|FRvscrollauto}, + {"no", FRnoscroll}, + {"yes", FRhscroll|FRvscroll}, +}; +#define NFSCROLLTAB (sizeof(fscroll_tab)/sizeof(StringInt)) + +static StringInt *shape_tab; +static AsciiInt _shape_tab[] = { + {"circ", SHcircle}, + {"circle", SHcircle}, + {"poly", SHpoly}, + {"polygon", SHpoly}, + {"rect", SHrect}, + {"rectangle", SHrect} +}; +#define NSHAPETAB (sizeof(shape_tab)/sizeof(StringInt)) + +static StringInt *method_tab; +static AsciiInt _method_tab[] = { + {"get", HGet}, + {"post", HPost} +}; +#define NMETHODTAB (sizeof(method_tab)/sizeof(StringInt)) + +static Rune** roman; +static char* _roman[15]= { + "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", + "XI", "XII", "XIII", "XIV", "XV" +}; +#define NROMAN 15 + +// List number types +enum { + LTdisc, LTsquare, LTcircle, LT1, LTa, LTA, LTi, LTI +}; + +enum { + SPBefore = 2, + SPAfter = 4, + BL = 1, + BLBA = (BL|SPBefore|SPAfter) +}; + +// blockbrk[tag] is break info for a block level element, or one +// of a few others that get the same treatment re ending open paragraphs +// and requiring a line break / vertical space before them. +// If we want a line of space before the given element, SPBefore is OR'd in. +// If we want a line of space after the given element, SPAfter is OR'd in. + +static uchar blockbrk[Numtags]= { + [Taddress] BLBA, [Tblockquote] BLBA, [Tcenter] BL, + [Tdir] BLBA, [Tdiv] BL, [Tdd] BL, [Tdl] BLBA, + [Tdt] BL, [Tform] BLBA, + // headings and tables get breaks added manually + [Th1] BL, [Th2] BL, [Th3] BL, + [Th4] BL, [Th5] BL, [Th6] BL, + [Thr] BL, [Tisindex] BLBA, [Tli] BL, [Tmenu] BLBA, + [Tol] BLBA, [Tp] BLBA, [Tpre] BLBA, + [Tul] BLBA +}; + +enum { + AGEN = 1 +}; + +// attrinfo is information about attributes. +// The AGEN value means that the attribute is generic (applies to almost all elements) +static uchar attrinfo[Numattrs]= { + [Aid] AGEN, [Aclass] AGEN, [Astyle] AGEN, [Atitle] AGEN, + [Aonblur] AGEN, [Aonchange] AGEN, [Aonclick] AGEN, + [Aondblclick] AGEN, [Aonfocus] AGEN, [Aonkeypress] AGEN, + [Aonkeyup] AGEN, [Aonload] AGEN, [Aonmousedown] AGEN, + [Aonmousemove] AGEN, [Aonmouseout] AGEN, [Aonmouseover] AGEN, + [Aonmouseup] AGEN, [Aonreset] AGEN, [Aonselect] AGEN, + [Aonsubmit] AGEN, [Aonunload] AGEN +}; + +static uchar scriptev[Numattrs]= { + [Aonblur] SEonblur, [Aonchange] SEonchange, [Aonclick] SEonclick, + [Aondblclick] SEondblclick, [Aonfocus] SEonfocus, [Aonkeypress] SEonkeypress, + [Aonkeyup] SEonkeyup, [Aonload] SEonload, [Aonmousedown] SEonmousedown, + [Aonmousemove] SEonmousemove, [Aonmouseout] SEonmouseout, [Aonmouseover] SEonmouseover, + [Aonmouseup] SEonmouseup, [Aonreset] SEonreset, [Aonselect] SEonselect, + [Aonsubmit] SEonsubmit, [Aonunload] SEonunload +}; + +// Color lookup table +static StringInt *color_tab; +static AsciiInt _color_tab[] = { + {"aqua", 0x00FFFF}, + {"black", 0x000000}, + {"blue", 0x0000CC}, + {"fuchsia", 0xFF00FF}, + {"gray", 0x808080}, + {"green", 0x008000}, + {"lime", 0x00FF00}, + {"maroon", 0x800000}, + {"navy", 0x000080,}, + {"olive", 0x808000}, + {"purple", 0x800080}, + {"red", 0xFF0000}, + {"silver", 0xC0C0C0}, + {"teal", 0x008080}, + {"white", 0xFFFFFF}, + {"yellow", 0xFFFF00} +}; +#define NCOLORS (sizeof(color_tab)/sizeof(StringInt)) + +static StringInt *targetmap; +static int targetmapsize; +static int ntargets; + +static int buildinited = 0; + +#define SMALLBUFSIZE 240 +#define BIGBUFSIZE 2000 + +int dbgbuild = 0; +int warn = 0; + +static Align aalign(Token* tok); +static int acolorval(Token* tok, int attid, int dflt); +static void addbrk(Pstate* ps, int sp, int clr); +static void additem(Pstate* ps, Item* it, Token* tok); +static void addlinebrk(Pstate* ps, int clr); +static void addnbsp(Pstate* ps); +static void addtext(Pstate* ps, Rune* s); +static Dimen adimen(Token* tok, int attid); +static int aflagval(Token* tok, int attid); +static int aintval(Token* tok, int attid, int dflt); +static Rune* astrval(Token* tok, int attid, Rune* dflt); +static int atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt); +static int atargval(Token* tok, int dflt); +static int auintval(Token* tok, int attid, int dflt); +static Rune* aurlval(Token* tok, int attid, Rune* dflt, Rune* base); +static Rune* aval(Token* tok, int attid); +static void buildinit(void); +static Pstate* cell_pstate(Pstate* oldps, int ishead); +static void changehang(Pstate* ps, int delta); +static void changeindent(Pstate* ps, int delta); +static int color(Rune* s, int dflt); +static void copystack(Stack* tostk, Stack* fromstk); +static int dimprint(char* buf, int nbuf, Dimen d); +static Pstate* finishcell(Table* curtab, Pstate* psstk); +static void finish_table(Table* t); +static void freeanchor(Anchor* a); +static void freedestanchor(DestAnchor* da); +static void freeform(Form* f); +static void freeformfield(Formfield* ff); +static void freeitem(Item* it); +static void freepstate(Pstate* p); +static void freepstatestack(Pstate* pshead); +static void freescriptevents(SEvent* ehead); +static void freetable(Table* t); +static Map* getmap(Docinfo* di, Rune* name); +static Rune* getpcdata(Token* toks, int tokslen, int* ptoki); +static Pstate* lastps(Pstate* psl); +static Rune* listmark(uchar ty, int n); +static int listtyval(Token* tok, int dflt); +static Align makealign(int halign, int valign); +static Background makebackground(Rune* imgurl, int color); +static Dimen makedimen(int kind, int spec); +static Anchor* newanchor(int index, Rune* name, Rune* href, int target, Anchor* link); +static Area* newarea(int shape, Rune* href, int target, Area* link); +static DestAnchor* newdestanchor(int index, Rune* name, Item* item, DestAnchor* link); +static Docinfo* newdocinfo(void); +static Genattr* newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events); +static Form* newform(int formid, Rune* name, Rune* action, + int target, int method, Form* link); +static Formfield* newformfield(int ftype, int fieldid, Form* form, Rune* name, + Rune* value, int size, int maxlength, Formfield* link); +static Item* newifloat(Item* it, int side); +static Item* newiformfield(Formfield* ff); +static Item* newiimage(Rune* src, Rune* altrep, int align, int width, int height, + int hspace, int vspace, int border, int ismap, Map* map); +static Item* newirule(int align, int size, int noshade, Dimen wspec); +static Item* newispacer(int spkind); +static Item* newitable(Table* t); +static ItemSource* newitemsource(Docinfo* di); +static Item* newitext(Rune* s, int fnt, int fg, int voff, int ul); +static Kidinfo* newkidinfo(int isframeset, Kidinfo* link); +static Option* newoption(int selected, Rune* value, Rune* display, Option* link); +static Pstate* newpstate(Pstate* link); +static SEvent* newscriptevent(int type, Rune* script, SEvent* link); +static Table* newtable(int tableid, Align align, Dimen width, int border, + int cellspacing, int cellpadding, Background bg, Token* tok, Table* link); +static Tablecell* newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec, + int hspec, Background bg, int flags, Tablecell* link); +static Tablerow* newtablerow(Align align, Background bg, int flags, Tablerow* link); +static Dimen parsedim(Rune* s, int ns); +static void pop(Stack* stk); +static void popfontsize(Pstate* ps); +static void popfontstyle(Pstate* ps); +static void popjust(Pstate* ps); +static int popretnewtop(Stack* stk, int dflt); +static int push(Stack* stk, int val); +static void pushfontsize(Pstate* ps, int sz); +static void pushfontstyle(Pstate* ps, int sty); +static void pushjust(Pstate* ps, int j); +static Item* textit(Pstate* ps, Rune* s); +static Rune* removeallwhite(Rune* s); +static void resetdocinfo(Docinfo* d); +static void setcurfont(Pstate* ps); +static void setcurjust(Pstate* ps); +static void setdimarray(Token* tok, int attid, Dimen** pans, int* panslen); +static Rune* stringalign(int a); +static void targetmapinit(void); +static int toint(Rune* s); +static int top(Stack* stk, int dflt); +static void trim_cell(Tablecell* c); +static int validalign(Align a); +static int validdimen(Dimen d); +static int validformfield(Formfield* f); +static int validhalign(int a); +static int validptr(void* p); +static int validStr(Rune* s); +static int validtable(Table* t); +static int validtablerow(Tablerow* r); +static int validtablecol(Tablecol* c); +static int validtablecell(Tablecell* c); +static int validvalign(int a); +static int Iconv(Fmt *f); + +static void +buildinit(void) +{ + runetabinit(); + roman = cvtstringtab(_roman, nelem(_roman)); + color_tab = cvtstringinttab(_color_tab, nelem(_color_tab)); + method_tab = cvtstringinttab(_method_tab, nelem(_method_tab)); + shape_tab = cvtstringinttab(_shape_tab, nelem(_shape_tab)); + fscroll_tab = cvtstringinttab(_fscroll_tab, nelem(_fscroll_tab)); + clear_tab = cvtstringinttab(_clear_tab, nelem(_clear_tab)); + input_tab = cvtstringinttab(_input_tab, nelem(_input_tab)); + align_tab = cvtstringinttab(_align_tab, nelem(_align_tab)); + + fmtinstall('I', Iconv); + targetmapinit(); + buildinited = 1; +} + +static ItemSource* +newitemsource(Docinfo* di) +{ + ItemSource* is; + Pstate* ps; + + ps = newpstate(nil); + if(di->mediatype != TextHtml) { + ps->curstate &= ~IFwrap; + ps->literal = 1; + pushfontstyle(ps, FntT); + } + is = (ItemSource*)emalloc(sizeof(ItemSource)); + is->doc = di; + is->psstk = ps; + is->nforms = 0; + is->ntables = 0; + is->nanchors = 0; + is->nframes = 0; + is->curform = nil; + is->curmap = nil; + is->tabstk = nil; + is->kidstk = nil; + return is; +} + +static Item *getitems(ItemSource* is, uchar* data, int datalen); + +// Parse an html document and create a list of layout items. +// Allocate and return document info in *pdi. +// When caller is done with the items, it should call +// freeitems on the returned result, and then +// freedocinfo(*pdi). +Item* +parsehtml(uchar* data, int datalen, Rune* pagesrc, int mtype, int chset, Docinfo** pdi) +{ + Item *it; + Docinfo* di; + ItemSource* is; + + di = newdocinfo(); + di->src = _Strdup(pagesrc); + di->base = _Strdup(pagesrc); + di->mediatype = mtype; + di->chset = chset; + *pdi = di; + is = newitemsource(di); + it = getitems(is, data, datalen); + freepstatestack(is->psstk); + free(is); + return it; +} + +// Get a group of tokens for lexer, parse them, and create +// a list of layout items. +// When caller is done with the items, it should call +// freeitems on the returned result. +static Item* +getitems(ItemSource* is, uchar* data, int datalen) +{ + int i; + int j; + int nt; + int pt; + int doscripts; + int tokslen; + int toki; + int h; + int sz; + int method; + int n; + int nblank; + int norsz; + int bramt; + int sty; + int nosh; + int oldcuranchor; + int dfltbd; + int v; + int hang; + int isempty; + int tag; + int brksp; + int target; + uchar brk; + uchar flags; + uchar align; + uchar al; + uchar ty; + uchar ty2; + Pstate* ps; + Pstate* nextps; + Pstate* outerps; + Table* curtab; + Token* tok; + Token* toks; + Docinfo* di; + Item* ans; + Item* img; + Item* ffit; + Item* tabitem; + Rune* s; + Rune* t; + Rune* name; + Rune* enctype; + Rune* usemap; + Rune* prompt; + Rune* equiv; + Rune* val; + Rune* nsz; + Rune* script; + Map* map; + Form* frm; + Iimage* ii; + Kidinfo* kd; + Kidinfo* ks; + Kidinfo* pks; + Dimen wd; + Option* option; + Table* tab; + Tablecell* c; + Tablerow* tr; + Formfield* field; + Formfield* ff; + Rune* href; + Rune* src; + Rune* scriptsrc; + Rune* bgurl; + Rune* action; + Background bg; + + if(!buildinited) + buildinit(); + doscripts = 0; // for now + ps = is->psstk; + curtab = is->tabstk; + di = is->doc; + toks = _gettoks(data, datalen, di->chset, di->mediatype, &tokslen); + toki = 0; + for(; toki < tokslen; toki++) { + tok = &toks[toki]; + if(dbgbuild > 1) + fprint(2, "build: curstate %ux, token %T\n", ps->curstate, tok); + tag = tok->tag; + brk = 0; + brksp = 0; + if(tag < Numtags) { + brk = blockbrk[tag]; + if(brk&SPBefore) + brksp = 1; + } + else if(tag < Numtags + RBRA) { + brk = blockbrk[tag - RBRA]; + if(brk&SPAfter) + brksp = 1; + } + if(brk) { + addbrk(ps, brksp, 0); + if(ps->inpar) { + popjust(ps); + ps->inpar = 0; + } + } + // check common case first (Data), then switch statement on tag + if(tag == Data) { + // Lexing didn't pay attention to SGML record boundary rules: + // \n after start tag or before end tag to be discarded. + // (Lex has already discarded all \r's). + // Some pages assume this doesn't happen in
 text,
+			// so we won't do it if literal is true.
+			// BUG: won't discard \n before a start tag that begins
+			// the next bufferful of tokens.
+			s = tok->text;
+			n = _Strlen(s);
+			if(!ps->literal) {
+				i = 0;
+				j = n;
+				if(toki > 0) {
+					pt = toks[toki - 1].tag;
+					// IE and Netscape both ignore this rule (contrary to spec)
+					// if previous tag was img
+					if(pt < Numtags && pt != Timg && j > 0 && s[0] == '\n')
+						i++;
+				}
+				if(toki < tokslen - 1) {
+					nt = toks[toki + 1].tag;
+					if(nt >= RBRA && nt < Numtags + RBRA && j > i && s[j - 1] == '\n')
+						j--;
+				}
+				if(i > 0 || j < n) {
+					t = s;
+					s = _Strsubstr(s, i, j);
+					free(t);
+					n = j-i;
+				}
+			}
+			if(ps->skipwhite) {
+				_trimwhite(s, n, &t, &nt);
+				if(t == nil) {
+					free(s);
+					s = nil;
+				}
+				else if(t != s) {
+					t = _Strndup(t, nt);
+					free(s);
+					s = t;
+				}
+				if(s != nil)
+					ps->skipwhite = 0;
+			}
+			tok->text = nil;		// token doesn't own string anymore
+			if(s != nil)
+				addtext(ps, s);
+		}
+		else
+			switch(tag) {
+			// Some abbrevs used in following DTD comments
+			// %text = 	#PCDATA
+			//		| TT | I | B | U | STRIKE | BIG | SMALL | SUB | SUP
+			//		| EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE
+			//		| A | IMG | APPLET | FONT | BASEFONT | BR | SCRIPT | MAP
+			//		| INPUT | SELECT | TEXTAREA
+			// %block = P | UL | OL | DIR | MENU | DL | PRE | DL | DIV | CENTER
+			//		| BLOCKQUOTE | FORM | ISINDEX | HR | TABLE
+			// %flow = (%text | %block)*
+			// %body.content = (%heading | %text | %block | ADDRESS)*
+
+			// 
+			// Anchors are not supposed to be nested, but you sometimes see
+			// href anchors inside destination anchors.
+			case Ta:
+				if(ps->curanchor != 0) {
+					if(warn)
+						fprint(2, "warning: nested  or missing \n");
+					ps->curanchor = 0;
+				}
+				name = aval(tok, Aname);
+				href = aurlval(tok, Ahref, nil, di->base);
+				// ignore rel, rev, and title attrs
+				if(href != nil) {
+					target = atargval(tok, di->target);
+					di->anchors = newanchor(++is->nanchors, name, href, target, di->anchors);
+					if(name != nil)
+						name = _Strdup(name);	// for DestAnchor construction, below
+					ps->curanchor = is->nanchors;
+					ps->curfg = push(&ps->fgstk, di->link);
+					ps->curul = push(&ps->ulstk, ULunder);
+				}
+				if(name != nil) {
+					// add a null item to be destination
+					additem(ps, newispacer(ISPnull), tok);
+					di->dests = newdestanchor(++is->nanchors, name, ps->lastit, di->dests);
+				}
+				break;
+
+			case Ta+RBRA :
+				if(ps->curanchor != 0) {
+					ps->curfg = popretnewtop(&ps->fgstk, di->text);
+					ps->curul = popretnewtop(&ps->ulstk, ULnone);
+					ps->curanchor = 0;
+				}
+				break;
+
+			// 
+			// We can't do applets, so ignore PARAMS, and let
+			// the %text contents appear for the alternative rep
+			case Tapplet:
+			case Tapplet+RBRA:
+				if(warn && tag == Tapplet)
+					fprint(2, "warning:  ignored\n");
+				break;
+
+			// 
+			case Tarea:
+				map = di->maps;
+				if(map == nil) {
+					if(warn)
+						fprint(2, "warning:  not inside \n");
+					continue;
+				}
+				map->areas = newarea(atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect),
+					aurlval(tok, Ahref, nil, di->base),
+					atargval(tok, di->target),
+					map->areas);
+				setdimarray(tok, Acoords, &map->areas->coords, &map->areas->ncoords);
+				break;
+
+			// 
+			case Tb:
+			case Tstrong:
+				pushfontstyle(ps, FntB);
+				break;
+
+			case Tb+RBRA:
+			case Tcite+RBRA:
+			case Tcode+RBRA:
+			case Tdfn+RBRA:
+			case Tem+RBRA:
+			case Tkbd+RBRA:
+			case Ti+RBRA:
+			case Tsamp+RBRA:
+			case Tstrong+RBRA:
+			case Ttt+RBRA:
+			case Tvar+RBRA :
+			case Taddress+RBRA:
+				popfontstyle(ps);
+				break;
+
+			// 
+			case Tbase:
+				t = di->base;
+				di->base = aurlval(tok, Ahref, di->base, di->base);
+				if(t != nil)
+					free(t);
+				di->target = atargval(tok, di->target);
+				break;
+
+			// 
+			case Tbasefont:
+				ps->adjsize = aintval(tok, Asize, 3) - 3;
+				break;
+
+			// 
+			case Tbig:
+			case Tsmall:
+				sz = ps->adjsize;
+				if(tag == Tbig)
+					sz += Large;
+				else
+					sz += Small;
+				pushfontsize(ps, sz);
+				break;
+
+			case Tbig+RBRA:
+			case Tsmall+RBRA:
+				popfontsize(ps);
+				break;
+
+			// 
+			case Tblockquote:
+				changeindent(ps, BQTAB);
+				break;
+
+			case Tblockquote+RBRA:
+				changeindent(ps, -BQTAB);
+				break;
+
+			// 
+			case Tbody:
+				ps->skipping = 0;
+				bg = makebackground(nil, acolorval(tok, Abgcolor, di->background.color));
+				bgurl = aurlval(tok, Abackground, nil, di->base);
+				if(bgurl != nil) {
+					if(di->backgrounditem != nil)
+						freeitem((Item*)di->backgrounditem);
+						// really should remove old item from di->images list,
+						// but there should only be one BODY element ...
+					di->backgrounditem = (Iimage*)newiimage(bgurl, nil, ALnone, 0, 0, 0, 0, 0, 0, nil);
+					di->backgrounditem->nextimage = di->images;
+					di->images = di->backgrounditem;
+				}
+				ps->curbg = bg;
+				di->background = bg;
+				di->text = acolorval(tok, Atext, di->text);
+				di->link = acolorval(tok, Alink, di->link);
+				di->vlink = acolorval(tok, Avlink, di->vlink);
+				di->alink = acolorval(tok, Aalink, di->alink);
+				if(di->text != ps->curfg) {
+					ps->curfg = di->text;
+					ps->fgstk.n = 0;
+				}
+				break;
+
+			case Tbody+RBRA:
+				// HTML spec says ignore things after ,
+				// but IE and Netscape don't
+				// ps.skipping = 1;
+				break;
+
+			// 
+			case Tbr:
+				addlinebrk(ps, atabval(tok, Aclear, clear_tab, NCLEARTAB, 0));
+				break;
+
+			// 
+			case Tcaption:
+				if(curtab == nil) {
+					if(warn)
+						fprint(2, "warning:  outside \n");
+					continue;
+				}
+				if(curtab->caption != nil) {
+					if(warn)
+						fprint(2, "warning: more than one 
in \n"); + continue; + } + ps = newpstate(ps); + curtab->caption_place = atabval(tok, Aalign, align_tab, NALIGNTAB, ALtop); + break; + + case Tcaption+RBRA: + nextps = ps->next; + if(curtab == nil || nextps == nil) { + if(warn) + fprint(2, "warning: unexpected \n"); + continue; + } + curtab->caption = ps->items->next; + free(ps); + ps = nextps; + break; + + case Tcenter: + case Tdiv: + if(tag == Tcenter) + al = ALcenter; + else + al = atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust); + pushjust(ps, al); + break; + + case Tcenter+RBRA: + case Tdiv+RBRA: + popjust(ps); + break; + + // + case Tdd: + if(ps->hangstk.n == 0) { + if(warn) + fprint(2, "warning:
not inside hangstk, 0); + if(h != 0) + changehang(ps, -10*LISTTAB); + else + addbrk(ps, 0, 0); + push(&ps->hangstk, 0); + break; + + // + // + case Tdir: + case Tmenu: + case Tol: + case Tul: + changeindent(ps, LISTTAB); + push(&ps->listtypestk, listtyval(tok, (tag==Tol)? LT1 : LTdisc)); + push(&ps->listcntstk, aintval(tok, Astart, 1)); + break; + + case Tdir+RBRA: + case Tmenu+RBRA: + case Tol+RBRA: + case Tul+RBRA: + if(ps->listtypestk.n == 0) { + if(warn) + fprint(2, "warning: %T ended no list\n", tok); + continue; + } + addbrk(ps, 0, 0); + pop(&ps->listtypestk); + pop(&ps->listcntstk); + changeindent(ps, -LISTTAB); + break; + + // + case Tdl: + changeindent(ps, LISTTAB); + push(&ps->hangstk, 0); + break; + + case Tdl+RBRA: + if(ps->hangstk.n == 0) { + if(warn) + fprint(2, "warning: unexpected \n"); + continue; + } + changeindent(ps, -LISTTAB); + if(top(&ps->hangstk, 0) != 0) + changehang(ps, -10*LISTTAB); + pop(&ps->hangstk); + break; + + // + case Tdt: + if(ps->hangstk.n == 0) { + if(warn) + fprint(2, "warning:
not inside
\n"); + continue; + } + h = top(&ps->hangstk, 0); + pop(&ps->hangstk); + if(h != 0) + changehang(ps, -10*LISTTAB); + changehang(ps, 10*LISTTAB); + push(&ps->hangstk, 1); + break; + + // + case Tfont: + sz = top(&ps->fntsizestk, Normal); + if(_tokaval(tok, Asize, &nsz, 0)) { + if(_prefix(L(Lplus), nsz)) + sz = Normal + _Strtol(nsz+1, nil, 10) + ps->adjsize; + else if(_prefix(L(Lminus), nsz)) + sz = Normal - _Strtol(nsz+1, nil, 10) + ps->adjsize; + else if(nsz != nil) + sz = Normal + (_Strtol(nsz, nil, 10) - 3); + } + ps->curfg = push(&ps->fgstk, acolorval(tok, Acolor, ps->curfg)); + pushfontsize(ps, sz); + break; + + case Tfont+RBRA: + if(ps->fgstk.n == 0) { + if(warn) + fprint(2, "warning: unexpected \n"); + continue; + } + ps->curfg = popretnewtop(&ps->fgstk, di->text); + popfontsize(ps); + break; + + // + case Tform: + if(is->curform != nil) { + if(warn) + fprint(2, "warning:
nested inside another\n"); + continue; + } + action = aurlval(tok, Aaction, di->base, di->base); + s = aval(tok, Aid); + name = astrval(tok, Aname, s); + if(s) + free(s); + target = atargval(tok, di->target); + method = atabval(tok, Amethod, method_tab, NMETHODTAB, HGet); + if(warn && _tokaval(tok, Aenctype, &enctype, 0) && + _Strcmp(enctype, L(Lappl_form))) + fprint(2, "form enctype %S not handled\n", enctype); + frm = newform(++is->nforms, name, action, target, method, di->forms); + di->forms = frm; + is->curform = frm; + break; + + case Tform+RBRA: + if(is->curform == nil) { + if(warn) + fprint(2, "warning: unexpected \n"); + continue; + } + // put fields back in input order + is->curform->fields = (Formfield*)_revlist((List*)is->curform->fields); + is->curform = nil; + break; + + // + case Tframe: + ks = is->kidstk; + if(ks == nil) { + if(warn) + fprint(2, "warning: not in \n"); + continue; + } + ks->kidinfos = kd = newkidinfo(0, ks->kidinfos); + kd->src = aurlval(tok, Asrc, nil, di->base); + kd->name = aval(tok, Aname); + if(kd->name == nil) { + s = _ltoStr(++is->nframes); + kd->name = _Strdup2(L(Lfr), s); + free(s); + } + kd->marginw = auintval(tok, Amarginwidth, 0); + kd->marginh = auintval(tok, Amarginheight, 0); + kd->framebd = auintval(tok, Aframeborder, 1); + kd->flags = atabval(tok, Ascrolling, fscroll_tab, NFSCROLLTAB, kd->flags); + norsz = aflagval(tok, Anoresize); + if(norsz) + kd->flags |= FRnoresize; + break; + + // + case Tframeset: + ks = newkidinfo(1, nil); + pks = is->kidstk; + if(pks == nil) + di->kidinfo = ks; + else { + ks->next = pks->kidinfos; + pks->kidinfos = ks; + } + ks->nextframeset = pks; + is->kidstk = ks; + setdimarray(tok, Arows, &ks->rows, &ks->nrows); + if(ks->nrows == 0) { + ks->rows = (Dimen*)emalloc(sizeof(Dimen)); + ks->nrows = 1; + ks->rows[0] = makedimen(Dpercent, 100); + } + setdimarray(tok, Acols, &ks->cols, &ks->ncols); + if(ks->ncols == 0) { + ks->cols = (Dimen*)emalloc(sizeof(Dimen)); + ks->ncols = 1; + ks->cols[0] = makedimen(Dpercent, 100); + } + break; + + case Tframeset+RBRA: + if(is->kidstk == nil) { + if(warn) + fprint(2, "warning: unexpected \n"); + continue; + } + ks = is->kidstk; + // put kids back in original order + // and add blank frames to fill out cells + n = ks->nrows*ks->ncols; + nblank = n - _listlen((List*)ks->kidinfos); + while(nblank-- > 0) + ks->kidinfos = newkidinfo(0, ks->kidinfos); + ks->kidinfos = (Kidinfo*)_revlist((List*)ks->kidinfos); + is->kidstk = is->kidstk->nextframeset; + if(is->kidstk == nil) { + // end input + ans = nil; + goto return_ans; + } + break; + + // , etc. + case Th1: + case Th2: + case Th3: + case Th4: + case Th5: + case Th6: + bramt = 1; + if(ps->items == ps->lastit) + bramt = 0; + addbrk(ps, bramt, IFcleft|IFcright); + sz = Verylarge - (tag - Th1); + if(sz < Tiny) + sz = Tiny; + pushfontsize(ps, sz); + sty = top(&ps->fntstylestk, FntR); + if(tag == Th1) + sty = FntB; + pushfontstyle(ps, sty); + pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust)); + ps->skipwhite = 1; + break; + + case Th1+RBRA: + case Th2+RBRA: + case Th3+RBRA: + case Th4+RBRA: + case Th5+RBRA: + case Th6+RBRA: + addbrk(ps, 1, IFcleft|IFcright); + popfontsize(ps); + popfontstyle(ps); + popjust(ps); + break; + + case Thead: + // HTML spec says ignore regular markup in head, + // but Netscape and IE don't + // ps.skipping = 1; + break; + + case Thead+RBRA: + ps->skipping = 0; + break; + + // + case Thr: + al = atabval(tok, Aalign, align_tab, NALIGNTAB, ALcenter); + sz = auintval(tok, Asize, HRSZ); + wd = adimen(tok, Awidth); + if(dimenkind(wd) == Dnone) + wd = makedimen(Dpercent, 100); + nosh = aflagval(tok, Anoshade); + additem(ps, newirule(al, sz, nosh, wd), tok); + addbrk(ps, 0, 0); + break; + + case Ti: + case Tcite: + case Tdfn: + case Tem: + case Tvar: + case Taddress: + pushfontstyle(ps, FntI); + break; + + // + case Timg: + map = nil; + oldcuranchor = ps->curanchor; + if(_tokaval(tok, Ausemap, &usemap, 0)) { + if(!_prefix(L(Lhash), usemap)) { + if(warn) + fprint(2, "warning: can't handle non-local map %S\n", usemap); + } + else { + map = getmap(di, usemap+1); + if(ps->curanchor == 0) { + di->anchors = newanchor(++is->nanchors, nil, nil, di->target, di->anchors); + ps->curanchor = is->nanchors; + } + } + } + align = atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom); + dfltbd = 0; + if(ps->curanchor != 0) + dfltbd = 2; + src = aurlval(tok, Asrc, nil, di->base); + if(src == nil) { + if(warn) + fprint(2, "warning: has no src attribute\n"); + ps->curanchor = oldcuranchor; + continue; + } + img = newiimage(src, + aval(tok, Aalt), + align, + auintval(tok, Awidth, 0), + auintval(tok, Aheight, 0), + auintval(tok, Ahspace, IMGHSPACE), + auintval(tok, Avspace, IMGVSPACE), + auintval(tok, Aborder, dfltbd), + aflagval(tok, Aismap), + map); + if(align == ALleft || align == ALright) { + additem(ps, newifloat(img, align), tok); + // if no hspace specified, use FLTIMGHSPACE + if(!_tokaval(tok, Ahspace, &val, 0)) + ((Iimage*)img)->hspace = FLTIMGHSPACE; + } + else { + ps->skipwhite = 0; + additem(ps, img, tok); + } + if(!ps->skipping) { + ((Iimage*)img)->nextimage = di->images; + di->images = (Iimage*)img; + } + ps->curanchor = oldcuranchor; + break; + + // + case Tinput: + ps->skipwhite = 0; + if(is->curform == nil) { + if(warn) + fprint(2, " not inside
\n"); + continue; + } + is->curform->fields = field = newformfield( + atabval(tok, Atype, input_tab, NINPUTTAB, Ftext), + ++is->curform->nfields, + is->curform, + aval(tok, Aname), + aval(tok, Avalue), + auintval(tok, Asize, 0), + auintval(tok, Amaxlength, 1000), + is->curform->fields); + if(aflagval(tok, Achecked)) + field->flags = FFchecked; + + switch(field->ftype) { + case Ftext: + case Fpassword: + case Ffile: + if(field->size == 0) + field->size = 20; + break; + + case Fcheckbox: + if(field->name == nil) { + if(warn) + fprint(2, "warning: checkbox form field missing name\n"); + continue; + } + if(field->value == nil) + field->value = _Strdup(L(Lone)); + break; + + case Fradio: + if(field->name == nil || field->value == nil) { + if(warn) + fprint(2, "warning: radio form field missing name or value\n"); + continue; + } + break; + + case Fsubmit: + if(field->value == nil) + field->value = _Strdup(L(Lsubmit)); + if(field->name == nil) + field->name = _Strdup(L(Lnoname)); + break; + + case Fimage: + src = aurlval(tok, Asrc, nil, di->base); + if(src == nil) { + if(warn) + fprint(2, "warning: image form field missing src\n"); + continue; + } + // width and height attrs aren't specified in HTML 3.2, + // but some people provide them and they help avoid + // a relayout + field->image = newiimage(src, + astrval(tok, Aalt, L(Lsubmit)), + atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom), + auintval(tok, Awidth, 0), auintval(tok, Aheight, 0), + 0, 0, 0, 0, nil); + ii = (Iimage*)field->image; + ii->nextimage = di->images; + di->images = ii; + break; + + case Freset: + if(field->value == nil) + field->value = _Strdup(L(Lreset)); + break; + + case Fbutton: + if(field->value == nil) + field->value = _Strdup(L(Lspace)); + break; + } + ffit = newiformfield(field); + additem(ps, ffit, tok); + if(ffit->genattr != nil) + field->events = ffit->genattr->events; + break; + + // + case Tisindex: + ps->skipwhite = 0; + prompt = astrval(tok, Aprompt, L(Lindex)); + target = atargval(tok, di->target); + additem(ps, textit(ps, prompt), tok); + frm = newform(++is->nforms, + nil, + di->base, + target, + HGet, + di->forms); + di->forms = frm; + ff = newformfield(Ftext, + 1, + frm, + _Strdup(L(Lisindex)), + nil, + 50, + 1000, + nil); + frm->fields = ff; + frm->nfields = 1; + additem(ps, newiformfield(ff), tok); + addbrk(ps, 1, 0); + break; + + // + case Tli: + if(ps->listtypestk.n == 0) { + if(warn) + fprint(2, "
  • not in list\n"); + continue; + } + ty = top(&ps->listtypestk, 0); + ty2 = listtyval(tok, ty); + if(ty != ty2) { + ty = ty2; + push(&ps->listtypestk, ty2); + } + v = aintval(tok, Avalue, top(&ps->listcntstk, 1)); + if(ty == LTdisc || ty == LTsquare || ty == LTcircle) + hang = 10*LISTTAB - 3; + else + hang = 10*LISTTAB - 1; + changehang(ps, hang); + addtext(ps, listmark(ty, v)); + push(&ps->listcntstk, v + 1); + changehang(ps, -hang); + ps->skipwhite = 1; + break; + + // + case Tmap: + if(_tokaval(tok, Aname, &name, 0)) + is->curmap = getmap(di, name); + break; + + case Tmap+RBRA: + map = is->curmap; + if(map == nil) { + if(warn) + fprint(2, "warning: unexpected \n"); + continue; + } + map->areas = (Area*)_revlist((List*)map->areas); + break; + + case Tmeta: + if(ps->skipping) + continue; + if(_tokaval(tok, Ahttp_equiv, &equiv, 0)) { + val = aval(tok, Acontent); + n = _Strlen(equiv); + if(!_Strncmpci(equiv, n, L(Lrefresh))) + di->refresh = val; + else if(!_Strncmpci(equiv, n, L(Lcontent))) { + n = _Strlen(val); + if(!_Strncmpci(val, n, L(Ljavascript)) + || !_Strncmpci(val, n, L(Ljscript1)) + || !_Strncmpci(val, n, L(Ljscript))) + di->scripttype = TextJavascript; + else { + if(warn) + fprint(2, "unimplemented script type %S\n", val); + di->scripttype = UnknownType; + } + } + } + break; + + // Nobr is NOT in HMTL 4.0, but it is ubiquitous on the web + case Tnobr: + ps->skipwhite = 0; + ps->curstate &= ~IFwrap; + break; + + case Tnobr+RBRA: + ps->curstate |= IFwrap; + break; + + // We do frames, so skip stuff in noframes + case Tnoframes: + ps->skipping = 1; + break; + + case Tnoframes+RBRA: + ps->skipping = 0; + break; + + // We do scripts (if enabled), so skip stuff in noscripts + case Tnoscript: + if(doscripts) + ps->skipping = 1; + break; + + case Tnoscript+RBRA: + if(doscripts) + ps->skipping = 0; + break; + + // + case Toption: + if(is->curform == nil || is->curform->fields == nil) { + if(warn) + fprint(2, "warning: