#include #include #include #include #include #include "impl.h" // A stack for holding integer values enum { Nestmax = 40 // max nesting level of lists, font styles, etc. }; struct Stack { int n; // next available slot (top of stack is stack[n-1]) int slots[Nestmax]; // stack entries }; // Parsing state struct Pstate { Pstate* next; // in stack of Pstates int skipping; // true when we shouldn't add items int skipwhite; // true when we should strip leading space int curfont; // font index for current font int curfg; // current foreground color Background curbg; // current background int curvoff; // current baseline offset uchar curul; // current underline/strike state uchar curjust; // current justify state int curanchor; // current (href) anchor id (if in one), or 0 int curstate; // current value of item state int literal; // current literal state int inpar; // true when in a paragraph-like construct int adjsize; // current font size adjustment Item* items; // dummy head of item list we're building Item* lastit; // tail of item list we're building Item* prelastit; // item before lastit Stack fntstylestk; // style stack Stack fntsizestk; // size stack Stack fgstk; // text color stack Stack ulstk; // underline stack Stack voffstk; // vertical offset stack Stack listtypestk; // list type stack Stack listcntstk; // list counter stack Stack juststk; // justification stack Stack hangstk; // hanging stack }; struct ItemSource { Docinfo* doc; Pstate* psstk; int nforms; int ntables; int nanchors; int nframes; Form* curform; Map* curmap; Table* tabstk; Kidinfo* kidstk; }; // Some layout parameters enum { FRKIDMARGIN = 6, // default margin around kid frames IMGHSPACE = 0, // default hspace for images (0 matches IE, Netscape) IMGVSPACE = 0, // default vspace for images FLTIMGHSPACE = 2, // default hspace for float images TABSP = 5, // default cellspacing for tables TABPAD = 1, // default cell padding for tables LISTTAB = 1, // number of tabs to indent lists BQTAB = 1, // number of tabs to indent blockquotes HRSZ = 2, // thickness of horizontal rules SUBOFF = 4, // vertical offset for subscripts SUPOFF = 6, // vertical offset for superscripts NBSP = 160 // non-breaking space character }; // These tables must be sorted static StringInt *align_tab; static AsciiInt _align_tab[] = { {"baseline", ALbaseline}, {"bottom", ALbottom}, {"center", ALcenter}, {"char", ALchar}, {"justify", ALjustify}, {"left", ALleft}, {"middle", ALmiddle}, {"right", ALright}, {"top", ALtop} }; #define NALIGNTAB (sizeof(align_tab)/sizeof(StringInt)) static StringInt *input_tab; static AsciiInt _input_tab[] = { {"button", Fbutton}, {"checkbox", Fcheckbox}, {"file", Ffile}, {"hidden", Fhidden}, {"image", Fimage}, {"password", Fpassword}, {"radio", Fradio}, {"reset", Freset}, {"submit", Fsubmit}, {"text", Ftext} }; #define NINPUTTAB (sizeof(input_tab)/sizeof(StringInt)) static StringInt *clear_tab; static AsciiInt _clear_tab[] = { {"all", IFcleft|IFcright}, {"left", IFcleft}, {"right", IFcright} }; #define NCLEARTAB (sizeof(clear_tab)/sizeof(StringInt)) static StringInt *fscroll_tab; static AsciiInt _fscroll_tab[] = { {"auto", FRhscrollauto|FRvscrollauto}, {"no", FRnoscroll}, {"yes", FRhscroll|FRvscroll}, }; #define NFSCROLLTAB (sizeof(fscroll_tab)/sizeof(StringInt)) static StringInt *shape_tab; static AsciiInt _shape_tab[] = { {"circ", SHcircle}, {"circle", SHcircle}, {"poly", SHpoly}, {"polygon", SHpoly}, {"rect", SHrect}, {"rectangle", SHrect} }; #define NSHAPETAB (sizeof(shape_tab)/sizeof(StringInt)) static StringInt *method_tab; static AsciiInt _method_tab[] = { {"get", HGet}, {"post", HPost} }; #define NMETHODTAB (sizeof(method_tab)/sizeof(StringInt)) static Rune** roman; static char* _roman[15]= { "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV", "XV" }; #define NROMAN 15 // List number types enum { LTdisc, LTsquare, LTcircle, LT1, LTa, LTA, LTi, LTI }; enum { SPBefore = 2, SPAfter = 4, BL = 1, BLBA = (BL|SPBefore|SPAfter) }; // blockbrk[tag] is break info for a block level element, or one // of a few others that get the same treatment re ending open paragraphs // and requiring a line break / vertical space before them. // If we want a line of space before the given element, SPBefore is OR'd in. // If we want a line of space after the given element, SPAfter is OR'd in. static uchar blockbrk[Numtags]= { /*Notfound*/ 0, /*Comment*/ 0, /*Ta*/ 0, /*Tabbr*/ 0, /*Tacronym*/ 0, /*Taddress*/ BLBA, /*Tapplet*/ 0, /*Tarea*/ 0, /*Tb*/ 0, /*Tbase*/ 0, /*Tbasefont*/ 0, /*Tbdo*/ 0, /*Tbig*/ 0, /*Tblink*/ 0, /*Tblockquote*/ BLBA, /*Tbody*/ 0, /*Tbq*/ 0, /*Tbr*/ 0, /*Tbutton*/ 0, /*Tcaption*/ 0, /*Tcenter*/ BL, /*Tcite*/ 0, /*Tcode*/ 0, /*Tcol*/ 0, /*Tcolgroup*/ 0, /*Tdd*/ BL, /*Tdel*/ 0, /*Tdfn*/ 0, /*Tdir*/ BLBA, /*Tdiv*/ BL, /*Tdl*/ BLBA, /*Tdt*/ BL, /*Tem*/ 0, /*Tfieldset*/ 0, /*Tfont*/ 0, /*Tform*/ BLBA, /*Tframe*/ 0, /*Tframeset*/ 0, /*Th1*/ BL, /*Th2*/ BL, /*Th3*/ BL, /*Th4*/ BL, /*Th5*/ BL, /*Th6*/ BL, /*Thead*/ 0, /*Thr*/ BL, /*Thtml*/ 0, /*Ti*/ 0, /*Tiframe*/ 0, /*Timg*/ 0, /*Tinput*/ 0, /*Tins*/ 0, /*Tisindex*/ BLBA, /*Tkbd*/ 0, /*Tlabel*/ 0, /*Tlegend*/ 0, /*Tli*/ BL, /*Tlink*/ 0, /*Tmap*/ 0, /*Tmenu*/ BLBA, /*Tmeta*/ 0, /*Tnobr*/ 0, /*Tnoframes*/ 0, /*Tnoscript*/ 0, /*Tobject*/ 0, /*Tol*/ BLBA, /*Toptgroup*/ 0, /*Toption*/ 0, /*Tp*/ BLBA, /*Tparam*/ 0, /*Tpre*/ BLBA, /*Tq*/ 0, /*Ts*/ 0, /*Tsamp*/ 0, /*Tscript*/ 0, /*Tselect*/ 0, /*Tsmall*/ 0, /*Tspan*/ 0, /*Tstrike*/ 0, /*Tstrong*/ 0, /*Tstyle*/ 0, /*Tsub*/ 0, /*Tsup*/ 0, /*Ttable*/ 0, /*Ttbody*/ 0, /*Ttd*/ 0, /*Ttextarea*/ 0, /*Ttfoot*/ 0, /*Tth*/ 0, /*Tthead*/ 0, /*Ttitle*/ 0, /*Ttr*/ 0, /*Ttt*/ 0, /*Tu*/ 0, /*Tul*/ BLBA, /*Tvar*/ 0, }; enum { AGEN = 1 }; // attrinfo is information about attributes. // The AGEN value means that the attribute is generic (applies to almost all elements) static uchar attrinfo[Numattrs]= { /*Aabbr*/ 0, /*Aaccept_charset*/ 0, /*Aaccess_key*/ 0, /*Aaction*/ 0, /*Aalign*/ 0, /*Aalink*/ 0, /*Aalt*/ 0, /*Aarchive*/ 0, /*Aaxis*/ 0, /*Abackground*/ 0, /*Abgcolor*/ 0, /*Aborder*/ 0, /*Acellpadding*/ 0, /*Acellspacing*/ 0, /*Achar*/ 0, /*Acharoff*/ 0, /*Acharset*/ 0, /*Achecked*/ 0, /*Acite*/ 0, /*Aclass*/ AGEN, /*Aclassid*/ 0, /*Aclear*/ 0, /*Acode*/ 0, /*Acodebase*/ 0, /*Acodetype*/ 0, /*Acolor*/ 0, /*Acols*/ 0, /*Acolspan*/ 0, /*Acompact*/ 0, /*Acontent*/ 0, /*Acoords*/ 0, /*Adata*/ 0, /*Adatetime*/ 0, /*Adeclare*/ 0, /*Adefer*/ 0, /*Adir*/ 0, /*Adisabled*/ 0, /*Aenctype*/ 0, /*Aface*/ 0, /*Afor*/ 0, /*Aframe*/ 0, /*Aframeborder*/ 0, /*Aheaders*/ 0, /*Aheight*/ 0, /*Ahref*/ 0, /*Ahreflang*/ 0, /*Ahspace*/ 0, /*Ahttp_equiv*/ 0, /*Aid*/ AGEN, /*Aismap*/ 0, /*Alabel*/ 0, /*Alang*/ 0, /*Alink*/ 0, /*Alongdesc*/ 0, /*Amarginheight*/ 0, /*Amarginwidth*/ 0, /*Amaxlength*/ 0, /*Amedia*/ 0, /*Amethod*/ 0, /*Amultiple*/ 0, /*Aname*/ 0, /*Anohref*/ 0, /*Anoresize*/ 0, /*Anoshade*/ 0, /*Anowrap*/ 0, /*Aobject*/ 0, /*Aonblur*/ AGEN, /*Aonchange*/ AGEN, /*Aonclick*/ AGEN, /*Aondblclick*/ AGEN, /*Aonfocus*/ AGEN, /*Aonkeypress*/ AGEN, /*Aonkeyup*/ AGEN, /*Aonload*/ AGEN, /*Aonmousedown*/ AGEN, /*Aonmousemove*/ AGEN, /*Aonmouseout*/ AGEN, /*Aonmouseover*/ AGEN, /*Aonmouseup*/ AGEN, /*Aonreset*/ AGEN, /*Aonselect*/ AGEN, /*Aonsubmit*/ AGEN, /*Aonunload*/ AGEN, /*Aprofile*/ 0, /*Aprompt*/ 0, /*Areadonly*/ 0, /*Arel*/ 0, /*Arev*/ 0, /*Arows*/ 0, /*Arowspan*/ 0, /*Arules*/ 0, /*Ascheme*/ 0, /*Ascope*/ 0, /*Ascrolling*/ 0, /*Aselected*/ 0, /*Ashape*/ 0, /*Asize*/ 0, /*Aspan*/ 0, /*Asrc*/ 0, /*Astandby*/ 0, /*Astart*/ 0, /*Astyle*/ AGEN, /*Asummary*/ 0, /*Atabindex*/ 0, /*Atarget*/ 0, /*Atext*/ 0, /*Atitle*/ AGEN, /*Atype*/ 0, /*Ausemap*/ 0, /*Avalign*/ 0, /*Avalue*/ 0, /*Avaluetype*/ 0, /*Aversion*/ 0, /*Avlink*/ 0, /*Avspace*/ 0, /*Awidth*/ 0, }; static uchar scriptev[Numattrs]= { /*Aabbr*/ 0, /*Aaccept_charset*/ 0, /*Aaccess_key*/ 0, /*Aaction*/ 0, /*Aalign*/ 0, /*Aalink*/ 0, /*Aalt*/ 0, /*Aarchive*/ 0, /*Aaxis*/ 0, /*Abackground*/ 0, /*Abgcolor*/ 0, /*Aborder*/ 0, /*Acellpadding*/ 0, /*Acellspacing*/ 0, /*Achar*/ 0, /*Acharoff*/ 0, /*Acharset*/ 0, /*Achecked*/ 0, /*Acite*/ 0, /*Aclass*/ 0, /*Aclassid*/ 0, /*Aclear*/ 0, /*Acode*/ 0, /*Acodebase*/ 0, /*Acodetype*/ 0, /*Acolor*/ 0, /*Acols*/ 0, /*Acolspan*/ 0, /*Acompact*/ 0, /*Acontent*/ 0, /*Acoords*/ 0, /*Adata*/ 0, /*Adatetime*/ 0, /*Adeclare*/ 0, /*Adefer*/ 0, /*Adir*/ 0, /*Adisabled*/ 0, /*Aenctype*/ 0, /*Aface*/ 0, /*Afor*/ 0, /*Aframe*/ 0, /*Aframeborder*/ 0, /*Aheaders*/ 0, /*Aheight*/ 0, /*Ahref*/ 0, /*Ahreflang*/ 0, /*Ahspace*/ 0, /*Ahttp_equiv*/ 0, /*Aid*/ 0, /*Aismap*/ 0, /*Alabel*/ 0, /*Alang*/ 0, /*Alink*/ 0, /*Alongdesc*/ 0, /*Amarginheight*/ 0, /*Amarginwidth*/ 0, /*Amaxlength*/ 0, /*Amedia*/ 0, /*Amethod*/ 0, /*Amultiple*/ 0, /*Aname*/ 0, /*Anohref*/ 0, /*Anoresize*/ 0, /*Anoshade*/ 0, /*Anowrap*/ 0, /*Aobject*/ 0, /*Aonblur*/ SEonblur, /*Aonchange*/ SEonchange, /*Aonclick*/ SEonclick, /*Aondblclick*/ SEondblclick, /*Aonfocus*/ SEonfocus, /*Aonkeypress*/ SEonkeypress, /*Aonkeyup*/ SEonkeyup, /*Aonload*/ SEonload, /*Aonmousedown*/ SEonmousedown, /*Aonmousemove*/ SEonmousemove, /*Aonmouseout*/ SEonmouseout, /*Aonmouseover*/ SEonmouseover, /*Aonmouseup*/ SEonmouseup, /*Aonreset*/ SEonreset, /*Aonselect*/ SEonselect, /*Aonsubmit*/ SEonsubmit, /*Aonunload*/ SEonunload, /*Aprofile*/ 0, /*Aprompt*/ 0, /*Areadonly*/ 0, /*Arel*/ 0, /*Arev*/ 0, /*Arows*/ 0, /*Arowspan*/ 0, /*Arules*/ 0, /*Ascheme*/ 0, /*Ascope*/ 0, /*Ascrolling*/ 0, /*Aselected*/ 0, /*Ashape*/ 0, /*Asize*/ 0, /*Aspan*/ 0, /*Asrc*/ 0, /*Astandby*/ 0, /*Astart*/ 0, /*Astyle*/ 0, /*Asummary*/ 0, /*Atabindex*/ 0, /*Atarget*/ 0, /*Atext*/ 0, /*Atitle*/ 0, /*Atype*/ 0, /*Ausemap*/ 0, /*Avalign*/ 0, /*Avalue*/ 0, /*Avaluetype*/ 0, /*Aversion*/ 0, /*Avlink*/ 0, /*Avspace*/ 0, /*Awidth*/ 0, }; // Color lookup table static StringInt *color_tab; static AsciiInt _color_tab[] = { {"aqua", 0x00FFFF}, {"black", 0x000000}, {"blue", 0x0000CC}, {"fuchsia", 0xFF00FF}, {"gray", 0x808080}, {"green", 0x008000}, {"lime", 0x00FF00}, {"maroon", 0x800000}, {"navy", 0x000080,}, {"olive", 0x808000}, {"purple", 0x800080}, {"red", 0xFF0000}, {"silver", 0xC0C0C0}, {"teal", 0x008080}, {"white", 0xFFFFFF}, {"yellow", 0xFFFF00} }; #define NCOLORS (sizeof(color_tab)/sizeof(StringInt)) static StringInt *targetmap; static int targetmapsize; static int ntargets; static int buildinited = 0; #define SMALLBUFSIZE 240 #define BIGBUFSIZE 2000 int dbgbuild = 0; int warn = 0; static Align aalign(Token* tok); static int acolorval(Token* tok, int attid, int dflt); static void addbrk(Pstate* ps, int sp, int clr); static void additem(Pstate* ps, Item* it, Token* tok); static void addlinebrk(Pstate* ps, int clr); static void addnbsp(Pstate* ps); static void addtext(Pstate* ps, Rune* s); static Dimen adimen(Token* tok, int attid); static int aflagval(Token* tok, int attid); static int aintval(Token* tok, int attid, int dflt); static Rune* astrval(Token* tok, int attid, Rune* dflt); static int atabval(Token* tok, int attid, StringInt* tab, int ntab, int dflt); static int atargval(Token* tok, int dflt); static int auintval(Token* tok, int attid, int dflt); static Rune* aurlval(Token* tok, int attid, Rune* dflt, Rune* base); static Rune* aval(Token* tok, int attid); static void buildinit(void); static Pstate* cell_pstate(Pstate* oldps, int ishead); static void changehang(Pstate* ps, int delta); static void changeindent(Pstate* ps, int delta); static int color(Rune* s, int dflt); static void copystack(Stack* tostk, Stack* fromstk); static int dimprint(char* buf, int nbuf, Dimen d); static Pstate* finishcell(Table* curtab, Pstate* psstk); static void finish_table(Table* t); static void freeanchor(Anchor* a); static void freedestanchor(DestAnchor* da); static void freeform(Form* f); static void freeformfield(Formfield* ff); static void freeitem(Item* it); static void freepstate(Pstate* p); static void freepstatestack(Pstate* pshead); static void freescriptevents(SEvent* ehead); static void freetable(Table* t); static Map* getmap(Docinfo* di, Rune* name); static Rune* getpcdata(Token* toks, int tokslen, int* ptoki); static Pstate* lastps(Pstate* psl); static Rune* listmark(uchar ty, int n); static int listtyval(Token* tok, int dflt); static Align makealign(int halign, int valign); static Background makebackground(Rune* imgurl, int color); static Dimen makedimen(int kind, int spec); static Anchor* newanchor(int index, Rune* name, Rune* href, int target, Anchor* link); static Area* newarea(int shape, Rune* href, int target, Area* link); static DestAnchor* newdestanchor(int index, Rune* name, Item* item, DestAnchor* link); static Docinfo* newdocinfo(void); static Genattr* newgenattr(Rune* id, Rune* class, Rune* style, Rune* title, SEvent* events); static Form* newform(int formid, Rune* name, Rune* action, int target, int method, Form* link); static Formfield* newformfield(int ftype, int fieldid, Form* form, Rune* name, Rune* value, int size, int maxlength, Formfield* link); static Item* newifloat(Item* it, int side); static Item* newiformfield(Formfield* ff); static Item* newiimage(Rune* src, Rune* altrep, int align, int width, int height, int hspace, int vspace, int border, int ismap, Map* map); static Item* newirule(int align, int size, int noshade, Dimen wspec); static Item* newispacer(int spkind); static Item* newitable(Table* t); static ItemSource* newitemsource(Docinfo* di); static Item* newitext(Rune* s, int fnt, int fg, int voff, int ul); static Kidinfo* newkidinfo(int isframeset, Kidinfo* link); static Option* newoption(int selected, Rune* value, Rune* display, Option* link); static Pstate* newpstate(Pstate* link); static SEvent* newscriptevent(int type, Rune* script, SEvent* link); static Table* newtable(int tableid, Align align, Dimen width, int border, int cellspacing, int cellpadding, Background bg, Token* tok, Table* link); static Tablecell* newtablecell(int cellid, int rowspan, int colspan, Align align, Dimen wspec, int hspec, Background bg, int flags, Tablecell* link); static Tablerow* newtablerow(Align align, Background bg, int flags, Tablerow* link); static Dimen parsedim(Rune* s, int ns); static void pop(Stack* stk); static void popfontsize(Pstate* ps); static void popfontstyle(Pstate* ps); static void popjust(Pstate* ps); static int popretnewtop(Stack* stk, int dflt); static int push(Stack* stk, int val); static void pushfontsize(Pstate* ps, int sz); static void pushfontstyle(Pstate* ps, int sty); static void pushjust(Pstate* ps, int j); static Item* textit(Pstate* ps, Rune* s); static Rune* removeallwhite(Rune* s); static void resetdocinfo(Docinfo* d); static void setcurfont(Pstate* ps); static void setcurjust(Pstate* ps); static void setdimarray(Token* tok, int attid, Dimen** pans, int* panslen); static Rune* stringalign(int a); static void targetmapinit(void); static int toint(Rune* s); static int top(Stack* stk, int dflt); static void trim_cell(Tablecell* c); static int validalign(Align a); static int validdimen(Dimen d); static int validformfield(Formfield* f); static int validhalign(int a); static int validptr(void* p); static int validStr(Rune* s); static int validtable(Table* t); static int validtablerow(Tablerow* r); static int validtablecol(Tablecol* c); static int validtablecell(Tablecell* c); static int validvalign(int a); static int Iconv(Fmt *f); static void buildinit(void) { runetabinit(); roman = cvtstringtab(_roman, nelem(_roman)); color_tab = cvtstringinttab(_color_tab, nelem(_color_tab)); method_tab = cvtstringinttab(_method_tab, nelem(_method_tab)); shape_tab = cvtstringinttab(_shape_tab, nelem(_shape_tab)); fscroll_tab = cvtstringinttab(_fscroll_tab, nelem(_fscroll_tab)); clear_tab = cvtstringinttab(_clear_tab, nelem(_clear_tab)); input_tab = cvtstringinttab(_input_tab, nelem(_input_tab)); align_tab = cvtstringinttab(_align_tab, nelem(_align_tab)); fmtinstall('I', Iconv); targetmapinit(); buildinited = 1; } static ItemSource* newitemsource(Docinfo* di) { ItemSource* is; Pstate* ps; ps = newpstate(nil); if(di->mediatype != TextHtml) { ps->curstate &= ~IFwrap; ps->literal = 1; pushfontstyle(ps, FntT); } is = (ItemSource*)emalloc(sizeof(ItemSource)); is->doc = di; is->psstk = ps; is->nforms = 0; is->ntables = 0; is->nanchors = 0; is->nframes = 0; is->curform = nil; is->curmap = nil; is->tabstk = nil; is->kidstk = nil; return is; } static Item *getitems(ItemSource* is, uchar* data, int datalen); // Parse an html document and create a list of layout items. // Allocate and return document info in *pdi. // When caller is done with the items, it should call // freeitems on the returned result, and then // freedocinfo(*pdi). Item* parsehtml(uchar* data, int datalen, Rune* pagesrc, int mtype, int chset, Docinfo** pdi) { Item *it; Docinfo* di; ItemSource* is; di = newdocinfo(); di->src = _Strdup(pagesrc); di->base = _Strdup(pagesrc); di->mediatype = mtype; di->chset = chset; *pdi = di; is = newitemsource(di); it = getitems(is, data, datalen); freepstatestack(is->psstk); free(is); return it; } // Get a group of tokens for lexer, parse them, and create // a list of layout items. // When caller is done with the items, it should call // freeitems on the returned result. static Item* getitems(ItemSource* is, uchar* data, int datalen) { int i; int j; int nt; int pt; int doscripts; int tokslen; int toki; int h; int sz; int method; int n; int nblank; int norsz; int bramt; int sty; int nosh; int oldcuranchor; int dfltbd; int v; int hang; int isempty; int tag; int brksp; int target; uchar brk; uchar flags; uchar align; uchar al; uchar ty; uchar ty2; Pstate* ps; Pstate* nextps; Pstate* outerps; Table* curtab; Token* tok; Token* toks; Docinfo* di; Item* ans; Item* img; Item* ffit; Item* tabitem; Rune* s; Rune* t; Rune* name; Rune* enctype; Rune* usemap; Rune* prompt; Rune* equiv; Rune* val; Rune* nsz; Rune* script; Map* map; Form* frm; Iimage* ii; Kidinfo* kd; Kidinfo* ks; Kidinfo* pks; Dimen wd; Option* option; Table* tab; Tablecell* c; Tablerow* tr; Formfield* field; Formfield* ff; Rune* href; Rune* src; Rune* scriptsrc; Rune* bgurl; Rune* action; Background bg; if(!buildinited) buildinit(); doscripts = 0; // for now ps = is->psstk; curtab = is->tabstk; di = is->doc; toks = _gettoks(data, datalen, di->chset, di->mediatype, &tokslen); toki = 0; for(; toki < tokslen; toki++) { tok = &toks[toki]; if(dbgbuild > 1) fprint(2, "build: curstate %ux, token %T\n", ps->curstate, tok); tag = tok->tag; brk = 0; brksp = 0; if(tag < Numtags) { brk = blockbrk[tag]; if(brk&SPBefore) brksp = 1; } else if(tag < Numtags + RBRA) { brk = blockbrk[tag - RBRA]; if(brk&SPAfter) brksp = 1; } if(brk) { addbrk(ps, brksp, 0); if(ps->inpar) { popjust(ps); ps->inpar = 0; } } // check common case first (Data), then switch statement on tag if(tag == Data) { // Lexing didn't pay attention to SGML record boundary rules: // \n after start tag or before end tag to be discarded. // (Lex has already discarded all \r's). // Some pages assume this doesn't happen in
 text,
			// so we won't do it if literal is true.
			// BUG: won't discard \n before a start tag that begins
			// the next bufferful of tokens.
			s = tok->text;
			n = _Strlen(s);
			if(!ps->literal) {
				i = 0;
				j = n;
				if(toki > 0) {
					pt = toks[toki - 1].tag;
					// IE and Netscape both ignore this rule (contrary to spec)
					// if previous tag was img
					if(pt < Numtags && pt != Timg && j > 0 && s[0] == '\n')
						i++;
				}
				if(toki < tokslen - 1) {
					nt = toks[toki + 1].tag;
					if(nt >= RBRA && nt < Numtags + RBRA && j > i && s[j - 1] == '\n')
						j--;
				}
				if(i > 0 || j < n) {
					t = s;
					s = _Strsubstr(s, i, j);
					free(t);
					n = j-i;
				}
			}
			if(ps->skipwhite) {
				_trimwhite(s, n, &t, &nt);
				if(t == nil) {
					free(s);
					s = nil;
				}
				else if(t != s) {
					t = _Strndup(t, nt);
					free(s);
					s = t;
				}
				if(s != nil)
					ps->skipwhite = 0;
			}
			tok->text = nil;		// token doesn't own string anymore
			if(s != nil)
				addtext(ps, s);
		}
		else
			switch(tag) {
			// Some abbrevs used in following DTD comments
			// %text = 	#PCDATA
			//		| TT | I | B | U | STRIKE | BIG | SMALL | SUB | SUP
			//		| EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE
			//		| A | IMG | APPLET | FONT | BASEFONT | BR | SCRIPT | MAP
			//		| INPUT | SELECT | TEXTAREA
			// %block = P | UL | OL | DIR | MENU | DL | PRE | DL | DIV | CENTER
			//		| BLOCKQUOTE | FORM | ISINDEX | HR | TABLE
			// %flow = (%text | %block)*
			// %body.content = (%heading | %text | %block | ADDRESS)*

			// 
			// Anchors are not supposed to be nested, but you sometimes see
			// href anchors inside destination anchors.
			case Ta:
				if(ps->curanchor != 0) {
					if(warn)
						fprint(2, "warning: nested  or missing \n");
					ps->curanchor = 0;
				}
				name = aval(tok, Aname);
				href = aurlval(tok, Ahref, nil, di->base);
				// ignore rel, rev, and title attrs
				if(href != nil) {
					target = atargval(tok, di->target);
					di->anchors = newanchor(++is->nanchors, name, href, target, di->anchors);
					if(name != nil)
						name = _Strdup(name);	// for DestAnchor construction, below
					ps->curanchor = is->nanchors;
					ps->curfg = push(&ps->fgstk, di->link);
					ps->curul = push(&ps->ulstk, ULunder);
				}
				if(name != nil) {
					// add a null item to be destination
					additem(ps, newispacer(ISPnull), tok);
					di->dests = newdestanchor(++is->nanchors, name, ps->lastit, di->dests);
				}
				break;

			case Ta+RBRA :
				if(ps->curanchor != 0) {
					ps->curfg = popretnewtop(&ps->fgstk, di->text);
					ps->curul = popretnewtop(&ps->ulstk, ULnone);
					ps->curanchor = 0;
				}
				break;

			// 
			// We can't do applets, so ignore PARAMS, and let
			// the %text contents appear for the alternative rep
			case Tapplet:
			case Tapplet+RBRA:
				if(warn && tag == Tapplet)
					fprint(2, "warning:  ignored\n");
				break;

			// 
			case Tarea:
				map = di->maps;
				if(map == nil) {
					if(warn)
						fprint(2, "warning:  not inside \n");
					continue;
				}
				map->areas = newarea(atabval(tok, Ashape, shape_tab, NSHAPETAB, SHrect),
					aurlval(tok, Ahref, nil, di->base),
					atargval(tok, di->target),
					map->areas);
				setdimarray(tok, Acoords, &map->areas->coords, &map->areas->ncoords);
				break;

			// 
			case Tb:
			case Tstrong:
				pushfontstyle(ps, FntB);
				break;

			case Tb+RBRA:
			case Tcite+RBRA:
			case Tcode+RBRA:
			case Tdfn+RBRA:
			case Tem+RBRA:
			case Tkbd+RBRA:
			case Ti+RBRA:
			case Tsamp+RBRA:
			case Tstrong+RBRA:
			case Ttt+RBRA:
			case Tvar+RBRA :
			case Taddress+RBRA:
				popfontstyle(ps);
				break;

			// 
			case Tbase:
				t = di->base;
				di->base = aurlval(tok, Ahref, di->base, di->base);
				if(t != nil)
					free(t);
				di->target = atargval(tok, di->target);
				break;

			// 
			case Tbasefont:
				ps->adjsize = aintval(tok, Asize, 3) - 3;
				break;

			// 
			case Tbig:
			case Tsmall:
				sz = ps->adjsize;
				if(tag == Tbig)
					sz += Large;
				else
					sz += Small;
				pushfontsize(ps, sz);
				break;

			case Tbig+RBRA:
			case Tsmall+RBRA:
				popfontsize(ps);
				break;

			// 
			case Tblockquote:
				changeindent(ps, BQTAB);
				break;

			case Tblockquote+RBRA:
				changeindent(ps, -BQTAB);
				break;

			// 
			case Tbody:
				ps->skipping = 0;
				bg = makebackground(nil, acolorval(tok, Abgcolor, di->background.color));
				bgurl = aurlval(tok, Abackground, nil, di->base);
				if(bgurl != nil) {
					if(di->backgrounditem != nil)
						freeitem((Item*)di->backgrounditem);
						// really should remove old item from di->images list,
						// but there should only be one BODY element ...
					di->backgrounditem = (Iimage*)newiimage(bgurl, nil, ALnone, 0, 0, 0, 0, 0, 0, nil);
					di->backgrounditem->nextimage = di->images;
					di->images = di->backgrounditem;
				}
				ps->curbg = bg;
				di->background = bg;
				di->text = acolorval(tok, Atext, di->text);
				di->link = acolorval(tok, Alink, di->link);
				di->vlink = acolorval(tok, Avlink, di->vlink);
				di->alink = acolorval(tok, Aalink, di->alink);
				if(di->text != ps->curfg) {
					ps->curfg = di->text;
					ps->fgstk.n = 0;
				}
				break;

			case Tbody+RBRA:
				// HTML spec says ignore things after ,
				// but IE and Netscape don't
				// ps.skipping = 1;
				break;

			// 
			case Tbr:
				addlinebrk(ps, atabval(tok, Aclear, clear_tab, NCLEARTAB, 0));
				break;

			// 
			case Tcaption:
				if(curtab == nil) {
					if(warn)
						fprint(2, "warning:  outside \n");
					continue;
				}
				if(curtab->caption != nil) {
					if(warn)
						fprint(2, "warning: more than one 
in \n"); continue; } ps = newpstate(ps); curtab->caption_place = atabval(tok, Aalign, align_tab, NALIGNTAB, ALtop); break; case Tcaption+RBRA: nextps = ps->next; if(curtab == nil || nextps == nil) { if(warn) fprint(2, "warning: unexpected \n"); continue; } curtab->caption = ps->items->next; free(ps); ps = nextps; break; case Tcenter: case Tdiv: if(tag == Tcenter) al = ALcenter; else al = atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust); pushjust(ps, al); break; case Tcenter+RBRA: case Tdiv+RBRA: popjust(ps); break; // case Tdd: if(ps->hangstk.n == 0) { if(warn) fprint(2, "warning:
not inside hangstk, 0); if(h != 0) changehang(ps, -10*LISTTAB); else addbrk(ps, 0, 0); push(&ps->hangstk, 0); break; // // case Tdir: case Tmenu: case Tol: case Tul: changeindent(ps, LISTTAB); push(&ps->listtypestk, listtyval(tok, (tag==Tol)? LT1 : LTdisc)); push(&ps->listcntstk, aintval(tok, Astart, 1)); break; case Tdir+RBRA: case Tmenu+RBRA: case Tol+RBRA: case Tul+RBRA: if(ps->listtypestk.n == 0) { if(warn) fprint(2, "warning: %T ended no list\n", tok); continue; } addbrk(ps, 0, 0); pop(&ps->listtypestk); pop(&ps->listcntstk); changeindent(ps, -LISTTAB); break; // case Tdl: changeindent(ps, LISTTAB); push(&ps->hangstk, 0); break; case Tdl+RBRA: if(ps->hangstk.n == 0) { if(warn) fprint(2, "warning: unexpected \n"); continue; } changeindent(ps, -LISTTAB); if(top(&ps->hangstk, 0) != 0) changehang(ps, -10*LISTTAB); pop(&ps->hangstk); break; // case Tdt: if(ps->hangstk.n == 0) { if(warn) fprint(2, "warning:
not inside
\n"); continue; } h = top(&ps->hangstk, 0); pop(&ps->hangstk); if(h != 0) changehang(ps, -10*LISTTAB); changehang(ps, 10*LISTTAB); push(&ps->hangstk, 1); break; // case Tfont: sz = top(&ps->fntsizestk, Normal); if(_tokaval(tok, Asize, &nsz, 0)) { if(_prefix(L(Lplus), nsz)) sz = Normal + _Strtol(nsz+1, nil, 10) + ps->adjsize; else if(_prefix(L(Lminus), nsz)) sz = Normal - _Strtol(nsz+1, nil, 10) + ps->adjsize; else if(nsz != nil) sz = Normal + (_Strtol(nsz, nil, 10) - 3); } ps->curfg = push(&ps->fgstk, acolorval(tok, Acolor, ps->curfg)); pushfontsize(ps, sz); break; case Tfont+RBRA: if(ps->fgstk.n == 0) { if(warn) fprint(2, "warning: unexpected \n"); continue; } ps->curfg = popretnewtop(&ps->fgstk, di->text); popfontsize(ps); break; // case Tform: if(is->curform != nil) { if(warn) fprint(2, "warning:
nested inside another\n"); continue; } action = aurlval(tok, Aaction, di->base, di->base); s = aval(tok, Aid); name = astrval(tok, Aname, s); if(s) free(s); target = atargval(tok, di->target); method = atabval(tok, Amethod, method_tab, NMETHODTAB, HGet); if(warn && _tokaval(tok, Aenctype, &enctype, 0) && _Strcmp(enctype, L(Lappl_form))) fprint(2, "form enctype %S not handled\n", enctype); frm = newform(++is->nforms, name, action, target, method, di->forms); di->forms = frm; is->curform = frm; break; case Tform+RBRA: if(is->curform == nil) { if(warn) fprint(2, "warning: unexpected \n"); continue; } // put fields back in input order is->curform->fields = (Formfield*)_revlist((List*)is->curform->fields); is->curform = nil; break; // case Tframe: ks = is->kidstk; if(ks == nil) { if(warn) fprint(2, "warning: not in \n"); continue; } ks->kidinfos = kd = newkidinfo(0, ks->kidinfos); kd->src = aurlval(tok, Asrc, nil, di->base); kd->name = aval(tok, Aname); if(kd->name == nil) { s = _ltoStr(++is->nframes); kd->name = _Strdup2(L(Lfr), s); free(s); } kd->marginw = auintval(tok, Amarginwidth, 0); kd->marginh = auintval(tok, Amarginheight, 0); kd->framebd = auintval(tok, Aframeborder, 1); kd->flags = atabval(tok, Ascrolling, fscroll_tab, NFSCROLLTAB, kd->flags); norsz = aflagval(tok, Anoresize); if(norsz) kd->flags |= FRnoresize; break; // case Tframeset: ks = newkidinfo(1, nil); pks = is->kidstk; if(pks == nil) di->kidinfo = ks; else { ks->next = pks->kidinfos; pks->kidinfos = ks; } ks->nextframeset = pks; is->kidstk = ks; setdimarray(tok, Arows, &ks->rows, &ks->nrows); if(ks->nrows == 0) { ks->rows = (Dimen*)emalloc(sizeof(Dimen)); ks->nrows = 1; ks->rows[0] = makedimen(Dpercent, 100); } setdimarray(tok, Acols, &ks->cols, &ks->ncols); if(ks->ncols == 0) { ks->cols = (Dimen*)emalloc(sizeof(Dimen)); ks->ncols = 1; ks->cols[0] = makedimen(Dpercent, 100); } break; case Tframeset+RBRA: if(is->kidstk == nil) { if(warn) fprint(2, "warning: unexpected \n"); continue; } ks = is->kidstk; // put kids back in original order // and add blank frames to fill out cells n = ks->nrows*ks->ncols; nblank = n - _listlen((List*)ks->kidinfos); while(nblank-- > 0) ks->kidinfos = newkidinfo(0, ks->kidinfos); ks->kidinfos = (Kidinfo*)_revlist((List*)ks->kidinfos); is->kidstk = is->kidstk->nextframeset; if(is->kidstk == nil) { // end input ans = nil; goto return_ans; } break; // , etc. case Th1: case Th2: case Th3: case Th4: case Th5: case Th6: bramt = 1; if(ps->items == ps->lastit) bramt = 0; addbrk(ps, bramt, IFcleft|IFcright); sz = Verylarge - (tag - Th1); if(sz < Tiny) sz = Tiny; pushfontsize(ps, sz); sty = top(&ps->fntstylestk, FntR); if(tag == Th1) sty = FntB; pushfontstyle(ps, sty); pushjust(ps, atabval(tok, Aalign, align_tab, NALIGNTAB, ps->curjust)); ps->skipwhite = 1; break; case Th1+RBRA: case Th2+RBRA: case Th3+RBRA: case Th4+RBRA: case Th5+RBRA: case Th6+RBRA: addbrk(ps, 1, IFcleft|IFcright); popfontsize(ps); popfontstyle(ps); popjust(ps); break; case Thead: // HTML spec says ignore regular markup in head, // but Netscape and IE don't // ps.skipping = 1; break; case Thead+RBRA: ps->skipping = 0; break; // case Thr: al = atabval(tok, Aalign, align_tab, NALIGNTAB, ALcenter); sz = auintval(tok, Asize, HRSZ); wd = adimen(tok, Awidth); if(dimenkind(wd) == Dnone) wd = makedimen(Dpercent, 100); nosh = aflagval(tok, Anoshade); additem(ps, newirule(al, sz, nosh, wd), tok); addbrk(ps, 0, 0); break; case Ti: case Tcite: case Tdfn: case Tem: case Tvar: case Taddress: pushfontstyle(ps, FntI); break; // case Timg: map = nil; oldcuranchor = ps->curanchor; if(_tokaval(tok, Ausemap, &usemap, 0)) { if(!_prefix(L(Lhash), usemap)) { if(warn) fprint(2, "warning: can't handle non-local map %S\n", usemap); } else { map = getmap(di, usemap+1); if(ps->curanchor == 0) { di->anchors = newanchor(++is->nanchors, nil, nil, di->target, di->anchors); ps->curanchor = is->nanchors; } } } align = atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom); dfltbd = 0; if(ps->curanchor != 0) dfltbd = 2; src = aurlval(tok, Asrc, nil, di->base); if(src == nil) { if(warn) fprint(2, "warning: has no src attribute\n"); ps->curanchor = oldcuranchor; continue; } img = newiimage(src, aval(tok, Aalt), align, auintval(tok, Awidth, 0), auintval(tok, Aheight, 0), auintval(tok, Ahspace, IMGHSPACE), auintval(tok, Avspace, IMGVSPACE), auintval(tok, Aborder, dfltbd), aflagval(tok, Aismap), map); if(align == ALleft || align == ALright) { additem(ps, newifloat(img, align), tok); // if no hspace specified, use FLTIMGHSPACE if(!_tokaval(tok, Ahspace, &val, 0)) ((Iimage*)img)->hspace = FLTIMGHSPACE; } else { ps->skipwhite = 0; additem(ps, img, tok); } if(!ps->skipping) { ((Iimage*)img)->nextimage = di->images; di->images = (Iimage*)img; } ps->curanchor = oldcuranchor; break; // case Tinput: ps->skipwhite = 0; if(is->curform == nil) { if(warn) fprint(2, " not inside
\n"); continue; } is->curform->fields = field = newformfield( atabval(tok, Atype, input_tab, NINPUTTAB, Ftext), ++is->curform->nfields, is->curform, aval(tok, Aname), aval(tok, Avalue), auintval(tok, Asize, 0), auintval(tok, Amaxlength, 1000), is->curform->fields); if(aflagval(tok, Achecked)) field->flags = FFchecked; switch(field->ftype) { case Ftext: case Fpassword: case Ffile: if(field->size == 0) field->size = 20; break; case Fcheckbox: if(field->name == nil) { if(warn) fprint(2, "warning: checkbox form field missing name\n"); continue; } if(field->value == nil) field->value = _Strdup(L(Lone)); break; case Fradio: if(field->name == nil || field->value == nil) { if(warn) fprint(2, "warning: radio form field missing name or value\n"); continue; } break; case Fsubmit: if(field->value == nil) field->value = _Strdup(L(Lsubmit)); if(field->name == nil) field->name = _Strdup(L(Lnoname)); break; case Fimage: src = aurlval(tok, Asrc, nil, di->base); if(src == nil) { if(warn) fprint(2, "warning: image form field missing src\n"); continue; } // width and height attrs aren't specified in HTML 3.2, // but some people provide them and they help avoid // a relayout field->image = newiimage(src, astrval(tok, Aalt, L(Lsubmit)), atabval(tok, Aalign, align_tab, NALIGNTAB, ALbottom), auintval(tok, Awidth, 0), auintval(tok, Aheight, 0), 0, 0, 0, 0, nil); ii = (Iimage*)field->image; ii->nextimage = di->images; di->images = ii; break; case Freset: if(field->value == nil) field->value = _Strdup(L(Lreset)); break; case Fbutton: if(field->value == nil) field->value = _Strdup(L(Lspace)); break; } ffit = newiformfield(field); additem(ps, ffit, tok); if(ffit->genattr != nil) field->events = ffit->genattr->events; break; // case Tisindex: ps->skipwhite = 0; prompt = astrval(tok, Aprompt, L(Lindex)); target = atargval(tok, di->target); additem(ps, textit(ps, prompt), tok); frm = newform(++is->nforms, nil, di->base, target, HGet, di->forms); di->forms = frm; ff = newformfield(Ftext, 1, frm, _Strdup(L(Lisindex)), nil, 50, 1000, nil); frm->fields = ff; frm->nfields = 1; additem(ps, newiformfield(ff), tok); addbrk(ps, 1, 0); break; // case Tli: if(ps->listtypestk.n == 0) { if(warn) fprint(2, "
  • not in list\n"); continue; } ty = top(&ps->listtypestk, 0); ty2 = listtyval(tok, ty); if(ty != ty2) { ty = ty2; push(&ps->listtypestk, ty2); } v = aintval(tok, Avalue, top(&ps->listcntstk, 1)); if(ty == LTdisc || ty == LTsquare || ty == LTcircle) hang = 10*LISTTAB - 3; else hang = 10*LISTTAB - 1; changehang(ps, hang); addtext(ps, listmark(ty, v)); push(&ps->listcntstk, v + 1); changehang(ps, -hang); ps->skipwhite = 1; break; // case Tmap: if(_tokaval(tok, Aname, &name, 0)) is->curmap = getmap(di, name); break; case Tmap+RBRA: map = is->curmap; if(map == nil) { if(warn) fprint(2, "warning: unexpected \n"); continue; } map->areas = (Area*)_revlist((List*)map->areas); break; case Tmeta: if(ps->skipping) continue; if(_tokaval(tok, Ahttp_equiv, &equiv, 0)) { val = aval(tok, Acontent); n = _Strlen(equiv); if(!_Strncmpci(equiv, n, L(Lrefresh))) di->refresh = val; else if(!_Strncmpci(equiv, n, L(Lcontent))) { n = _Strlen(val); if(!_Strncmpci(val, n, L(Ljavascript)) || !_Strncmpci(val, n, L(Ljscript1)) || !_Strncmpci(val, n, L(Ljscript))) di->scripttype = TextJavascript; else { if(warn) fprint(2, "unimplemented script type %S\n", val); di->scripttype = UnknownType; } } } break; // Nobr is NOT in HMTL 4.0, but it is ubiquitous on the web case Tnobr: ps->skipwhite = 0; ps->curstate &= ~IFwrap; break; case Tnobr+RBRA: ps->curstate |= IFwrap; break; // We do frames, so skip stuff in noframes case Tnoframes: ps->skipping = 1; break; case Tnoframes+RBRA: ps->skipping = 0; break; // We do scripts (if enabled), so skip stuff in noscripts case Tnoscript: if(doscripts) ps->skipping = 1; break; case Tnoscript+RBRA: if(doscripts) ps->skipping = 0; break; // case Toption: if(is->curform == nil || is->curform->fields == nil) { if(warn) fprint(2, "warning: