From 78e51a8c6678b6e3dff3d619aa786669f531f4bc Mon Sep 17 00:00:00 2001 From: rsc Date: Fri, 14 Jan 2005 03:45:44 +0000 Subject: checkpoint --- man/man3/html.html | 1206 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1206 insertions(+) create mode 100644 man/man3/html.html (limited to 'man/man3/html.html') diff --git a/man/man3/html.html b/man/man3/html.html new file mode 100644 index 00000000..465edcdb --- /dev/null +++ b/man/man3/html.html @@ -0,0 +1,1206 @@ + +html(3) - Plan 9 from User Space + + + + +
+
+
HTML(3)HTML(3) +
+
+

NAME
+ +
+ + parsehtml, printitems, validitems, freeitems, freedocinfo, dimenkind, + dimenspec, targetid, targetname, fromStr, toStr – HTML parser
+ +
+

SYNOPSIS
+ +
+ + +
+ + #include <u.h>
+ #include <libc.h>
+ #include <html.h>
+ +
+
+ Item*    parsehtml(uchar* data, int datalen, Rune* src, int mtype,
+ +
+ + +
+ + int chset, Docinfo** pdi)
+ +
+ +
+ +
+
+
+ + +
+ + + +
+ +
+ void     printitems(Item* items, char* msg)
+ +
+
+ int      validitems(Item* items)
+ +
+
+ void     freeitems(Item* items)
+ +
+
+ void     freedocinfo(Docinfo* d)
+ +
+
+ int      dimenkind(Dimen d)
+ +
+
+ int      dimenspec(Dimen d)
+ +
+
+ int      targetid(Rune* s)
+ +
+
+ Rune*    targetname(int targid)
+ +
+
+ uchar* fromStr(Rune* buf, int n, int chset)
+ +
+
+ Rune*    toStr(uchar* buf, int n, int chset)
+
+
+

DESCRIPTION
+ +
+ + +
+ + This library implements a parser for HTML 4.0 documents. The parsed + HTML is converted into an intermediate representation that describes + how the formatted HTML should be laid out. +
+ + Parsehtml parses an entire HTML document contained in the buffer + data and having length datalen. The URL of the document should + be passed in as src. Mtype is the media type of the document, + which should be either TextHtml or TextPlain. The character set + of the document is described in chset, which can be + one of US_Ascii, ISO_8859_1, UTF_8 or Unicode. The return value + is a linked list of Item structures, described in detail below. + As a side effect, *pdi is set to point to a newly created Docinfo + structure, containing information pertaining to the entire document. + +
+ + The library expects two allocation routines to be provided by + the caller, emalloc and erealloc. These routines are analogous + to the standard malloc and realloc routines, except that they + should not return if the memory allocation fails. In addition, + emalloc is required to zero the memory. +
+ + For debugging purposes, printitems may be called to display the + contents of an item list; individual items may be printed using + the %I print verb, installed on the first call to parsehtml. validitems + traverses the item list, checking that all of the pointers are + valid. It returns 1 is everything is ok, and 0 if an error was + found. Normally, one would not call these routines directly. Instead, + one sets the global variable dbgbuild and the library calls them + automatically. One can also set warn, to cause the library to + print a warning whenever it finds a problem with the input document, + and dbglex, to print debugging information in the + lexer. +
+ + When an item list is finished with, it should be freed with freeitems. + Then, freedocinfo should be called on the pointer returned in + *pdi. +
+ + Dimenkind and dimenspec are provided to interpret the Dimen type, + as described in the section Dimension Specifications. +
+ + Frame target names are mapped to integer ids via a global, permanent + mapping. To find the value for a given name, call targetid, which + allocates a new id if the name hasn’t been seen before. The name + of a given, known id may be retrieved using targetname. The library + predefines FTtop, FTself, FTparent and + FTblank. +
+ + The library handles all text as Unicode strings (type Rune*). + Character set conversion is provided by fromStr and toStr. FromStr + takes n Unicode characters from buf and converts them to the character + set described by chset. ToStr takes n bytes from buf, interpretted + as belonging to character set chset, and converts + them to a Unicode string. Both routines null-terminate the result, + and use emalloc to allocate space for it.
+

Items
+ The return value of parsehtml is a linked list of variant structures, + with the generic portion described by the following definition: + +
+ + typedef struct Item Item;
+ struct Item
+ {
+ +
+ + Item*      next;
+ int        width;
+ int        height;
+ int        ascent;
+ int        anchorid;
+ int        state;
+ Genattr* genattr;
+ int        tag;
+ +
+ };
+ +
+
+ The field next points to the successor in the linked list of items, + while width, height, and ascent are intended for use by the caller + as part of the layout process. Anchorid, if non-zero, gives the + integer id assigned by the parser to the anchor that this item + is in (see section Anchors). State is a collection of + flags and values described as follows: +
+ + enum
+ {
+ +
+ + IFbrk =           0x80000000,
+ IFbrksp =         0x40000000,
+ IFnobrk =         0x20000000,
+ IFcleft =         0x10000000,
+ IFcright =        0x08000000,
+ IFwrap =          0x04000000,
+ IFhang =          0x02000000,
+ IFrjust =         0x01000000,
+ IFcjust =         0x00800000,
+ IFsmap =          0x00400000,
+ IFindentshift = 8,
+ IFindentmask =    (255<<IFindentshift),
+ IFhangmask =      255
+ +
+ };
+ +
+
+ IFbrk is set if a break is to be forced before placing this item. + IFbrksp is set if a 1 line space should be added to the break + (in which case IFbrk is also set). IFnobrk is set if a break is + not permitted before the item. IFcleft is set if left floats should + be cleared (that is, if the list of pending left floats should + be placed) before this item is placed, and IFcright is set for + right floats. In both cases, IFbrk is also set. IFwrap is set + if the line containing this item is allowed to wrap. IFhang is + set if this item hangs into the left indent. IFrjust is set if + the line containing this item should be right justified, and IFcjust + is + set for center justified lines. IFsmap is used to indicate that + an image is a server-side map. The low 8 bits, represented by + IFhangmask, indicate the current hang into left indent, in tenths + of a tabstop. The next 8 bits, represented by IFindentmask and + IFindentshift, indicate the current indent in tab + stops. +
+ + The field genattr is an optional pointer to an auxiliary structure, + described in the section Generic Attributes. +
+ + Finally, tag describes which variant type this item has. It can + have one of the values Itexttag, Iruletag, Iimagetag, Iformfieldtag, + Itabletag, Ifloattag or Ispacertag. For each of these values, + there is an additional structure defined, which includes Item + as an unnamed initial substructure, + and then defines additional fields. +
+ + Items of type Itexttag represent a piece of text, using the following + structure: +
+ + struct Itext
+ {
+ +
+ + Item;
+ Rune* s;
+ int     fnt;
+ int     fg;
+ uchar voff;
+ uchar ul;
+ +
+ };
+ +
+
+ Here s is a null-terminated Unicode string of the actual characters + making up this text item, fnt is the font number (described in + the section Font Numbers), and fg is the RGB encoded color for + the text. Voff measures the vertical offset from the baseline; + subtract Voffbias to get the actual value (negative values + represent a displacement down the page). The field ul is the underline + style: ULnone if no underline, ULunder for conventional underline, + and ULmid for strike-through. +
+ + Items of type Iruletag represent a horizontal rule, as follows: + +
+ + struct Irule
+ {
+ +
+ + Item;
+ uchar align;
+ uchar noshade;
+ int     size;
+ Dimen wspec;
+ +
+ };
+ +
+
+ Here align is the alignment specification (described in the corresponding + section), noshade is set if the rule should not be shaded, size + is the height of the rule (as set by the size attribute), and + wspec is the desired width (see section Dimension Specifications). + +
+ + Items of type Iimagetag describe embedded images, for which the + following structure is defined: +
+ + struct Iimage
+ {
+ +
+ + Item;
+ Rune*     imsrc;
+ int       imwidth;
+ int       imheight;
+ Rune*     altrep;
+ Map*      map;
+ int       ctlid;
+ uchar     align;
+ uchar     hspace;
+ uchar     vspace;
+ uchar     border;
+ Iimage* nextimage;
+ +
+ };
+ +
+
+ Here imsrc is the URL of the image source, imwidth and imheight, + if non-zero, contain the specified width and height for the image, + and altrep is the text to use as an alternative to the image, + if the image is not displayed. Map, if set, points to a structure + describing an associated client-side image map. + Ctlid is reserved for use by the application, for handling animated + images. Align encodes the alignment specification of the image. + Hspace contains the number of pixels to pad the image with on + either side, and Vspace the padding above and below. Border is + the width of the border to draw around the + image. Nextimage points to the next image in the document (the + head of this list is Docinfo.images). +
+ + For items of type Iformfieldtag, the following structure is defined: + +
+ + struct Iformfield
+ {
+ +
+ + Item;
+ Formfield* formfield;
+ +
+ };
+ +
+
+ This adds a single field, formfield, which points to a structure + describing a field in a form, described in section Forms. +
+ + For items of type Itabletag, the following structure is defined: + +
+ + struct Itable
+ {
+ +
+ + Item;
+ Table* table;
+ +
+ };
+ +
+
+ Table points to a structure describing the table, described in + the section Tables. +
+ + For items of type Ifloattag, the following structure is defined: + +
+ + struct Ifloat
+ {
+ +
+ + Item;
+ Item*     item;
+ int       x;
+ int       y;
+ uchar     side;
+ uchar     infloats;
+ Ifloat* nextfloat;
+ +
+ };
+ +
+
+ The item points to a single item (either a table or an image) + that floats (the text of the document flows around it), and side + indicates the margin that this float sticks to; it is either ALleft + or ALright. X and y are reserved for use by the caller; these + are typically used for the coordinates of the top of the float. + Infloats is used by the caller to keep track of whether it has + placed the float. Nextfloat is used by the caller to link together + all of the floats that it has placed. +
+ + For items of type Ispacertag, the following structure is defined: + +
+ + struct Ispacer
+ {
+ +
+ + Item;
+ int     spkind;
+ +
+ };
+ +
+
+ Spkind encodes the kind of spacer, and may be one of ISPnull (zero + height and width), ISPvline (takes on height and ascent of the + current font), ISPhspace (has the width of a space in the current + font) and ISPgeneral (for all other purposes, such as between + markers and lists). +

Generic Attributes
+ +
+ + The genattr field of an item, if non-nil, points to a structure + that holds the values of attributes not specific to any particular + item type, as they occur on a wide variety of underlying HTML + tags. The structure is as follows: +
+ + typedef struct Genattr Genattr;
+ struct Genattr
+ {
+ +
+ + Rune*     id;
+ Rune*     class;
+ Rune*     style;
+ Rune*     title;
+ SEvent* events;
+ +
+ };
+ +
+
+ Fields id, class, style and title, when non-nil, contain values + of correspondingly named attributes of the HTML tag associated + with this item. Events is a linked list of events (with corresponding + scripted actions) associated with the item: +
+ + typedef struct SEvent SEvent;
+ struct SEvent
+ {
+ +
+ + SEvent* next;
+ int       type;
+ Rune*     script;
+ +
+ };
+ +
+
+ Here, next points to the next event in the list, type is one of + SEonblur, SEonchange, SEonclick, SEondblclick, SEonfocus, SEonkeypress, + SEonkeyup, SEonload, SEonmousedown, SEonmousemove, SEonmouseout, + SEonmouseover, SEonmouseup, SEonreset, SEonselect, + SEonsubmit or SEonunload, and script is the text of the associated + script.
+

Dimension Specifications
+ +
+ + Some structures include a dimension specification, used where + a number can be followed by a % or a * to indicate percentage + of total or relative weight. This is encoded using the following + structure: +
+ + typedef struct Dimen Dimen;
+ struct Dimen
+ {
+ +
+ + int kindspec;
+ +
+ };
+ +
+
+ Separate kind and spec values are extracted using dimenkind and + dimenspec. Dimenkind returns one of Dnone, Dpixels, Dpercent or + Drelative. Dnone means that no dimension was specified. In all + other cases, dimenspec should be called to find the absolute number + of pixels, the percentage of total, or the + relative weight.
+

Background Specifications
+ +
+ + It is possible to set the background of the entire document, and + also for some parts of the document (such as tables). This is + encoded as follows: +
+ + typedef struct Background Background;
+ struct Background
+ {
+ +
+ + Rune* image;
+ int     color;
+ +
+ };
+ +
+
+ Image, if non-nil, is the URL of an image to use as the background. + If this is nil, color is used instead, as the RGB value for a + solid fill color.
+

Alignment Specifications
+ +
+ + Certain items have alignment specifiers taken from the following + enumerated type: +
+ + enum
+ {
+ +
+ + ALnone = 0, ALleft, ALcenter, ALright, ALjustify,
+ ALchar, ALtop, ALmiddle, ALbottom, ALbaseline
+ +
+ };
+ +
+
+ These values correspond to the various alignment types named in + the HTML 4.0 standard. If an item has an alignment of ALleft or + ALright, the library automatically encapsulates it inside a float + item. +
+ + Tables, and the various rows, columns and cells within them, have + a more complex alignment specification, composed of separate vertical + and horizontal alignments: +
+ + typedef struct Align Align;
+ struct Align
+ {
+ +
+ + uchar halign;
+ uchar valign;
+ +
+ };
+ +
+
+ Halign can be one of ALnone, ALleft, ALcenter, ALright, ALjustify + or ALchar. Valign can be one of ALnone, ALmiddle, ALbottom, ALtop + or ALbaseline.
+

Font Numbers
+ +
+ + Text items have an associated font number (the fnt field), which + is encoded as style*NumSize+size. Here, style is one of FntR, + FntI, FntB or FntT, for roman, italic, bold and typewriter font + styles, respectively, and size is Tiny, Small, Normal, Large or + Verylarge. The total number of possible + font numbers is NumFnt, and the default font number is DefFnt + (which is roman style, normal size).
+

Document Info
+ +
+ + Global information about an HTML page is stored in the following + structure: +
+ + typedef struct Docinfo Docinfo;
+ struct Docinfo
+ {
+ +
+ + // stuff from HTTP headers, doc head, and body tag
+ Rune*         src;
+ Rune*         base;
+ Rune*         doctitle;
+ Background    background;
+ Iimage*       backgrounditem;
+ int           text;
+ int           link;
+ int           vlink;
+ int           alink;
+ int           target;
+ int           chset;
+ int           mediatype;
+ int           scripttype;
+ int           hasscripts;
+ Rune*         refresh;
+ Kidinfo*      kidinfo;
+ int           frameid;
+ // info needed to respond to user actions
+ Anchor*       anchors;
+ DestAnchor* dests;
+ Form*         forms;
+ Table*        tables;
+ Map*          maps;
+ Iimage*       images;
+ +
+ };
+ +
+
+ Src gives the URL of the original source of the document, and + base is the base URL. Doctitle is the document’s title, as set + by a <title> element. Background is as described in the section + Background Specifications, and backgrounditem is set to be an + image item for the document’s background image + (if given as a URL), or else nil. Text gives the default foregound + text color of the document, link the unvisited hyperlink color, + vlink the visited hyperlink color, and alink the color for highlighting + hyperlinks (all in 24-bit RGB format). Target is the default target + frame id. Chset and mediatype are as for + the chset and mtype parameters to parsehtml. Scripttype is the + type of any scripts contained in the document, and is always TextJavascript. + Hasscripts is set if the document contains any scripts. Scripting + is currently unsupported. Refresh is the contents of a <meta http−equiv=Refresh + ...> tag, if any. Kidinfo is set if this document is a frameset + (see section Frames). Frameid is this document’s frame id. +
+ + Anchors is a list of hyperlinks contained in the document, and + dests is a list of hyperlink destinations within the page (see + the following section for details). Forms, tables and maps are + lists of the various forms, tables and client-side maps contained + in the document, as described in subsequent sections. + Images is a list of all the image items in the document.
+

Anchors
+ +
+ + The library builds two lists for all of the <a> elements (anchors) + in a document. Each anchor is assigned a unique anchor id within + the document. For anchors which are hyperlinks (the href attribute + was supplied), the following structure is defined: +
+ + typedef struct Anchor Anchor;
+ struct Anchor
+ {
+ +
+ + Anchor* next;
+ int       index;
+ Rune*     name;
+ Rune*     href;
+ int       target;
+ +
+ };
+ +
+
+ Next points to the next anchor in the list (the head of this list + is Docinfo.anchors). Index is the anchor id; each item within + this hyperlink is tagged with this value in its anchorid field. + Name and href are the values of the correspondingly named attributes + of the anchor (in particular, href is the URL to go + to). Target is the value of the target attribute (if provided) + converted to a frame id. +
+ + Destinations within the document (anchors with the name attribute + set) are held in the Docinfo.dests list, using the following structure: + +
+ + typedef struct DestAnchor DestAnchor;
+ struct DestAnchor
+ {
+ +
+ + DestAnchor* next;
+ int           index;
+ Rune*         name;
+ Item*         item;
+ +
+ };
+ +
+
+ Next is the next element of the list, index is the anchor id, + name is the value of the name attribute, and item is points to + the item within the parsed document that should be considered + to be the destination.
+

Forms
+ +
+ + Any forms within a document are kept in a list, headed by Docinfo.forms. + The elements of this list are as follows: +
+ + typedef struct Form Form;
+ struct Form
+ {
+ +
+ + Form*        next;
+ int          formid;
+ Rune*        name;
+ Rune*        action;
+ int          target;
+ int          method;
+ int          nfields;
+ Formfield* fields;
+ +
+ };
+ +
+
+ Next points to the next form in the list. Formid is a serial number + for the form within the document. Name is the value of the form’s + name or id attribute. Action is the value of any action attribute. + Target is the value of the target attribute (if any) converted + to a frame target id. Method is one of HGet or + HPost. Nfields is the number of fields in the form, and fields + is a linked list of the actual fields. +
+ + The individual fields in a form are described by the following + structure: +
+ + typedef struct Formfield Formfield;
+ struct Formfield
+ {
+ +
+ + Formfield* next;
+ int          ftype;
+ int          fieldid;
+ Form*        form;
+ Rune*        name;
+ Rune*        value;
+ int          size;
+ int          maxlength;
+ int          rows;
+ int          cols;
+ uchar        flags;
+ Option*      options;
+ Item*        image;
+ int          ctlid;
+ SEvent*      events;
+ +
+ };
+ +
+
+ Here, next points to the next field in the list. Ftype is the + type of the field, which can be one of Ftext, Fpassword, Fcheckbox, + Fradio, Fsubmit, Fhidden, Fimage, Freset, Ffile, Fbutton, Fselect + or Ftextarea. Fieldid is a serial number for the field within + the form. Form points back + to the form containing this field. Name, value, size, maxlength, + rows and cols each contain the values of corresponding attributes + of the field, if present. Flags contains per-field flags, of which + FFchecked and FFmultiple are defined. Image is only used for fields + of type Fimage; it points to an + image item containing the image to be displayed. Ctlid is reserved + for use by the caller, typically to store a unique id of an associated + control used to implement the field. Events is the same as the + corresponding field of the generic attributes associated with + the item containing this field. Options is only used by + fields of type Fselect; it consists of a list of possible options + that may be selected for that field, using the following structure: + +
+ + typedef struct Option Option;
+ struct Option
+ {
+ +
+ + Option* next;
+ int       selected;
+ Rune*     value;
+ Rune*     display;
+ +
+ };
+ +
+
+ Next points to the next element of the list. Selected is set if + this option is to be displayed initially. Value is the value to + send when the form is submitted if this option is selected. Display + is the string to display on the screen for this option.
+

Tables
+ +
+ + The library builds a list of all the tables in the document, headed + by Docinfo.tables. Each element of this list has the following + format: +
+ + typedef struct Table Table;
+ struct Table
+ {
+ +
+ + Table*         next;
+ int           tableid;
+ Tablerow*      rows;
+ int           nrow;
+ Tablecol*      cols;
+ int           ncol;
+ Tablecell*     cells;
+ int           ncell;
+ Tablecell*** grid;
+ Align          align;
+ Dimen          width;
+ int           border;
+ int           cellspacing;
+ int           cellpadding;
+ Background     background;
+ Item*          caption;
+ uchar          caption_place;
+ Lay*           caption_lay;
+ int           totw;
+ int           toth;
+ int           caph;
+ int           availw;
+ Token*         tabletok;
+ uchar          flags;
+ +
+ };
+ +
+
+ Next points to the next element in the list of tables. Tableid + is a serial number for the table within the document. Rows is + an array of row specifications (described below) and nrow is the + number of elements in this array. Similarly, cols is an array + of column specifications, and ncol the size of this array. + Cells is a list of all cells within the table (structure described + below) and ncell is the number of elements in this list. Note + that a cell may span multiple rows and/or columns, thus ncell + may be smaller than nrow*ncol. Grid is a two-dimensional array + of cells within the table; the cell at row i and column j is + Table.grid[i][j]. A cell that spans multiple rows and/or columns + will be referenced by grid multiple times, however it will only + occur once in cells. Align gives the alignment specification for + the entire table, and width gives the requested width as a dimension + specification. Border, cellspacing + and cellpadding give the values of the corresponding attributes + for the table, and background gives the requested background for + the table. Caption is a linked list of items to be displayed as + the caption of the table, either above or below depending on whether + caption_place is ALtop or ALbottom. + Most of the remaining fields are reserved for use by the caller, + except tabletok, which is reserved for internal use. The type + Lay is not defined by the library; the caller can provide its + own definition. +
+ + The Tablecol structure is defined for use by the caller. The library + ensures that the correct number of these is allocated, but leaves + them blank. The fields are as follows: +
+ + typedef struct Tablecol Tablecol;
+ struct Tablecol
+ {
+ +
+ + int     width;
+ Align align;
+ Point pos;
+ +
+ };
+ +
+
+ The rows in the table are specified as follows: +
+ + typedef struct Tablerow Tablerow;
+ struct Tablerow
+ {
+ +
+ + Tablerow*    next;
+ Tablecell* cells;
+ int          height;
+ int          ascent;
+ Align        align;
+ Background background;
+ Point        pos;
+ uchar        flags;
+ +
+ };
+ +
+
+ Next is only used during parsing; it should be ignored by the + caller. Cells provides a list of all the cells in a row, linked + through their nextinrow fields (see below). Height, ascent and + pos are reserved for use by the caller. Align is the alignment + specification for the row, and background is the + background to use, if specified. Flags is used by the parser; + ignore this field. +
+ + The individual cells of the table are described as follows: +
+ + typedef struct Tablecell Tablecell;
+ struct Tablecell
+ {
+ +
+ + Tablecell* next;
+ Tablecell* nextinrow;
+ int          cellid;
+ Item*        content;
+ Lay*         lay;
+ int          rowspan;
+ int          colspan;
+ Align        align;
+ uchar        flags;
+ Dimen        wspec;
+ int          hspec;
+ Background background;
+ int          minw;
+ int          maxw;
+ int          ascent;
+ int          row;
+ int          col;
+ Point        pos;
+ +
+ };
+ +
+
+ Next is used to link together the list of all cells within a table + (Table.cells), whereas nextinrow is used to link together all + the cells within a single row (Tablerow.cells). Cellid provides + a serial number for the cell within the table. Content is a linked + list of the items to be laid out within the cell. Lay + is reserved for the user to describe how these items have been + laid out. Rowspan and colspan are the number of rows and columns + spanned by this cell, respectively. Align is the alignment specification + for the cell. Flags is some combination of TFparsing, TFnowrap + and TFisth or’d together. Here + TFparsing is used internally by the parser, and should be ignored. + TFnowrap means that the contents of the cell should not be wrapped + if they don’t fit the available width, rather, the table should + be expanded if need be (this is set when the nowrap attribute + is supplied). TFisth means that the cell was created + by the <th> element (rather than the <td> element), indicating that + it is a header cell rather than a data cell. Wspec provides a + suggested width as a dimension specification, and hspec provides + a suggested height in pixels. Background gives a background specification + for the individual cell. Minw, maxw, + ascent and pos are reserved for use by the caller during layout. + Row and col give the indices of the row and column of the top + left-hand corner of the cell within the table grid.
+

Client-side Maps
+ +
+ + The library builds a list of client-side maps, headed by Docinfo.maps, + and having the following structure: +
+ + typedef struct Map Map;
+ struct Map
+ {
+ +
+ + Map*    next;
+ Rune* name;
+ Area* areas;
+ +
+ };
+ +
+
+ Next points to the next element in the list, name is the name + of the map (use to bind it to an image), and areas is a list of + the areas within the image that comprise the map, using the following + structure: +
+ + typedef struct Area Area;
+ struct Area
+ {
+ +
+ + Area*    next;
+ int      shape;
+ Rune*    href;
+ int      target;
+ Dimen* coords;
+ int      ncoords;
+ +
+ };
+ +
+
+ Next points to the next element in the map’s list of areas. Shape + describes the shape of the area, and is one of SHrect, SHcircle + or SHpoly. Href is the URL associated with this area in its role + as a hypertext link, and target is the target frame it should + be loaded in. Coords is an array of coordinates for + the shape, and ncoords is the size of this array (number of elements).
+

Frames
+ +
+ + If the Docinfo.kidinfo field is set, the document is a frameset. + In this case, it is typical for parsehtml to return nil, as a + document which is a frameset should have no actual items that + need to be laid out (such will appear only in subsidiary documents). + It is possible that items will be returned by a malformed + document; the caller should check for this and free any such items. + +
+ + The Kidinfo structure itself reflects the fact that framesets + can be nested within a document. If is defined as follows: +
+ + typedef struct Kidinfo Kidinfo;
+ struct Kidinfo
+ {
+ +
+ + Kidinfo* next;
+ int        isframeset;
+ // fields for "frame"
+ Rune*      src;
+ Rune*      name;
+ int        marginw;
+ int        marginh;
+ int        framebd;
+ int        flags;
+ // fields for "frameset"
+ Dimen*     rows;
+ int        nrows;
+ Dimen*     cols;
+ int        ncols;
+ Kidinfo* kidinfos;
+ Kidinfo* nextframeset;
+ +
+ };
+ +
+
+ Next is only used if this structure is part of a containing frameset; + it points to the next element in the list of children of that + frameset. Isframeset is set when this structure represents a frameset; + if clear, it is an individual frame. +
+ + Some fields are used only for framesets. Rows is an array of dimension + specifications for rows in the frameset, and nrows is the length + of this array. Cols is the corresponding array for columns, of + length ncols. Kidinfos points to a list of components contained + within this frameset, each of which may be a + frameset or a frame. Nextframeset is only used during parsing, + and should be ignored. +
+ + The remaining fields are used if the structure describes a frame, + not a frameset. Src provides the URL for the document that should + be initially loaded into this frame. Note that this may be a relative + URL, in which case it should be interpretted using the containing + document’s URL as the base. Name gives the name of + the frame, typically supplied via a name attribute in the HTML. + If no name was given, the library allocates one. Marginw, marginh + and framebd are the values of the marginwidth, marginheight and + frameborder attributes, respectively. Flags can contain some combination + of the following: FRnoresize (the + frame had the noresize attribute set, and the user should not + be allowed to resize it), FRnoscroll (the frame should not have + any scroll bars), FRhscroll (the frame should have a horizontal + scroll bar), FRvscroll (the frame should have a vertical scroll + bar), FRhscrollauto (the frame should be automatically + given a horizontal scroll bar if its contents would not otherwise + fit), and FRvscrollauto (the frame gets a vertical scrollbar only + if required).
+ +

+

SOURCE
+ +
+ + /usr/local/plan9/src/libhtml
+
+
+

SEE ALSO
+ +
+ + fmt(1) +
+ + W3C World Wide Web Consortium, “HTML 4.01 Specification”.
+ +
+

BUGS
+ +
+ + The entire HTML document must be loaded into memory before any + of it can be parsed.
+ +
+ +

+
+
+ + +
+
+
+Space Glenda +
+
+ + -- cgit v1.2.3