From 357621cd82c8cf27c4e6a6514b779fd77721b907 Mon Sep 17 00:00:00 2001 From: rsc Date: Thu, 13 Jan 2005 04:50:11 +0000 Subject: more small changes --- src/cmd/troff2html/chars.h | 195 +++++++++ src/cmd/troff2html/mkfile | 8 + src/cmd/troff2html/troff2html.c | 846 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1049 insertions(+) create mode 100644 src/cmd/troff2html/chars.h create mode 100644 src/cmd/troff2html/mkfile create mode 100644 src/cmd/troff2html/troff2html.c (limited to 'src/cmd/troff2html') diff --git a/src/cmd/troff2html/chars.h b/src/cmd/troff2html/chars.h new file mode 100644 index 00000000..7db2f85f --- /dev/null +++ b/src/cmd/troff2html/chars.h @@ -0,0 +1,195 @@ +/* sorted by unicode value */ +Htmlchar htmlchars[] = +{ + { "\"", """ }, + { "&", "&" }, + { "<", "<" }, + { ">", ">" }, + { "¡", "¡" }, + { "¢", "¢" }, + { "£", "£" }, + { "¤", "¤" }, + { "¥", "¥" }, + { "¦", "¦" }, + { "§", "§" }, + { "¨", "¨" }, + { "©", "©" }, + { "ª", "ª" }, + { "«", "«" }, + { "¬", "¬" }, + { "­", "–" }, + { "®", "®" }, + { "¯", "¯" }, + { "°", "°" }, + { "±", "±" }, + { "²", "²" }, + { "³", "³" }, + { "´", "´" }, + { "µ", "µ" }, + { "¶", "¶" }, + { "·", "·" }, + { "¸", "¸" }, + { "¹", "¹" }, + { "º", "º" }, + { "»", "»" }, + { "¼", "¼" }, + { "½", "½" }, + { "¾", "¾" }, + { "¿", "¿" }, + { "À", "À" }, + { "Á", "Á" }, + { "Â", "Â" }, + { "Ã", "Ã" }, + { "Ä", "Ä" }, + { "Å", "Å" }, + { "Æ", "Æ" }, + { "Ç", "Ç" }, + { "È", "È" }, + { "É", "É" }, + { "Ê", "Ê" }, + { "Ë", "Ë" }, + { "Ì", "Ì" }, + { "Í", "Í" }, + { "Î", "Î" }, + { "Ï", "Ï" }, + { "Ð", "Ð" }, + { "Ñ", "Ñ" }, + { "Ò", "Ò" }, + { "Ó", "Ó" }, + { "Ô", "Ô" }, + { "Õ", "Õ" }, + { "Ö", "Ö" }, + { "×", "x" }, + { "Ø", "Ø" }, + { "Ù", "Ù" }, + { "Ú", "Ú" }, + { "Û", "Û" }, + { "Ü", "Ü" }, + { "Ý", "Ý" }, + { "Þ", "Þ" }, + { "ß", "ß" }, + { "à", "à" }, + { "á", "á" }, + { "â", "â" }, + { "ã", "ã" }, + { "ä", "ä" }, + { "å", "å" }, + { "æ", "æ" }, + { "ç", "ç" }, + { "è", "è" }, + { "é", "é" }, + { "ê", "ê" }, + { "ë", "ë" }, + { "ì", "ì" }, + { "í", "í" }, + { "î", "î" }, + { "ï", "ï" }, + { "ð", "ð" }, + { "ñ", "ñ" }, + { "ò", "ò" }, + { "ó", "ó" }, + { "ô", "ô" }, + { "õ", "õ" }, + { "ö", "ö" }, + { "ø", "ø" }, + { "ù", "ù" }, + { "ú", "ú" }, + { "û", "û" }, + { "ü", "ü" }, + { "ý", "ý" }, + { "þ", "þ" }, + { "ÿ", "ÿ" }, + { "•", "*" }, + { "™", "(tm)" }, + { "←", "←" }, + { "↑", "↑" }, + { "→", "→" }, + { "↓", "↓" }, + { "≠", "!=" }, + { "≤", "≤" }, +/* { "□", "¤" }, + { "◊", "º" }, */ +}; + +/* unsorted */ +Troffchar troffchars[] = +{ + { "A*", "Å", }, + { "o\"", "ö", }, + { "ff", "ff", }, + { "fi", "fi", }, + { "fl", "fl", }, + { "Fi", "ffi", }, + { "ru", "_", }, + { "em", "--", }, + { "en", "-", }, + { "\\-", "–", }, + { "14", "¼", }, + { "12", "½", }, + { "co", "©", }, + { "de", "°", }, + { "dg", "¡", }, + { "fm", "´", }, + { "rg", "®", }, + { "bu", "*", }, + { "sq", "¤", }, + { "hy", "–", }, + { "pl", "+", }, + { "mi", "-", }, + { "mu", "×", }, + { "di", "÷", }, + { "eq", "=", }, + { "==", "==", }, + { ">=", ">=", }, + { "<=", "<=", }, + { "!=", "!=", }, + { "+-", "±", }, + { "no", "¬", }, + { "sl", "/", }, + { "ap", "&", }, + { "~=", "~=", }, + { "pt", "oc", }, + { "gr", "GRAD", }, + { "->", "->", }, + { "<-", "<-", }, + { "ua", "^", }, + { "da", "v", }, + { "is", "Integral", }, + { "pd", "DIV", }, + { "if", "oo", }, + { "sr", "-/", }, + { "sb", "(~", }, + { "sp", "~)", }, + { "cu", "U", }, + { "ca", "(^)", }, + { "ib", "(=", }, + { "ip", "=)", }, + { "mo", "C", }, + { "es", "Ø", }, + { "aa", "´", }, + { "ga", "`", }, + { "ci", "O", }, + { "L1", "DEATHSTAR", }, + { "sc", "§", }, + { "dd", "++", }, + { "lh", "<=", }, + { "rh", "=>", }, + { "lt", "(", }, + { "rt", ")", }, + { "lc", "|", }, + { "rc", "|", }, + { "lb", "(", }, + { "rb", ")", }, + { "lf", "|", }, + { "rf", "|", }, + { "lk", "|", }, + { "rk", "|", }, + { "bv", "|", }, + { "ts", "s", }, + { "br", "|", }, + { "or", "|", }, + { "ul", "_", }, + { "rn", " ", }, + { "**", "*", }, + { nil, nil, }, +}; diff --git a/src/cmd/troff2html/mkfile b/src/cmd/troff2html/mkfile new file mode 100644 index 00000000..8b79c6f1 --- /dev/null +++ b/src/cmd/troff2html/mkfile @@ -0,0 +1,8 @@ +<$PLAN9/src/mkhdr + +TARG=troff2html +HFILES=chars.h +OFILES=troff2html.$O\ + +<$PLAN9/src/mkone + diff --git a/src/cmd/troff2html/troff2html.c b/src/cmd/troff2html/troff2html.c new file mode 100644 index 00000000..1bb40cff --- /dev/null +++ b/src/cmd/troff2html/troff2html.c @@ -0,0 +1,846 @@ +#include +#include +#include + +enum{ + Nfont = 11, + Wid = 20, /* tmac.anhtml sets page width to 20" so we can recognize .nf text */ +}; + +typedef ulong Char; +typedef struct Troffchar Troffchar; +typedef struct Htmlchar Htmlchar; +typedef struct Font Font; +typedef struct HTMLfont HTMLfont; + +/* a Char is 32 bits. low 16 bits are the rune. higher are attributes */ +enum +{ + Italic = 16, + Bold, + CW, + Indent1, + Indent2, + Indent3, + Heading = 25, + Anchor = 26, /* must be last */ +}; + +enum /* magic emissions */ +{ + Estring = 0, + Epp = 1<<16, +}; + +int attrorder[] = { Indent1, Indent2, Indent3, Heading, Anchor, Italic, Bold, CW }; + +int nest[10]; +int nnest; + +struct Troffchar +{ + char *name; + char *value; +}; + +struct Htmlchar +{ + char *utf; + char *name; + int value; +}; + +#include "chars.h" + +struct Font{ + char *name; + HTMLfont *htmlfont; +}; + +struct HTMLfont{ + char *name; + char *htmlname; + int bit; +}; + +/* R must be first; it's the default representation for fonts we don't recognize */ +HTMLfont htmlfonts[] = +{ + "R", nil, 0, + "LucidaSans", nil, 0, + "I", "i", Italic, + "LucidaSansI", "i", Italic, + "CW", "tt", CW, + "LucidaCW", "tt", CW, + nil, nil, +}; + +#define TABLE "" + +char* +onattr[8*sizeof(ulong)] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + "", /* italic */ + "", /* bold */ + "", /* cw */ + "<+table border=0 cellpadding=0 cellspacing=0>
\n", /* indent1 */ + "<+table border=0 cellpadding=0 cellspacing=0>
\n", /* indent2 */ + "<+table border=0 cellpadding=0 cellspacing=0>
\n", /* indent3 */ + 0, + 0, + 0, + "

", /* heading 25 */ + "", /* anchor 26 */ +}; + +char* +offattr[8*sizeof(ulong)] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + "", /* italic */ + "", /* bold */ + "", /* cw */ + "<-/table>", /* indent1 */ + "<-/table>", /* indent2 */ + "<-/table>", /* indent3 */ + 0, + 0, + 0, + "", /* heading 25 */ + "", /* anchor 26 */ +}; + +Font *font[Nfont]; + +Biobuf bout; +int debug = 0; + +/* troff state */ +int page = 1; +int ft = 1; +int vp = 0; +int hp = 0; +int ps = 1; +int res = 720; + +int didP = 0; +int atnewline = 1; +int prevlineH = 0; +ulong attr = 0; /* or'ed into each Char */ + +Char *chars; +int nchars; +int nalloc; +char** anchors; /* allocated in order */ +int nanchors; + +char *pagename; +char *section; + +char *filename; +int cno; +char buf[8192]; +char *title = "Plan 9 man page"; + +void process(Biobuf*, char*); +void mountfont(int, char*); +void switchfont(int); +void header(char*); +void flush(void); +void trailer(void); + +void* +emalloc(ulong n) +{ + void *p; + + p = malloc(n); + if(p == nil) + sysfatal("malloc failed: %r"); + return p; +} + +void* +erealloc(void *p, ulong n) +{ + + p = realloc(p, n); + if(p == nil) + sysfatal("realloc failed: %r"); + return p; +} + +char* +estrdup(char *s) +{ + char *t; + + t = strdup(s); + if(t == nil) + sysfatal("strdup failed: %r"); + return t; +} + +void +usage(void) +{ + fprint(2, "usage: troff2html [-d] [-t title] [file ...]\n"); + exits("usage"); +} + +int +hccmp(const void *va, const void *vb) +{ + Htmlchar *a, *b; + + a = (Htmlchar*)va; + b = (Htmlchar*)vb; + return a->value - b->value; +} + +void +main(int argc, char *argv[]) +{ + int i; + Biobuf in, *inp; + Rune r; + + for(i=0; i"); + }else{ + for(i=0; i 0){ + if(c == 0x2018 && (chars[nchars-1]&0xFFFF) == 0x2018 + && a==(chars[nchars-1]&~0xFFFF)){ + chars[nchars-1] = (ul&~0xFFFF) | 0x201C; + return; + } + if(c == 0x2019 && (chars[nchars-1]&0xFFFF) == 0x2019 + && a==(chars[nchars-1]&~0xFFFF)){ + chars[nchars-1] = (ul&~0xFFFF) | 0x201D; + return; + } + } + } + chars[nchars++] = ul; +} + +void +emit(Rune r) +{ + emitul(r | attr, 0); + /* + * Close man page references early, so that + * .IR proof (1), + * doesn't make the comma part of the link. + */ + if(r == ')') + attr &= ~(1< 60 && r == ' ') + r = '\n'; + if(r >= 0x80) + Bprint(b, "&#%d;", r); + else + Bputrune(b, r); + if(r == '\n'){ + for(i=0; i=i; j--) + iputs(&bout, offattr[nest[j]]); + + /* turn on everything we just turned off but didn't want to */ + for(j=i; j

"); + iputrune(&bout, '\n'); + continue; + } + a = c & ~0xFFFF; + c &= 0xFFFF; + /* + * If we're going to something off after a space, + * let's just turn it off before. + */ + if(c==' ' && i= 32) + a ^= a & ~chars[i+1]; + setattr(a); + if(c == Estring){ + /* next word is string to print */ + iputs(&bout, (char*)chars[++i]); + continue; + } + iputrune(&bout, c & 0xFFFF); + } +} + +void +header(char *s) +{ + Bprint(&bout, "\n"); + Bprint(&bout, "%s\n", s); + Bprint(&bout, "\n"); + Bprint(&bout, "\n"); + Bprint(&bout, "\n"); + Bprint(&bout, "\n"); + Bprint(&bout, "
\n"); + Bprint(&bout, "
\n"); + if(pagename && section){ + Bprint(&bout, "
%s(%s)%s(%s)\n", + pagename, section, pagename, section); + } + Bprint(&bout, "
\n"); +} + +void +trailer(void) +{ + Bprint(&bout, "\n"); + Bprint(&bout, "
\n"); + Bprint(&bout, "
\n"); + +#ifdef LUCENT + { + Tm *t; + + t = localtime(time(nil)); + Bprint(&bout, TABLE "\n"); + Bprint(&bout, "\n"); + Bprint(&bout, "Portions Copyright © %d Lucent Technologies. All rights reserved.\n", t->year+1900); + } +#endif + Bprint(&bout, "\n"); + Bprint(&bout, "\n"); +} + +int +getc(Biobuf *b) +{ + cno++; + return Bgetrune(b); +} + +void +ungetc(Biobuf *b) +{ + cno--; + Bungetrune(b); +} + +char* +getline(Biobuf *b) +{ + int i, c; + + for(i=0; i 2) + fprint(2, "set %s = %d\n", name, i); + if(min<=i && i"); + return; + case 's': + /* stop */ + return; + case 't': + /* trailer */ + return; + case 'T': + if(nfld!=2 || strcmp(fld[1], "utf")!=0) + sysfatal("output for unknown typesetter type %s", fld[1]); + return; + case 'X': + if(nfld<3 || strcmp(fld[1], "html")!=0) + break; + /* is it a man reference of the form cp(1)? */ + /* X manref start/end cp (1) */ + if(nfld==6 && strcmp(fld[2], "manref")==0){ + /* was the right macro; is it the right form? */ + if(strlen(fld[5])>=3 && + fld[5][0]=='(' && fld[5][2]==')' && + '0'<=fld[5][1] && fld[5][1]<='9'){ + if(strcmp(fld[3], "start") == 0){ + /* set anchor attribute and remember string */ + attr |= (1<", + fld[5][1], fld[4]); + nanchors++; + anchors = erealloc(anchors, nanchors*sizeof(char*)); + anchors[nanchors-1] = estrdup(buf); + }else if(strcmp(fld[3], "end") == 0) + attr &= ~(1<=5 && strcmp(fld[2], "manhead") == 0){ + pagename = strdup(fld[3]); + section = strdup(fld[4]); + }else if(nfld<4 || strcmp(fld[2], "manref")!=0){ + if(nfld>2 && strcmp(fld[2], "

")==0){ /* avoid triggering extra
*/ + didP = 1; + /* clear all font attributes before paragraph */ + emitul(' ' | (attr & ~(0xFFFF|((1<"); + /* next emittec char will turn font attributes back on */ + }else if(nfld>2 && strcmp(fld[2], "

")==0) + attr |= (1<2 && strcmp(fld[2], "

")==0) + attr &= ~(1< tab[mid].value) + low = mid + 1; + else + return mid; + } + return -1; /* no match */ +} + +void +emithtmlchar(int r) +{ + int i; + + i = lookup(r, htmlchars, nelem(htmlchars)); + if(i >= 0) + emitstr(htmlchars[i].name); + else + emit(r); +} + +char* +troffchar(char *s) +{ + int i; + + for(i=0; troffchars[i].name!=nil; i++) + if(strcmp(s, troffchars[i].name) == 0) + return troffchars[i].value; + return "??"; +} + +void +indent(void) +{ + int nind; + + didP = 0; + if(atnewline){ + if(hp != prevlineH){ + prevlineH = hp; + /* these most peculiar numbers appear in the troff -man output */ + nind = ((prevlineH-1*res)+323)/324; + attr &= ~((1<= 1) + attr |= (1<= 2) + attr |= (1<= 3) + attr |= (1<2*72) + for(i=0; i"); + emit('\n'); + break; + case 'p': + page = setnum(b, "ps", -10000, 10000); + break; + case 's': + ps = setnum(b, "ps", 1, 1000); + break; + case 'v': + vp += setnum(b, "vpos", -10000, 10000); + /* BUG: ignore motion */ + break; + case 'x': + xcmd(b); + break; + case 'w': + emit(' '); + break; + case 'C': + indent(); + p = getstr(b); + emitstr(troffchar(p)); + break; + case 'H': + hp = setnum(b, "hpos", 0, 20000); + //Bprint(&bout, " H=%d ", hp); + break; + case 'V': + vp = setnum(b, "vpos", 0, 10000); + break; + default: + fprint(2, "dhtml: unknown directive %c(0x%.2ux) at %s:#%d\n", c, c, filename, cno); + return; + } + } +} + +HTMLfont* +htmlfont(char *name) +{ + int i; + + for(i=0; htmlfonts[i].name!=nil; i++) + if(strcmp(name, htmlfonts[i].name) == 0) + return &htmlfonts[i]; + return &htmlfonts[0]; +} + +void +mountfont(int pos, char *name) +{ + if(debug) + fprint(2, "mount font %s on %d\n", name, pos); + if(font[pos] != nil){ + free(font[pos]->name); + free(font[pos]); + } + font[pos] = emalloc(sizeof(Font)); + font[pos]->name = estrdup(name); + font[pos]->htmlfont = htmlfont(name); +} + +void +switchfont(int pos) +{ + HTMLfont *hf; + + if(debug) + fprint(2, "font change from %d (%s) to %d (%s)\n", ft, font[ft]->name, pos, font[pos]->name); + if(pos == ft) + return; + hf = font[ft]->htmlfont; + if(hf->bit != 0) + attr &= ~(1<bit); + ft = pos; + hf = font[ft]->htmlfont; + if(hf->bit != 0) + attr |= (1<bit); +} -- cgit v1.2.3