aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/deroff.c
diff options
context:
space:
mode:
authorrsc <devnull@localhost>2003-11-23 18:04:47 +0000
committerrsc <devnull@localhost>2003-11-23 18:04:47 +0000
commitbc7cb1a15a67c859c8c71c4b52bb35fe9425a63d (patch)
tree8ca0fe4e2418e6aa18dc74a236c577a719f6c6ed /src/cmd/deroff.c
parentf08fdedcee12c06e3ce9ac9bec363915978e8289 (diff)
downloadplan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.tar.gz
plan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.tar.bz2
plan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.zip
new utilities.
the .C files compile but are renamed to avoid building automatically.
Diffstat (limited to 'src/cmd/deroff.c')
-rw-r--r--src/cmd/deroff.c969
1 files changed, 969 insertions, 0 deletions
diff --git a/src/cmd/deroff.c b/src/cmd/deroff.c
new file mode 100644
index 00000000..914c5a3f
--- /dev/null
+++ b/src/cmd/deroff.c
@@ -0,0 +1,969 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+/*
+ * Deroff command -- strip troff, eqn, and tbl sequences from
+ * a file. Has three flags argument, -w, to cause output one word per line
+ * rather than in the original format.
+ * -mm (or -ms) causes the corresponding macro's to be interpreted
+ * so that just sentences are output
+ * -ml also gets rid of lists.
+ * -i causes deroff to ignore .so and .nx commands.
+ * Deroff follows .so and .nx commands, removes contents of macro
+ * definitions, equations (both .EQ ... .EN and $...$),
+ * Tbl command sequences, and Troff backslash vconstructions.
+ *
+ * All input is through the C macro; the most recently read character is in c.
+ */
+
+/*
+#define C ((c = Bgetrune(infile)) < 0?\
+ eof():\
+ ((c == ldelim) && (filesp == files)?\
+ skeqn():\
+ (c == '\n'?\
+ (linect++,c):\
+ c)))
+
+#define C1 ((c = Bgetrune(infile)) == Beof?\
+ eof():\
+ (c == '\n'?\
+ (linect++,c):\
+ c))
+*/
+
+/* lose those macros! */
+#define C fC()
+#define C1 fC1()
+
+#define SKIP while(C != '\n')
+#define SKIP1 while(C1 != '\n')
+#define SKIP_TO_COM SKIP;\
+ SKIP;\
+ pc=c;\
+ while(C != '.' || pc != '\n' || C > 'Z')\
+ pc=c
+
+#define YES 1
+#define NO 0
+#define MS 0
+#define MM 1
+#define ONE 1
+#define TWO 2
+
+#define NOCHAR -2
+#define EXTENDED -1 /* All runes above 0x7F */
+#define SPECIAL 0
+#define APOS 1
+#define PUNCT 2
+#define DIGIT 3
+#define LETTER 4
+
+
+int linect = 0;
+int wordflag= NO;
+int underscoreflag = NO;
+int msflag = NO;
+int iflag = NO;
+int mac = MM;
+int disp = 0;
+int inmacro = NO;
+int intable = NO;
+int eqnflag = 0;
+
+#define MAX_ASCII 0X80
+
+char chars[MAX_ASCII]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
+
+Rune line[30000];
+Rune* lp;
+
+long c;
+long pc;
+int ldelim = NOCHAR;
+int rdelim = NOCHAR;
+
+
+char** argv;
+
+char fname[50];
+Biobuf* files[15];
+Biobuf**filesp;
+Biobuf* infile;
+char* devnull = "/dev/null";
+Biobuf *infile;
+Biobuf bout;
+
+long skeqn(void);
+Biobuf* opn(char *p);
+int eof(void);
+int charclass(int);
+void getfname(void);
+void fatal(char *s, char *p);
+void usage(void);
+void work(void);
+void putmac(Rune *rp, int vconst);
+void regline(int macline, int vconst);
+void putwords(void);
+void comline(void);
+void macro(void);
+void eqn(void);
+void tbl(void);
+void stbl(void);
+void sdis(char a1, char a2);
+void sce(void);
+void backsl(void);
+char* copys(char *s);
+void refer(int c1);
+void inpic(void);
+
+int
+fC(void)
+{
+ c = Bgetrune(infile);
+ if(c < 0)
+ return eof();
+ if(c == ldelim && filesp == files)
+ return skeqn();
+ if(c == '\n')
+ linect++;
+ return c;
+}
+
+int
+fC1(void)
+{
+ c = Bgetrune(infile);
+ if(c == Beof)
+ return eof();
+ if(c == '\n')
+ linect++;
+ return c;
+}
+
+void
+main(int argc, char *av[])
+{
+ int i;
+ char *f;
+
+ argv = av;
+ Binit(&bout, 1, OWRITE);
+ ARGBEGIN{
+ case 'w':
+ wordflag = YES;
+ break;
+ case '_':
+ wordflag = YES;
+ underscoreflag = YES;
+ break;
+ case 'm':
+ msflag = YES;
+ if(f = ARGF())
+ switch(*f)
+ {
+ case 'm': mac = MM; break;
+ case 's': mac = MS; break;
+ case 'l': disp = 1; break;
+ default: usage();
+ }
+ else
+ usage();
+ break;
+ case 'i':
+ iflag = YES;
+ break;
+ default:
+ usage();
+ }ARGEND
+ if(*argv)
+ infile = opn(*argv++);
+ else{
+ infile = malloc(sizeof(Biobuf));
+ Binit(infile, 0, OREAD);
+ }
+ files[0] = infile;
+ filesp = &files[0];
+
+ for(i='a'; i<='z' ; ++i)
+ chars[i] = LETTER;
+ for(i='A'; i<='Z'; ++i)
+ chars[i] = LETTER;
+ for(i='0'; i<='9'; ++i)
+ chars[i] = DIGIT;
+ chars['\''] = APOS;
+ chars['&'] = APOS;
+ chars['\b'] = APOS;
+ chars['.'] = PUNCT;
+ chars[','] = PUNCT;
+ chars[';'] = PUNCT;
+ chars['?'] = PUNCT;
+ chars[':'] = PUNCT;
+ work();
+}
+
+long
+skeqn(void)
+{
+ while(C1 != rdelim)
+ if(c == '\\')
+ c = C1;
+ else if(c == '"')
+ while(C1 != '"')
+ if(c == '\\')
+ C1;
+ if (msflag)
+ eqnflag = 1;
+ return(c = ' ');
+}
+
+Biobuf*
+opn(char *p)
+{
+ Biobuf *fd;
+
+ while ((fd = Bopen(p, OREAD)) == 0) {
+ if(msflag || p == devnull)
+ fatal("Cannot open file %s - quitting\n", p);
+ else {
+ fprint(2, "Deroff: Cannot open file %s - continuing\n", p);
+ p = devnull;
+ }
+ }
+ linect = 0;
+ return(fd);
+}
+
+int
+eof(void)
+{
+ if(Bfildes(infile) != 0)
+ Bterm(infile);
+ if(filesp > files)
+ infile = *--filesp;
+ else
+ if(*argv)
+ infile = opn(*argv++);
+ else
+ exits(0);
+ return(C);
+}
+
+void
+getfname(void)
+{
+ char *p;
+ Rune r;
+ Dir *dir;
+ struct chain
+ {
+ struct chain* nextp;
+ char* datap;
+ } *q;
+
+ static struct chain *namechain= 0;
+
+ while(C == ' ')
+ ;
+ for(p = fname; (r=c) != '\n' && r != ' ' && r != '\t' && r != '\\'; C)
+ p += runetochar(p, &r);
+ *p = '\0';
+ while(c != '\n')
+ C;
+ if(!strcmp(fname, "/sys/lib/tmac/tmac.cs")
+ || !strcmp(fname, "/sys/lib/tmac/tmac.s")) {
+ fname[0] = '\0';
+ return;
+ }
+ dir = dirstat(fname);
+ if(dir!=nil && ((dir->mode & DMDIR) || dir->type != 'M')) {
+ free(dir);
+ fname[0] = '\0';
+ return;
+ }
+ free(dir);
+ /*
+ * see if this name has already been used
+ */
+
+ for(q = namechain; q; q = q->nextp)
+ if( !strcmp(fname, q->datap)) {
+ fname[0] = '\0';
+ return;
+ }
+ q = (struct chain*)malloc(sizeof(struct chain));
+ q->nextp = namechain;
+ q->datap = copys(fname);
+ namechain = q;
+}
+
+void
+usage(void)
+{
+ fprint(2,"usage: deroff [-nw_pi] [-m (m s l)] [file ...] \n");
+ exits("usage");
+}
+
+void
+fatal(char *s, char *p)
+{
+ fprint(2, "deroff: ");
+ fprint(2, s, p);
+ exits(s);
+}
+
+void
+work(void)
+{
+
+ for(;;) {
+ eqnflag = 0;
+ if(C == '.' || c == '\'')
+ comline();
+ else
+ regline(NO, TWO);
+ }
+}
+
+void
+regline(int macline, int vconst)
+{
+ line[0] = c;
+ lp = line;
+ for(;;) {
+ if(c == '\\') {
+ *lp = ' ';
+ backsl();
+ if(c == '%') /* no blank for hyphenation char */
+ lp--;
+ }
+ if(c == '\n')
+ break;
+ if(intable && c=='T') {
+ *++lp = C;
+ if(c=='{' || c=='}') {
+ lp[-1] = ' ';
+ *lp = C;
+ }
+ } else {
+ if(msflag == 1 && eqnflag == 1) {
+ eqnflag = 0;
+ *++lp = 'x';
+ }
+ *++lp = C;
+ }
+ }
+ *lp = '\0';
+ if(lp != line) {
+ if(wordflag)
+ putwords();
+ else
+ if(macline)
+ putmac(line,vconst);
+ else
+ Bprint(&bout, "%S\n", line);
+ }
+}
+
+void
+putmac(Rune *rp, int vconst)
+{
+ Rune *t;
+ int found;
+ Rune last;
+
+ found = 0;
+ last = 0;
+ while(*rp) {
+ while(*rp == ' ' || *rp == '\t')
+ Bputrune(&bout, *rp++);
+ for(t = rp; *t != ' ' && *t != '\t' && *t != '\0'; t++)
+ ;
+ if(*rp == '\"')
+ rp++;
+ if(t > rp+vconst && charclass(*rp) == LETTER
+ && charclass(rp[1]) == LETTER) {
+ while(rp < t)
+ if(*rp == '\"')
+ rp++;
+ else
+ Bputrune(&bout, *rp++);
+ last = t[-1];
+ found++;
+ } else
+ if(found && charclass(*rp) == PUNCT && rp[1] == '\0')
+ Bputrune(&bout, *rp++);
+ else {
+ last = t[-1];
+ rp = t;
+ }
+ }
+ Bputc(&bout, '\n');
+ if(msflag && charclass(last) == PUNCT)
+ Bprint(&bout, " %C\n", last);
+}
+
+/*
+ * break into words for -w option
+ */
+void
+putwords(void)
+{
+ Rune *p, *p1;
+ int i, nlet;
+
+
+ for(p1 = line;;) {
+ /*
+ * skip initial specials ampersands and apostrophes
+ */
+ while((i = charclass(*p1)) != EXTENDED && i < DIGIT)
+ if(*p1++ == '\0')
+ return;
+ nlet = 0;
+ for(p = p1; (i = charclass(*p)) != SPECIAL || (underscoreflag && *p=='_'); p++)
+ if(i == LETTER || (underscoreflag && *p == '_'))
+ nlet++;
+ /*
+ * MDM definition of word
+ */
+ if(nlet > 1) {
+ /*
+ * delete trailing ampersands and apostrophes
+ */
+ while(*--p == '\'' || *p == '&'
+ || charclass(*p) == PUNCT)
+ ;
+ while(p1 <= p)
+ Bputrune(&bout, *p1++);
+ Bputc(&bout, '\n');
+ } else
+ p1 = p;
+ }
+}
+
+void
+comline(void)
+{
+ long c1, c2;
+
+ while(C==' ' || c=='\t')
+ ;
+comx:
+ if((c1=c) == '\n')
+ return;
+ c2 = C;
+ if(c1=='.' && c2!='.')
+ inmacro = NO;
+ if(msflag && c1 == '['){
+ refer(c2);
+ return;
+ }
+ if(c2 == '\n')
+ return;
+ if(c1 == '\\' && c2 == '\"')
+ SKIP;
+ else
+ if (filesp==files && c1=='E' && c2=='Q')
+ eqn();
+ else
+ if(filesp==files && c1=='T' && (c2=='S' || c2=='C' || c2=='&')) {
+ if(msflag)
+ stbl();
+ else
+ tbl();
+ }
+ else
+ if(c1=='T' && c2=='E')
+ intable = NO;
+ else if (!inmacro &&
+ ((c1 == 'd' && c2 == 'e') ||
+ (c1 == 'i' && c2 == 'g') ||
+ (c1 == 'a' && c2 == 'm')))
+ macro();
+ else
+ if(c1=='s' && c2=='o') {
+ if(iflag)
+ SKIP;
+ else {
+ getfname();
+ if(fname[0]) {
+ if(infile = opn(fname))
+ *++filesp = infile;
+ else infile = *filesp;
+ }
+ }
+ }
+ else
+ if(c1=='n' && c2=='x')
+ if(iflag)
+ SKIP;
+ else {
+ getfname();
+ if(fname[0] == '\0')
+ exits(0);
+ if(Bfildes(infile) != 0)
+ Bterm(infile);
+ infile = *filesp = opn(fname);
+ }
+ else
+ if(c1 == 't' && c2 == 'm')
+ SKIP;
+ else
+ if(c1=='h' && c2=='w')
+ SKIP;
+ else
+ if(msflag && c1 == 'T' && c2 == 'L') {
+ SKIP_TO_COM;
+ goto comx;
+ }
+ else
+ if(msflag && c1=='N' && c2 == 'R')
+ SKIP;
+ else
+ if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){
+ if(mac==MM)SKIP;
+ else {
+ SKIP_TO_COM;
+ goto comx;
+ }
+ } else
+ if(msflag && c1=='F' && c2=='S') {
+ SKIP_TO_COM;
+ goto comx;
+ }
+ else
+ if(msflag && (c1=='S' || c1=='N') && c2=='H') {
+ SKIP_TO_COM;
+ goto comx;
+ } else
+ if(c1 == 'U' && c2 == 'X') {
+ if(wordflag)
+ Bprint(&bout, "UNIX\n");
+ else
+ Bprint(&bout, "UNIX ");
+ } else
+ if(msflag && c1=='O' && c2=='K') {
+ SKIP_TO_COM;
+ goto comx;
+ } else
+ if(msflag && c1=='N' && c2=='D')
+ SKIP;
+ else
+ if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U'))
+ SKIP;
+ else
+ if(msflag && mac==MM && c2=='L') {
+ if(disp || c1=='R')
+ sdis('L', 'E');
+ else {
+ SKIP;
+ Bprint(&bout, " .");
+ }
+ } else
+ if(!msflag && c1=='P' && c2=='S') {
+ inpic();
+ } else
+ if(msflag && (c1=='D' || c1=='N' || c1=='K'|| c1=='P') && c2=='S') {
+ sdis(c1, 'E');
+ } else
+ if(msflag && (c1 == 'K' && c2 == 'F')) {
+ sdis(c1,'E');
+ } else
+ if(msflag && c1=='n' && c2=='f')
+ sdis('f','i');
+ else
+ if(msflag && c1=='c' && c2=='e')
+ sce();
+ else {
+ if(c1=='.' && c2=='.') {
+ if(msflag) {
+ SKIP;
+ return;
+ }
+ while(C == '.')
+ ;
+ }
+ inmacro++;
+ if(c1 <= 'Z' && msflag)
+ regline(YES,ONE);
+ else {
+ if(wordflag)
+ C;
+ regline(YES,TWO);
+ }
+ inmacro--;
+ }
+}
+
+void
+macro(void)
+{
+ if(msflag) {
+ do {
+ SKIP1;
+ } while(C1 != '.' || C1 != '.' || C1 == '.');
+ if(c != '\n')
+ SKIP;
+ return;
+ }
+ SKIP;
+ inmacro = YES;
+}
+
+void
+sdis(char a1, char a2)
+{
+ int c1, c2;
+ int eqnf;
+ int lct;
+
+ if(a1 == 'P'){
+ while(C1 == ' ')
+ ;
+ if(c == '<') {
+ SKIP1;
+ return;
+ }
+ }
+ lct = 0;
+ eqnf = 1;
+ if(c != '\n')
+ SKIP1;
+ for(;;) {
+ while(C1 != '.')
+ if(c == '\n')
+ continue;
+ else
+ SKIP1;
+ if((c1=C1) == '\n')
+ continue;
+ if((c2=C1) == '\n') {
+ if(a1 == 'f' && (c1 == 'P' || c1 == 'H'))
+ return;
+ continue;
+ }
+ if(c1==a1 && c2 == a2) {
+ SKIP1;
+ if(lct != 0){
+ lct--;
+ continue;
+ }
+ if(eqnf)
+ Bprint(&bout, " .");
+ Bputc(&bout, '\n');
+ return;
+ } else
+ if(a1 == 'L' && c2 == 'L') {
+ lct++;
+ SKIP1;
+ } else
+ if(a1 == 'D' && c1 == 'E' && c2 == 'Q') {
+ eqn();
+ eqnf = 0;
+ } else
+ if(a1 == 'f') {
+ if((mac == MS && c2 == 'P') ||
+ (mac == MM && c1 == 'H' && c2 == 'U')){
+ SKIP1;
+ return;
+ }
+ SKIP1;
+ }
+ else
+ SKIP1;
+ }
+}
+
+void
+tbl(void)
+{
+ while(C != '.')
+ ;
+ SKIP;
+ intable = YES;
+}
+
+void
+stbl(void)
+{
+ while(C != '.')
+ ;
+ SKIP_TO_COM;
+ if(c != 'T' || C != 'E') {
+ SKIP;
+ pc = c;
+ while(C != '.' || pc != '\n' || C != 'T' || C != 'E')
+ pc = c;
+ }
+}
+
+void
+eqn(void)
+{
+ long c1, c2;
+ int dflg;
+ char last;
+
+ last = 0;
+ dflg = 1;
+ SKIP;
+
+ for(;;) {
+ if(C1 == '.' || c == '\'') {
+ while(C1==' ' || c=='\t')
+ ;
+ if(c=='E' && C1=='N') {
+ SKIP;
+ if(msflag && dflg) {
+ Bputc(&bout, 'x');
+ Bputc(&bout, ' ');
+ if(last) {
+ Bputc(&bout, last);
+ Bputc(&bout, '\n');
+ }
+ }
+ return;
+ }
+ } else
+ if(c == 'd') {
+ if(C1=='e' && C1=='l')
+ if(C1=='i' && C1=='m') {
+ while(C1 == ' ')
+ ;
+ if((c1=c)=='\n' || (c2=C1)=='\n' ||
+ (c1=='o' && c2=='f' && C1=='f')) {
+ ldelim = NOCHAR;
+ rdelim = NOCHAR;
+ } else {
+ ldelim = c1;
+ rdelim = c2;
+ }
+ }
+ dflg = 0;
+ }
+ if(c != '\n')
+ while(C1 != '\n') {
+ if(chars[c] == PUNCT)
+ last = c;
+ else
+ if(c != ' ')
+ last = 0;
+ }
+ }
+}
+
+/*
+ * skip over a complete backslash vconstruction
+ */
+void
+backsl(void)
+{
+ int bdelim;
+
+sw:
+ switch(C1)
+ {
+ case '"':
+ SKIP1;
+ return;
+
+ case 's':
+ if(C1 == '\\')
+ backsl();
+ else {
+ while(C1>='0' && c<='9')
+ ;
+ Bungetrune(infile);
+ c = '0';
+ }
+ lp--;
+ return;
+
+ case 'f':
+ case 'n':
+ case '*':
+ if(C1 != '(')
+ return;
+
+ case '(':
+ if(msflag) {
+ if(C == 'e') {
+ if(C1 == 'm') {
+ *lp = '-';
+ return;
+ }
+ } else
+ if(c != '\n')
+ C1;
+ return;
+ }
+ if(C1 != '\n')
+ C1;
+ return;
+
+ case '$':
+ C1; /* discard argument number */
+ return;
+
+ case 'b':
+ case 'x':
+ case 'v':
+ case 'h':
+ case 'w':
+ case 'o':
+ case 'l':
+ case 'L':
+ if((bdelim=C1) == '\n')
+ return;
+ while(C1!='\n' && c!=bdelim)
+ if(c == '\\')
+ backsl();
+ return;
+
+ case '\\':
+ if(inmacro)
+ goto sw;
+ default:
+ return;
+ }
+}
+
+char*
+copys(char *s)
+{
+ char *t, *t0;
+
+ if((t0 = t = malloc((strlen(s)+1))) == 0)
+ fatal("Cannot allocate memory", (char*)0);
+ while(*t++ = *s++)
+ ;
+ return(t0);
+}
+
+void
+sce(void)
+{
+ int n = 1;
+
+ while (C != L'\n' && !(L'0' <= c && c <= L'9'))
+ ;
+ if (c != L'\n') {
+ for (n = c-L'0';'0' <= C && c <= L'9';)
+ n = n*10 + c-L'0';
+ }
+ while(n) {
+ if(C == '.') {
+ if(C == 'c') {
+ if(C == 'e') {
+ while(C == ' ')
+ ;
+ if(c == '0') {
+ SKIP;
+ break;
+ } else
+ SKIP;
+ } else
+ SKIP;
+ } else
+ if(c == 'P' || C == 'P') {
+ if(c != '\n')
+ SKIP;
+ break;
+ } else
+ if(c != '\n')
+ SKIP;
+ } else {
+ SKIP;
+ n--;
+ }
+ }
+}
+
+void
+refer(int c1)
+{
+ int c2;
+
+ if(c1 != '\n')
+ SKIP;
+ c2 = 0;
+ for(;;) {
+ if(C != '.')
+ SKIP;
+ else {
+ if(C != ']')
+ SKIP;
+ else {
+ while(C != '\n')
+ c2 = c;
+ if(charclass(c2) == PUNCT)
+ Bprint(&bout, " %C",c2);
+ return;
+ }
+ }
+ }
+}
+
+void
+inpic(void)
+{
+ int c1;
+ Rune *p1;
+
+/* SKIP1;*/
+ while(C1 != '\n')
+ if(c == '<'){
+ SKIP1;
+ return;
+ }
+ p1 = line;
+ c = '\n';
+ for(;;) {
+ c1 = c;
+ if(C1 == '.' && c1 == '\n') {
+ if(C1 != 'P' || C1 != 'E') {
+ if(c != '\n'){
+ SKIP1;
+ c = '\n';
+ }
+ continue;
+ }
+ SKIP1;
+ return;
+ } else
+ if(c == '\"') {
+ while(C1 != '\"') {
+ if(c == '\\') {
+ if(C1 == '\"')
+ continue;
+ Bungetrune(infile);
+ backsl();
+ } else
+ *p1++ = c;
+ }
+ *p1++ = ' ';
+ } else
+ if(c == '\n' && p1 != line) {
+ *p1 = '\0';
+ if(wordflag)
+ putwords();
+ else
+ Bprint(&bout, "%S\n\n", line);
+ p1 = line;
+ }
+ }
+}
+
+int
+charclass(int c)
+{
+ if(c < MAX_ASCII)
+ return chars[c];
+ switch(c){
+ case 0x2013: case 0x2014: /* en dash, em dash */
+ return SPECIAL;
+ }
+ return EXTENDED;
+}