diff options
author | rsc <devnull@localhost> | 2005-01-11 20:57:41 +0000 |
---|---|---|
committer | rsc <devnull@localhost> | 2005-01-11 20:57:41 +0000 |
commit | c70667367f0d720fd6ddc62041ba5fcf10dc7f4c (patch) | |
tree | 6e02f612afe640fb864ab820026bf552c6fb4458 | |
parent | 1c096fa40aa3a74e0886b62f8686f294968a1130 (diff) | |
download | plan9port-c70667367f0d720fd6ddc62041ba5fcf10dc7f4c.tar.gz plan9port-c70667367f0d720fd6ddc62041ba5fcf10dc7f4c.tar.bz2 plan9port-c70667367f0d720fd6ddc62041ba5fcf10dc7f4c.zip |
get rid of the 9foo commands in favor of the 9 script
-rw-r--r-- | src/cmd/9sed.c | 1447 | ||||
-rw-r--r-- | src/cmd/9yacc.c | 2942 | ||||
-rw-r--r-- | src/cmd/grep/mkfile | 2 | ||||
-rw-r--r-- | src/cmd/lex/mkfile | 2 | ||||
-rw-r--r-- | src/cmd/mkfile | 10 |
5 files changed, 7 insertions, 4396 deletions
diff --git a/src/cmd/9sed.c b/src/cmd/9sed.c deleted file mode 100644 index 182163eb..00000000 --- a/src/cmd/9sed.c +++ /dev/null @@ -1,1447 +0,0 @@ -/* - * sed -- stream editor - * - * - */ -#include <u.h> -#include <libc.h> -#include <bio.h> -#include <regexp.h> - -enum { - DEPTH = 20, /* max nesting depth of {} */ - MAXCMDS = 512, /* max sed commands */ - ADDSIZE = 10000, /* size of add & read buffer */ - MAXADDS = 20, /* max pending adds and reads */ - LBSIZE = 8192, /* input line size */ - LABSIZE = 50, /* max label name size */ - MAXSUB = 10, /* max number of sub reg exp */ - MAXFILES = 120, /* max output files */ -}; - /* An address is a line #, a R.E., "$", a reference to the last - * R.E., or nothing. - */ -typedef struct { - enum { - A_NONE, - A_DOL, - A_LINE, - A_RE, - A_LAST, - }type; - union { - long line; /* Line # */ - Reprog *rp; /* Compiled R.E. */ - } u; -} Addr; - -typedef struct SEDCOM { - Addr ad1; /* optional start address */ - Addr ad2; /* optional end address */ - union { - Reprog *re1; /* compiled R.E. */ - Rune *text; /* added text or file name */ - struct SEDCOM *lb1; /* destination command of branch */ - } u; - Rune *rhs; /* Right-hand side of substitution */ - Biobuf* fcode; /* File ID for read and write */ - char command; /* command code -see below */ - char gfl; /* 'Global' flag for substitutions */ - char pfl; /* 'print' flag for substitutions */ - char active; /* 1 => data between start and end */ - char negfl; /* negation flag */ -} SedCom; - - /* Command Codes for field SedCom.command */ -#define ACOM 01 -#define BCOM 020 -#define CCOM 02 -#define CDCOM 025 -#define CNCOM 022 -#define COCOM 017 -#define CPCOM 023 -#define DCOM 03 -#define ECOM 015 -#define EQCOM 013 -#define FCOM 016 -#define GCOM 027 -#define CGCOM 030 -#define HCOM 031 -#define CHCOM 032 -#define ICOM 04 -#define LCOM 05 -#define NCOM 012 -#define PCOM 010 -#define QCOM 011 -#define RCOM 06 -#define SCOM 07 -#define TCOM 021 -#define WCOM 014 -#define CWCOM 024 -#define YCOM 026 -#define XCOM 033 - - -typedef struct label { /* Label symbol table */ - Rune asc[9]; /* Label name */ - SedCom *chain; - SedCom *address; /* Command associated with label */ -} Label; - -typedef struct FILE_CACHE { /* Data file control block */ - struct FILE_CACHE *next; /* Forward Link */ - char *name; /* Name of file */ -} FileCache; - -SedCom pspace[MAXCMDS]; /* Command storage */ -SedCom *pend = pspace+MAXCMDS; /* End of command storage */ -SedCom *rep = pspace; /* Current fill point */ - -Reprog *lastre = 0; /* Last regular expression */ -Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/ - -Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */ -Rune *addend = addspace+ADDSIZE; - -SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */ -SedCom **aptr = abuf; - -struct { /* Sed program input control block */ - enum PTYPE /* Either on command line or in file */ - { P_ARG, - P_FILE - } type; - union PCTL { /* Pointer to data */ - Biobuf *bp; - char *curr; - } pctl; -} prog; - -Rune genbuf[LBSIZE]; /* Miscellaneous buffer */ - -FileCache *fhead = 0; /* Head of File Cache Chain */ -FileCache *ftail = 0; /* Tail of File Cache Chain */ - -Rune *loc1; /* Start of pattern match */ -Rune *loc2; /* End of pattern match */ -Rune seof; /* Pattern delimiter char */ - -Rune linebuf[LBSIZE+1]; /* Input data buffer */ -Rune *lbend = linebuf+LBSIZE; /* End of buffer */ -Rune *spend = linebuf; /* End of input data */ -Rune *cp; /* Current scan point in linebuf */ - -Rune holdsp[LBSIZE+1]; /* Hold buffer */ -Rune *hend = holdsp+LBSIZE; /* End of hold buffer */ -Rune *hspend = holdsp; /* End of hold data */ - -int nflag; /* Command line flags */ -int gflag; - -int dolflag; /* Set when at true EOF */ -int sflag; /* Set when substitution done */ -int jflag; /* Set when jump required */ -int delflag; /* Delete current line when set */ - -long lnum = 0; /* Input line count */ - -char fname[MAXFILES][40]; /* File name cache */ -Biobuf *fcode[MAXFILES]; /* File ID cache */ -int nfiles = 0; /* Cache fill point */ - -Biobuf fout; /* Output stream */ -Biobuf bstdin; /* Default input */ -Biobuf* f = 0; /* Input data */ - -Label ltab[LABSIZE]; /* Label name symbol table */ -Label *labend = ltab+LABSIZE; /* End of label table */ -Label *lab = ltab+1; /* Current Fill point */ - -int depth = 0; /* {} stack pointer */ - -Rune bad; /* Dummy err ptr reference */ -Rune *badp = &bad; - - -char CGMES[] = "Command garbled: %S"; -char TMMES[] = "Too much text: %S"; -char LTL[] = "Label too long: %S"; -char AD0MES[] = "No addresses allowed: %S"; -char AD1MES[] = "Only one address allowed: %S"; - -void address(Addr *); -void arout(void); -int cmp(char *, char *); -int rcmp(Rune *, Rune *); -void command(SedCom *); -Reprog *compile(void); -Rune *compsub(Rune *, Rune *); -void dechain(void); -void dosub(Rune *); -int ecmp(Rune *, Rune *, int); -void enroll(char *); -void errexit(void); -int executable(SedCom *); -void execute(void); -void fcomp(void); -long getrune(void); -Rune *gline(Rune *); -int match(Reprog *, Rune *); -void newfile(enum PTYPE, char *); -int opendata(void); -Biobuf *open_file(char *); -Rune *place(Rune *, Rune *, Rune *); -void quit(char *, char *); -int rline(Rune *, Rune *); -Label *search(Label *); -int substitute(SedCom *); -char *text(char *); -Rune *stext(Rune *, Rune *); -int ycomp(SedCom *); -char * trans(int c); -void putline(Biobuf *bp, Rune *buf, int n); - -void -main(int argc, char **argv) -{ - int compfl; - - lnum = 0; - Binit(&fout, 1, OWRITE); - fcode[nfiles++] = &fout; - compfl = 0; - - if(argc == 1) - exits(0); - ARGBEGIN{ - case 'n': - nflag++; - continue; - case 'f': - if(argc <= 1) - quit("no pattern-file", 0); - newfile(P_FILE, ARGF()); - fcomp(); - compfl = 1; - continue; - case 'e': - if (argc <= 1) - quit("missing pattern", 0); - newfile(P_ARG, ARGF()); - fcomp(); - compfl = 1; - continue; - case 'g': - gflag++; - continue; - default: - fprint(2, "sed: Unknown flag: %c\n", ARGC()); - continue; - } ARGEND - - if(compfl == 0) { - if (--argc < 0) - quit("missing pattern", 0); - newfile(P_ARG, *argv++); - fcomp(); - } - - if(depth) - quit("Too many {'s", 0); - - ltab[0].address = rep; - - dechain(); - - if(argc <= 0) - enroll(0); /* Add stdin to cache */ - else while(--argc >= 0) { - enroll(*argv++); - } - execute(); - exits(0); -} -void -fcomp(void) -{ - Rune *tp; - SedCom *pt, *pt1; - int i; - Label *lpt; - - static Rune *p = addspace; - static SedCom **cmpend[DEPTH]; /* stack of {} operations */ - - while (rline(linebuf, lbend) >= 0) { - cp = linebuf; -comploop: - while(*cp == ' ' || *cp == '\t') - cp++; - if(*cp == '\0' || *cp == '#') - continue; - if(*cp == ';') { - cp++; - goto comploop; - } - - address(&rep->ad1); - if (rep->ad1.type != A_NONE) { - if (rep->ad1.type == A_LAST) { - if (!lastre) - quit("First RE may not be null", 0); - rep->ad1.type = A_RE; - rep->ad1.u.rp = lastre; - } - if(*cp == ',' || *cp == ';') { - cp++; - address(&rep->ad2); - if (rep->ad2.type == A_LAST) { - rep->ad1.type = A_RE; - rep->ad2.u.rp = lastre; - } - } else - rep->ad2.type = A_NONE; - } - while(*cp == ' ' || *cp == '\t') - cp++; - -swit: - switch(*cp++) { - - default: - quit("Unrecognized command: %S", (char *)linebuf); - - case '!': - rep->negfl = 1; - goto swit; - - case '{': - rep->command = BCOM; - rep->negfl = !(rep->negfl); - cmpend[depth++] = &rep->u.lb1; - if(++rep >= pend) - quit("Too many commands: %S", (char *) linebuf); - if(*cp == '\0') continue; - goto comploop; - - case '}': - if(rep->ad1.type != A_NONE) - quit(AD0MES, (char *) linebuf); - if(--depth < 0) - quit("Too many }'s", 0); - *cmpend[depth] = rep; - if(*cp == 0) continue; - goto comploop; - - case '=': - rep->command = EQCOM; - if(rep->ad2.type != A_NONE) - quit(AD1MES, (char *) linebuf); - break; - - case ':': - if(rep->ad1.type != A_NONE) - quit(AD0MES, (char *) linebuf); - - while(*cp == ' ') - cp++; - tp = lab->asc; - while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') { - *tp++ = *cp++; - if(tp >= &(lab->asc[8])) - quit(LTL, (char *) linebuf); - } - *tp = '\0'; - - if(lpt = search(lab)) { - if(lpt->address) - quit("Duplicate labels: %S", (char *) linebuf); - } else { - lab->chain = 0; - lpt = lab; - if(++lab >= labend) - quit("Too many labels: %S", (char *) linebuf); - } - lpt->address = rep; - if (*cp == '#') - continue; - rep--; /* reuse this slot */ - break; - - case 'a': - rep->command = ACOM; - if(rep->ad2.type != A_NONE) - quit(AD1MES, (char *) linebuf); - if(*cp == '\\') cp++; - if(*cp++ != '\n') - quit(CGMES, (char *) linebuf); - rep->u.text = p; - p = stext(p, addend); - break; - case 'c': - rep->command = CCOM; - if(*cp == '\\') cp++; - if(*cp++ != '\n') - quit(CGMES, (char *) linebuf); - rep->u.text = p; - p = stext(p, addend); - break; - case 'i': - rep->command = ICOM; - if(rep->ad2.type != A_NONE) - quit(AD1MES, (char *) linebuf); - if(*cp == '\\') cp++; - if(*cp++ != '\n') - quit(CGMES, (char *) linebuf); - rep->u.text = p; - p = stext(p, addend); - break; - - case 'g': - rep->command = GCOM; - break; - - case 'G': - rep->command = CGCOM; - break; - - case 'h': - rep->command = HCOM; - break; - - case 'H': - rep->command = CHCOM; - break; - - case 't': - rep->command = TCOM; - goto jtcommon; - - case 'b': - rep->command = BCOM; -jtcommon: - while(*cp == ' ')cp++; - if(*cp == '\0') { - if(pt = ltab[0].chain) { - while(pt1 = pt->u.lb1) - pt = pt1; - pt->u.lb1 = rep; - } else - ltab[0].chain = rep; - break; - } - tp = lab->asc; - while((*tp++ = *cp++)) - if(tp >= &(lab->asc[8])) - quit(LTL, (char *) linebuf); - cp--; - tp[-1] = '\0'; - - if(lpt = search(lab)) { - if(lpt->address) { - rep->u.lb1 = lpt->address; - } else { - pt = lpt->chain; - while(pt1 = pt->u.lb1) - pt = pt1; - pt->u.lb1 = rep; - } - } else { - lab->chain = rep; - lab->address = 0; - if(++lab >= labend) - quit("Too many labels: %S", - (char *) linebuf); - } - break; - - case 'n': - rep->command = NCOM; - break; - - case 'N': - rep->command = CNCOM; - break; - - case 'p': - rep->command = PCOM; - break; - - case 'P': - rep->command = CPCOM; - break; - - case 'r': - rep->command = RCOM; - if(rep->ad2.type != A_NONE) - quit(AD1MES, (char *) linebuf); - if(*cp++ != ' ') - quit(CGMES, (char *) linebuf); - rep->u.text = p; - p = stext(p, addend); - break; - - case 'd': - rep->command = DCOM; - break; - - case 'D': - rep->command = CDCOM; - rep->u.lb1 = pspace; - break; - - case 'q': - rep->command = QCOM; - if(rep->ad2.type != A_NONE) - quit(AD1MES, (char *) linebuf); - break; - - case 'l': - rep->command = LCOM; - break; - - case 's': - rep->command = SCOM; - seof = *cp++; - if ((rep->u.re1 = compile()) == 0) { - if(!lastre) - quit("First RE may not be null.", 0); - rep->u.re1 = lastre; - } - rep->rhs = p; - if((p = compsub(p, addend)) == 0) - quit(CGMES, (char *) linebuf); - if(*cp == 'g') { - cp++; - rep->gfl++; - } else if(gflag) - rep->gfl++; - - if(*cp == 'p') { - cp++; - rep->pfl = 1; - } - - if(*cp == 'P') { - cp++; - rep->pfl = 2; - } - - if(*cp == 'w') { - cp++; - if(*cp++ != ' ') - quit(CGMES, (char *) linebuf); - text(fname[nfiles]); - for(i = nfiles - 1; i >= 0; i--) - if(cmp(fname[nfiles],fname[i]) == 0) { - rep->fcode = fcode[i]; - goto done; - } - if(nfiles >= MAXFILES) - quit("Too many files in w commands 1", 0); - rep->fcode = open_file(fname[nfiles]); - } - break; - - case 'w': - rep->command = WCOM; - if(*cp++ != ' ') - quit(CGMES, (char *) linebuf); - text(fname[nfiles]); - for(i = nfiles - 1; i >= 0; i--) - if(cmp(fname[nfiles], fname[i]) == 0) { - rep->fcode = fcode[i]; - goto done; - } - if(nfiles >= MAXFILES){ - fprint(2, "sed: Too many files in w commands 2 \n"); - fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES); - errexit(); - } - rep->fcode = open_file(fname[nfiles]); - break; - - case 'x': - rep->command = XCOM; - break; - - case 'y': - rep->command = YCOM; - seof = *cp++; - if (ycomp(rep) == 0) - quit(CGMES, (char *) linebuf); - break; - - } -done: - if(++rep >= pend) - quit("Too many commands, last: %S", (char *) linebuf); - - if(*cp++ != '\0') { - if(cp[-1] == ';') - goto comploop; - quit(CGMES, (char *) linebuf); - } - - } -} - -Biobuf * -open_file(char *name) -{ - Biobuf *bp; - int fd; - - if ((bp = malloc(sizeof(Biobuf))) == 0) - quit("Out of memory", 0); - if ((fd = open(name, OWRITE)) < 0 && - (fd = create(name, OWRITE, 0666)) < 0) - quit("Cannot create %s", name); - Binit(bp, fd, OWRITE); - Bseek(bp, 0, 2); - fcode[nfiles++] = bp; - return bp; -} - -Rune * -compsub(Rune *rhs, Rune *end) -{ - Rune r; - - while ((r = *cp++) != '\0') { - if(r == '\\') { - if (rhs < end) - *rhs++ = 0xFFFF; - else - return 0; - r = *cp++; - if(r == 'n') - r = '\n'; - } else { - if(r == seof) { - if (rhs < end) - *rhs++ = '\0'; - else - return 0; - return rhs; - } - } - if (rhs < end) - *rhs++ = r; - else - return 0; - - } - return 0; -} - -Reprog * -compile(void) -{ - Rune c; - char *ep; - char expbuf[512]; - - if((c = *cp++) == seof) /* '//' */ - return 0; - ep = expbuf; - do { - if (c == 0 || c == '\n') - quit(TMMES, (char *) linebuf); - if (c == '\\') { - if (ep >= expbuf+sizeof(expbuf)) - quit(TMMES, (char *) linebuf); - ep += runetochar(ep, &c); - if ((c = *cp++) == 'n') - c = '\n'; - } - if (ep >= expbuf+sizeof(expbuf)) - quit(TMMES, (char *) linebuf); - ep += runetochar(ep, &c); - } while ((c = *cp++) != seof); - *ep = 0; - return lastre = regcomp(expbuf); -} - -void -regerror(char *s) -{ - USED(s); - quit(CGMES, (char *) linebuf); -} - -void -newfile(enum PTYPE type, char *name) -{ - if (type == P_ARG) - prog.pctl.curr = name; - else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0) - quit("Cannot open pattern-file: %s\n", name); - prog.type = type; -} - -int -rline(Rune *buf, Rune *end) -{ - long c; - Rune r; - - while ((c = getrune()) >= 0) { - r = c; - if (r == '\\') { - if (buf <= end) - *buf++ = r; - if ((c = getrune()) < 0) - break; - r = c; - } else if (r == '\n') { - *buf = '\0'; - return(1); - } - if (buf <= end) - *buf++ = r; - } - *buf = '\0'; - return(-1); -} - -long -getrune(void) -{ - char *p; - long c; - Rune r; - - if (prog.type == P_ARG) { - if ((p = prog.pctl.curr) != 0) { - if (*p) { - prog.pctl.curr += chartorune(&r, p); - c = r; - } else { - c = '\n'; /* fake an end-of-line */ - prog.pctl.curr = 0; - } - } else - c = -1; - } else if ((c = Bgetrune(prog.pctl.bp)) < 0) - Bterm(prog.pctl.bp); - return c; -} - -void -address(Addr *ap) -{ - int c; - long lno; - - if((c = *cp++) == '$') - ap->type = A_DOL; - else if(c == '/') { - seof = c; - if (ap->u.rp = compile()) - ap->type = A_RE; - else - ap->type = A_LAST; - } - else if (c >= '0' && c <= '9') { - lno = c-'0'; - while ((c = *cp) >= '0' && c <= '9') - lno = lno*10 + *cp++-'0'; - if(!lno) - quit("line number 0 is illegal",0); - ap->type = A_LINE; - ap->u.line = lno; - } - else { - cp--; - ap->type = A_NONE; - } -} - -int -cmp(char *a, char *b) /* compare characters */ -{ - while(*a == *b++) - if (*a == '\0') - return(0); - else a++; - return(1); -} - -int -rcmp(Rune *a, Rune *b) /* compare runes */ -{ - while(*a == *b++) - if (*a == '\0') - return(0); - else a++; - return(1); -} - -char * -text(char *p) /* extract character string */ -{ - Rune r; - - while(*cp == '\t' || *cp == ' ') - cp++; - while (*cp) { - if ((r = *cp++) == '\\') - if ((r = *cp++) == 0) - break;; - if (r == '\n') - while (*cp == '\t' || *cp == ' ') - cp++; - p += runetochar(p, &r); - } - *p++ = '\0'; - return p; -} - -Rune * -stext(Rune *p, Rune *end) /* extract rune string */ -{ - while(*cp == '\t' || *cp == ' ') - cp++; - while (*cp) { - if (*cp == '\\') - if (*++cp == 0) - break; - if (p >= end-1) - quit(TMMES, (char *) linebuf); - if ((*p++ = *cp++) == '\n') - while(*cp == '\t' || *cp == ' ') - cp++; - } - *p++ = 0; - return p; -} - - -Label * -search (Label *ptr) -{ - Label *rp; - - for (rp = ltab; rp < ptr; rp++) - if(rcmp(rp->asc, ptr->asc) == 0) - return(rp); - return(0); -} - -void -dechain(void) -{ - Label *lptr; - SedCom *rptr, *trptr; - - for(lptr = ltab; lptr < lab; lptr++) { - - if(lptr->address == 0) - quit("Undefined label: %S", (char *) lptr->asc); - - if(lptr->chain) { - rptr = lptr->chain; - while(trptr = rptr->u.lb1) { - rptr->u.lb1 = lptr->address; - rptr = trptr; - } - rptr->u.lb1 = lptr->address; - } - } -} - -int -ycomp(SedCom *r) -{ - int i; - Rune *rp; - Rune c, *tsp, highc; - Rune *sp; - - highc = 0; - for(tsp = cp; *tsp != seof; tsp++) { - if(*tsp == '\\') - tsp++; - if(*tsp == '\n' || *tsp == '\0') - return(0); - if (*tsp > highc) highc = *tsp; - } - tsp++; - if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0) - quit("Out of memory", 0); - *rp++ = highc; /* save upper bound */ - for (i = 0; i <= highc; i++) - rp[i] = i; - sp = cp; - while((c = *sp++) != seof) { - if(c == '\\' && *sp == 'n') { - sp++; - c = '\n'; - } - if((rp[c] = *tsp++) == '\\' && *tsp == 'n') { - rp[c] = '\n'; - tsp++; - } - if(rp[c] == seof || rp[c] == '\0') { - free(r->u.re1); - r->u.re1 = 0; - return(0); - } - } - if(*tsp != seof) { - free(r->u.re1); - r->u.re1 = 0; - return(0); - } - cp = tsp+1; - return(1); -} - -void -execute(void) -{ - SedCom *ipc; - - while (spend = gline(linebuf)){ - for(ipc = pspace; ipc->command; ) { - if (!executable(ipc)) { - ipc++; - continue; - } - command(ipc); - - if(delflag) - break; - if(jflag) { - jflag = 0; - if((ipc = ipc->u.lb1) == 0) - break; - } else - ipc++; - - } - if(!nflag && !delflag) - putline(&fout, linebuf, spend-linebuf); - if(aptr > abuf) { - arout(); - } - delflag = 0; - } -} - /* determine if a statement should be applied to an input line */ -int -executable(SedCom *ipc) -{ - if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */ - if (ipc->active == 1) /* Second line */ - ipc->active = 2; - switch(ipc->ad2.type) { - case A_NONE: /* No second addr; use first */ - ipc->active = 0; - break; - case A_DOL: /* Accept everything */ - return !ipc->negfl; - case A_LINE: /* Line at end of range? */ - if (lnum <= ipc->ad2.u.line) { - if (ipc->ad2.u.line == lnum) - ipc->active = 0; - return !ipc->negfl; - } - ipc->active = 0; /* out of range */ - return ipc->negfl; - case A_RE: /* Check for matching R.E. */ - if (match(ipc->ad2.u.rp, linebuf)) - ipc->active = 0; - return !ipc->negfl; - default: /* internal error */ - quit("Internal error", 0); - } - } - switch (ipc->ad1.type) { /* Check first address */ - case A_NONE: /* Everything matches */ - return !ipc->negfl; - case A_DOL: /* Only last line */ - if (dolflag) - return !ipc->negfl; - break; - case A_LINE: /* Check line number */ - if (ipc->ad1.u.line == lnum) { - ipc->active = 1; /* In range */ - return !ipc->negfl; - } - break; - case A_RE: /* Check R.E. */ - if (match(ipc->ad1.u.rp, linebuf)) { - ipc->active = 1; /* In range */ - return !ipc->negfl; - } - break; - default: - quit("Internal error", 0); - } - return ipc->negfl; -} - -int -match(Reprog *pattern, Rune *buf) -{ - if (!pattern) - return 0; - subexp[0].s.rsp = buf; - subexp[0].e.rep = 0; - if (rregexec(pattern, linebuf, subexp, MAXSUB)) { - loc1 = subexp[0].s.rsp; - loc2 = subexp[0].e.rep; - return 1; - } - loc1 = loc2 = 0; - return 0; -} - -int -substitute(SedCom *ipc) -{ - int len; - - if(!match(ipc->u.re1, linebuf)) - return 0; - - /* - * we have at least one match. some patterns, e.g. '$' or '^', can - * produce zero-length matches, so during a global substitute we - * must bump to the character after a zero-length match to keep from looping. - */ - sflag = 1; - if(ipc->gfl == 0) /* single substitution */ - dosub(ipc->rhs); - else - do{ /* global substitution */ - len = loc2-loc1; /* length of match */ - dosub(ipc->rhs); /* dosub moves loc2 */ - if(*loc2 == 0) /* end of string */ - break; - if(len == 0) /* zero-length R.E. match */ - loc2++; /* bump over zero-length match */ - if(*loc2 == 0) /* end of string */ - break; - } while(match(ipc->u.re1, loc2)); - return 1; -} - -void -dosub(Rune *rhsbuf) -{ - Rune *lp, *sp; - Rune *rp; - int c, n; - - lp = linebuf; - sp = genbuf; - rp = rhsbuf; - while (lp < loc1) - *sp++ = *lp++; - while(c = *rp++) { - if (c == '&') { - sp = place(sp, loc1, loc2); - continue; - } - if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') { - n = c-'0'; - if (subexp[n].s.rsp && subexp[n].e.rep) { - sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep); - continue; - } - else { - fprint(2, "sed: Invalid back reference \\%d\n",n); - errexit(); - } - } - *sp++ = c; - if (sp >= &genbuf[LBSIZE]) - fprint(2, "sed: Output line too long.\n"); - } - lp = loc2; - loc2 = sp - genbuf + linebuf; - while (*sp++ = *lp++) - if (sp >= &genbuf[LBSIZE]) - fprint(2, "sed: Output line too long.\n"); - lp = linebuf; - sp = genbuf; - while (*lp++ = *sp++) - ; - spend = lp-1; -} - -Rune * -place(Rune *sp, Rune *l1, Rune *l2) -{ - while (l1 < l2) { - *sp++ = *l1++; - if (sp >= &genbuf[LBSIZE]) - fprint(2, "sed: Output line too long.\n"); - } - return(sp); -} - -char * -trans(int c) -{ - static char buf[] = "\\x0000"; - static char hex[] = "0123456789abcdef"; - - switch(c) { - case '\b': - return "\\b"; - case '\n': - return "\\n"; - case '\r': - return "\\r"; - case '\t': - return "\\t"; - case '\\': - return "\\\\"; - } - buf[2] = hex[(c>>12)&0xF]; - buf[3] = hex[(c>>8)&0xF]; - buf[4] = hex[(c>>4)&0xF]; - buf[5] = hex[c&0xF]; - return buf; -} - -void -command(SedCom *ipc) -{ - int i, c; - Rune *p1, *p2; - char *ucp; - Rune *rp; - Rune *execp; - - switch(ipc->command) { - - case ACOM: - *aptr++ = ipc; - if(aptr >= abuf+MAXADDS) { - quit("sed: Too many appends after line %ld\n", - (char *) lnum); - } - *aptr = 0; - break; - case CCOM: - delflag = 1; - if(ipc->active == 1) { - for(rp = ipc->u.text; *rp; rp++) - Bputrune(&fout, *rp); - Bputc(&fout, '\n'); - } - break; - case DCOM: - delflag++; - break; - case CDCOM: - p1 = p2 = linebuf; - while(*p1 != '\n') { - if(*p1++ == 0) { - delflag++; - return; - } - } - p1++; - while(*p2++ = *p1++) - ; - spend = p2-1; - jflag++; - break; - case EQCOM: - Bprint(&fout, "%ld\n", lnum); - break; - case GCOM: - p1 = linebuf; - p2 = holdsp; - while(*p1++ = *p2++) - ; - spend = p1-1; - break; - case CGCOM: - *spend++ = '\n'; - p1 = spend; - p2 = holdsp; - while(*p1++ = *p2++) - if(p1 >= lbend) - break; - spend = p1-1; - break; - case HCOM: - p1 = holdsp; - p2 = linebuf; - while(*p1++ = *p2++); - hspend = p1-1; - break; - case CHCOM: - *hspend++ = '\n'; - p1 = hspend; - p2 = linebuf; - while(*p1++ = *p2++) - if(p1 >= hend) - break; - hspend = p1-1; - break; - case ICOM: - for(rp = ipc->u.text; *rp; rp++) - Bputrune(&fout, *rp); - Bputc(&fout, '\n'); - break; - case BCOM: - jflag = 1; - break; - case LCOM: - c = 0; - for (i = 0, rp = linebuf; *rp; rp++) { - c = *rp; - if(c >= 0x20 && c < 0x7F && c != '\\') { - Bputc(&fout, c); - if(i++ > 71) { - Bprint(&fout, "\\\n"); - i = 0; - } - } else { - for (ucp = trans(*rp); *ucp; ucp++){ - c = *ucp; - Bputc(&fout, c); - if(i++ > 71) { - Bprint(&fout, "\\\n"); - i = 0; - } - } - } - } - if(c == ' ') - Bprint(&fout, "\\n"); - Bputc(&fout, '\n'); - break; - case NCOM: - if(!nflag) - putline(&fout, linebuf, spend-linebuf); - - if(aptr > abuf) - arout(); - if((execp = gline(linebuf)) == 0) { - delflag = 1; - break; - } - spend = execp; - break; - case CNCOM: - if(aptr > abuf) - arout(); - *spend++ = '\n'; - if((execp = gline(spend)) == 0) { - delflag = 1; - break; - } - spend = execp; - break; - case PCOM: - putline(&fout, linebuf, spend-linebuf); - break; - case CPCOM: - cpcom: - for(rp = linebuf; *rp && *rp != '\n'; rp++) - Bputc(&fout, *rp); - Bputc(&fout, '\n'); - break; - case QCOM: - if(!nflag) - putline(&fout, linebuf, spend-linebuf); - if(aptr > abuf) - arout(); - exits(0); - case RCOM: - *aptr++ = ipc; - if(aptr >= &abuf[MAXADDS]) - quit("sed: Too many reads after line %ld\n", - (char *) lnum); - *aptr = 0; - break; - case SCOM: - i = substitute(ipc); - if(i && ipc->pfl) - if(ipc->pfl == 1) - putline(&fout, linebuf, spend-linebuf); - else - goto cpcom; - if(i && ipc->fcode) - goto wcom; - break; - - case TCOM: - if(sflag == 0) break; - sflag = 0; - jflag = 1; - break; - - wcom: - case WCOM: - putline(ipc->fcode,linebuf, spend-linebuf); - break; - case XCOM: - p1 = linebuf; - p2 = genbuf; - while(*p2++ = *p1++); - p1 = holdsp; - p2 = linebuf; - while(*p2++ = *p1++); - spend = p2 - 1; - p1 = genbuf; - p2 = holdsp; - while(*p2++ = *p1++); - hspend = p2 - 1; - break; - case YCOM: - p1 = linebuf; - p2 = ipc->u.text; - for (i = *p2++; *p1; p1++){ - if (*p1 <= i) *p1 = p2[*p1]; - } - break; - } - -} - -void -putline(Biobuf *bp, Rune *buf, int n) -{ - while (n--) - Bputrune(bp, *buf++); - Bputc(bp, '\n'); -} - -int -ecmp(Rune *a, Rune *b, int count) -{ - while(count--) - if(*a++ != *b++) return(0); - return(1); -} - -void -arout(void) -{ - Rune *p1; - Biobuf *fi; - int c; - char *s; - char buf[128]; - - for (aptr = abuf; *aptr; aptr++) { - if((*aptr)->command == ACOM) { - for(p1 = (*aptr)->u.text; *p1; p1++ ) - Bputrune(&fout, *p1); - Bputc(&fout, '\n'); - } else { - for(s = buf, p1= (*aptr)->u.text; *p1; p1++) - s += runetochar(s, p1); - *s = '\0'; - if((fi = Bopen(buf, OREAD)) == 0) - continue; - while((c = Bgetc(fi)) >= 0) - Bputc(&fout, c); - Bterm(fi); - } - } - aptr = abuf; - *aptr = 0; -} - -void -errexit(void) -{ - exits("error"); -} - -void -quit (char *msg, char *arg) -{ - fprint(2, "sed: "); - fprint(2, msg, arg); - fprint(2, "\n"); - errexit(); -} - -Rune * -gline(Rune *addr) -{ - long c; - Rune *p; - - static long peekc = 0; - - if (f == 0 && opendata() < 0) - return 0; - sflag = 0; - lnum++; -/* Bflush(&fout);********* dumped 4/30/92 - bobf****/ - do { - p = addr; - for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) { - if (c == '\n') { - if ((peekc = Bgetrune(f)) < 0) { - if (fhead == 0) - dolflag = 1; - } - *p = '\0'; - return p; - } - if (c && p < lbend) - *p++ = c; - } - /* return partial final line, adding implicit newline */ - if(p != addr) { - *p = '\0'; - peekc = -1; - if (fhead == 0) - dolflag = 1; - return p; - } - peekc = 0; - Bterm(f); - } while (opendata() > 0); /* Switch to next stream */ - f = 0; - return 0; -} - - /* Data file input section - the intent is to transparently - * catenate all data input streams. - */ -void -enroll(char *filename) /* Add a file to the input file cache */ -{ - FileCache *fp; - - if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0) - quit("Out of memory", 0); - if (ftail == 0) - fhead = fp; - else - ftail->next = fp; - ftail = fp; - fp->next = 0; - fp->name = filename; /* 0 => stdin */ -} - -int -opendata(void) -{ - if (fhead == 0) - return -1; - if (fhead->name) { - if ((f = Bopen(fhead->name, OREAD)) == 0) - quit("Can't open %s", fhead->name); - } else { - Binit(&bstdin, 0, OREAD); - f = &bstdin; - } - fhead = fhead->next; - return 1; -} diff --git a/src/cmd/9yacc.c b/src/cmd/9yacc.c deleted file mode 100644 index 3db6768f..00000000 --- a/src/cmd/9yacc.c +++ /dev/null @@ -1,2942 +0,0 @@ -#include <u.h> -#include <libc.h> -#include <bio.h> -#include <ctype.h> - -#define Bungetrune Bungetc /* ok for now. */ - -/* - * all these are 32 bit - */ -#define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */ -#define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037))) -#define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037))) -#define NWORDS(n) (((n)+32)/32) - -#define PARSER "#9/lib/yaccpar" -#define PARSERS "#9/lib/yaccpars" -#define TEMPNAME "y.tmp.XXXXXX" -#define ACTNAME "y.acts.XXXXXX" -#define OFILE "tab.c" -#define FILEU "output" -#define FILED "tab.h" -#define FILEDEBUG "debug" - -enum -{ -/* - * the following are adjustable - * according to memory size - */ - ACTSIZE = 40000, - MEMSIZE = 40000, - NSTATES = 2000, - NTERMS = 511, - NPROD = 1600, - NNONTERM = 600, - TEMPSIZE = 2000, - CNAMSZ = 10000, - LSETSIZE = 2400, - WSETSIZE = 350, - - NAMESIZE = 50, - NTYPES = 63, - ISIZE = 400, - - PRIVATE = 0xE000, /* unicode private use */ - - /* relationships which must hold: - TBITSET ints must hold NTERMS+1 bits... - WSETSIZE >= NNONTERM - LSETSIZE >= NNONTERM - TEMPSIZE >= NTERMS + NNONTERM + 1 - TEMPSIZE >= NSTATES - */ - - NTBASE = 010000, - ERRCODE = 8190, - ACCEPTCODE = 8191, - - NOASC = 0, /* no assoc. */ - LASC = 1, /* left assoc. */ - RASC = 2, /* right assoc. */ - BASC = 3, /* binary assoc. */ - - /* flags for state generation */ - - DONE = 0, - MUSTDO = 1, - MUSTLOOKAHEAD = 2, - - /* flags for a rule having an action, and being reduced */ - - ACTFLAG = 04, - REDFLAG = 010, - - /* output parser flags */ - YYFLAG1 = -1000, - - /* parse tokens */ - IDENTIFIER = PRIVATE, - MARK, - TERM, - LEFT, - RIGHT, - BINARY, - PREC, - LCURLY, - IDENTCOLON, - NUMBER, - START, - TYPEDEF, - TYPENAME, - UNION, - - ENDFILE = 0, - - EMPTY = 1, - WHOKNOWS = 0, - OK = 1, - NOMORE = -1000, -}; - - /* macros for getting associativity and precedence levels */ - -#define ASSOC(i) ((i)&03) -#define PLEVEL(i) (((i)>>4)&077) -#define TYPE(i) (((i)>>10)&077) - - /* macros for setting associativity and precedence levels */ - -#define SETASC(i,j) i |= j -#define SETPLEV(i,j) i |= (j<<4) -#define SETTYPE(i,j) i |= (j<<10) - - /* looping macros */ - -#define TLOOP(i) for(i=1; i<=ntokens; i++) -#define NTLOOP(i) for(i=0; i<=nnonter; i++) -#define PLOOP(s,i) for(i=s; i<nprod; i++) -#define SLOOP(i) for(i=0; i<nstate; i++) -#define WSBUMP(x) x++ -#define WSLOOP(s,j) for(j=s; j<cwp; j++) -#define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++) -#define SETLOOP(i) for(i=0; i<tbitset; i++) - - /* command to clobber tempfiles after use */ - -#define ZAPFILE(x) if(x) remove(x) - - /* I/O descriptors */ - -Biobuf* faction; /* file for saving actions */ -Biobuf* fdefine; /* file for #defines */ -Biobuf* fdebug; /* y.debug for strings for debugging */ -Biobuf* ftable; /* y.tab.c file */ -Biobuf* ftemp; /* tempfile to pass 2 */ -Biobuf* finput; /* input file */ -Biobuf* foutput; /* y.output file */ - - /* communication variables between various I/O routines */ - -char* infile; /* input file name */ -int numbval; /* value of an input number */ -char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */ - - /* structure declarations */ - -typedef -struct -{ - int lset[TBITSET]; -} Lkset; - -typedef -struct -{ - int* pitem; - Lkset* look; -} Item; - -typedef -struct -{ - char* name; - int value; -} Symb; - -typedef -struct -{ - int* pitem; - int flag; - Lkset ws; -} Wset; - - /* storage of names */ - -char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */ -int cnamsz = CNAMSZ; /* size of cnames */ -char* cnamp = cnames; /* place where next name is to be put in */ -int ndefout = 4; /* number of defined symbols output */ -char* tempname; -char* actname; -char ttempname[] = TEMPNAME; -char tactname[] = ACTNAME; -char* parser = PARSER; -char* yydebug; - - /* storage of types */ -int ntypes; /* number of types defined */ -char* typeset[NTYPES]; /* pointers to type tags */ - - /* token information */ - -int ntokens = 0 ; /* number of tokens */ -Symb tokset[NTERMS]; -int toklev[NTERMS]; /* vector with the precedence of the terminals */ - - /* nonterminal information */ - -int nnonter = -1; /* the number of nonterminals */ -Symb nontrst[NNONTERM]; -int start; /* start symbol */ - - /* assigned token type values */ -int extval = 0; - -char* ytabc = OFILE; /* name of y.tab.c */ - - /* grammar rule information */ - -int mem0[MEMSIZE] ; /* production storage */ -int* mem = mem0; -int nprod = 1; /* number of productions */ -int* prdptr[NPROD]; /* pointers to descriptions of productions */ -int levprd[NPROD]; /* precedence levels for the productions */ -int rlines[NPROD]; /* line number for this rule */ - - /* state information */ - -int nstate = 0; /* number of states */ -Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */ -int tystate[NSTATES]; /* contains type information about the states */ -int defact[NSTATES]; /* the default actions of states */ -int tstates[NTERMS]; /* states generated by terminal gotos */ -int ntstates[NNONTERM]; /* states generated by nonterminal gotos */ -int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */ -int lastred; /* the number of the last reduction of a state */ - - /* lookahead set information */ - -Lkset lkst[LSETSIZE]; -int nolook; /* flag to turn off lookahead computations */ -int tbitset; /* size of lookahead sets */ -int nlset = 0; /* next lookahead set index */ -int nolook = 0; /* flag to suppress lookahead computations */ -Lkset clset; /* temporary storage for lookahead computations */ - - /* working set information */ - -Wset wsets[WSETSIZE]; -Wset* cwp; - - /* storage for action table */ - -int amem[ACTSIZE]; /* action table storage */ -int* memp = amem; /* next free action table position */ -int indgo[NSTATES]; /* index to the stored goto table */ - - /* temporary vector, indexable by states, terms, or ntokens */ - -int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */ -int lineno = 1; /* current input line number */ -int fatfl = 1; /* if on, error is fatal */ -int nerrors = 0; /* number of errors */ - - /* statistics collection variables */ - -int zzgoent; -int zzgobest; -int zzacent; -int zzexcp; -int zzclose; -int zzrrconf; -int zzsrconf; - -int* ggreed = lkst[0].lset; -int* pgo = wsets[0].ws.lset; -int* yypgo = &nontrst[0].value; - -int maxspr = 0; /* maximum spread of any entry */ -int maxoff = 0; /* maximum offset into a array */ -int* pmem = mem0; -int* maxa; -int nxdb = 0; -int adb = 0; - - - /* storage for information about the nonterminals */ - -int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */ -Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */ -int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */ - - /* random stuff picked out from between functions */ - -int indebug = 0; -Wset* zzcwp = wsets; -int zzgoent = 0; -int zzgobest = 0; -int zzacent = 0; -int zzexcp = 0; -int zzclose = 0; -int zzsrconf = 0; -int* zzmemsz = mem0; -int zzrrconf = 0; -int pidebug = 0; /* debugging flag for putitem */ -int gsdebug = 0; -int cldebug = 0; /* debugging flag for closure */ -int pkdebug = 0; -int g2debug = 0; - -struct -{ - char* name; - long value; -} resrv[] = -{ - "binary", BINARY, - "left", LEFT, - "nonassoc", BINARY, - "prec", PREC, - "right", RIGHT, - "start", START, - "term", TERM, - "token", TERM, - "type", TYPEDEF, - "union", UNION, - 0, -}; - - /* define functions */ - -void main(int, char**); -void others(void); -char* chcopy(char*, char*); -char* writem(int*); -char* symnam(int); -void summary(void); -void error(char*, ...); -void aryfil(int*, int, int); -int setunion(int*, int*); -void prlook(Lkset*); -void cpres(void); -void cpfir(void); -int state(int); -void putitem(int*, Lkset*); -void cempty(void); -void stagen(void); -void closure(int); -Lkset* flset(Lkset*); -void cleantmp(void); -void intr(void); -void setup(int, char**); -void finact(void); -int defin(int, char*); -void defout(int); -char* cstash(char*); -long gettok(void); -int fdtype(int); -int chfind(int, char*); -void cpyunion(void); -void cpycode(void); -int skipcom(void); -void cpyact(int); -void openup(char*, int, int, int, char*); -void output(void); -int apack(int*, int); -void go2out(void); -void go2gen(int); -void precftn(int, int, int); -void wract(int); -void wrstate(int); -void warray(char*, int*, int); -void hideprod(void); -void callopt(void); -void gin(int); -void stin(int); -int nxti(void); -void osummary(void); -void aoutput(void); -void arout(char*, int*, int); -int gtnm(void); - -void -main(int argc, char *argv[]) -{ - - setup(argc, argv); /* initialize and read productions */ - tbitset = NWORDS(ntokens); - cpres(); /* make table of which productions yield a given nonterminal */ - cempty(); /* make a table of which nonterminals can match the empty string */ - cpfir(); /* make a table of firsts of nonterminals */ - stagen(); /* generate the states */ - output(); /* write the states and the tables */ - go2out(); - hideprod(); - summary(); - callopt(); - others(); - exits(0); -} - -/* - * put out other arrays, copy the parsers - */ -void -others(void) -{ - int c, i, j; - - finput = Bopen(unsharp(parser), OREAD); - if(finput == 0) - error("cannot open parser %s: %r", parser); - warray("yyr1", levprd, nprod); - aryfil(temp1, nprod, 0); - PLOOP(1, i) - temp1[i] = prdptr[i+1]-prdptr[i]-2; - warray("yyr2", temp1, nprod); - - aryfil(temp1, nstate, -1000); - TLOOP(i) - for(j=tstates[i]; j!=0; j=mstates[j]) - temp1[j] = i; - NTLOOP(i) - for(j=ntstates[i]; j!=0; j=mstates[j]) - temp1[j] = -i; - warray("yychk", temp1, nstate); - warray("yydef", defact, nstate); - - /* put out token translation tables */ - /* table 1 has 0-256 */ - aryfil(temp1, 256, 0); - c = 0; - TLOOP(i) { - j = tokset[i].value; - if(j >= 0 && j < 256) { - if(temp1[j]) { - print("yacc bug -- cant have 2 different Ts with same value\n"); - print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); - nerrors++; - } - temp1[j] = i; - if(j > c) - c = j; - } - } - warray("yytok1", temp1, c+1); - - /* table 2 has PRIVATE-PRIVATE+256 */ - aryfil(temp1, 256, 0); - c = 0; - TLOOP(i) { - j = tokset[i].value - PRIVATE; - if(j >= 0 && j < 256) { - if(temp1[j]) { - print("yacc bug -- cant have 2 different Ts with same value\n"); - print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); - nerrors++; - } - temp1[j] = i; - if(j > c) - c = j; - } - } - warray("yytok2", temp1, c+1); - - /* table 3 has everything else */ - Bprint(ftable, "long yytok3[] =\n{\n"); - c = 0; - TLOOP(i) { - j = tokset[i].value; - if(j >= 0 && j < 256) - continue; - if(j >= PRIVATE && j < 256+PRIVATE) - continue; - - Bprint(ftable, "%4d,%4d,", j, i); - c++; - if(c%5 == 0) - Bprint(ftable, "\n"); - } - Bprint(ftable, "%4d\n};\n", 0); - - /* copy parser text */ - while((c=Bgetrune(finput)) != Beof) { - if(c == '$') { - if((c = Bgetrune(finput)) != 'A') - Bputrune(ftable, '$'); - else { /* copy actions */ - faction = Bopen(actname, OREAD); - if(faction == 0) - error("cannot reopen action tempfile"); - while((c=Bgetrune(faction)) != Beof) - Bputrune(ftable, c); - Bterm(faction); - ZAPFILE(actname); - c = Bgetrune(finput); - } - } - Bputrune(ftable, c); - } - Bterm(ftable); -} - -/* - * copies string q into p, returning next free char ptr - */ -char* -chcopy(char* p, char* q) -{ - int c; - - while(c = *q) { - if(c == '"') - *p++ = '\\'; - *p++ = c; - q++; - } - *p = 0; - return p; -} - -/* - * creates output string for item pointed to by pp - */ -char* -writem(int *pp) -{ - int i,*p; - static char sarr[ISIZE]; - char* q; - - for(p=pp; *p>0; p++) - ; - p = prdptr[-*p]; - q = chcopy(sarr, nontrst[*p-NTBASE].name); - q = chcopy(q, ": "); - for(;;) { - *q = ' '; - p++; - if(p == pp) - *q = '.'; - q++; - *q = '\0'; - i = *p; - if(i <= 0) - break; - q = chcopy(q, symnam(i)); - if(q > &sarr[ISIZE-30]) - error("item too big"); - } - - /* an item calling for a reduction */ - i = *pp; - if(i < 0 ) { - q = chcopy(q, " ("); - sprint(q, "%d)", -i); - } - return sarr; -} - -/* - * return a pointer to the name of symbol i - */ -char* -symnam(int i) -{ - char* cp; - - cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name; - if(*cp == ' ') - cp++; - return cp; -} - -/* - * output the summary on y.output - */ -void -summary(void) -{ - - if(foutput != 0) { - Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n", - ntokens, NTERMS, nnonter, NNONTERM); - Bprint(foutput, "%d/%d grammar rules, %d/%d states\n", - nprod, NPROD, nstate, NSTATES); - Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n", - zzsrconf, zzrrconf); - Bprint(foutput, "%d/%d working sets used\n", - (int)(zzcwp-wsets), WSETSIZE); - Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n", - (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE); - Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE); - Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate); - Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp); - Bprint(foutput, "%d goto entries\n", zzgoent); - Bprint(foutput, "%d entries saved by goto default\n", zzgobest); - } - if(zzsrconf != 0 || zzrrconf != 0) { - print("\nconflicts: "); - if(zzsrconf) - print("%d shift/reduce", zzsrconf); - if(zzsrconf && zzrrconf) - print(", "); - if(zzrrconf) - print("%d reduce/reduce", zzrrconf); - print("\n"); - } - if(ftemp != 0) { - Bterm(ftemp); - ftemp = 0; - } - if(fdefine != 0) { - Bterm(fdefine); - fdefine = 0; - } -} - -/* - * write out error comment -- NEEDS WORK - */ -void -error(char *s, ...) -{ - va_list arg; - - nerrors++; - fprint(2, "\n fatal error:"); - va_start(arg, s); - vfprint(2, s, arg); - va_end(arg); - fprint(2, ", %s:%d\n", infile, lineno); - if(!fatfl) - return; - summary(); - cleantmp(); - exits("error"); -} - -/* - * set elements 0 through n-1 to c - */ -void -aryfil(int *v, int n, int c) -{ - int i; - - for(i=0; i<n; i++) - v[i] = c; -} - -/* - * set a to the union of a and b - * return 1 if b is not a subset of a, 0 otherwise - */ -int -setunion(int *a, int *b) -{ - int i, x, sub; - - sub = 0; - SETLOOP(i) { - x = *a; - *a |= *b; - if(*a != x) - sub = 1; - a++; - b++; - } - return sub; -} - -void -prlook(Lkset* p) -{ - int j, *pp; - - pp = p->lset; - if(pp == 0) - Bprint(foutput, "\tNULL"); - else { - Bprint(foutput, " { "); - TLOOP(j) - if(BIT(pp,j)) - Bprint(foutput, "%s ", symnam(j)); - Bprint(foutput, "}"); - } -} - -/* - * compute an array with the beginnings of productions yielding given nonterminals - * The array pres points to these lists - * the array pyield has the lists: the total size is only NPROD+1 - */ -void -cpres(void) -{ - int c, j, i, **pmem; - static int *pyield[NPROD]; - - pmem = pyield; - NTLOOP(i) { - c = i+NTBASE; - pres[i] = pmem; - fatfl = 0; /* make undefined symbols nonfatal */ - PLOOP(0, j) - if(*prdptr[j] == c) - *pmem++ = prdptr[j]+1; - if(pres[i] == pmem) - error("nonterminal %s not defined!", nontrst[i].name); - } - pres[i] = pmem; - fatfl = 1; - if(nerrors) { - summary(); - cleantmp(); - exits("error"); - } - if(pmem != &pyield[nprod]) - error("internal Yacc error: pyield %d", pmem-&pyield[nprod]); -} - -/* - * compute an array with the first of nonterminals - */ -void -cpfir(void) -{ - int *p, **s, i, **t, ch, changes; - - zzcwp = &wsets[nnonter]; - NTLOOP(i) { - aryfil(wsets[i].ws.lset, tbitset, 0); - t = pres[i+1]; - /* initially fill the sets */ - for(s=pres[i]; s<t; ++s) - for(p = *s; (ch = *p) > 0; ++p) { - if(ch < NTBASE) { - SETBIT(wsets[i].ws.lset, ch); - break; - } - if(!pempty[ch-NTBASE]) - break; - } - } - - /* now, reflect transitivity */ - changes = 1; - while(changes) { - changes = 0; - NTLOOP(i) { - t = pres[i+1]; - for(s = pres[i]; s < t; ++s) - for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) { - changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset); - if(!pempty[ch]) - break; - } - } - } - - NTLOOP(i) - pfirst[i] = flset(&wsets[i].ws); - if(!indebug) - return; - if(foutput != 0) - NTLOOP(i) { - Bprint(foutput, "\n%s: ", nontrst[i].name); - prlook(pfirst[i]); - Bprint(foutput, " %d\n", pempty[i]); - } -} - -/* - * sorts last state,and sees if it equals earlier ones. returns state number - */ -int -state(int c) -{ - Item *p1, *p2, *k, *l, *q1, *q2; - int size1, size2, i; - - p1 = pstate[nstate]; - p2 = pstate[nstate+1]; - if(p1 == p2) - return 0; /* null state */ - /* sort the items */ - for(k = p2-1; k > p1; k--) /* make k the biggest */ - for(l = k-1; l >= p1; --l) - if(l->pitem > k->pitem) { - int *s; - Lkset *ss; - - s = k->pitem; - k->pitem = l->pitem; - l->pitem = s; - ss = k->look; - k->look = l->look; - l->look = ss; - } - size1 = p2 - p1; /* size of state */ - - for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) { - /* get ith state */ - q1 = pstate[i]; - q2 = pstate[i+1]; - size2 = q2 - q1; - if(size1 != size2) - continue; - k = p1; - for(l = q1; l < q2; l++) { - if(l->pitem != k->pitem) - break; - k++; - } - if(l != q2) - continue; - /* found it */ - pstate[nstate+1] = pstate[nstate]; /* delete last state */ - /* fix up lookaheads */ - if(nolook) - return i; - for(l = q1, k = p1; l < q2; ++l, ++k ) { - int s; - - SETLOOP(s) - clset.lset[s] = l->look->lset[s]; - if(setunion(clset.lset, k->look->lset)) { - tystate[i] = MUSTDO; - /* register the new set */ - l->look = flset( &clset ); - } - } - return i; - } - /* state is new */ - if(nolook) - error("yacc state/nolook error"); - pstate[nstate+2] = p2; - if(nstate+1 >= NSTATES) - error("too many states"); - if(c >= NTBASE) { - mstates[nstate] = ntstates[c-NTBASE]; - ntstates[c-NTBASE] = nstate; - } else { - mstates[nstate] = tstates[c]; - tstates[c] = nstate; - } - tystate[nstate] = MUSTDO; - return nstate++; -} - -void -putitem(int *ptr, Lkset *lptr) -{ - Item *j; - - if(pidebug && foutput != 0) - Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate); - j = pstate[nstate+1]; - j->pitem = ptr; - if(!nolook) - j->look = flset(lptr); - pstate[nstate+1] = ++j; - if((int*)j > zzmemsz) { - zzmemsz = (int*)j; - if(zzmemsz >= &mem0[MEMSIZE]) - error("out of state space"); - } -} - -/* - * mark nonterminals which derive the empty string - * also, look for nonterminals which don't derive any token strings - */ -void -cempty(void) -{ - - int i, *p; - - /* first, use the array pempty to detect productions that can never be reduced */ - /* set pempty to WHONOWS */ - aryfil(pempty, nnonter+1, WHOKNOWS); - - /* now, look at productions, marking nonterminals which derive something */ -more: - PLOOP(0, i) { - if(pempty[*prdptr[i] - NTBASE]) - continue; - for(p = prdptr[i]+1; *p >= 0; ++p) - if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS) - break; - /* production can be derived */ - if(*p < 0) { - pempty[*prdptr[i]-NTBASE] = OK; - goto more; - } - } - - /* now, look at the nonterminals, to see if they are all OK */ - NTLOOP(i) { - /* the added production rises or falls as the start symbol ... */ - if(i == 0) - continue; - if(pempty[i] != OK) { - fatfl = 0; - error("nonterminal %s never derives any token string", nontrst[i].name); - } - } - - if(nerrors) { - summary(); - cleantmp(); - exits("error"); - } - - /* now, compute the pempty array, to see which nonterminals derive the empty string */ - /* set pempty to WHOKNOWS */ - aryfil( pempty, nnonter+1, WHOKNOWS); - - /* loop as long as we keep finding empty nonterminals */ - -again: - PLOOP(1, i) { - /* not known to be empty */ - if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) { - for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p) - ; - /* we have a nontrivially empty nonterminal */ - if(*p < 0) { - pempty[*prdptr[i]-NTBASE] = EMPTY; - /* got one ... try for another */ - goto again; - } - } - } -} - -/* - * generate the states - */ -void -stagen(void) -{ - - int c, i, j, more; - Wset *p, *q; - - /* initialize */ - nstate = 0; - - /* THIS IS FUNNY from the standpoint of portability - * it represents the magic moment when the mem0 array, which has - * been holding the productions, starts to hold item pointers, of a - * different type... - * someday, alloc should be used to allocate all this stuff... for now, we - * accept that if pointers don't fit in integers, there is a problem... - */ - - pstate[0] = pstate[1] = (Item*)mem; - aryfil(clset.lset, tbitset, 0); - putitem(prdptr[0]+1, &clset); - tystate[0] = MUSTDO; - nstate = 1; - pstate[2] = pstate[1]; - - aryfil(amem, ACTSIZE, 0); - - /* now, the main state generation loop */ - for(more=1; more;) { - more = 0; - SLOOP(i) { - if(tystate[i] != MUSTDO) - continue; - tystate[i] = DONE; - aryfil(temp1, nnonter+1, 0); - /* take state i, close it, and do gotos */ - closure(i); - /* generate goto's */ - WSLOOP(wsets, p) { - if(p->flag) - continue; - p->flag = 1; - c = *(p->pitem); - if(c <= 1) { - if(pstate[i+1]-pstate[i] <= p-wsets) - tystate[i] = MUSTLOOKAHEAD; - continue; - } - /* do a goto on c */ - WSLOOP(p, q) - /* this item contributes to the goto */ - if(c == *(q->pitem)) { - putitem(q->pitem+1, &q->ws); - q->flag = 1; - } - if(c < NTBASE) - state(c); /* register new state */ - else - temp1[c-NTBASE] = state(c); - } - if(gsdebug && foutput != 0) { - Bprint(foutput, "%d: ", i); - NTLOOP(j) - if(temp1[j]) - Bprint(foutput, "%s %d, ", - nontrst[j].name, temp1[j]); - Bprint(foutput, "\n"); - } - indgo[i] = apack(&temp1[1], nnonter-1) - 1; - /* do some more */ - more = 1; - } - } -} - -/* - * generate the closure of state i - */ -void -closure(int i) -{ - - Wset *u, *v; - Item *p, *q; - int c, ch, work, k, *pi, **s, **t; - - zzclose++; - - /* first, copy kernel of state i to wsets */ - cwp = wsets; - ITMLOOP(i, p, q) { - cwp->pitem = p->pitem; - cwp->flag = 1; /* this item must get closed */ - SETLOOP(k) - cwp->ws.lset[k] = p->look->lset[k]; - WSBUMP(cwp); - } - - /* now, go through the loop, closing each item */ - work = 1; - while(work) { - work = 0; - WSLOOP(wsets, u) { - if(u->flag == 0) - continue; - /* dot is before c */ - c = *(u->pitem); - if(c < NTBASE) { - u->flag = 0; - /* only interesting case is where . is before nonterminal */ - continue; - } - - /* compute the lookahead */ - aryfil(clset.lset, tbitset, 0); - - /* find items involving c */ - WSLOOP(u, v) - if(v->flag == 1 && *(pi=v->pitem) == c) { - v->flag = 0; - if(nolook) - continue; - while((ch = *++pi) > 0) { - /* terminal symbol */ - if(ch < NTBASE) { - SETBIT(clset.lset, ch); - break; - } - /* nonterminal symbol */ - setunion(clset.lset, pfirst[ch-NTBASE]->lset); - if(!pempty[ch-NTBASE]) - break; - } - if(ch <= 0) - setunion(clset.lset, v->ws.lset); - } - - /* - * now loop over productions derived from c - * c is now nonterminal number - */ - c -= NTBASE; - t = pres[c+1]; - for(s = pres[c]; s < t; ++s) { - /* - * put these items into the closure - * is the item there - */ - WSLOOP(wsets, v) - /* yes, it is there */ - if(v->pitem == *s) { - if(nolook) - goto nexts; - if(setunion(v->ws.lset, clset.lset)) - v->flag = work = 1; - goto nexts; - } - - /* not there; make a new entry */ - if(cwp-wsets+1 >= WSETSIZE) - error( "working set overflow"); - cwp->pitem = *s; - cwp->flag = 1; - if(!nolook) { - work = 1; - SETLOOP(k) cwp->ws.lset[k] = clset.lset[k]; - } - WSBUMP(cwp); - - nexts:; - } - } - } - - /* have computed closure; flags are reset; return */ - if(cwp > zzcwp) - zzcwp = cwp; - if(cldebug && foutput != 0) { - Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook); - WSLOOP(wsets, u) { - if(u->flag) - Bprint(foutput, "flag set!\n"); - u->flag = 0; - Bprint(foutput, "\t%s", writem(u->pitem)); - prlook(&u->ws); - Bprint(foutput, "\n"); - } - } -} - -/* - * decide if the lookahead set pointed to by p is known - * return pointer to a perminent location for the set - */ -Lkset* -flset(Lkset *p) -{ - Lkset *q; - int *u, *v, *w, j; - - for(q = &lkst[nlset]; q-- > lkst;) { - u = p->lset; - v = q->lset; - w = &v[tbitset]; - while(v < w) - if(*u++ != *v++) - goto more; - /* we have matched */ - return q; - more:; - } - /* add a new one */ - q = &lkst[nlset++]; - if(nlset >= LSETSIZE) - error("too many lookahead sets"); - SETLOOP(j) - q->lset[j] = p->lset[j]; - return q; -} - -void -cleantmp(void) -{ - ZAPFILE(actname); - ZAPFILE(tempname); -} - -void -intr(void) -{ - cleantmp(); - exits("interrupted"); -} - -void -setup(int argc, char *argv[]) -{ - long c, t; - int i, j, fd, lev, ty, ytab, *p; - int vflag, dflag, stem; - char actnm[8], *stemc, *s, dirbuf[128]; - - ytab = 0; - vflag = 0; - dflag = 0; - stem = 0; - stemc = "y"; - foutput = 0; - fdefine = 0; - fdebug = 0; - ARGBEGIN{ - case 'v': - case 'V': - vflag++; - break; - case 'D': - yydebug = ARGF(); - break; - case 'd': - dflag++; - break; - case 'o': - ytab++; - ytabc = ARGF(); - break; - case 's': - stem++; - stemc = ARGF(); - break; - case 'S': - parser = PARSERS; - break; - default: - error("illegal option: %c", ARGC()); - }ARGEND - openup(stemc, dflag, vflag, ytab, ytabc); - - if((fd = mkstemp(ttempname)) >= 0){ - tempname = ttempname; - ftemp = Bfdopen(fd, OWRITE); - } - if((fd = mkstemp(tactname)) >= 0){ - actname = tactname; - faction = Bfdopen(fd, OWRITE); - } - if(ftemp == 0 || faction == 0) - error("cannot open temp file"); - if(argc < 1) - error("no input file"); - infile = argv[0]; - if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){ - i = strlen(infile)+1+strlen(dirbuf)+1+10; - s = malloc(i); - if(s != nil){ - snprint(s, i, "%s/%s", dirbuf, infile); - cleanname(s); - infile = s; - } - } - finput = Bopen(infile, OREAD); - if(finput == 0) - error("cannot open '%s'", argv[0]); - cnamp = cnames; - - defin(0, "$end"); - extval = PRIVATE; /* tokens start in unicode 'private use' */ - defin(0, "error"); - defin(1, "$accept"); - defin(0, "$unk"); - mem = mem0; - i = 0; - - for(t = gettok(); t != MARK && t != ENDFILE;) - switch(t) { - case ';': - t = gettok(); - break; - - case START: - if(gettok() != IDENTIFIER) - error("bad %%start construction"); - start = chfind(1, tokname); - t = gettok(); - continue; - - case TYPEDEF: - if(gettok() != TYPENAME) - error("bad syntax in %%type"); - ty = numbval; - for(;;) { - t = gettok(); - switch(t) { - case IDENTIFIER: - if((t=chfind(1, tokname)) < NTBASE) { - j = TYPE(toklev[t]); - if(j != 0 && j != ty) - error("type redeclaration of token %s", - tokset[t].name); - else - SETTYPE(toklev[t], ty); - } else { - j = nontrst[t-NTBASE].value; - if(j != 0 && j != ty) - error("type redeclaration of nonterminal %s", - nontrst[t-NTBASE].name ); - else - nontrst[t-NTBASE].value = ty; - } - case ',': - continue; - case ';': - t = gettok(); - default: - break; - } - break; - } - continue; - - case UNION: - /* copy the union declaration to the output */ - cpyunion(); - t = gettok(); - continue; - - case LEFT: - case BINARY: - case RIGHT: - i++; - - case TERM: - /* nonzero means new prec. and assoc. */ - lev = t-TERM; - ty = 0; - - /* get identifiers so defined */ - t = gettok(); - - /* there is a type defined */ - if(t == TYPENAME) { - ty = numbval; - t = gettok(); - } - for(;;) { - switch(t) { - case ',': - t = gettok(); - continue; - - case ';': - break; - - case IDENTIFIER: - j = chfind(0, tokname); - if(j >= NTBASE) - error("%s defined earlier as nonterminal", tokname); - if(lev) { - if(ASSOC(toklev[j])) - error("redeclaration of precedence of %s", tokname); - SETASC(toklev[j], lev); - SETPLEV(toklev[j], i); - } - if(ty) { - if(TYPE(toklev[j])) - error("redeclaration of type of %s", tokname); - SETTYPE(toklev[j],ty); - } - t = gettok(); - if(t == NUMBER) { - tokset[j].value = numbval; - if(j < ndefout && j > 3) - error("please define type number of %s earlier", - tokset[j].name); - t = gettok(); - } - continue; - } - break; - } - continue; - - case LCURLY: - defout(0); - cpycode(); - t = gettok(); - continue; - - default: - error("syntax error"); - } - if(t == ENDFILE) - error("unexpected EOF before %%"); - - /* t is MARK */ - Bprint(ftable, "extern int yyerrflag;\n"); - Bprint(ftable, "#ifndef YYMAXDEPTH\n"); - Bprint(ftable, "#define YYMAXDEPTH 150\n"); - Bprint(ftable, "#endif\n" ); - if(!ntypes) { - Bprint(ftable, "#ifndef YYSTYPE\n"); - Bprint(ftable, "#define YYSTYPE int\n"); - Bprint(ftable, "#endif\n"); - } - Bprint(ftable, "YYSTYPE yylval;\n"); - Bprint(ftable, "YYSTYPE yyval;\n"); - - prdptr[0] = mem; - - /* added production */ - *mem++ = NTBASE; - - /* if start is 0, we will overwrite with the lhs of the first rule */ - *mem++ = start; - *mem++ = 1; - *mem++ = 0; - prdptr[1] = mem; - while((t=gettok()) == LCURLY) - cpycode(); - if(t != IDENTCOLON) - error("bad syntax on first rule"); - - if(!start) - prdptr[0][1] = chfind(1, tokname); - - /* read rules */ - while(t != MARK && t != ENDFILE) { - /* process a rule */ - rlines[nprod] = lineno; - if(t == '|') - *mem++ = *prdptr[nprod-1]; - else - if(t == IDENTCOLON) { - *mem = chfind(1, tokname); - if(*mem < NTBASE) - error("token illegal on LHS of grammar rule"); - mem++; - } else - error("illegal rule: missing semicolon or | ?"); - /* read rule body */ - t = gettok(); - - more_rule: - while(t == IDENTIFIER) { - *mem = chfind(1, tokname); - if(*mem < NTBASE) - levprd[nprod] = toklev[*mem]; - mem++; - t = gettok(); - } - if(t == PREC) { - if(gettok() != IDENTIFIER) - error("illegal %%prec syntax"); - j = chfind(2, tokname); - if(j >= NTBASE) - error("nonterminal %s illegal after %%prec", - nontrst[j-NTBASE].name); - levprd[nprod] = toklev[j]; - t = gettok(); - } - if(t == '=') { - levprd[nprod] |= ACTFLAG; - Bprint(faction, "\ncase %d:", nprod); - cpyact(mem-prdptr[nprod]-1); - Bprint(faction, " break;"); - if((t=gettok()) == IDENTIFIER) { - - /* action within rule... */ - sprint(actnm, "$$%d", nprod); - - /* make it a nonterminal */ - j = chfind(1, actnm); - - /* - * the current rule will become rule number nprod+1 - * move the contents down, and make room for the null - */ - for(p = mem; p >= prdptr[nprod]; --p) - p[2] = *p; - mem += 2; - - /* enter null production for action */ - p = prdptr[nprod]; - *p++ = j; - *p++ = -nprod; - - /* update the production information */ - levprd[nprod+1] = levprd[nprod] & ~ACTFLAG; - levprd[nprod] = ACTFLAG; - if(++nprod >= NPROD) - error("more than %d rules", NPROD); - prdptr[nprod] = p; - - /* make the action appear in the original rule */ - *mem++ = j; - - /* get some more of the rule */ - goto more_rule; - } - } - - while(t == ';') - t = gettok(); - *mem++ = -nprod; - - /* check that default action is reasonable */ - if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) { - - /* no explicit action, LHS has value */ - int tempty; - - tempty = prdptr[nprod][1]; - if(tempty < 0) - error("must return a value, since LHS has a type"); - else - if(tempty >= NTBASE) - tempty = nontrst[tempty-NTBASE].value; - else - tempty = TYPE(toklev[tempty]); - if(tempty != nontrst[*prdptr[nprod]-NTBASE].value) - error("default action causes potential type clash"); - } - nprod++; - if(nprod >= NPROD) - error("more than %d rules", NPROD); - prdptr[nprod] = mem; - levprd[nprod] = 0; - } - - /* end of all rules */ - defout(1); - - finact(); - if(t == MARK) { - Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); - while((c=Bgetrune(finput)) != Beof) - Bputrune(ftable, c); - } - Bterm(finput); -} - -/* - * finish action routine - */ -void -finact(void) -{ - - Bterm(faction); - Bprint(ftable, "#define YYEOFCODE %d\n", 1); - Bprint(ftable, "#define YYERRCODE %d\n", 2); -} - -/* - * define s to be a terminal if t=0 - * or a nonterminal if t=1 - */ -int -defin(int nt, char *s) -{ - int val; - Rune rune; - - val = 0; - if(nt) { - nnonter++; - if(nnonter >= NNONTERM) - error("too many nonterminals, limit %d",NNONTERM); - nontrst[nnonter].name = cstash(s); - return NTBASE + nnonter; - } - - /* must be a token */ - ntokens++; - if(ntokens >= NTERMS) - error("too many terminals, limit %d", NTERMS); - tokset[ntokens].name = cstash(s); - - /* establish value for token */ - /* single character literal */ - if(s[0] == ' ') { - val = chartorune(&rune, &s[1]); - if(s[val+1] == 0) { - val = rune; - goto out; - } - } - - /* escape sequence */ - if(s[0] == ' ' && s[1] == '\\') { - if(s[3] == 0) { - /* single character escape sequence */ - switch(s[2]) { - case 'n': val = '\n'; break; - case 'r': val = '\r'; break; - case 'b': val = '\b'; break; - case 't': val = '\t'; break; - case 'f': val = '\f'; break; - case '\'': val = '\''; break; - case '"': val = '"'; break; - case '\\': val = '\\'; break; - default: error("invalid escape"); - } - goto out; - } - - /* \nnn sequence */ - if(s[2] >= '0' && s[2] <= '7') { - if(s[3] < '0' || - s[3] > '7' || - s[4] < '0' || - s[4] > '7' || - s[5] != 0) - error("illegal \\nnn construction"); - val = 64*s[2] + 8*s[3] + s[4] - 73*'0'; - if(val == 0) - error("'\\000' is illegal"); - goto out; - } - error("unknown escape"); - } - val = extval++; - -out: - tokset[ntokens].value = val; - toklev[ntokens] = 0; - return ntokens; -} - -/* - * write out the defines (at the end of the declaration section) - */ -void -defout(int last) -{ - int i, c; - char sar[NAMESIZE+10]; - - for(i=ndefout; i<=ntokens; i++) { - /* non-literals */ - c = tokset[i].name[0]; - if(c != ' ' && c != '$') { - Bprint(ftable, "#define %s %d\n", - tokset[i].name, tokset[i].value); - if(fdefine) - Bprint(fdefine, "#define\t%s\t%d\n", - tokset[i].name, tokset[i].value); - } - } - ndefout = ntokens+1; - if(last && fdebug) { - Bprint(fdebug, "char* yytoknames[] =\n{\n"); - TLOOP(i) { - if(tokset[i].name) { - chcopy(sar, tokset[i].name); - Bprint(fdebug, "\t\"%s\",\n", sar); - continue; - } - Bprint(fdebug, "\t0,\n"); - } - Bprint(fdebug, "};\n"); - } -} - -char* -cstash(char *s) -{ - char *temp; - - temp = cnamp; - do { - if(cnamp >= &cnames[cnamsz]) - error("too many characters in id's and literals"); - else - *cnamp++ = *s; - } while(*s++); - return temp; -} - -long -gettok(void) -{ - long c; - Rune rune; - int i, base, match, reserve; - static int peekline; - -begin: - reserve = 0; - lineno += peekline; - peekline = 0; - c = Bgetrune(finput); - while(c == ' ' || c == '\n' || c == '\t' || c == '\f') { - if(c == '\n') - lineno++; - c = Bgetrune(finput); - } - - /* skip comment */ - if(c == '/') { - lineno += skipcom(); - goto begin; - } - switch(c) { - case Beof: - return ENDFILE; - - case '{': - Bungetrune(finput); - return '='; - - case '<': - /* get, and look up, a type name (union member name) */ - i = 0; - while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') { - rune = c; - c = runetochar(&tokname[i], &rune); - if(i < NAMESIZE) - i += c; - } - if(c != '>') - error("unterminated < ... > clause"); - tokname[i] = 0; - for(i=1; i<=ntypes; i++) - if(!strcmp(typeset[i], tokname)) { - numbval = i; - return TYPENAME; - } - ntypes++; - numbval = ntypes; - typeset[numbval] = cstash(tokname); - return TYPENAME; - - case '"': - case '\'': - match = c; - tokname[0] = ' '; - i = 1; - for(;;) { - c = Bgetrune(finput); - if(c == '\n' || c <= 0) - error("illegal or missing ' or \"" ); - if(c == '\\') { - tokname[i] = '\\'; - if(i < NAMESIZE) - i++; - c = Bgetrune(finput); - } else - if(c == match) - break; - rune = c; - c = runetochar(&tokname[i], &rune); - if(i < NAMESIZE) - i += c; - } - break; - - case '%': - case '\\': - switch(c = Bgetrune(finput)) { - case '0': return TERM; - case '<': return LEFT; - case '2': return BINARY; - case '>': return RIGHT; - case '%': - case '\\': return MARK; - case '=': return PREC; - case '{': return LCURLY; - default: reserve = 1; - } - - default: - /* number */ - if(isdigit(c)) { - numbval = c-'0'; - base = (c=='0')? 8: 10; - for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput)) - numbval = numbval*base + (c-'0'); - Bungetrune(finput); - return NUMBER; - } - if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') { - i = 0; - while(islower(c) || isupper(c) || isdigit(c) || - c == '-' || c=='_' || c=='.' || c=='$') { - if(reserve && isupper(c)) - c += 'a'-'A'; - rune = c; - c = runetochar(&tokname[i], &rune); - if(i < NAMESIZE) - i += c; - c = Bgetrune(finput); - } - } else - return c; - Bungetrune(finput); - } - tokname[i] = 0; - - /* find a reserved word */ - if(reserve) { - for(c=0; resrv[c].name; c++) - if(strcmp(tokname, resrv[c].name) == 0) - return resrv[c].value; - error("invalid escape, or illegal reserved word: %s", tokname); - } - - /* look ahead to distinguish IDENTIFIER from IDENTCOLON */ - c = Bgetrune(finput); - while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') { - if(c == '\n') - peekline++; - /* look for comments */ - if(c == '/') - peekline += skipcom(); - c = Bgetrune(finput); - } - if(c == ':') - return IDENTCOLON; - Bungetrune(finput); - return IDENTIFIER; -} - -/* - * determine the type of a symbol - */ -int -fdtype(int t) -{ - int v; - - if(t >= NTBASE) - v = nontrst[t-NTBASE].value; - else - v = TYPE(toklev[t]); - if(v <= 0) - error("must specify type for %s", (t>=NTBASE)? - nontrst[t-NTBASE].name: tokset[t].name); - return v; -} - -int -chfind(int t, char *s) -{ - int i; - - if(s[0] == ' ') - t = 0; - TLOOP(i) - if(!strcmp(s, tokset[i].name)) - return i; - NTLOOP(i) - if(!strcmp(s, nontrst[i].name)) - return NTBASE+i; - - /* cannot find name */ - if(t > 1) - error("%s should have been defined earlier", s); - return defin(t, s); -} - -/* - * copy the union declaration to the output, and the define file if present - */ -void -cpyunion(void) -{ - long c; - int level; - - Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); - Bprint(ftable, "typedef union "); - if(fdefine != 0) - Bprint(fdefine, "\ntypedef union "); - - level = 0; - for(;;) { - if((c=Bgetrune(finput)) == Beof) - error("EOF encountered while processing %%union"); - Bputrune(ftable, c); - if(fdefine != 0) - Bputrune(fdefine, c); - switch(c) { - case '\n': - lineno++; - break; - case '{': - level++; - break; - case '}': - level--; - - /* we are finished copying */ - if(level == 0) { - Bprint(ftable, " YYSTYPE;\n"); - if(fdefine != 0) - Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n"); - return; - } - } - } -} - -/* - * copies code between \{ and \} - */ -void -cpycode(void) -{ - - long c; - - c = Bgetrune(finput); - if(c == '\n') { - c = Bgetrune(finput); - lineno++; - } - Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); - while(c != Beof) { - if(c == '\\') { - if((c=Bgetrune(finput)) == '}') - return; - Bputc(ftable, '\\'); - } - if(c == '%') { - if((c=Bgetrune(finput)) == '}') - return; - Bputc(ftable, '%'); - } - Bputrune(ftable, c); - if(c == '\n') - lineno++; - c = Bgetrune(finput); - } - error("eof before %%}"); -} - -/* - * skip over comments - * skipcom is called after reading a '/' - */ -int -skipcom(void) -{ - long c; - int i; - - /* i is the number of lines skipped */ - i = 0; - if(Bgetrune(finput) != '*') - error("illegal comment"); - c = Bgetrune(finput); - while(c != Beof) { - while(c == '*') - if((c=Bgetrune(finput)) == '/') - return i; - if(c == '\n') - i++; - c = Bgetrune(finput); - } - error("EOF inside comment"); - return 0; -} - -/* - * copy C action to the next ; or closing } - */ -void -cpyact(int offset) -{ - long c; - int brac, match, j, s, fnd, tok; - - Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile); - brac = 0; - -loop: - c = Bgetrune(finput); -swt: - switch(c) { - case ';': - if(brac == 0) { - Bputrune(faction, c); - return; - } - goto lcopy; - - case '{': - brac++; - goto lcopy; - - case '$': - s = 1; - tok = -1; - c = Bgetrune(finput); - - /* type description */ - if(c == '<') { - Bungetrune(finput); - if(gettok() != TYPENAME) - error("bad syntax on $<ident> clause"); - tok = numbval; - c = Bgetrune(finput); - } - if(c == '$') { - Bprint(faction, "yyval"); - - /* put out the proper tag... */ - if(ntypes) { - if(tok < 0) - tok = fdtype(*prdptr[nprod]); - Bprint(faction, ".%s", typeset[tok]); - } - goto loop; - } - if(c == '-') { - s = -s; - c = Bgetrune(finput); - } - if(isdigit(c)) { - j = 0; - while(isdigit(c)) { - j = j*10 + (c-'0'); - c = Bgetrune(finput); - } - Bungetrune(finput); - j = j*s - offset; - if(j > 0) - error("Illegal use of $%d", j+offset); - - dollar: - Bprint(faction, "yypt[-%d].yyv", -j); - - /* put out the proper tag */ - if(ntypes) { - if(j+offset <= 0 && tok < 0) - error("must specify type of $%d", j+offset); - if(tok < 0) - tok = fdtype(prdptr[nprod][j+offset]); - Bprint(faction, ".%s", typeset[tok]); - } - goto loop; - } - if(isupper(c) || islower(c) || c == '_' || c == '.') { - int tok; /* tok used oustide for type info */ - - /* look for $name */ - Bungetrune(finput); - if(gettok() != IDENTIFIER) - error("$ must be followed by an identifier"); - tok = chfind(2, tokname); - if((c = Bgetrune(finput)) != '#') { - Bungetrune(finput); - fnd = -1; - } else - if(gettok() != NUMBER) { - error("# must be followed by number"); - fnd = -1; - } else - fnd = numbval; - for(j=1; j<=offset; ++j) - if(tok == prdptr[nprod][j]) { - if(--fnd <= 0) { - j -= offset; - goto dollar; - } - } - error("$name or $name#number not found"); - } - Bputc(faction, '$'); - if(s < 0 ) - Bputc(faction, '-'); - goto swt; - - case '}': - brac--; - if(brac) - goto lcopy; - Bputrune(faction, c); - return; - - case '/': - /* look for comments */ - Bputrune(faction, c); - c = Bgetrune(finput); - if(c != '*') - goto swt; - - /* it really is a comment */ - Bputrune(faction, c); - c = Bgetrune(finput); - while(c >= 0) { - while(c == '*') { - Bputrune(faction, c); - if((c=Bgetrune(finput)) == '/') - goto lcopy; - } - Bputrune(faction, c); - if(c == '\n') - lineno++; - c = Bgetrune(finput); - } - error("EOF inside comment"); - - case '\'': - /* character constant */ - match = '\''; - goto string; - - case '"': - /* character string */ - match = '"'; - - string: - Bputrune(faction, c); - while(c = Bgetrune(finput)) { - if(c == '\\') { - Bputrune(faction, c); - c = Bgetrune(finput); - if(c == '\n') - lineno++; - } else - if(c == match) - goto lcopy; - if(c == '\n') - error("newline in string or char. const."); - Bputrune(faction, c); - } - error("EOF in string or character constant"); - - case Beof: - error("action does not terminate"); - - case '\n': - lineno++; - goto lcopy; - } - -lcopy: - Bputrune(faction, c); - goto loop; -} - -void -openup(char *stem, int dflag, int vflag, int ytab, char *ytabc) -{ - char buf[256]; - - if(vflag) { - sprint(buf, "%s.%s", stem, FILEU); - foutput = Bopen(buf, OWRITE); - if(foutput == 0) - error("cannot open %s", buf); - } - if(yydebug) { - sprint(buf, "%s.%s", stem, FILEDEBUG); - if((fdebug = Bopen(buf, OWRITE)) == 0) - error("can't open %s", buf); - } - if(dflag) { - sprint(buf, "%s.%s", stem, FILED); - fdefine = Bopen(buf, OWRITE); - if(fdefine == 0) - error("can't create %s", buf); - } - if(ytab == 0) - sprint(buf, "%s.%s", stem, OFILE); - else - strcpy(buf, ytabc); - ftable = Bopen(buf, OWRITE); - if(ftable == 0) - error("cannot open table file %s", buf); -} - -/* - * print the output for the states - */ -void -output(void) -{ - int i, k, c; - Wset *u, *v; - - Bprint(ftable, "short yyexca[] =\n{"); - if(fdebug) - Bprint(fdebug, "char* yystates[] =\n{\n"); - - /* output the stuff for state i */ - SLOOP(i) { - nolook = tystate[i]!=MUSTLOOKAHEAD; - closure(i); - - /* output actions */ - nolook = 1; - aryfil(temp1, ntokens+nnonter+1, 0); - WSLOOP(wsets, u) { - c = *(u->pitem); - if(c > 1 && c < NTBASE && temp1[c] == 0) { - WSLOOP(u, v) - if(c == *(v->pitem)) - putitem(v->pitem+1, (Lkset*)0); - temp1[c] = state(c); - } else - if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0) - temp1[c+ntokens] = amem[indgo[i]+c]; - } - if(i == 1) - temp1[1] = ACCEPTCODE; - - /* now, we have the shifts; look at the reductions */ - lastred = 0; - WSLOOP(wsets, u) { - c = *u->pitem; - - /* reduction */ - if(c <= 0) { - lastred = -c; - TLOOP(k) - if(BIT(u->ws.lset, k)) { - if(temp1[k] == 0) - temp1[k] = c; - else - if(temp1[k] < 0) { /* reduce/reduce conflict */ - if(foutput) - Bprint(foutput, - "\n%d: reduce/reduce conflict" - " (red'ns %d and %d ) on %s", - i, -temp1[k], lastred, - symnam(k)); - if(-temp1[k] > lastred) - temp1[k] = -lastred; - zzrrconf++; - } else - /* potential shift/reduce conflict */ - precftn( lastred, k, i ); - } - } - } - wract(i); - } - - if(fdebug) - Bprint(fdebug, "};\n"); - Bprint(ftable, "};\n"); - Bprint(ftable, "#define YYNPROD %d\n", nprod); - Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE); - if(yydebug) - Bprint(ftable, "#define yydebug %s\n", yydebug); -} - -/* - * pack state i from temp1 into amem - */ -int -apack(int *p, int n) -{ - int *pp, *qq, *rr, off, *q, *r; - - /* we don't need to worry about checking because - * we will only look at entries known to be there... - * eliminate leading and trailing 0's - */ - - q = p+n; - for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off) - ; - /* no actions */ - if(pp > q) - return 0; - p = pp; - - /* now, find a place for the elements from p to q, inclusive */ - r = &amem[ACTSIZE-1]; - for(rr = amem; rr <= r; rr++, off++) { - for(qq = rr, pp = p; pp <= q; pp++, qq++) - if(*pp != 0) - if(*pp != *qq && *qq != 0) - goto nextk; - - /* we have found an acceptable k */ - if(pkdebug && foutput != 0) - Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem)); - for(qq = rr, pp = p; pp <= q; pp++, qq++) - if(*pp) { - if(qq > r) - error("action table overflow"); - if(qq > memp) - memp = qq; - *qq = *pp; - } - if(pkdebug && foutput != 0) - for(pp = amem; pp <= memp; pp += 10) { - Bprint(foutput, "\t"); - for(qq = pp; qq <= pp+9; qq++) - Bprint(foutput, "%d ", *qq); - Bprint(foutput, "\n"); - } - return(off); - nextk:; - } - error("no space in action table"); - return 0; -} - -/* - * output the gotos for the nontermninals - */ -void -go2out(void) -{ - int i, j, k, best, count, cbest, times; - - /* mark begining of gotos */ - Bprint(ftemp, "$\n"); - for(i = 1; i <= nnonter; i++) { - go2gen(i); - - /* find the best one to make default */ - best = -1; - times = 0; - - /* is j the most frequent */ - for(j = 0; j <= nstate; j++) { - if(tystate[j] == 0) - continue; - if(tystate[j] == best) - continue; - - /* is tystate[j] the most frequent */ - count = 0; - cbest = tystate[j]; - for(k = j; k <= nstate; k++) - if(tystate[k] == cbest) - count++; - if(count > times) { - best = cbest; - times = count; - } - } - - /* best is now the default entry */ - zzgobest += times-1; - for(j = 0; j <= nstate; j++) - if(tystate[j] != 0 && tystate[j] != best) { - Bprint(ftemp, "%d,%d,", j, tystate[j]); - zzgoent++; - } - - /* now, the default */ - if(best == -1) - best = 0; - zzgoent++; - Bprint(ftemp, "%d\n", best); - } -} - -/* - * output the gotos for nonterminal c - */ -void -go2gen(int c) -{ - int i, work, cc; - Item *p, *q; - - - /* first, find nonterminals with gotos on c */ - aryfil(temp1, nnonter+1, 0); - temp1[c] = 1; - work = 1; - while(work) { - work = 0; - PLOOP(0, i) - - /* cc is a nonterminal */ - if((cc=prdptr[i][1]-NTBASE) >= 0) - /* cc has a goto on c */ - if(temp1[cc] != 0) { - - /* thus, the left side of production i does too */ - cc = *prdptr[i]-NTBASE; - if(temp1[cc] == 0) { - work = 1; - temp1[cc] = 1; - } - } - } - - /* now, we have temp1[c] = 1 if a goto on c in closure of cc */ - if(g2debug && foutput != 0) { - Bprint(foutput, "%s: gotos on ", nontrst[c].name); - NTLOOP(i) - if(temp1[i]) - Bprint(foutput, "%s ", nontrst[i].name); - Bprint(foutput, "\n"); - } - - /* now, go through and put gotos into tystate */ - aryfil(tystate, nstate, 0); - SLOOP(i) - ITMLOOP(i, p, q) - if((cc = *p->pitem) >= NTBASE) - /* goto on c is possible */ - if(temp1[cc-NTBASE]) { - tystate[i] = amem[indgo[i]+c]; - break; - } -} - -/* - * decide a shift/reduce conflict by precedence. - * r is a rule number, t a token number - * the conflict is in state s - * temp1[t] is changed to reflect the action - */ -void -precftn(int r, int t, int s) -{ - int lp, lt, action; - - lp = levprd[r]; - lt = toklev[t]; - if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) { - - /* conflict */ - if(foutput != 0) - Bprint(foutput, - "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s", - s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t)); - zzsrconf++; - return; - } - if(PLEVEL(lt) == PLEVEL(lp)) - action = ASSOC(lt); - else - if(PLEVEL(lt) > PLEVEL(lp)) - action = RASC; /* shift */ - else - action = LASC; /* reduce */ - switch(action) { - case BASC: /* error action */ - temp1[t] = ERRCODE; - break; - case LASC: /* reduce */ - temp1[t] = -r; - break; - } -} - -/* - * output state i - * temp1 has the actions, lastred the default - */ -void -wract(int i) -{ - int p, p0, p1, ntimes, tred, count, j, flag; - - /* find the best choice for lastred */ - lastred = 0; - ntimes = 0; - TLOOP(j) { - if(temp1[j] >= 0) - continue; - if(temp1[j]+lastred == 0) - continue; - /* count the number of appearances of temp1[j] */ - count = 0; - tred = -temp1[j]; - levprd[tred] |= REDFLAG; - TLOOP(p) - if(temp1[p]+tred == 0) - count++; - if(count > ntimes) { - lastred = tred; - ntimes = count; - } - } - - /* - * for error recovery, arrange that, if there is a shift on the - * error recovery token, `error', that the default be the error action - */ - if(temp1[2] > 0) - lastred = 0; - - /* clear out entries in temp1 which equal lastred */ - TLOOP(p) - if(temp1[p]+lastred == 0) - temp1[p] = 0; - - wrstate(i); - defact[i] = lastred; - flag = 0; - TLOOP(p0) - if((p1=temp1[p0]) != 0) { - if(p1 < 0) { - p1 = -p1; - goto exc; - } - if(p1 == ACCEPTCODE) { - p1 = -1; - goto exc; - } - if(p1 == ERRCODE) { - p1 = 0; - exc: - if(flag++ == 0) - Bprint(ftable, "-1, %d,\n", i); - Bprint(ftable, "\t%d, %d,\n", p0, p1); - zzexcp++; - continue; - } - Bprint(ftemp, "%d,%d,", p0, p1); - zzacent++; - } - if(flag) { - defact[i] = -2; - Bprint(ftable, "\t-2, %d,\n", lastred); - } - Bprint(ftemp, "\n"); -} - -/* - * writes state i - */ -void -wrstate(int i) -{ - int j0, j1; - Item *pp, *qq; - Wset *u; - - if(fdebug) { - if(lastred) { - Bprint(fdebug, " 0, /*%d*/\n", i); - } else { - Bprint(fdebug, " \""); - ITMLOOP(i, pp, qq) - Bprint(fdebug, "%s\\n", writem(pp->pitem)); - if(tystate[i] == MUSTLOOKAHEAD) - WSLOOP(wsets + (pstate[i+1] - pstate[i]), u) - if(*u->pitem < 0) - Bprint(fdebug, "%s\\n", writem(u->pitem)); - Bprint(fdebug, "\", /*%d*/\n", i); - } - } - if(foutput == 0) - return; - Bprint(foutput, "\nstate %d\n", i); - ITMLOOP(i, pp, qq) - Bprint(foutput, "\t%s\n", writem(pp->pitem)); - if(tystate[i] == MUSTLOOKAHEAD) - /* print out empty productions in closure */ - WSLOOP(wsets+(pstate[i+1]-pstate[i]), u) - if(*u->pitem < 0) - Bprint(foutput, "\t%s\n", writem(u->pitem)); - - /* check for state equal to another */ - TLOOP(j0) - if((j1=temp1[j0]) != 0) { - Bprint(foutput, "\n\t%s ", symnam(j0)); - /* shift, error, or accept */ - if(j1 > 0) { - if(j1 == ACCEPTCODE) - Bprint(foutput, "accept"); - else - if(j1 == ERRCODE) - Bprint(foutput, "error"); - else - Bprint(foutput, "shift %d", j1); - } else - Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]); - } - - /* output the final production */ - if(lastred) - Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n", - lastred, rlines[lastred]); - else - Bprint(foutput, "\n\t. error\n\n"); - - /* now, output nonterminal actions */ - j1 = ntokens; - for(j0 = 1; j0 <= nnonter; j0++) { - j1++; - if(temp1[j1]) - Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]); - } -} - -void -warray(char *s, int *v, int n) -{ - int i; - - Bprint(ftable, "short %s[] =\n{", s); - for(i=0;;) { - if(i%10 == 0) - Bprint(ftable, "\n"); - Bprint(ftable, "%4d", v[i]); - i++; - if(i >= n) { - Bprint(ftable, "\n};\n"); - break; - } - Bprint(ftable, ","); - } -} - -/* - * in order to free up the mem and amem arrays for the optimizer, - * and still be able to output yyr1, etc., after the sizes of - * the action array is known, we hide the nonterminals - * derived by productions in levprd. - */ - -void -hideprod(void) -{ - int i, j; - - j = 0; - levprd[0] = 0; - PLOOP(1, i) { - if(!(levprd[i] & REDFLAG)) { - j++; - if(foutput != 0) - Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i])); - } - levprd[i] = *prdptr[i] - NTBASE; - } - if(j) - print("%d rules never reduced\n", j); -} - -void -callopt(void) -{ - int i, *p, j, k, *q; - - /* read the arrays from tempfile and set parameters */ - finput = Bopen(tempname, OREAD); - if(finput == 0) - error("optimizer cannot open tempfile"); - - pgo[0] = 0; - temp1[0] = 0; - nstate = 0; - nnonter = 0; - for(;;) { - switch(gtnm()) { - case '\n': - nstate++; - pmem--; - temp1[nstate] = pmem - mem0; - case ',': - continue; - case '$': - break; - default: - error("bad tempfile %s", tempname); - } - break; - } - - pmem--; - temp1[nstate] = yypgo[0] = pmem - mem0; - for(;;) { - switch(gtnm()) { - case '\n': - nnonter++; - yypgo[nnonter] = pmem-mem0; - case ',': - continue; - case -1: - break; - default: - error("bad tempfile"); - } - break; - } - pmem--; - yypgo[nnonter--] = pmem - mem0; - for(i = 0; i < nstate; i++) { - k = 32000; - j = 0; - q = mem0 + temp1[i+1]; - for(p = mem0 + temp1[i]; p < q ; p += 2) { - if(*p > j) - j = *p; - if(*p < k) - k = *p; - } - /* nontrivial situation */ - if(k <= j) { - /* j is now the range */ -/* j -= k; *//* call scj */ - if(k > maxoff) - maxoff = k; - } - tystate[i] = (temp1[i+1]-temp1[i]) + 2*j; - if(j > maxspr) - maxspr = j; - } - - /* initialize ggreed table */ - for(i = 1; i <= nnonter; i++) { - ggreed[i] = 1; - j = 0; - - /* minimum entry index is always 0 */ - q = mem0 + yypgo[i+1] - 1; - for(p = mem0+yypgo[i]; p < q ; p += 2) { - ggreed[i] += 2; - if(*p > j) - j = *p; - } - ggreed[i] = ggreed[i] + 2*j; - if(j > maxoff) - maxoff = j; - } - - /* now, prepare to put the shift actions into the amem array */ - for(i = 0; i < ACTSIZE; i++) - amem[i] = 0; - maxa = amem; - for(i = 0; i < nstate; i++) { - if(tystate[i] == 0 && adb > 1) - Bprint(ftable, "State %d: null\n", i); - indgo[i] = YYFLAG1; - } - while((i = nxti()) != NOMORE) - if(i >= 0) - stin(i); - else - gin(-i); - - /* print amem array */ - if(adb > 2 ) - for(p = amem; p <= maxa; p += 10) { - Bprint(ftable, "%4d ", (int)(p-amem)); - for(i = 0; i < 10; ++i) - Bprint(ftable, "%4d ", p[i]); - Bprint(ftable, "\n"); - } - - /* write out the output appropriate to the language */ - aoutput(); - osummary(); - ZAPFILE(tempname); -} - -void -gin(int i) -{ - int *p, *r, *s, *q1, *q2; - - /* enter gotos on nonterminal i into array amem */ - ggreed[i] = 0; - - q2 = mem0+ yypgo[i+1] - 1; - q1 = mem0 + yypgo[i]; - - /* now, find amem place for it */ - for(p = amem; p < &amem[ACTSIZE]; p++) { - if(*p) - continue; - for(r = q1; r < q2; r += 2) { - s = p + *r + 1; - if(*s) - goto nextgp; - if(s > maxa) - if((maxa = s) > &amem[ACTSIZE]) - error("a array overflow"); - } - /* we have found amem spot */ - *p = *q2; - if(p > maxa) - if((maxa = p) > &amem[ACTSIZE]) - error("a array overflow"); - for(r = q1; r < q2; r += 2) { - s = p + *r + 1; - *s = r[1]; - } - pgo[i] = p-amem; - if(adb > 1) - Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]); - return; - - nextgp:; - } - error("cannot place goto %d\n", i); -} - -void -stin(int i) -{ - int *r, *s, n, flag, j, *q1, *q2; - - tystate[i] = 0; - - /* enter state i into the amem array */ - q2 = mem0+temp1[i+1]; - q1 = mem0+temp1[i]; - /* find an acceptable place */ - for(n = -maxoff; n < ACTSIZE; n++) { - flag = 0; - for(r = q1; r < q2; r += 2) { - if((s = *r + n + amem) < amem) - goto nextn; - if(*s == 0) - flag++; - else - if(*s != r[1]) - goto nextn; - } - - /* check that the position equals another only if the states are identical */ - for(j=0; j<nstate; j++) { - if(indgo[j] == n) { - - /* we have some disagreement */ - if(flag) - goto nextn; - if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) { - - /* states are equal */ - indgo[i] = n; - if(adb > 1) - Bprint(ftable, - "State %d: entry at %d equals state %d\n", - i, n, j); - return; - } - - /* we have some disagreement */ - goto nextn; - } - } - - for(r = q1; r < q2; r += 2) { - if((s = *r+n+amem) >= &amem[ACTSIZE]) - error("out of space in optimizer a array"); - if(s > maxa) - maxa = s; - if(*s != 0 && *s != r[1]) - error("clobber of a array, pos'n %d, by %d", s-amem, r[1]); - *s = r[1]; - } - indgo[i] = n; - if(adb > 1) - Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]); - return; - nextn:; - } - error("Error; failure to place state %d\n", i); -} - -/* - * finds the next i - */ -int -nxti(void) -{ - int i, max, maxi; - - max = 0; - maxi = 0; - for(i = 1; i <= nnonter; i++) - if(ggreed[i] >= max) { - max = ggreed[i]; - maxi = -i; - } - for(i = 0; i < nstate; ++i) - if(tystate[i] >= max) { - max = tystate[i]; - maxi = i; - } - if(nxdb) - Bprint(ftable, "nxti = %d, max = %d\n", maxi, max); - if(max == 0) - return NOMORE; - return maxi; -} - -/* - * write summary - */ -void -osummary(void) -{ - - int i, *p; - - if(foutput == 0) - return; - i = 0; - for(p = maxa; p >= amem; p--) - if(*p == 0) - i++; - - Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n", - (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE); - Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i); - Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff); -} - -/* - * this version is for C - * write out the optimized parser - */ -void -aoutput(void) -{ - Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1)); - arout("yyact", amem, (maxa-amem)+1); - arout("yypact", indgo, nstate); - arout("yypgo", pgo, nnonter+1); -} - -void -arout(char *s, int *v, int n) -{ - int i; - - Bprint(ftable, "short %s[] =\n{", s); - for(i = 0; i < n;) { - if(i%10 == 0) - Bprint(ftable, "\n"); - Bprint(ftable, "%4d", v[i]); - i++; - if(i == n) - Bprint(ftable, "\n};\n"); - else - Bprint(ftable, ","); - } -} - -/* - * read and convert an integer from the standard input - * return the terminating character - * blanks, tabs, and newlines are ignored - */ -int -gtnm(void) -{ - int sign, val, c; - - sign = 0; - val = 0; - while((c=Bgetrune(finput)) != Beof) { - if(isdigit(c)) { - val = val*10 + c-'0'; - continue; - } - if(c == '-') { - sign = 1; - continue; - } - break; - } - if(sign) - val = -val; - *pmem++ = val; - if(pmem >= &mem0[MEMSIZE]) - error("out of space"); - return c; -} diff --git a/src/cmd/grep/mkfile b/src/cmd/grep/mkfile index aa240094..7c82c5a4 100644 --- a/src/cmd/grep/mkfile +++ b/src/cmd/grep/mkfile @@ -3,7 +3,7 @@ # Calling this grep breaks a LOT. Like egrep on Linux. # And probably configure. -TARG=9grep +TARG=grep HFILES=\ grep.h\ diff --git a/src/cmd/lex/mkfile b/src/cmd/lex/mkfile index 7b6e8fdc..5283c13a 100644 --- a/src/cmd/lex/mkfile +++ b/src/cmd/lex/mkfile @@ -1,6 +1,6 @@ <$PLAN9/src/mkhdr -TARG=9lex +TARG=lex OFILES=lmain.$O\ y.tab.$O\ sub1.$O\ diff --git a/src/cmd/mkfile b/src/cmd/mkfile index 8414dabb..42ca0e9f 100644 --- a/src/cmd/mkfile +++ b/src/cmd/mkfile @@ -9,9 +9,9 @@ DIRS=lex `ls -l |sed -n 's/^d.* //p' |egrep -v "^($BUGGERED)$"|egrep -v '^lex$'` <$PLAN9/src/mkdirs -dir-all dir-install: $PLAN9/bin/9yacc $PLAN9/bin/9lex +dir-all dir-install: $PLAN9/bin/yacc $PLAN9/bin/lex -bc.tab.c units.tab.c: $PLAN9/bin/9yacc +bc.tab.c units.tab.c: $PLAN9/bin/yacc %.tab.h %.tab.c: %.y $YACC $YFLAGS -s $stem $stem.y @@ -19,10 +19,10 @@ bc.tab.c units.tab.c: $PLAN9/bin/9yacc %.o: %.tab.c 9c -o $target $stem.tab.c -delatex.c:D: delatex.lx $PLAN9/bin/9lex - 9lex -t delatex.lx >delatex.c +delatex.c:D: delatex.lx $PLAN9/bin/lex + 9 lex -t delatex.lx >delatex.c -$PLAN9/bin/9lex: $PLAN9/bin/9yacc +$PLAN9/bin/lex: $PLAN9/bin/yacc cd lex; mk install CLEANFILES=$CLEANFILES bc.tab.[ch] units.tab.[ch] delatex.c |