aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/lex
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/lex')
-rw-r--r--src/cmd/lex/header.c115
-rw-r--r--src/cmd/lex/ldefs.h180
-rw-r--r--src/cmd/lex/lmain.c292
-rw-r--r--src/cmd/lex/mkfile30
-rw-r--r--src/cmd/lex/ncform188
-rw-r--r--src/cmd/lex/parser.y651
-rw-r--r--src/cmd/lex/sub1.c591
-rw-r--r--src/cmd/lex/sub2.c856
8 files changed, 2903 insertions, 0 deletions
diff --git a/src/cmd/lex/header.c b/src/cmd/lex/header.c
new file mode 100644
index 00000000..8fa63d62
--- /dev/null
+++ b/src/cmd/lex/header.c
@@ -0,0 +1,115 @@
+# include "ldefs.h"
+
+extern int nine;
+
+void
+phead1(void)
+{
+ Bprint(&fout,"typedef unsigned char Uchar;\n");
+ if (nine) {
+ Bprint(&fout,"# include <u.h>\n");
+ Bprint(&fout,"# include <libc.h>\n");
+ }
+ Bprint(&fout,"# include <stdio.h>\n");
+ Bprint(&fout, "# define U(x) x\n");
+ Bprint(&fout, "# define NLSTATE yyprevious=YYNEWLINE\n");
+ Bprint(&fout,"# define BEGIN yybgin = yysvec + 1 +\n");
+ Bprint(&fout,"# define INITIAL 0\n");
+ Bprint(&fout,"# define YYLERR yysvec\n");
+ Bprint(&fout,"# define YYSTATE (yyestate-yysvec-1)\n");
+ Bprint(&fout,"# define YYOPTIM 1\n");
+# ifdef DEBUG
+ Bprint(&fout,"# define LEXDEBUG 1\n");
+# endif
+ Bprint(&fout,"# define YYLMAX 200\n");
+ Bprint(&fout,
+"# define unput(c) {yytchar= (c);if(yytchar=='\\n')yylineno--;*yysptr++=yytchar;}\n");
+ Bprint(&fout,"# define yymore() (yymorfg=1)\n");
+ Bprint(&fout,"# define ECHO fprintf(yyout, \"%%s\",yytext)\n");
+ Bprint(&fout,"# define REJECT { nstr = yyreject(); goto yyfussy;}\n");
+ Bprint(&fout,"int yyleng; extern char yytext[];\n");
+ Bprint(&fout,"int yymorfg;\n");
+ Bprint(&fout,"extern Uchar *yysptr, yysbuf[];\n");
+ Bprint(&fout,"int yytchar;\n");
+// Bprint(&fout,"FILE *yyin = stdin, *yyout = stdout;\n");
+ Bprint(&fout,"FILE *yyin, *yyout;\n");
+ Bprint(&fout,"extern int yylineno;\n");
+ Bprint(&fout,"struct yysvf { \n");
+ Bprint(&fout,"\tstruct yywork *yystoff;\n");
+ Bprint(&fout,"\tstruct yysvf *yyother;\n");
+ Bprint(&fout,"\tint *yystops;};\n");
+ Bprint(&fout,"struct yysvf *yyestate;\n");
+ Bprint(&fout,"extern struct yysvf yysvec[], *yybgin;\n");
+ Bprint(&fout,"int yylook(void), yywrap(void), yyback(int *, int);\n");
+ if(nine) {
+ Bprint(&fout,
+ "int infd, outfd;\n"
+ "\n"
+ "void\n"
+ "output(char c)\n"
+ "{\n"
+ " int rv;\n"
+ " if ((rv = write(outfd, &c, 1)) < 0)\n"
+ " sysfatal(\"output: %%r\");\n"
+ " if (rv == 0)\n"
+ " sysfatal(\"output: EOF?\");\n"
+ "}\n"
+ "\n"
+ "int\n"
+ "input(void)\n"
+ "{\n"
+ " if(yysptr > yysbuf)\n"
+ " yytchar = U(*--yysptr);\n"
+ " else {\n"
+ " int rv;\n"
+ " if ((rv = read(infd, &yytchar, 1)) < 0)\n"
+ " sysfatal(\"input: %%r\");\n"
+ " if (rv == 0)\n"
+ " return 0;\n"
+ " }\n"
+ " if (yytchar == '\\n')\n"
+ " yylineno++;\n"
+ " return yytchar;\n"
+ "}\n");
+ }
+ else {
+ Bprint(&fout,"# define output(c) putc(c,yyout)\n");
+ Bprint(&fout, "%s%d%s\n",
+ "# define input() (((yytchar=yysptr>yysbuf?U(*--yysptr):getc(yyin))==",
+ '\n',
+ "?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)");
+ }
+}
+
+void
+phead2(void)
+{
+ Bprint(&fout,"while((nstr = yylook()) >= 0)\n");
+ Bprint(&fout,"yyfussy: switch(nstr){\n");
+ Bprint(&fout,"case 0:\n");
+ Bprint(&fout,"if(yywrap()) return(0); break;\n");
+}
+
+void
+ptail(void)
+{
+ if(!pflag){
+ Bprint(&fout,"case -1:\nbreak;\n"); /* for reject */
+ Bprint(&fout,"default:\n");
+ Bprint(&fout,"fprintf(yyout,\"bad switch yylook %%d\",nstr);\n");
+ Bprint(&fout,"} return(0); }\n");
+ Bprint(&fout,"/* end of yylex */\n");
+ }
+ pflag = 1;
+}
+
+void
+statistics(void)
+{
+ fprint(errorf,"%d/%d nodes(%%e), %d/%d positions(%%p), %d/%d (%%n), %ld transitions\n",
+ tptr, treesize, (int)(nxtpos-positions), maxpos, stnum+1, nstates, rcount);
+ fprint(errorf, ", %d/%d packed char classes(%%k)", (int)(pcptr-pchar), pchlen);
+ fprint(errorf,", %d/%d packed transitions(%%a)",nptr, ntrans);
+ fprint(errorf, ", %d/%d output slots(%%o)", yytop, outsize);
+ fprint(errorf,"\n");
+}
diff --git a/src/cmd/lex/ldefs.h b/src/cmd/lex/ldefs.h
new file mode 100644
index 00000000..ea95fb51
--- /dev/null
+++ b/src/cmd/lex/ldefs.h
@@ -0,0 +1,180 @@
+# include <u.h>
+# include <libc.h>
+# include <ctype.h>
+# include <bio.h>
+# define PP 1
+
+#ifdef NOTDEF
+# define CWIDTH 8
+# define CMASK 0377
+#endif
+# define NCH 256
+
+
+# define TOKENSIZE 1000
+# define DEFSIZE 40
+# define DEFCHAR 1000
+# define STARTCHAR 100
+# define STARTSIZE 256
+# define CCLSIZE 1000
+
+# define TREESIZE 1000
+# define NSTATES 500
+# define MAXPOS 2500
+# define NTRANS 2000
+# define NOUTPUT 5000
+
+# define NACTIONS 100
+# define ALITTLEEXTRA 30
+
+# define RCCL NCH+90
+# define RNCCL NCH+91
+# define RSTR NCH+92
+# define RSCON NCH+93
+# define RNEWE NCH+94
+# define FINAL NCH+95
+# define RNULLS NCH+96
+# define RCAT NCH+97
+# define STAR NCH+98
+# define PLUS NCH+99
+# define QUEST NCH+100
+# define DIV NCH+101
+# define BAR NCH+102
+# define CARAT NCH+103
+# define S1FINAL NCH+104
+# define S2FINAL NCH+105
+
+# define DEFSECTION 1
+# define RULESECTION 2
+# define ENDSECTION 5
+# define TRUE 1
+# define FALSE 0
+
+# define PC 1
+# define PS 1
+
+# ifdef DEBUG
+# define LINESIZE 110
+extern int yydebug;
+extern int debug; /* 1 = on */
+extern int charc;
+# endif
+
+# ifdef DEBUG
+extern int freturn(int);
+# else
+# define freturn(s) s
+# endif
+
+extern int sargc;
+extern char **sargv;
+extern uchar buf[520];
+extern int yyline; /* line number of file */
+extern int sect;
+extern int eof;
+extern int lgatflg;
+extern int divflg;
+extern int funcflag;
+extern int pflag;
+extern int casecount;
+extern int chset; /* 1 = char set modified */
+extern Biobuf *fin, fout, *fother;
+extern int foutopen;
+extern int errorf;
+extern int fptr;
+extern char *cname;
+extern int prev; /* previous input character */
+extern int pres; /* present input character */
+extern int peek; /* next input character */
+extern int *name;
+extern int *left;
+extern int *right;
+extern int *parent;
+extern uchar *nullstr;
+extern int tptr;
+extern uchar pushc[TOKENSIZE];
+extern uchar *pushptr;
+extern uchar slist[STARTSIZE];
+extern uchar *slptr;
+extern uchar **def, **subs, *dchar;
+extern uchar **sname, *stchar;
+extern uchar *ccl;
+extern uchar *ccptr;
+extern uchar *dp, *sp;
+extern int dptr, sptr;
+extern uchar *bptr; /* store input position */
+extern uchar *tmpstat;
+extern int count;
+extern int **foll;
+extern int *nxtpos;
+extern int *positions;
+extern int *gotof;
+extern int *nexts;
+extern uchar *nchar;
+extern int **state;
+extern int *sfall; /* fallback state num */
+extern uchar *cpackflg; /* true if state has been character packed */
+extern int *atable, aptr;
+extern int nptr;
+extern uchar symbol[NCH];
+extern uchar cindex[NCH];
+extern int xstate;
+extern int stnum;
+extern int ccount;
+extern uchar match[NCH];
+extern uchar extra[NACTIONS];
+extern uchar *pcptr, *pchar;
+extern int pchlen;
+extern int nstates, maxpos;
+extern int yytop;
+extern int report;
+extern int ntrans, treesize, outsize;
+extern long rcount;
+extern int *verify, *advance, *stoff;
+extern int scon;
+extern uchar *psave;
+
+extern void acompute(int);
+extern void add(int **, int);
+extern void allprint(int);
+extern void cclinter(int);
+extern void cgoto(void);
+extern void cfoll(int);
+extern int cpyact(void);
+extern int dupl(int);
+extern void error(char *,...);
+extern void first(int);
+extern void follow(int);
+extern int gch(void);
+extern uchar *getl(uchar *);
+extern void layout(void);
+extern void lgate(void);
+extern int lookup(uchar *, uchar **);
+extern int member(int, uchar *);
+extern void mkmatch(void);
+extern int mn0(int);
+extern int mn1(int, int);
+extern int mn2(int, int, int);
+extern void munputc(int);
+extern void munputs(uchar *);
+extern void *myalloc(int, int);
+extern void nextstate(int, int);
+extern int notin(int);
+extern void packtrans(int, uchar *, int *, int, int);
+extern void padd(int **, int);
+extern void pccl(void);
+extern void pfoll(void);
+extern void phead1(void);
+extern void phead2(void);
+extern void pstate(int);
+extern void ptail(void);
+extern void sect1dump(void);
+extern void sect2dump(void);
+extern void statistics(void);
+extern void stprt(int);
+extern void strpt(uchar *);
+extern void treedump(void);
+extern int usescape(int);
+extern void warning(char *,...);
+extern int yyparse(void);
+extern void yyerror(char *);
diff --git a/src/cmd/lex/lmain.c b/src/cmd/lex/lmain.c
new file mode 100644
index 00000000..ec43869a
--- /dev/null
+++ b/src/cmd/lex/lmain.c
@@ -0,0 +1,292 @@
+/* lex [-[dynvt]] [file] ... [file] */
+
+/* Copyright 1976, Bell Telephone Laboratories, Inc.,
+ written by Eric Schmidt, August 27, 1976 */
+
+# include "ldefs.h"
+Biobuf fout;
+int foutopen;
+int errorf = 1;
+int sect = DEFSECTION;
+int prev = '\n'; /* previous input character */
+int pres = '\n'; /* present input character */
+int peek = '\n'; /* next input character */
+uchar *pushptr = pushc;
+uchar *slptr = slist;
+
+char *cname = SYS9 "/sys/lib/lex/ncform";
+
+int nine;
+int ccount = 1;
+int casecount = 1;
+int aptr = 1;
+int nstates = NSTATES, maxpos = MAXPOS;
+int treesize = TREESIZE, ntrans = NTRANS;
+int yytop;
+int outsize = NOUTPUT;
+int sptr = 1;
+int report = 2;
+int debug; /* 1 = on */
+int charc;
+int sargc;
+char **sargv;
+uchar buf[520];
+int yyline; /* line number of file */
+int eof;
+int lgatflg;
+int divflg;
+int funcflag;
+int pflag;
+int chset; /* 1 = char set modified */
+Biobuf *fin, *fother;
+int fptr;
+int *name;
+int *left;
+int *right;
+int *parent;
+uchar *nullstr;
+int tptr;
+uchar pushc[TOKENSIZE];
+uchar slist[STARTSIZE];
+uchar **def, **subs, *dchar;
+uchar **sname, *stchar;
+uchar *ccl;
+uchar *ccptr;
+uchar *dp, *sp;
+int dptr;
+uchar *bptr; /* store input position */
+uchar *tmpstat;
+int count;
+int **foll;
+int *nxtpos;
+int *positions;
+int *gotof;
+int *nexts;
+uchar *nchar;
+int **state;
+int *sfall; /* fallback state num */
+uchar *cpackflg; /* true if state has been character packed */
+int *atable;
+int nptr;
+uchar symbol[NCH];
+uchar cindex[NCH];
+int xstate;
+int stnum;
+uchar match[NCH];
+uchar extra[NACTIONS];
+uchar *pchar, *pcptr;
+int pchlen = TOKENSIZE;
+ long rcount;
+int *verify, *advance, *stoff;
+int scon;
+uchar *psave;
+
+static void free1core(void);
+static void free2core(void);
+#ifdef DEBUG
+static void free3core(void);
+#endif
+static void get1core(void);
+static void get2core(void);
+static void get3core(void);
+
+void
+main(int argc, char **argv)
+{
+ int i;
+
+ ARGBEGIN {
+# ifdef DEBUG
+ case 'd': debug++; break;
+ case 'y': yydebug = TRUE; break;
+# endif
+ case 't': case 'T':
+ Binit(&fout, 1, OWRITE);
+ errorf= 2;
+ foutopen = 1;
+ break;
+ case 'v': case 'V':
+ report = 1;
+ break;
+ case 'n': case 'N':
+ report = 0;
+ break;
+ case '9':
+ nine = 1;
+ break;
+ default:
+ warning("Unknown option %c", ARGC());
+ } ARGEND
+ sargc = argc;
+ sargv = argv;
+ if (argc > 0){
+ fin = Bopen(argv[fptr++], OREAD);
+ if(fin == 0)
+ error ("Can't read input file %s",argv[0]);
+ sargc--;
+ sargv++;
+ }
+ else {
+ fin = myalloc(sizeof(Biobuf), 1);
+ if(fin == 0)
+ exits("core");
+ Binit(fin, 0, OREAD);
+ }
+ if(Bgetc(fin) == Beof) /* no input */
+ exits(0);
+ Bseek(fin, 0, 0);
+ gch();
+ /* may be gotten: def, subs, sname, stchar, ccl, dchar */
+ get1core();
+ /* may be gotten: name, left, right, nullstr, parent */
+ strcpy((char*)sp, "INITIAL");
+ sname[0] = sp;
+ sp += strlen("INITIAL") + 1;
+ sname[1] = 0;
+ if(yyparse()) exits("error"); /* error return code */
+ /* may be disposed of: def, subs, dchar */
+ free1core();
+ /* may be gotten: tmpstat, foll, positions, gotof, nexts, nchar, state, atable, sfall, cpackflg */
+ get2core();
+ ptail();
+ mkmatch();
+# ifdef DEBUG
+ if(debug) pccl();
+# endif
+ sect = ENDSECTION;
+ if(tptr>0)cfoll(tptr-1);
+# ifdef DEBUG
+ if(debug)pfoll();
+# endif
+ cgoto();
+# ifdef DEBUG
+ if(debug){
+ print("Print %d states:\n",stnum+1);
+ for(i=0;i<=stnum;i++)stprt(i);
+ }
+# endif
+ /* may be disposed of: positions, tmpstat, foll, state, name, left, right, parent, ccl, stchar, sname */
+ /* may be gotten: verify, advance, stoff */
+ free2core();
+ get3core();
+ layout();
+ /* may be disposed of: verify, advance, stoff, nexts, nchar,
+ gotof, atable, ccpackflg, sfall */
+# ifdef DEBUG
+ free3core();
+# endif
+ fother = Bopen(cname,OREAD);
+ if(fother == 0)
+ error("Lex driver missing, file %s",cname);
+ while ( (i=Bgetc(fother)) != Beof)
+ Bputc(&fout, i);
+
+ Bterm(fother);
+ Bterm(&fout);
+ if(
+# ifdef DEBUG
+ debug ||
+# endif
+ report == 1)statistics();
+ Bterm(fin);
+ exits(0); /* success return code */
+}
+
+static void
+get1core(void)
+{
+ ccptr = ccl = myalloc(CCLSIZE,sizeof(*ccl));
+ pcptr = pchar = myalloc(pchlen, sizeof(*pchar));
+ def = myalloc(DEFSIZE,sizeof(*def));
+ subs = myalloc(DEFSIZE,sizeof(*subs));
+ dp = dchar = myalloc(DEFCHAR,sizeof(*dchar));
+ sname = myalloc(STARTSIZE,sizeof(*sname));
+ sp = stchar = myalloc(STARTCHAR,sizeof(*stchar));
+ if(ccl == 0 || def == 0 || subs == 0 || dchar == 0 || sname == 0 || stchar == 0)
+ error("Too little core to begin");
+}
+
+static void
+free1core(void)
+{
+ free(def);
+ free(subs);
+ free(dchar);
+}
+
+static void
+get2core(void)
+{
+ int i;
+
+ gotof = myalloc(nstates,sizeof(*gotof));
+ nexts = myalloc(ntrans,sizeof(*nexts));
+ nchar = myalloc(ntrans,sizeof(*nchar));
+ state = myalloc(nstates,sizeof(*state));
+ atable = myalloc(nstates,sizeof(*atable));
+ sfall = myalloc(nstates,sizeof(*sfall));
+ cpackflg = myalloc(nstates,sizeof(*cpackflg));
+ tmpstat = myalloc(tptr+1,sizeof(*tmpstat));
+ foll = myalloc(tptr+1,sizeof(*foll));
+ nxtpos = positions = myalloc(maxpos,sizeof(*positions));
+ if(tmpstat == 0 || foll == 0 || positions == 0 ||
+ gotof == 0 || nexts == 0 || nchar == 0 || state == 0 || atable == 0 || sfall == 0 || cpackflg == 0 )
+ error("Too little core for state generation");
+ for(i=0;i<=tptr;i++)foll[i] = 0;
+}
+
+static void
+free2core(void)
+{
+ free(positions);
+ free(tmpstat);
+ free(foll);
+ free(name);
+ free(left);
+ free(right);
+ free(parent);
+ free(nullstr);
+ free(state);
+ free(sname);
+ free(stchar);
+ free(ccl);
+}
+
+static void
+get3core(void)
+{
+ verify = myalloc(outsize,sizeof(*verify));
+ advance = myalloc(outsize,sizeof(*advance));
+ stoff = myalloc(stnum+2,sizeof(*stoff));
+ if(verify == 0 || advance == 0 || stoff == 0)
+ error("Too little core for final packing");
+}
+# ifdef DEBUG
+static void
+free3core(void){
+ free(advance);
+ free(verify);
+ free(stoff);
+ free(gotof);
+ free(nexts);
+ free(nchar);
+ free(atable);
+ free(sfall);
+ free(cpackflg);
+}
+# endif
+void *
+myalloc(int a, int b)
+{
+ void *i;
+ i = calloc(a, b);
+ if(i==0)
+ warning("OOPS - calloc returns a 0");
+ return(i);
+}
+
+void
+yyerror(char *s)
+{
+ fprint(2, "line %d: %s\n", yyline, s);
+}
diff --git a/src/cmd/lex/mkfile b/src/cmd/lex/mkfile
new file mode 100644
index 00000000..a748409f
--- /dev/null
+++ b/src/cmd/lex/mkfile
@@ -0,0 +1,30 @@
+<$PLAN9/src/mkhdr
+
+#TARG=lex
+TARG=lex.9
+OFILES=lmain.$O\
+ y.tab.$O\
+ sub1.$O\
+ sub2.$O\
+ header.$O\
+
+HFILES=ldefs.h\
+
+YFILES=parser.y\
+
+SHORTLIB=bio 9
+UPDATE=\
+ mkfile\
+ $HFILES\
+ lmain.c\
+ sub1.c\
+ sub2.c\
+ header.c
+
+<$PLAN9/src/mkone
+
+installall:V:
+ for objtype in $CPUS ; do
+ mk install
+ done
+ cp ncform $SYS9/lib/lex
diff --git a/src/cmd/lex/ncform b/src/cmd/lex/ncform
new file mode 100644
index 00000000..dd7a1ea6
--- /dev/null
+++ b/src/cmd/lex/ncform
@@ -0,0 +1,188 @@
+#pragma lib "libl.a"
+int yylineno =1;
+# define YYU(x) x
+char yytext[YYLMAX];
+struct yysvf *yylstate [YYLMAX], **yylsp, **yyolsp;
+Uchar yysbuf[YYLMAX];
+Uchar *yysptr = yysbuf;
+int *yyfnd;
+extern struct yysvf *yyestate;
+int yyprevious = YYNEWLINE;
+# ifdef LEXDEBUG
+extern void allprint(char);
+# endif
+yylook(void){
+ struct yysvf *yystate, **lsp;
+ struct yywork *yyt;
+ struct yysvf *yyz;
+ int yych;
+ struct yywork *yyr;
+# ifdef LEXDEBUG
+ int debug;
+# endif
+ Uchar *yylastch;
+ /* start off machines */
+# ifdef LEXDEBUG
+ debug = 0;
+# endif
+ if (!yymorfg)
+ yylastch = (Uchar*)yytext;
+ else {
+ yymorfg=0;
+ yylastch = (Uchar*)yytext+yyleng;
+ }
+ for(;;){
+ lsp = yylstate;
+ yyestate = yystate = yybgin;
+ if (yyprevious==YYNEWLINE) yystate++;
+ for (;;){
+# ifdef LEXDEBUG
+ if(debug)fprintf(yyout,"state %d\n",yystate-yysvec-1);
+# endif
+ yyt = yystate->yystoff;
+ if(yyt == yycrank){ /* may not be any transitions */
+ yyz = yystate->yyother;
+ if(yyz == 0)break;
+ if(yyz->yystoff == yycrank)break;
+ }
+ *yylastch++ = yych = input();
+ tryagain:
+# ifdef LEXDEBUG
+ if(debug){
+ fprintf(yyout,"char ");
+ allprint(yych);
+ putchar('\n');
+ }
+# endif
+ yyr = yyt;
+ if ( (int)yyt > (int)yycrank){
+ yyt = yyr + yych;
+ if (yyt <= yytop && yyt->verify+yysvec == yystate){
+ if(yyt->advance+yysvec == YYLERR) /* error transitions */
+ {unput(*--yylastch);break;}
+ *lsp++ = yystate = yyt->advance+yysvec;
+ goto contin;
+ }
+ }
+# ifdef YYOPTIM
+ else if((int)yyt < (int)yycrank) { /* r < yycrank */
+ yyt = yyr = yycrank+(yycrank-yyt);
+# ifdef LEXDEBUG
+ if(debug)fprintf(yyout,"compressed state\n");
+# endif
+ yyt = yyt + yych;
+ if(yyt <= yytop && yyt->verify+yysvec == yystate){
+ if(yyt->advance+yysvec == YYLERR) /* error transitions */
+ {unput(*--yylastch);break;}
+ *lsp++ = yystate = yyt->advance+yysvec;
+ goto contin;
+ }
+ yyt = yyr + YYU(yymatch[yych]);
+# ifdef LEXDEBUG
+ if(debug){
+ fprintf(yyout,"try fall back character ");
+ allprint(YYU(yymatch[yych]));
+ putchar('\n');
+ }
+# endif
+ if(yyt <= yytop && yyt->verify+yysvec == yystate){
+ if(yyt->advance+yysvec == YYLERR) /* error transition */
+ {unput(*--yylastch);break;}
+ *lsp++ = yystate = yyt->advance+yysvec;
+ goto contin;
+ }
+ }
+ if ((yystate = yystate->yyother) && (yyt= yystate->yystoff) != yycrank){
+# ifdef LEXDEBUG
+ if(debug)fprintf(yyout,"fall back to state %d\n",yystate-yysvec-1);
+# endif
+ goto tryagain;
+ }
+# endif
+ else
+ {unput(*--yylastch);break;}
+ contin:
+# ifdef LEXDEBUG
+ if(debug){
+ fprintf(yyout,"state %d char ",yystate-yysvec-1);
+ allprint(yych);
+ putchar('\n');
+ }
+# endif
+ ;
+ }
+# ifdef LEXDEBUG
+ if(debug){
+ fprintf(yyout,"stopped at %d with ",*(lsp-1)-yysvec-1);
+ allprint(yych);
+ putchar('\n');
+ }
+# endif
+ while (lsp-- > yylstate){
+ *yylastch-- = 0;
+ if (*lsp != 0 && (yyfnd= (*lsp)->yystops) && *yyfnd > 0){
+ yyolsp = lsp;
+ if(yyextra[*yyfnd]){ /* must backup */
+ while(yyback((*lsp)->yystops,-*yyfnd) != 1 && lsp > yylstate){
+ lsp--;
+ unput(*yylastch--);
+ }
+ }
+ yyprevious = YYU(*yylastch);
+ yylsp = lsp;
+ yyleng = yylastch-(Uchar*)yytext+1;
+ yytext[yyleng] = 0;
+# ifdef LEXDEBUG
+ if(debug){
+ fprintf(yyout,"\nmatch '%s'", yytext);
+ fprintf(yyout," action %d\n",*yyfnd);
+ }
+# endif
+ return(*yyfnd++);
+ }
+ unput(*yylastch);
+ }
+ if (yytext[0] == 0 /* && feof(yyin) */)
+ {
+ yysptr=yysbuf;
+ return(0);
+ }
+ yyprevious = input();
+ yytext[0] = yyprevious;
+ if (yyprevious>0)
+ output(yyprevious);
+ yylastch = (Uchar*)yytext;
+# ifdef LEXDEBUG
+ if(debug)putchar('\n');
+# endif
+ }
+ return(0); /* shut up the compiler; i have no idea what should be returned */
+ }
+yyback(int *p, int m)
+{
+if (p==0) return(0);
+while (*p)
+ {
+ if (*p++ == m)
+ return(1);
+ }
+return(0);
+}
+ /* the following are only used in the lex library */
+yyinput(void){
+ if(yyin == ((void*)0))
+ yyin = stdin;
+ return(input());
+}
+void
+yyoutput(int c)
+{
+ if(yyout == ((void*)0))
+ yyout = stdin;
+ output(c);
+}
+void
+yyunput(int c)
+{
+ unput(c);
+}
diff --git a/src/cmd/lex/parser.y b/src/cmd/lex/parser.y
new file mode 100644
index 00000000..5ac33a28
--- /dev/null
+++ b/src/cmd/lex/parser.y
@@ -0,0 +1,651 @@
+%token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
+%left SCON '/' NEWE
+%left '|'
+%left '$' '^'
+%left CHAR CCL NCCL '(' '.' STR NULLS
+%left ITER
+%left CAT
+%left '*' '+' '?'
+
+%{
+# include "ldefs.h"
+#define YYSTYPE union _yystype_
+union _yystype_
+{
+ int i;
+ uchar *cp;
+};
+%}
+%%
+%{
+int i;
+int j,k;
+int g;
+uchar *p;
+%}
+acc : lexinput
+ ={
+# ifdef DEBUG
+ if(debug) sect2dump();
+# endif
+ }
+ ;
+lexinput: defns delim prods end
+ | defns delim end
+ ={
+ if(!funcflag)phead2();
+ funcflag = TRUE;
+ }
+ | error
+ ={
+# ifdef DEBUG
+ if(debug) {
+ sect1dump();
+ sect2dump();
+ }
+# endif
+ }
+ ;
+end: delim | ;
+defns: defns STR STR
+ ={ strcpy((char*)dp,(char*)$2.cp);
+ def[dptr] = dp;
+ dp += strlen((char*)$2.cp) + 1;
+ strcpy((char*)dp,(char*)$3.cp);
+ subs[dptr++] = dp;
+ if(dptr >= DEFSIZE)
+ error("Too many definitions");
+ dp += strlen((char*)$3.cp) + 1;
+ if(dp >= dchar+DEFCHAR)
+ error("Definitions too long");
+ subs[dptr]=def[dptr]=0; /* for lookup - require ending null */
+ }
+ |
+ ;
+delim: DELIM
+ ={
+# ifdef DEBUG
+ if(sect == DEFSECTION && debug) sect1dump();
+# endif
+ sect++;
+ }
+ ;
+prods: prods pr
+ ={ $$.i = mn2(RNEWE,$1.i,$2.i);
+ }
+ | pr
+ ={ $$.i = $1.i;}
+ ;
+pr: r NEWE
+ ={
+ if(divflg == TRUE)
+ i = mn1(S1FINAL,casecount);
+ else i = mn1(FINAL,casecount);
+ $$.i = mn2(RCAT,$1.i,i);
+ divflg = FALSE;
+ casecount++;
+ }
+ | error NEWE
+ ={
+# ifdef DEBUG
+ if(debug) sect2dump();
+# endif
+ }
+r: CHAR
+ ={ $$.i = mn0($1.i); }
+ | STR
+ ={
+ p = $1.cp;
+ i = mn0(*p++);
+ while(*p)
+ i = mn2(RSTR,i,*p++);
+ $$.i = i;
+ }
+ | '.'
+ ={ symbol['\n'] = 0;
+ if(psave == FALSE){
+ p = ccptr;
+ psave = ccptr;
+ for(i=1;i<'\n';i++){
+ symbol[i] = 1;
+ *ccptr++ = i;
+ }
+ for(i='\n'+1;i<NCH;i++){
+ symbol[i] = 1;
+ *ccptr++ = i;
+ }
+ *ccptr++ = 0;
+ if(ccptr > ccl+CCLSIZE)
+ error("Too many large character classes");
+ }
+ else
+ p = psave;
+ $$.i = mn1(RCCL,(int)p);
+ cclinter(1);
+ }
+ | CCL
+ ={ $$.i = mn1(RCCL,$1.i); }
+ | NCCL
+ ={ $$.i = mn1(RNCCL,$1.i); }
+ | r '*'
+ ={ $$.i = mn1(STAR,$1.i); }
+ | r '+'
+ ={ $$.i = mn1(PLUS,$1.i); }
+ | r '?'
+ ={ $$.i = mn1(QUEST,$1.i); }
+ | r '|' r
+ ={ $$.i = mn2(BAR,$1.i,$3.i); }
+ | r r %prec CAT
+ ={ $$.i = mn2(RCAT,$1.i,$2.i); }
+ | r '/' r
+ ={ if(!divflg){
+ j = mn1(S2FINAL,-casecount);
+ i = mn2(RCAT,$1.i,j);
+ $$.i = mn2(DIV,i,$3.i);
+ }
+ else {
+ $$.i = mn2(RCAT,$1.i,$3.i);
+ warning("Extra slash removed");
+ }
+ divflg = TRUE;
+ }
+ | r ITER ',' ITER '}'
+ ={ if($2.i > $4.i){
+ i = $2.i;
+ $2.i = $4.i;
+ $4.i = i;
+ }
+ if($4.i <= 0)
+ warning("Iteration range must be positive");
+ else {
+ j = $1.i;
+ for(k = 2; k<=$2.i;k++)
+ j = mn2(RCAT,j,dupl($1.i));
+ for(i = $2.i+1; i<=$4.i; i++){
+ g = dupl($1.i);
+ for(k=2;k<=i;k++)
+ g = mn2(RCAT,g,dupl($1.i));
+ j = mn2(BAR,j,g);
+ }
+ $$.i = j;
+ }
+ }
+ | r ITER '}'
+ ={
+ if($2.i < 0)warning("Can't have negative iteration");
+ else if($2.i == 0) $$.i = mn0(RNULLS);
+ else {
+ j = $1.i;
+ for(k=2;k<=$2.i;k++)
+ j = mn2(RCAT,j,dupl($1.i));
+ $$.i = j;
+ }
+ }
+ | r ITER ',' '}'
+ ={
+ /* from n to infinity */
+ if($2.i < 0)warning("Can't have negative iteration");
+ else if($2.i == 0) $$.i = mn1(STAR,$1.i);
+ else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
+ else { /* >= 2 iterations minimum */
+ j = $1.i;
+ for(k=2;k<$2.i;k++)
+ j = mn2(RCAT,j,dupl($1.i));
+ k = mn1(PLUS,dupl($1.i));
+ $$.i = mn2(RCAT,j,k);
+ }
+ }
+ | SCON r
+ ={ $$.i = mn2(RSCON,$2.i,$1.i); }
+ | '^' r
+ ={ $$.i = mn1(CARAT,$2.i); }
+ | r '$'
+ ={ i = mn0('\n');
+ if(!divflg){
+ j = mn1(S2FINAL,-casecount);
+ k = mn2(RCAT,$1.i,j);
+ $$.i = mn2(DIV,k,i);
+ }
+ else $$.i = mn2(RCAT,$1.i,i);
+ divflg = TRUE;
+ }
+ | '(' r ')'
+ ={ $$.i = $2.i; }
+ | NULLS
+ ={ $$.i = mn0(RNULLS); }
+ ;
+%%
+int
+yylex(void)
+{
+ uchar *p;
+ int c, i;
+ uchar *t, *xp;
+ int n, j, k, x;
+ static int sectbegin;
+ static uchar token[TOKENSIZE];
+ static int iter;
+
+# ifdef DEBUG
+ yylval.i = 0;
+# endif
+
+ if(sect == DEFSECTION) { /* definitions section */
+ while(!eof) {
+ if(prev == '\n'){ /* next char is at beginning of line */
+ getl(p=buf);
+ switch(*p){
+ case '%':
+ switch(*(p+1)){
+ case '%':
+ lgate();
+ Bprint(&fout,"#define YYNEWLINE %d\n",'\n');
+ Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n");
+ sectbegin = TRUE;
+ i = treesize*(sizeof(*name)+sizeof(*left)+
+ sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
+ p = myalloc(i,1);
+ if(p == 0)
+ error("Too little core for parse tree");
+ free(p);
+ name = myalloc(treesize,sizeof(*name));
+ left = myalloc(treesize,sizeof(*left));
+ right = myalloc(treesize,sizeof(*right));
+ nullstr = myalloc(treesize,sizeof(*nullstr));
+ parent = myalloc(treesize,sizeof(*parent));
+ if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0)
+ error("Too little core for parse tree");
+ return(freturn(DELIM));
+ case 'p': case 'P': /* has overridden number of positions */
+ while(*p && !isdigit(*p))p++;
+ maxpos = atol((char*)p);
+# ifdef DEBUG
+ if (debug) print("positions (%%p) now %d\n",maxpos);
+# endif
+ if(report == 2)report = 1;
+ continue;
+ case 'n': case 'N': /* has overridden number of states */
+ while(*p && !isdigit(*p))p++;
+ nstates = atol((char*)p);
+# ifdef DEBUG
+ if(debug)print( " no. states (%%n) now %d\n",nstates);
+# endif
+ if(report == 2)report = 1;
+ continue;
+ case 'e': case 'E': /* has overridden number of tree nodes */
+ while(*p && !isdigit(*p))p++;
+ treesize = atol((char*)p);
+# ifdef DEBUG
+ if (debug) print("treesize (%%e) now %d\n",treesize);
+# endif
+ if(report == 2)report = 1;
+ continue;
+ case 'o': case 'O':
+ while (*p && !isdigit(*p))p++;
+ outsize = atol((char*)p);
+ if (report ==2) report=1;
+ continue;
+ case 'a': case 'A': /* has overridden number of transitions */
+ while(*p && !isdigit(*p))p++;
+ if(report == 2)report = 1;
+ ntrans = atol((char*)p);
+# ifdef DEBUG
+ if (debug)print("N. trans (%%a) now %d\n",ntrans);
+# endif
+ continue;
+ case 'k': case 'K': /* overriden packed char classes */
+ while (*p && !isdigit(*p))p++;
+ if (report==2) report=1;
+ free(pchar);
+ pchlen = atol((char*)p);
+# ifdef DEBUG
+ if (debug) print( "Size classes (%%k) now %d\n",pchlen);
+# endif
+ pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
+ continue;
+ case '{':
+ lgate();
+ while(getl(p) && strcmp((char*)p,"%}") != 0)
+ Bprint(&fout, "%s\n",(char*)p);
+ if(p[0] == '%') continue;
+ error("Premature eof");
+ case 's': case 'S': /* start conditions */
+ lgate();
+ while(*p && strchr(" \t,", *p) == 0) p++;
+ n = TRUE;
+ while(n){
+ while(*p && strchr(" \t,", *p)) p++;
+ t = p;
+ while(*p && strchr(" \t,", *p) == 0)p++;
+ if(!*p) n = FALSE;
+ *p++ = 0;
+ if (*t == 0) continue;
+ i = sptr*2;
+ Bprint(&fout,"#define %s %d\n",(char*)t,i);
+ strcpy((char*)sp, (char*)t);
+ sname[sptr++] = sp;
+ sname[sptr] = 0; /* required by lookup */
+ if(sptr >= STARTSIZE)
+ error("Too many start conditions");
+ sp += strlen((char*)sp) + 1;
+ if(sp >= stchar+STARTCHAR)
+ error("Start conditions too long");
+ }
+ continue;
+ default:
+ warning("Invalid request %s",p);
+ continue;
+ } /* end of switch after seeing '%' */
+ case ' ': case '\t': /* must be code */
+ lgate();
+ Bprint(&fout, "%s\n",(char*)p);
+ continue;
+ default: /* definition */
+ while(*p && !isspace(*p)) p++;
+ if(*p == 0)
+ continue;
+ prev = *p;
+ *p = 0;
+ bptr = p+1;
+ yylval.cp = buf;
+ if(isdigit(buf[0]))
+ warning("Substitution strings may not begin with digits");
+ return(freturn(STR));
+ }
+ }
+ /* still sect 1, but prev != '\n' */
+ else {
+ p = bptr;
+ while(*p && isspace(*p)) p++;
+ if(*p == 0)
+ warning("No translation given - null string assumed");
+ strcpy((char*)token, (char*)p);
+ yylval.cp = token;
+ prev = '\n';
+ return(freturn(STR));
+ }
+ }
+ /* end of section one processing */
+ } else if(sect == RULESECTION){ /* rules and actions */
+ while(!eof){
+ switch(c=gch()){
+ case '\0':
+ return(freturn(0));
+ case '\n':
+ if(prev == '\n') continue;
+ x = NEWE;
+ break;
+ case ' ':
+ case '\t':
+ if(sectbegin == TRUE){
+ cpyact();
+ while((c=gch()) && c != '\n');
+ continue;
+ }
+ if(!funcflag)phead2();
+ funcflag = TRUE;
+ Bprint(&fout,"case %d:\n",casecount);
+ if(cpyact())
+ Bprint(&fout,"break;\n");
+ while((c=gch()) && c != '\n');
+ if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
+ warning("Executable statements should occur right after %%");
+ continue;
+ }
+ x = NEWE;
+ break;
+ case '%':
+ if(prev != '\n') goto character;
+ if(peek == '{'){ /* included code */
+ getl(buf);
+ while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0)
+ Bprint(&fout,"%s\n",(char*)buf);
+ continue;
+ }
+ if(peek == '%'){
+ gch();
+ gch();
+ x = DELIM;
+ break;
+ }
+ goto character;
+ case '|':
+ if(peek == ' ' || peek == '\t' || peek == '\n'){
+ Bprint(&fout,"%d\n",30000+casecount++);
+ continue;
+ }
+ x = '|';
+ break;
+ case '$':
+ if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
+ x = c;
+ break;
+ }
+ goto character;
+ case '^':
+ if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */
+ x = c;
+ break;
+ case '?':
+ case '+':
+ case '.':
+ case '*':
+ case '(':
+ case ')':
+ case ',':
+ case '/':
+ x = c;
+ break;
+ case '}':
+ iter = FALSE;
+ x = c;
+ break;
+ case '{': /* either iteration or definition */
+ if(isdigit(c=gch())){ /* iteration */
+ iter = TRUE;
+ ieval:
+ i = 0;
+ while(isdigit(c)){
+ token[i++] = c;
+ c = gch();
+ }
+ token[i] = 0;
+ yylval.i = atol((char*)token);
+ munputc(c);
+ x = ITER;
+ break;
+ } else { /* definition */
+ i = 0;
+ while(c && c!='}'){
+ token[i++] = c;
+ c = gch();
+ }
+ token[i] = 0;
+ i = lookup(token,def);
+ if(i < 0)
+ warning("Definition %s not found",token);
+ else
+ munputs(subs[i]);
+ continue;
+ }
+ case '<': /* start condition ? */
+ if(prev != '\n') /* not at line begin, not start */
+ goto character;
+ t = slptr;
+ do {
+ i = 0;
+ c = gch();
+ while(c != ',' && c && c != '>'){
+ token[i++] = c;
+ c = gch();
+ }
+ token[i] = 0;
+ if(i == 0)
+ goto character;
+ i = lookup(token,sname);
+ if(i < 0) {
+ warning("Undefined start condition %s",token);
+ continue;
+ }
+ *slptr++ = i+1;
+ } while(c && c != '>');
+ *slptr++ = 0;
+ /* check if previous value re-usable */
+ for (xp=slist; xp<t; ){
+ if (strcmp((char*)xp, (char*)t)==0)
+ break;
+ while (*xp++);
+ }
+ if (xp<t){
+ /* re-use previous pointer to string */
+ slptr=t;
+ t=xp;
+ }
+ if(slptr > slist+STARTSIZE) /* note not packed ! */
+ error("Too many start conditions used");
+ yylval.cp = t;
+ x = SCON;
+ break;
+ case '"':
+ i = 0;
+ while((c=gch()) && c != '"' && c != '\n'){
+ if(c == '\\') c = usescape(gch());
+ token[i++] = c;
+ if(i > TOKENSIZE){
+ warning("String too long");
+ i = TOKENSIZE-1;
+ break;
+ }
+ }
+ if(c == '\n') {
+ yyline--;
+ warning("Non-terminated string");
+ yyline++;
+ }
+ token[i] = 0;
+ if(i == 0)x = NULLS;
+ else if(i == 1){
+ yylval.i = token[0];
+ x = CHAR;
+ } else {
+ yylval.cp = token;
+ x = STR;
+ }
+ break;
+ case '[':
+ for(i=1;i<NCH;i++) symbol[i] = 0;
+ x = CCL;
+ if((c = gch()) == '^'){
+ x = NCCL;
+ c = gch();
+ }
+ while(c != ']' && c){
+ if(c == '\\') c = usescape(gch());
+ symbol[c] = 1;
+ j = c;
+ if((c=gch()) == '-' && peek != ']'){ /* range specified */
+ c = gch();
+ if(c == '\\') c = usescape(gch());
+ k = c;
+ if(j > k) {
+ n = j;
+ j = k;
+ k = n;
+ }
+ if(!(('A' <= j && k <= 'Z') ||
+ ('a' <= j && k <= 'z') ||
+ ('0' <= j && k <= '9')))
+ warning("Non-portable Character Class");
+ for(n=j+1;n<=k;n++)
+ symbol[n] = 1; /* implementation dependent */
+ c = gch();
+ }
+ }
+ /* try to pack ccl's */
+ i = 0;
+ for(j=0;j<NCH;j++)
+ if(symbol[j])token[i++] = j;
+ token[i] = 0;
+ p = ccl;
+ while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++;
+ if(p < ccptr) /* found it */
+ yylval.cp = p;
+ else {
+ yylval.cp = ccptr;
+ strcpy((char*)ccptr,(char*)token);
+ ccptr += strlen((char*)token) + 1;
+ if(ccptr >= ccl+CCLSIZE)
+ error("Too many large character classes");
+ }
+ cclinter(x==CCL);
+ break;
+ case '\\':
+ c = usescape(gch());
+ default:
+ character:
+ if(iter){ /* second part of an iteration */
+ iter = FALSE;
+ if('0' <= c && c <= '9')
+ goto ieval;
+ }
+ if(isalpha(peek)){
+ i = 0;
+ yylval.cp = token;
+ token[i++] = c;
+ while(isalpha(peek))
+ token[i++] = gch();
+ if(peek == '?' || peek == '*' || peek == '+')
+ munputc(token[--i]);
+ token[i] = 0;
+ if(i == 1){
+ yylval.i = token[0];
+ x = CHAR;
+ }
+ else x = STR;
+ } else {
+ yylval.i = c;
+ x = CHAR;
+ }
+ }
+ scon = FALSE;
+ if(x == SCON)scon = TRUE;
+ sectbegin = FALSE;
+ return(freturn(x));
+ }
+ }
+ /* section three */
+ ptail();
+# ifdef DEBUG
+ if(debug)
+ Bprint(&fout,"\n/*this comes from section three - debug */\n");
+# endif
+ while(getl(buf) && !eof)
+ Bprint(&fout,"%s\n",(char*)buf);
+ return(freturn(0));
+}
+/* end of yylex */
+# ifdef DEBUG
+int
+freturn(int i)
+{
+ if(yydebug) {
+ print("now return ");
+ if(i < NCH) allprint(i);
+ else print("%d",i);
+ printf(" yylval = ");
+ switch(i){
+ case STR: case CCL: case NCCL:
+ strpt(yylval.cp);
+ break;
+ case CHAR:
+ allprint(yylval.i);
+ break;
+ default:
+ print("%d",yylval.i);
+ break;
+ }
+ print("\n");
+ }
+ return(i);
+}
+# endif
diff --git a/src/cmd/lex/sub1.c b/src/cmd/lex/sub1.c
new file mode 100644
index 00000000..cb393a46
--- /dev/null
+++ b/src/cmd/lex/sub1.c
@@ -0,0 +1,591 @@
+# include "ldefs.h"
+uchar *
+getl(uchar *p) /* return next line of input, throw away trailing '\n' */
+ /* returns 0 if eof is had immediately */
+{
+ int c;
+ uchar *s, *t;
+
+ t = s = p;
+ while(((c = gch()) != 0) && c != '\n')
+ *t++ = c;
+ *t = 0;
+ if(c == 0 && s == t) return((uchar *)0);
+ prev = '\n';
+ pres = '\n';
+ return(s);
+}
+
+void
+printerr(char *type, char *fmt, va_list argl)
+{
+ char buf[1024];
+
+ if(!eof)fprint(errorf,"%d: ",yyline);
+ fprint(errorf,"(%s) ", type);
+ vseprint(buf, buf+sizeof(buf), fmt, argl);
+ fprint(errorf, "%s\n", buf);
+}
+
+
+void
+error(char *s,...)
+{
+ va_list argl;
+
+ va_start(argl, s);
+ printerr("Error", s, argl);
+ va_end(argl);
+# ifdef DEBUG
+ if(debug && sect != ENDSECTION) {
+ sect1dump();
+ sect2dump();
+ }
+# endif
+ if(
+# ifdef DEBUG
+ debug ||
+# endif
+ report == 1) statistics();
+ exits("error"); /* error return code */
+}
+
+void
+warning(char *s,...)
+{
+ va_list argl;
+
+ va_start(argl, s);
+ printerr("Warning", s, argl);
+ va_end(argl);
+ Bflush(&fout);
+}
+
+void
+lgate(void)
+{
+ int fd;
+
+ if (lgatflg) return;
+ lgatflg=1;
+ if(foutopen == 0){
+ fd = create("lex.yy.c", OWRITE, 0666);
+ if(fd < 0)
+ error("Can't open lex.yy.c");
+ Binit(&fout, fd, OWRITE);
+ foutopen = 1;
+ }
+ phead1();
+}
+
+void
+cclinter(int sw)
+{
+ /* sw = 1 ==> ccl */
+ int i, j, k;
+ int m;
+ if(!sw){ /* is NCCL */
+ for(i=1;i<NCH;i++)
+ symbol[i] ^= 1; /* reverse value */
+ }
+ for(i=1;i<NCH;i++)
+ if(symbol[i]) break;
+ if(i >= NCH) return;
+ i = cindex[i];
+ /* see if ccl is already in our table */
+ j = 0;
+ if(i){
+ for(j=1;j<NCH;j++){
+ if((symbol[j] && cindex[j] != i) ||
+ (!symbol[j] && cindex[j] == i)) break;
+ }
+ }
+ if(j >= NCH) return; /* already in */
+ m = 0;
+ k = 0;
+ for(i=1;i<NCH;i++)
+ if(symbol[i]){
+ if(!cindex[i]){
+ cindex[i] = ccount;
+ symbol[i] = 0;
+ m = 1;
+ } else k = 1;
+ }
+ /* m == 1 implies last value of ccount has been used */
+ if(m)ccount++;
+ if(k == 0) return; /* is now in as ccount wholly */
+ /* intersection must be computed */
+ for(i=1;i<NCH;i++){
+ if(symbol[i]){
+ m = 0;
+ j = cindex[i]; /* will be non-zero */
+ for(k=1;k<NCH;k++){
+ if(cindex[k] == j){
+ if(symbol[k]) symbol[k] = 0;
+ else {
+ cindex[k] = ccount;
+ m = 1;
+ }
+ }
+ }
+ if(m)ccount++;
+ }
+ }
+}
+
+int
+usescape(int c)
+{
+ int d;
+ switch(c){
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'b': c = '\b'; break;
+ case 'f': c = 014; break; /* form feed for ascii */
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ c -= '0';
+ while('0' <= (d=gch()) && d <= '7'){
+ c = c * 8 + (d-'0');
+ if(!('0' <= peek && peek <= '7')) break;
+ }
+ break;
+ }
+ return(c);
+}
+
+int
+lookup(uchar *s, uchar **t)
+{
+ int i;
+ i = 0;
+ while(*t){
+ if(strcmp((char *)s, *(char **)t) == 0)
+ return(i);
+ i++;
+ t++;
+ }
+ return(-1);
+}
+
+int
+cpyact(void)
+{ /* copy C action to the next ; or closing } */
+ int brac, c, mth;
+ int savline, sw;
+
+ brac = 0;
+ sw = TRUE;
+ savline = 0;
+
+while(!eof){
+ c = gch();
+swt:
+ switch( c ){
+
+case '|': if(brac == 0 && sw == TRUE){
+ if(peek == '|')gch(); /* eat up an extra '|' */
+ return(0);
+ }
+ break;
+
+case ';':
+ if( brac == 0 ){
+ Bputc(&fout, c);
+ Bputc(&fout, '\n');
+ return(1);
+ }
+ break;
+
+case '{':
+ brac++;
+ savline=yyline;
+ break;
+
+case '}':
+ brac--;
+ if( brac == 0 ){
+ Bputc(&fout, c);
+ Bputc(&fout, '\n');
+ return(1);
+ }
+ break;
+
+case '/': /* look for comments */
+ Bputc(&fout, c);
+ c = gch();
+ if( c != '*' ) goto swt;
+
+ /* it really is a comment */
+
+ Bputc(&fout, c);
+ savline=yyline;
+ while( c=gch() ){
+ if( c=='*' ){
+ Bputc(&fout, c);
+ if( (c=gch()) == '/' ) goto loop;
+ }
+ Bputc(&fout, c);
+ }
+ yyline=savline;
+ error( "EOF inside comment" );
+
+case '\'': /* character constant */
+ mth = '\'';
+ goto string;
+
+case '"': /* character string */
+ mth = '"';
+
+ string:
+
+ Bputc(&fout, c);
+ while( c=gch() ){
+ if( c=='\\' ){
+ Bputc(&fout, c);
+ c=gch();
+ }
+ else if( c==mth ) goto loop;
+ Bputc(&fout, c);
+ if (c == '\n') {
+ yyline--;
+ error( "Non-terminated string or character constant");
+ }
+ }
+ error( "EOF in string or character constant" );
+
+case '\0':
+ yyline = savline;
+ error("Action does not terminate");
+default:
+ break; /* usual character */
+ }
+loop:
+ if(c != ' ' && c != '\t' && c != '\n') sw = FALSE;
+ Bputc(&fout, c);
+ }
+ error("Premature EOF");
+ return(0);
+}
+
+int
+gch(void){
+ int c;
+ prev = pres;
+ c = pres = peek;
+ peek = pushptr > pushc ? *--pushptr : Bgetc(fin);
+ if(peek == Beof && sargc > 1){
+ Bterm(fin);
+ fin = Bopen(sargv[fptr++],OREAD);
+ if(fin == 0)
+ error("Cannot open file %s",sargv[fptr-1]);
+ peek = Bgetc(fin);
+ sargc--;
+ sargv++;
+ }
+ if(c == Beof) {
+ eof = TRUE;
+ Bterm(fin);
+ return(0);
+ }
+ if(c == '\n')yyline++;
+ return(c);
+}
+
+int
+mn2(int a, int d, int c)
+{
+ name[tptr] = a;
+ left[tptr] = d;
+ right[tptr] = c;
+ parent[tptr] = 0;
+ nullstr[tptr] = 0;
+ switch(a){
+ case RSTR:
+ parent[d] = tptr;
+ break;
+ case BAR:
+ case RNEWE:
+ if(nullstr[d] || nullstr[c]) nullstr[tptr] = TRUE;
+ parent[d] = parent[c] = tptr;
+ break;
+ case RCAT:
+ case DIV:
+ if(nullstr[d] && nullstr[c])nullstr[tptr] = TRUE;
+ parent[d] = parent[c] = tptr;
+ break;
+ case RSCON:
+ parent[d] = tptr;
+ nullstr[tptr] = nullstr[d];
+ break;
+# ifdef DEBUG
+ default:
+ warning("bad switch mn2 %d %d",a,d);
+ break;
+# endif
+ }
+ if(tptr > treesize)
+ error("Parse tree too big %s",(treesize == TREESIZE?"\nTry using %e num":""));
+ return(tptr++);
+}
+
+int
+mn1(int a, int d)
+{
+ name[tptr] = a;
+ left[tptr] = d;
+ parent[tptr] = 0;
+ nullstr[tptr] = 0;
+ switch(a){
+ case RCCL:
+ case RNCCL:
+ if(strlen((char *)d) == 0) nullstr[tptr] = TRUE;
+ break;
+ case STAR:
+ case QUEST:
+ nullstr[tptr] = TRUE;
+ parent[d] = tptr;
+ break;
+ case PLUS:
+ case CARAT:
+ nullstr[tptr] = nullstr[d];
+ parent[d] = tptr;
+ break;
+ case S2FINAL:
+ nullstr[tptr] = TRUE;
+ break;
+# ifdef DEBUG
+ case FINAL:
+ case S1FINAL:
+ break;
+ default:
+ warning("bad switch mn1 %d %d",a,d);
+ break;
+# endif
+ }
+ if(tptr > treesize)
+ error("Parse tree too big %s",(treesize == TREESIZE?"\nTry using %e num":""));
+ return(tptr++);
+}
+
+int
+mn0(int a)
+{
+ name[tptr] = a;
+ parent[tptr] = 0;
+ nullstr[tptr] = 0;
+ if(a >= NCH) switch(a){
+ case RNULLS: nullstr[tptr] = TRUE; break;
+# ifdef DEBUG
+ default:
+ warning("bad switch mn0 %d",a);
+ break;
+# endif
+ }
+ if(tptr > treesize)
+ error("Parse tree too big %s",(treesize == TREESIZE?"\nTry using %e num":""));
+ return(tptr++);
+}
+
+void
+munputc(int p)
+{
+ *pushptr++ = peek; /* watch out for this */
+ peek = p;
+ if(pushptr >= pushc+TOKENSIZE)
+ error("Too many characters pushed");
+}
+
+void
+munputs(uchar *p)
+{
+ int i,j;
+ *pushptr++ = peek;
+ peek = p[0];
+ i = strlen((char*)p);
+ for(j = i-1; j>=1; j--)
+ *pushptr++ = p[j];
+ if(pushptr >= pushc+TOKENSIZE)
+ error("Too many characters pushed");
+}
+
+int
+dupl(int n)
+{
+ /* duplicate the subtree whose root is n, return ptr to it */
+ int i;
+
+ i = name[n];
+ if(i < NCH) return(mn0(i));
+ switch(i){
+ case RNULLS:
+ return(mn0(i));
+ case RCCL: case RNCCL: case FINAL: case S1FINAL: case S2FINAL:
+ return(mn1(i,left[n]));
+ case STAR: case QUEST: case PLUS: case CARAT:
+ return(mn1(i,dupl(left[n])));
+ case RSTR: case RSCON:
+ return(mn2(i,dupl(left[n]),right[n]));
+ case BAR: case RNEWE: case RCAT: case DIV:
+ return(mn2(i,dupl(left[n]),dupl(right[n])));
+# ifdef DEBUG
+ default:
+ warning("bad switch dupl %d",n);
+# endif
+ }
+ return(0);
+}
+
+# ifdef DEBUG
+void
+allprint(int c)
+{
+ switch(c){
+ case 014:
+ print("\\f");
+ charc++;
+ break;
+ case '\n':
+ print("\\n");
+ charc++;
+ break;
+ case '\t':
+ print("\\t");
+ charc++;
+ break;
+ case '\b':
+ print("\\b");
+ charc++;
+ break;
+ case ' ':
+ print("\\\bb");
+ break;
+ default:
+ if(!isprint(c)){
+ print("\\%-3o",c);
+ charc += 3;
+ } else
+ print("%c", c);
+ break;
+ }
+ charc++;
+}
+
+void
+strpt(uchar *s)
+{
+ charc = 0;
+ while(*s){
+ allprint(*s++);
+ if(charc > LINESIZE){
+ charc = 0;
+ print("\n\t");
+ }
+ }
+}
+
+void
+sect1dump(void)
+{
+ int i;
+
+ print("Sect 1:\n");
+ if(def[0]){
+ print("str trans\n");
+ i = -1;
+ while(def[++i])
+ print("%s\t%s\n",def[i],subs[i]);
+ }
+ if(sname[0]){
+ print("start names\n");
+ i = -1;
+ while(sname[++i])
+ print("%s\n",sname[i]);
+ }
+}
+
+void
+sect2dump(void)
+{
+ print("Sect 2:\n");
+ treedump();
+}
+
+void
+treedump(void)
+{
+ int t;
+ uchar *p;
+ print("treedump %d nodes:\n",tptr);
+ for(t=0;t<tptr;t++){
+ print("%4d ",t);
+ parent[t] ? print("p=%4d",parent[t]) : print(" ");
+ print(" ");
+ if(name[t] < NCH)
+ allprint(name[t]);
+ else switch(name[t]){
+ case RSTR:
+ print("%d ",left[t]);
+ allprint(right[t]);
+ break;
+ case RCCL:
+ print("ccl ");
+ strpt(left[t]);
+ break;
+ case RNCCL:
+ print("nccl ");
+ strpt(left[t]);
+ break;
+ case DIV:
+ print("/ %d %d",left[t],right[t]);
+ break;
+ case BAR:
+ print("| %d %d",left[t],right[t]);
+ break;
+ case RCAT:
+ print("cat %d %d",left[t],right[t]);
+ break;
+ case PLUS:
+ print("+ %d",left[t]);
+ break;
+ case STAR:
+ print("* %d",left[t]);
+ break;
+ case CARAT:
+ print("^ %d",left[t]);
+ break;
+ case QUEST:
+ print("? %d",left[t]);
+ break;
+ case RNULLS:
+ print("nullstring");
+ break;
+ case FINAL:
+ print("final %d",left[t]);
+ break;
+ case S1FINAL:
+ print("s1final %d",left[t]);
+ break;
+ case S2FINAL:
+ print("s2final %d",left[t]);
+ break;
+ case RNEWE:
+ print("new %d %d",left[t],right[t]);
+ break;
+ case RSCON:
+ p = (uchar *)right[t];
+ print("start %s",sname[*p++-1]);
+ while(*p)
+ print(", %s",sname[*p++-1]);
+ print(" %d",left[t]);
+ break;
+ default:
+ print("unknown %d %d %d",name[t],left[t],right[t]);
+ break;
+ }
+ if(nullstr[t])print("\t(null poss.)");
+ print("\n");
+ }
+}
+# endif
diff --git a/src/cmd/lex/sub2.c b/src/cmd/lex/sub2.c
new file mode 100644
index 00000000..f9950a8d
--- /dev/null
+++ b/src/cmd/lex/sub2.c
@@ -0,0 +1,856 @@
+# include "ldefs.h"
+void
+cfoll(int v)
+{
+ int i,j,k;
+ uchar *p;
+ i = name[v];
+ if(i < NCH) i = 1; /* character */
+ switch(i){
+ case 1: case RSTR: case RCCL: case RNCCL: case RNULLS:
+ for(j=0;j<tptr;j++)
+ tmpstat[j] = FALSE;
+ count = 0;
+ follow(v);
+# ifdef PP
+ padd(foll,v); /* packing version */
+# endif
+# ifndef PP
+ add(foll,v); /* no packing version */
+# endif
+ if(i == RSTR) cfoll(left[v]);
+ else if(i == RCCL || i == RNCCL){ /* compress ccl list */
+ for(j=1; j<NCH;j++)
+ symbol[j] = (i==RNCCL);
+ p = (uchar *)left[v];
+ while(*p)
+ symbol[*p++] = (i == RCCL);
+ p = pcptr;
+ for(j=1;j<NCH;j++)
+ if(symbol[j]){
+ for(k=0;p+k < pcptr; k++)
+ if(cindex[j] == *(p+k))
+ break;
+ if(p+k >= pcptr)*pcptr++ = cindex[j];
+ }
+ *pcptr++ = 0;
+ if(pcptr > pchar + pchlen)
+ error("Too many packed character classes");
+ left[v] = (int)p;
+ name[v] = RCCL; /* RNCCL eliminated */
+# ifdef DEBUG
+ if(debug && *p){
+ print("ccl %d: %d",v,*p++);
+ while(*p)
+ print(", %d",*p++);
+ print("\n");
+ }
+# endif
+ }
+ break;
+ case CARAT:
+ cfoll(left[v]);
+ break;
+ case STAR: case PLUS: case QUEST: case RSCON:
+ cfoll(left[v]);
+ break;
+ case BAR: case RCAT: case DIV: case RNEWE:
+ cfoll(left[v]);
+ cfoll(right[v]);
+ break;
+# ifdef DEBUG
+ case FINAL:
+ case S1FINAL:
+ case S2FINAL:
+ break;
+ default:
+ warning("bad switch cfoll %d",v);
+# endif
+ }
+}
+
+# ifdef DEBUG
+void
+pfoll(void)
+ {
+ int i,k,*p;
+ int j;
+ /* print sets of chars which may follow positions */
+ print("pos\tchars\n");
+ for(i=0;i<tptr;i++)
+ if(p=foll[i]){
+ j = *p++;
+ if(j >= 1){
+ print("%d:\t%d",i,*p++);
+ for(k=2;k<=j;k++)
+ print(", %d",*p++);
+ print("\n");
+ }
+ }
+}
+# endif
+
+void
+add(int **array, int n)
+{
+ int i, *temp;
+ uchar *ctemp;
+
+ temp = nxtpos;
+ ctemp = tmpstat;
+ array[n] = nxtpos; /* note no packing is done in positions */
+ *temp++ = count;
+ for(i=0;i<tptr;i++)
+ if(ctemp[i] == TRUE)
+ *temp++ = i;
+ nxtpos = temp;
+ if(nxtpos >= positions+maxpos)
+ error("Too many positions %s",(maxpos== MAXPOS?"\nTry using %p num":""));
+ return;
+}
+
+void
+follow(int v)
+{
+ int p;
+ if(v >= tptr-1)return;
+ p = parent[v];
+ if(p == 0) return;
+ switch(name[p]){
+ /* will not be CHAR RNULLS FINAL S1FINAL S2FINAL RCCL RNCCL */
+ case RSTR:
+ if(tmpstat[p] == FALSE){
+ count++;
+ tmpstat[p] = TRUE;
+ }
+ break;
+ case STAR: case PLUS:
+ first(v);
+ follow(p);
+ break;
+ case BAR: case QUEST: case RNEWE:
+ follow(p);
+ break;
+ case RCAT: case DIV:
+ if(v == left[p]){
+ if(nullstr[right[p]])
+ follow(p);
+ first(right[p]);
+ }
+ else follow(p);
+ break;
+ case RSCON: case CARAT:
+ follow(p);
+ break;
+# ifdef DEBUG
+ default:
+ warning("bad switch follow %d",p);
+# endif
+ }
+}
+
+void
+first(int v) /* calculate set of positions with v as root which can be active initially */
+{
+ int i;
+ uchar *p;
+ i = name[v];
+ if(i < NCH)i = 1;
+ switch(i){
+ case 1: case RCCL: case RNCCL: case RNULLS: case FINAL: case S1FINAL: case S2FINAL:
+ if(tmpstat[v] == FALSE){
+ count++;
+ tmpstat[v] = TRUE;
+ }
+ break;
+ case BAR: case RNEWE:
+ first(left[v]);
+ first(right[v]);
+ break;
+ case CARAT:
+ if(stnum % 2 == 1)
+ first(left[v]);
+ break;
+ case RSCON:
+ i = stnum/2 +1;
+ p = (uchar *)right[v];
+ while(*p)
+ if(*p++ == i){
+ first(left[v]);
+ break;
+ }
+ break;
+ case STAR: case QUEST: case PLUS: case RSTR:
+ first(left[v]);
+ break;
+ case RCAT: case DIV:
+ first(left[v]);
+ if(nullstr[left[v]])
+ first(right[v]);
+ break;
+# ifdef DEBUG
+ default:
+ warning("bad switch first %d",v);
+# endif
+ }
+}
+
+void
+cgoto(void)
+{
+ int i, j, s;
+ int npos, curpos, n;
+ int tryit;
+ uchar tch[NCH];
+ int tst[NCH];
+ uchar *q;
+
+ /* generate initial state, for each start condition */
+ Bprint(&fout,"int yyvstop[] = {\n0,\n");
+ while(stnum < 2 || stnum/2 < sptr){
+ for(i = 0; i<tptr; i++) tmpstat[i] = 0;
+ count = 0;
+ if(tptr > 0)first(tptr-1);
+ add(state,stnum);
+# ifdef DEBUG
+ if(debug){
+ if(stnum > 1)
+ print("%s:\n",sname[stnum/2]);
+ pstate(stnum);
+ }
+# endif
+ stnum++;
+ }
+ stnum--;
+ /* even stnum = might not be at line begin */
+ /* odd stnum = must be at line begin */
+ /* even states can occur anywhere, odd states only at line begin */
+ for(s = 0; s <= stnum; s++){
+ tryit = FALSE;
+ cpackflg[s] = FALSE;
+ sfall[s] = -1;
+ acompute(s);
+ for(i=0;i<NCH;i++) symbol[i] = 0;
+ npos = *state[s];
+ for(i = 1; i<=npos; i++){
+ curpos = *(state[s]+i);
+ if(name[curpos] < NCH) symbol[name[curpos]] = TRUE;
+ else switch(name[curpos]){
+ case RCCL:
+ tryit = TRUE;
+ q = (uchar *)left[curpos];
+ while(*q){
+ for(j=1;j<NCH;j++)
+ if(cindex[j] == *q)
+ symbol[j] = TRUE;
+ q++;
+ }
+ break;
+ case RSTR:
+ symbol[right[curpos]] = TRUE;
+ break;
+# ifdef DEBUG
+ case RNULLS:
+ case FINAL:
+ case S1FINAL:
+ case S2FINAL:
+ break;
+ default:
+ warning("bad switch cgoto %d state %d",curpos,s);
+ break;
+# endif
+ }
+ }
+# ifdef DEBUG
+ if(debug){
+ print("State %d transitions on:\n\t",s);
+ charc = 0;
+ for(i = 1; i<NCH; i++){
+ if(symbol[i]) allprint(i);
+ if(charc > LINESIZE){
+ charc = 0;
+ print("\n\t");
+ }
+ }
+ print("\n");
+ }
+# endif
+ /* for each char, calculate next state */
+ n = 0;
+ for(i = 1; i<NCH; i++){
+ if(symbol[i]){
+ nextstate(s,i); /* executed for each state, transition pair */
+ xstate = notin(stnum);
+ if(xstate == -2) warning("bad state %d %o",s,i);
+ else if(xstate == -1){
+ if(stnum >= nstates)
+ error("Too many states %s",(nstates == NSTATES ? "\nTry using %n num":""));
+ add(state,++stnum);
+# ifdef DEBUG
+ if(debug)pstate(stnum);
+# endif
+ tch[n] = i;
+ tst[n++] = stnum;
+ } else { /* xstate >= 0 ==> state exists */
+ tch[n] = i;
+ tst[n++] = xstate;
+ }
+ }
+ }
+ tch[n] = 0;
+ tst[n] = -1;
+ /* pack transitions into permanent array */
+ if(n > 0) packtrans(s,tch,tst,n,tryit);
+ else gotof[s] = -1;
+ }
+ Bprint(&fout,"0};\n");
+}
+
+/* Beware -- 70% of total CPU time is spent in this subroutine -
+ if you don't believe me - try it yourself ! */
+void
+nextstate(int s, int c)
+{
+ int j, *newpos;
+ uchar *temp, *tz;
+ int *pos, i, *f, num, curpos, number;
+ /* state to goto from state s on char c */
+ num = *state[s];
+ temp = tmpstat;
+ pos = state[s] + 1;
+ for(i = 0; i<num; i++){
+ curpos = *pos++;
+ j = name[curpos];
+ if(j < NCH && j == c
+ || j == RSTR && c == right[curpos]
+ || j == RCCL && member(c, (uchar *)left[curpos])){
+ f = foll[curpos];
+ number = *f;
+ newpos = f+1;
+ for(j=0;j<number;j++)
+ temp[*newpos++] = 2;
+ }
+ }
+ j = 0;
+ tz = temp + tptr;
+ while(temp < tz){
+ if(*temp == 2){
+ j++;
+ *temp++ = 1;
+ }
+ else *temp++ = 0;
+ }
+ count = j;
+}
+
+int
+notin(int n)
+{ /* see if tmpstat occurs previously */
+ int *j,k;
+ uchar *temp;
+ int i;
+
+ if(count == 0)
+ return(-2);
+ temp = tmpstat;
+ for(i=n;i>=0;i--){ /* for each state */
+ j = state[i];
+ if(count == *j++){
+ for(k=0;k<count;k++)
+ if(!temp[*j++])break;
+ if(k >= count)
+ return(i);
+ }
+ }
+ return(-1);
+}
+
+void
+packtrans(int st, uchar *tch, int *tst, int cnt, int tryit)
+{
+ /* pack transitions into nchar, nexts */
+ /* nchar is terminated by '\0', nexts uses cnt, followed by elements */
+ /* gotof[st] = index into nchr, nexts for state st */
+
+ /* sfall[st] = t implies t is fall back state for st */
+ /* == -1 implies no fall back */
+
+ int i,j,k;
+ int cmin, cval, tcnt, diff, p, *ast;
+ uchar *ach;
+ int go[NCH], temp[NCH], c;
+ int swork[NCH];
+ uchar cwork[NCH];
+ int upper;
+
+ rcount += cnt;
+ cmin = -1;
+ cval = NCH;
+ ast = tst;
+ ach = tch;
+ /* try to pack transitions using ccl's */
+ if(tryit){ /* ccl's used */
+ for(i=1;i<NCH;i++){
+ go[i] = temp[i] = -1;
+ symbol[i] = 1;
+ }
+ for(i=0;i<cnt;i++){
+ go[tch[i]] = tst[i];
+ symbol[tch[i]] = 0;
+ }
+ for(i=0; i<cnt;i++){
+ c = match[tch[i]];
+ if(go[c] != tst[i] || c == tch[i])
+ temp[tch[i]] = tst[i];
+ }
+ /* fill in error entries */
+ for(i=1;i<NCH;i++)
+ if(symbol[i]) temp[i] = -2; /* error trans */
+ /* count them */
+ k = 0;
+ for(i=1;i<NCH;i++)
+ if(temp[i] != -1)k++;
+ if(k <cnt){ /* compress by char */
+# ifdef DEBUG
+ if(debug) print("use compression %d, %d vs %d\n",st,k,cnt);
+# endif
+ k = 0;
+ for(i=1;i<NCH;i++)
+ if(temp[i] != -1){
+ cwork[k] = i;
+ swork[k++] = (temp[i] == -2 ? -1 : temp[i]);
+ }
+ cwork[k] = 0;
+# ifdef PC
+ ach = cwork;
+ ast = swork;
+ cnt = k;
+ cpackflg[st] = TRUE;
+# endif
+ }
+ }
+ for(i=0; i<st; i++){ /* get most similar state */
+ /* reject state with more transitions, state already represented by a third state,
+ and state which is compressed by char if ours is not to be */
+ if(sfall[i] != -1) continue;
+ if(cpackflg[st] == 1) if(!(cpackflg[i] == 1)) continue;
+ p = gotof[i];
+ if(p == -1) /* no transitions */ continue;
+ tcnt = nexts[p];
+ if(tcnt > cnt) continue;
+ diff = 0;
+ j = 0;
+ upper = p + tcnt;
+ while(ach[j] && p < upper){
+ while(ach[j] < nchar[p] && ach[j]){diff++; j++; }
+ if(ach[j] == 0)break;
+ if(ach[j] > nchar[p]){diff=NCH;break;}
+ /* ach[j] == nchar[p] */
+ if(ast[j] != nexts[++p] || ast[j] == -1 || (cpackflg[st] && ach[j] != match[ach[j]]))diff++;
+ j++;
+ }
+ while(ach[j]){
+ diff++;
+ j++;
+ }
+ if(p < upper)diff = NCH;
+ if(diff < cval && diff < tcnt){
+ cval = diff;
+ cmin = i;
+ if(cval == 0)break;
+ }
+ }
+ /* cmin = state "most like" state st */
+# ifdef DEBUG
+ if(debug)print("select st %d for st %d diff %d\n",cmin,st,cval);
+# endif
+# ifdef PS
+ if(cmin != -1){ /* if we can use st cmin */
+ gotof[st] = nptr;
+ k = 0;
+ sfall[st] = cmin;
+ p = gotof[cmin]+1;
+ j = 0;
+ while(ach[j]){
+ /* if cmin has a transition on c, then so will st */
+ /* st may be "larger" than cmin, however */
+ while(ach[j] < nchar[p-1] && ach[j]){
+ k++;
+ nchar[nptr] = ach[j];
+ nexts[++nptr] = ast[j];
+ j++;
+ }
+ if(nchar[p-1] == 0)break;
+ if(ach[j] > nchar[p-1]){
+ warning("bad transition %d %d",st,cmin);
+ goto nopack;
+ }
+ /* ach[j] == nchar[p-1] */
+ if(ast[j] != nexts[p] || ast[j] == -1 || (cpackflg[st] && ach[j] != match[ach[j]])){
+ k++;
+ nchar[nptr] = ach[j];
+ nexts[++nptr] = ast[j];
+ }
+ p++;
+ j++;
+ }
+ while(ach[j]){
+ nchar[nptr] = ach[j];
+ nexts[++nptr] = ast[j++];
+ k++;
+ }
+ nexts[gotof[st]] = cnt = k;
+ nchar[nptr++] = 0;
+ } else {
+# endif
+nopack:
+ /* stick it in */
+ gotof[st] = nptr;
+ nexts[nptr] = cnt;
+ for(i=0;i<cnt;i++){
+ nchar[nptr] = ach[i];
+ nexts[++nptr] = ast[i];
+ }
+ nchar[nptr++] = 0;
+# ifdef PS
+ }
+# endif
+ if(cnt < 1){
+ gotof[st] = -1;
+ nptr--;
+ } else
+ if(nptr > ntrans)
+ error("Too many transitions %s",(ntrans==NTRANS?"\nTry using %a num":""));
+}
+
+# ifdef DEBUG
+void
+pstate(int s)
+{
+ int *p,i,j;
+ print("State %d:\n",s);
+ p = state[s];
+ i = *p++;
+ if(i == 0) return;
+ print("%4d",*p++);
+ for(j = 1; j<i; j++){
+ print(", %4d",*p++);
+ if(j%30 == 0)print("\n");
+ }
+ print("\n");
+ return;
+}
+# endif
+
+int
+member(int d, uchar *t)
+{
+ int c;
+ uchar *s;
+
+ c = d;
+ s = t;
+ c = cindex[c];
+ while(*s)
+ if(*s++ == c) return(1);
+ return(0);
+}
+
+# ifdef DEBUG
+void
+stprt(int i)
+{
+ int p, t;
+
+ print("State %d:",i);
+ /* print actions, if any */
+ t = atable[i];
+ if(t != -1)print(" final");
+ print("\n");
+ if(cpackflg[i] == TRUE)print("backup char in use\n");
+ if(sfall[i] != -1)print("fall back state %d\n",sfall[i]);
+ p = gotof[i];
+ if(p == -1) return;
+ print("(%d transitions)\n",nexts[p]);
+ while(nchar[p]){
+ charc = 0;
+ if(nexts[p+1] >= 0)
+ print("%d\t",nexts[p+1]);
+ else print("err\t");
+ allprint(nchar[p++]);
+ while(nexts[p] == nexts[p+1] && nchar[p]){
+ if(charc > LINESIZE){
+ charc = 0;
+ print("\n\t");
+ }
+ allprint(nchar[p++]);
+ }
+ print("\n");
+ }
+ print("\n");
+}
+# endif
+
+void
+acompute(int s) /* compute action list = set of poss. actions */
+{
+ int *p, i, j;
+ int cnt, m;
+ int temp[300], k, neg[300], n;
+
+ k = 0;
+ n = 0;
+ p = state[s];
+ cnt = *p++;
+ if(cnt > 300)
+ error("Too many positions for one state - acompute");
+ for(i=0;i<cnt;i++){
+ if(name[*p] == FINAL)temp[k++] = left[*p];
+ else if(name[*p] == S1FINAL){temp[k++] = left[*p];
+ if (left[*p] >= NACTIONS) error("Too many right contexts");
+ extra[left[*p]] = 1;
+ } else if(name[*p] == S2FINAL)neg[n++] = left[*p];
+ p++;
+ }
+ atable[s] = -1;
+ if(k < 1 && n < 1) return;
+# ifdef DEBUG
+ if(debug) print("final %d actions:",s);
+# endif
+ /* sort action list */
+ for(i=0; i<k; i++)
+ for(j=i+1;j<k;j++)
+ if(temp[j] < temp[i]){
+ m = temp[j];
+ temp[j] = temp[i];
+ temp[i] = m;
+ }
+ /* remove dups */
+ for(i=0;i<k-1;i++)
+ if(temp[i] == temp[i+1]) temp[i] = 0;
+ /* copy to permanent quarters */
+ atable[s] = aptr;
+# ifdef DEBUG
+ Bprint(&fout,"/* actions for state %d */",s);
+# endif
+ Bputc(&fout, '\n');
+ for(i=0;i<k;i++)
+ if(temp[i] != 0){
+ Bprint(&fout,"%d,\n",temp[i]);
+# ifdef DEBUG
+ if(debug)
+ print("%d ",temp[i]);
+# endif
+ aptr++;
+ }
+ for(i=0;i<n;i++){ /* copy fall back actions - all neg */
+ Bprint(&fout,"%d,\n",neg[i]);
+ aptr++;
+# ifdef DEBUG
+ if(debug)print("%d ",neg[i]);
+# endif
+ }
+# ifdef DEBUG
+ if(debug)print("\n");
+# endif
+ Bprint(&fout,"0,\n");
+ aptr++;
+ return;
+}
+
+# ifdef DEBUG
+void
+pccl(void) {
+ /* print character class sets */
+ int i, j;
+
+ print("char class intersection\n");
+ for(i=0; i< ccount; i++){
+ charc = 0;
+ print("class %d:\n\t",i);
+ for(j=1;j<NCH;j++)
+ if(cindex[j] == i){
+ allprint(j);
+ if(charc > LINESIZE){
+ print("\n\t");
+ charc = 0;
+ }
+ }
+ print("\n");
+ }
+ charc = 0;
+ print("match:\n");
+ for(i=0;i<NCH;i++){
+ allprint(match[i]);
+ if(charc > LINESIZE){
+ print("\n");
+ charc = 0;
+ }
+ }
+ print("\n");
+ return;
+}
+# endif
+
+void
+mkmatch(void)
+{
+ int i;
+ uchar tab[NCH];
+
+ for(i=0; i<ccount; i++)
+ tab[i] = 0;
+ for(i=1;i<NCH;i++)
+ if(tab[cindex[i]] == 0)
+ tab[cindex[i]] = i;
+ /* tab[i] = principal char for new ccl i */
+ for(i = 1; i<NCH; i++)
+ match[i] = tab[cindex[i]];
+ return;
+}
+
+void
+layout(void)
+{
+ /* format and output final program's tables */
+ int i, j, k;
+ int top, bot, startup, omin;
+
+ for(i=0; i<outsize;i++)
+ verify[i] = advance[i] = 0;
+ omin = 0;
+ yytop = 0;
+ for(i=0; i<= stnum; i++){ /* for each state */
+ j = gotof[i];
+ if(j == -1){
+ stoff[i] = 0;
+ continue;
+ }
+ bot = j;
+ while(nchar[j])j++;
+ top = j - 1;
+# ifdef DEBUG
+ if (debug) {
+ print("State %d: (layout)\n", i);
+ for(j=bot; j<=top;j++) {
+ print(" %o", nchar[j]);
+ if (j%10==0) print("\n");
+ }
+ print("\n");
+ }
+# endif
+ while(verify[omin+NCH]) omin++;
+ startup = omin;
+# ifdef DEBUG
+ if (debug) print("bot,top %d, %d startup begins %d\n",bot,top,startup);
+# endif
+ do {
+ startup += 1;
+ if(startup > outsize - NCH)
+ error("output table overflow");
+ for(j = bot; j<= top; j++){
+ k = startup + nchar[j];
+ if(verify[k])break;
+ }
+ } while (j <= top);
+ /* have found place */
+# ifdef DEBUG
+ if (debug) print(" startup going to be %d\n", startup);
+# endif
+ for(j = bot; j<= top; j++){
+ k = startup + nchar[j];
+ verify[k] = i+1; /* state number + 1*/
+ advance[k] = nexts[j+1]+1; /* state number + 1*/
+ if(yytop < k) yytop = k;
+ }
+ stoff[i] = startup;
+ }
+
+ /* stoff[i] = offset into verify, advance for trans for state i */
+ /* put out yywork */
+ Bprint(&fout,"# define YYTYPE %s\n",stnum+1 >= NCH ? "int" : "Uchar");
+ Bprint(&fout,"struct yywork { YYTYPE verify, advance; } yycrank[] = {\n");
+ for(i=0;i<=yytop;i+=4){
+ for(j=0;j<4;j++){
+ k = i+j;
+ if(verify[k])
+ Bprint(&fout,"%d,%d,\t",verify[k],advance[k]);
+ else
+ Bprint(&fout,"0,0,\t");
+ }
+ Bputc(&fout, '\n');
+ }
+ Bprint(&fout,"0,0};\n");
+
+ /* put out yysvec */
+
+ Bprint(&fout,"struct yysvf yysvec[] = {\n");
+ Bprint(&fout,"0,\t0,\t0,\n");
+ for(i=0;i<=stnum;i++){ /* for each state */
+ if(cpackflg[i])stoff[i] = -stoff[i];
+ Bprint(&fout,"yycrank+%d,\t",stoff[i]);
+ if(sfall[i] != -1)
+ Bprint(&fout,"yysvec+%d,\t", sfall[i]+1); /* state + 1 */
+ else Bprint(&fout,"0,\t\t");
+ if(atable[i] != -1)
+ Bprint(&fout,"yyvstop+%d,",atable[i]);
+ else Bprint(&fout,"0,\t");
+# ifdef DEBUG
+ Bprint(&fout,"\t\t/* state %d */",i);
+# endif
+ Bputc(&fout, '\n');
+ }
+ Bprint(&fout,"0,\t0,\t0};\n");
+
+ /* put out yymatch */
+
+ Bprint(&fout,"struct yywork *yytop = yycrank+%d;\n",yytop);
+ Bprint(&fout,"struct yysvf *yybgin = yysvec+1;\n");
+ Bprint(&fout,"Uchar yymatch[] = {\n");
+ for(i=0; i<NCH; i+=8){
+ for(j=0; j<8; j++){
+ int fbch;
+ fbch = match[i+j];
+ if(isprint(fbch) && fbch != '\'' && fbch != '\\')
+ Bprint(&fout,"'%c' ,",fbch);
+ else Bprint(&fout,"0%-3o,",fbch);
+ }
+ Bputc(&fout, '\n');
+ }
+ Bprint(&fout,"0};\n");
+ /* put out yyextra */
+ Bprint(&fout,"Uchar yyextra[] = {\n");
+ for(i=0;i<casecount;i+=8){
+ for(j=0;j<8;j++)
+ Bprint(&fout, "%d,", i+j<NACTIONS ?
+ extra[i+j] : 0);
+ Bputc(&fout, '\n');
+ }
+ Bprint(&fout,"0};\n");
+}
+
+# ifdef PP
+void
+padd(int **array, int n)
+{
+ int i, *j, k;
+
+ array[n] = nxtpos;
+ if(count == 0){
+ *nxtpos++ = 0;
+ return;
+ }
+ for(i=tptr-1;i>=0;i--){
+ j = array[i];
+ if(j && *j++ == count){
+ for(k=0;k<count;k++)
+ if(!tmpstat[*j++])break;
+ if(k >= count){
+ array[n] = array[i];
+ return;
+ }
+ }
+ }
+ add(array,n);
+}
+# endif