aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/lex/parser.y
diff options
context:
space:
mode:
authorwkj <devnull@localhost>2004-04-21 01:22:09 +0000
committerwkj <devnull@localhost>2004-04-21 01:22:09 +0000
commite37302c4b99f147f1fd85ca27a2dbd2aa7a4eb41 (patch)
treec4d4944ccfc614f0ad0ad8aa16126e336f210853 /src/cmd/lex/parser.y
parent7d3bbe16529792999d0627256748f9b780c084f3 (diff)
downloadplan9port-e37302c4b99f147f1fd85ca27a2dbd2aa7a4eb41.tar.gz
plan9port-e37302c4b99f147f1fd85ca27a2dbd2aa7a4eb41.tar.bz2
plan9port-e37302c4b99f147f1fd85ca27a2dbd2aa7a4eb41.zip
Plan 9 lex (to be installed as lex.9, if at all).
Diffstat (limited to 'src/cmd/lex/parser.y')
-rw-r--r--src/cmd/lex/parser.y651
1 files changed, 651 insertions, 0 deletions
diff --git a/src/cmd/lex/parser.y b/src/cmd/lex/parser.y
new file mode 100644
index 00000000..5ac33a28
--- /dev/null
+++ b/src/cmd/lex/parser.y
@@ -0,0 +1,651 @@
+%token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS
+%left SCON '/' NEWE
+%left '|'
+%left '$' '^'
+%left CHAR CCL NCCL '(' '.' STR NULLS
+%left ITER
+%left CAT
+%left '*' '+' '?'
+
+%{
+# include "ldefs.h"
+#define YYSTYPE union _yystype_
+union _yystype_
+{
+ int i;
+ uchar *cp;
+};
+%}
+%%
+%{
+int i;
+int j,k;
+int g;
+uchar *p;
+%}
+acc : lexinput
+ ={
+# ifdef DEBUG
+ if(debug) sect2dump();
+# endif
+ }
+ ;
+lexinput: defns delim prods end
+ | defns delim end
+ ={
+ if(!funcflag)phead2();
+ funcflag = TRUE;
+ }
+ | error
+ ={
+# ifdef DEBUG
+ if(debug) {
+ sect1dump();
+ sect2dump();
+ }
+# endif
+ }
+ ;
+end: delim | ;
+defns: defns STR STR
+ ={ strcpy((char*)dp,(char*)$2.cp);
+ def[dptr] = dp;
+ dp += strlen((char*)$2.cp) + 1;
+ strcpy((char*)dp,(char*)$3.cp);
+ subs[dptr++] = dp;
+ if(dptr >= DEFSIZE)
+ error("Too many definitions");
+ dp += strlen((char*)$3.cp) + 1;
+ if(dp >= dchar+DEFCHAR)
+ error("Definitions too long");
+ subs[dptr]=def[dptr]=0; /* for lookup - require ending null */
+ }
+ |
+ ;
+delim: DELIM
+ ={
+# ifdef DEBUG
+ if(sect == DEFSECTION && debug) sect1dump();
+# endif
+ sect++;
+ }
+ ;
+prods: prods pr
+ ={ $$.i = mn2(RNEWE,$1.i,$2.i);
+ }
+ | pr
+ ={ $$.i = $1.i;}
+ ;
+pr: r NEWE
+ ={
+ if(divflg == TRUE)
+ i = mn1(S1FINAL,casecount);
+ else i = mn1(FINAL,casecount);
+ $$.i = mn2(RCAT,$1.i,i);
+ divflg = FALSE;
+ casecount++;
+ }
+ | error NEWE
+ ={
+# ifdef DEBUG
+ if(debug) sect2dump();
+# endif
+ }
+r: CHAR
+ ={ $$.i = mn0($1.i); }
+ | STR
+ ={
+ p = $1.cp;
+ i = mn0(*p++);
+ while(*p)
+ i = mn2(RSTR,i,*p++);
+ $$.i = i;
+ }
+ | '.'
+ ={ symbol['\n'] = 0;
+ if(psave == FALSE){
+ p = ccptr;
+ psave = ccptr;
+ for(i=1;i<'\n';i++){
+ symbol[i] = 1;
+ *ccptr++ = i;
+ }
+ for(i='\n'+1;i<NCH;i++){
+ symbol[i] = 1;
+ *ccptr++ = i;
+ }
+ *ccptr++ = 0;
+ if(ccptr > ccl+CCLSIZE)
+ error("Too many large character classes");
+ }
+ else
+ p = psave;
+ $$.i = mn1(RCCL,(int)p);
+ cclinter(1);
+ }
+ | CCL
+ ={ $$.i = mn1(RCCL,$1.i); }
+ | NCCL
+ ={ $$.i = mn1(RNCCL,$1.i); }
+ | r '*'
+ ={ $$.i = mn1(STAR,$1.i); }
+ | r '+'
+ ={ $$.i = mn1(PLUS,$1.i); }
+ | r '?'
+ ={ $$.i = mn1(QUEST,$1.i); }
+ | r '|' r
+ ={ $$.i = mn2(BAR,$1.i,$3.i); }
+ | r r %prec CAT
+ ={ $$.i = mn2(RCAT,$1.i,$2.i); }
+ | r '/' r
+ ={ if(!divflg){
+ j = mn1(S2FINAL,-casecount);
+ i = mn2(RCAT,$1.i,j);
+ $$.i = mn2(DIV,i,$3.i);
+ }
+ else {
+ $$.i = mn2(RCAT,$1.i,$3.i);
+ warning("Extra slash removed");
+ }
+ divflg = TRUE;
+ }
+ | r ITER ',' ITER '}'
+ ={ if($2.i > $4.i){
+ i = $2.i;
+ $2.i = $4.i;
+ $4.i = i;
+ }
+ if($4.i <= 0)
+ warning("Iteration range must be positive");
+ else {
+ j = $1.i;
+ for(k = 2; k<=$2.i;k++)
+ j = mn2(RCAT,j,dupl($1.i));
+ for(i = $2.i+1; i<=$4.i; i++){
+ g = dupl($1.i);
+ for(k=2;k<=i;k++)
+ g = mn2(RCAT,g,dupl($1.i));
+ j = mn2(BAR,j,g);
+ }
+ $$.i = j;
+ }
+ }
+ | r ITER '}'
+ ={
+ if($2.i < 0)warning("Can't have negative iteration");
+ else if($2.i == 0) $$.i = mn0(RNULLS);
+ else {
+ j = $1.i;
+ for(k=2;k<=$2.i;k++)
+ j = mn2(RCAT,j,dupl($1.i));
+ $$.i = j;
+ }
+ }
+ | r ITER ',' '}'
+ ={
+ /* from n to infinity */
+ if($2.i < 0)warning("Can't have negative iteration");
+ else if($2.i == 0) $$.i = mn1(STAR,$1.i);
+ else if($2.i == 1)$$.i = mn1(PLUS,$1.i);
+ else { /* >= 2 iterations minimum */
+ j = $1.i;
+ for(k=2;k<$2.i;k++)
+ j = mn2(RCAT,j,dupl($1.i));
+ k = mn1(PLUS,dupl($1.i));
+ $$.i = mn2(RCAT,j,k);
+ }
+ }
+ | SCON r
+ ={ $$.i = mn2(RSCON,$2.i,$1.i); }
+ | '^' r
+ ={ $$.i = mn1(CARAT,$2.i); }
+ | r '$'
+ ={ i = mn0('\n');
+ if(!divflg){
+ j = mn1(S2FINAL,-casecount);
+ k = mn2(RCAT,$1.i,j);
+ $$.i = mn2(DIV,k,i);
+ }
+ else $$.i = mn2(RCAT,$1.i,i);
+ divflg = TRUE;
+ }
+ | '(' r ')'
+ ={ $$.i = $2.i; }
+ | NULLS
+ ={ $$.i = mn0(RNULLS); }
+ ;
+%%
+int
+yylex(void)
+{
+ uchar *p;
+ int c, i;
+ uchar *t, *xp;
+ int n, j, k, x;
+ static int sectbegin;
+ static uchar token[TOKENSIZE];
+ static int iter;
+
+# ifdef DEBUG
+ yylval.i = 0;
+# endif
+
+ if(sect == DEFSECTION) { /* definitions section */
+ while(!eof) {
+ if(prev == '\n'){ /* next char is at beginning of line */
+ getl(p=buf);
+ switch(*p){
+ case '%':
+ switch(*(p+1)){
+ case '%':
+ lgate();
+ Bprint(&fout,"#define YYNEWLINE %d\n",'\n');
+ Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n");
+ sectbegin = TRUE;
+ i = treesize*(sizeof(*name)+sizeof(*left)+
+ sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA;
+ p = myalloc(i,1);
+ if(p == 0)
+ error("Too little core for parse tree");
+ free(p);
+ name = myalloc(treesize,sizeof(*name));
+ left = myalloc(treesize,sizeof(*left));
+ right = myalloc(treesize,sizeof(*right));
+ nullstr = myalloc(treesize,sizeof(*nullstr));
+ parent = myalloc(treesize,sizeof(*parent));
+ if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0)
+ error("Too little core for parse tree");
+ return(freturn(DELIM));
+ case 'p': case 'P': /* has overridden number of positions */
+ while(*p && !isdigit(*p))p++;
+ maxpos = atol((char*)p);
+# ifdef DEBUG
+ if (debug) print("positions (%%p) now %d\n",maxpos);
+# endif
+ if(report == 2)report = 1;
+ continue;
+ case 'n': case 'N': /* has overridden number of states */
+ while(*p && !isdigit(*p))p++;
+ nstates = atol((char*)p);
+# ifdef DEBUG
+ if(debug)print( " no. states (%%n) now %d\n",nstates);
+# endif
+ if(report == 2)report = 1;
+ continue;
+ case 'e': case 'E': /* has overridden number of tree nodes */
+ while(*p && !isdigit(*p))p++;
+ treesize = atol((char*)p);
+# ifdef DEBUG
+ if (debug) print("treesize (%%e) now %d\n",treesize);
+# endif
+ if(report == 2)report = 1;
+ continue;
+ case 'o': case 'O':
+ while (*p && !isdigit(*p))p++;
+ outsize = atol((char*)p);
+ if (report ==2) report=1;
+ continue;
+ case 'a': case 'A': /* has overridden number of transitions */
+ while(*p && !isdigit(*p))p++;
+ if(report == 2)report = 1;
+ ntrans = atol((char*)p);
+# ifdef DEBUG
+ if (debug)print("N. trans (%%a) now %d\n",ntrans);
+# endif
+ continue;
+ case 'k': case 'K': /* overriden packed char classes */
+ while (*p && !isdigit(*p))p++;
+ if (report==2) report=1;
+ free(pchar);
+ pchlen = atol((char*)p);
+# ifdef DEBUG
+ if (debug) print( "Size classes (%%k) now %d\n",pchlen);
+# endif
+ pchar=pcptr=myalloc(pchlen, sizeof(*pchar));
+ continue;
+ case '{':
+ lgate();
+ while(getl(p) && strcmp((char*)p,"%}") != 0)
+ Bprint(&fout, "%s\n",(char*)p);
+ if(p[0] == '%') continue;
+ error("Premature eof");
+ case 's': case 'S': /* start conditions */
+ lgate();
+ while(*p && strchr(" \t,", *p) == 0) p++;
+ n = TRUE;
+ while(n){
+ while(*p && strchr(" \t,", *p)) p++;
+ t = p;
+ while(*p && strchr(" \t,", *p) == 0)p++;
+ if(!*p) n = FALSE;
+ *p++ = 0;
+ if (*t == 0) continue;
+ i = sptr*2;
+ Bprint(&fout,"#define %s %d\n",(char*)t,i);
+ strcpy((char*)sp, (char*)t);
+ sname[sptr++] = sp;
+ sname[sptr] = 0; /* required by lookup */
+ if(sptr >= STARTSIZE)
+ error("Too many start conditions");
+ sp += strlen((char*)sp) + 1;
+ if(sp >= stchar+STARTCHAR)
+ error("Start conditions too long");
+ }
+ continue;
+ default:
+ warning("Invalid request %s",p);
+ continue;
+ } /* end of switch after seeing '%' */
+ case ' ': case '\t': /* must be code */
+ lgate();
+ Bprint(&fout, "%s\n",(char*)p);
+ continue;
+ default: /* definition */
+ while(*p && !isspace(*p)) p++;
+ if(*p == 0)
+ continue;
+ prev = *p;
+ *p = 0;
+ bptr = p+1;
+ yylval.cp = buf;
+ if(isdigit(buf[0]))
+ warning("Substitution strings may not begin with digits");
+ return(freturn(STR));
+ }
+ }
+ /* still sect 1, but prev != '\n' */
+ else {
+ p = bptr;
+ while(*p && isspace(*p)) p++;
+ if(*p == 0)
+ warning("No translation given - null string assumed");
+ strcpy((char*)token, (char*)p);
+ yylval.cp = token;
+ prev = '\n';
+ return(freturn(STR));
+ }
+ }
+ /* end of section one processing */
+ } else if(sect == RULESECTION){ /* rules and actions */
+ while(!eof){
+ switch(c=gch()){
+ case '\0':
+ return(freturn(0));
+ case '\n':
+ if(prev == '\n') continue;
+ x = NEWE;
+ break;
+ case ' ':
+ case '\t':
+ if(sectbegin == TRUE){
+ cpyact();
+ while((c=gch()) && c != '\n');
+ continue;
+ }
+ if(!funcflag)phead2();
+ funcflag = TRUE;
+ Bprint(&fout,"case %d:\n",casecount);
+ if(cpyact())
+ Bprint(&fout,"break;\n");
+ while((c=gch()) && c != '\n');
+ if(peek == ' ' || peek == '\t' || sectbegin == TRUE){
+ warning("Executable statements should occur right after %%");
+ continue;
+ }
+ x = NEWE;
+ break;
+ case '%':
+ if(prev != '\n') goto character;
+ if(peek == '{'){ /* included code */
+ getl(buf);
+ while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0)
+ Bprint(&fout,"%s\n",(char*)buf);
+ continue;
+ }
+ if(peek == '%'){
+ gch();
+ gch();
+ x = DELIM;
+ break;
+ }
+ goto character;
+ case '|':
+ if(peek == ' ' || peek == '\t' || peek == '\n'){
+ Bprint(&fout,"%d\n",30000+casecount++);
+ continue;
+ }
+ x = '|';
+ break;
+ case '$':
+ if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){
+ x = c;
+ break;
+ }
+ goto character;
+ case '^':
+ if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */
+ x = c;
+ break;
+ case '?':
+ case '+':
+ case '.':
+ case '*':
+ case '(':
+ case ')':
+ case ',':
+ case '/':
+ x = c;
+ break;
+ case '}':
+ iter = FALSE;
+ x = c;
+ break;
+ case '{': /* either iteration or definition */
+ if(isdigit(c=gch())){ /* iteration */
+ iter = TRUE;
+ ieval:
+ i = 0;
+ while(isdigit(c)){
+ token[i++] = c;
+ c = gch();
+ }
+ token[i] = 0;
+ yylval.i = atol((char*)token);
+ munputc(c);
+ x = ITER;
+ break;
+ } else { /* definition */
+ i = 0;
+ while(c && c!='}'){
+ token[i++] = c;
+ c = gch();
+ }
+ token[i] = 0;
+ i = lookup(token,def);
+ if(i < 0)
+ warning("Definition %s not found",token);
+ else
+ munputs(subs[i]);
+ continue;
+ }
+ case '<': /* start condition ? */
+ if(prev != '\n') /* not at line begin, not start */
+ goto character;
+ t = slptr;
+ do {
+ i = 0;
+ c = gch();
+ while(c != ',' && c && c != '>'){
+ token[i++] = c;
+ c = gch();
+ }
+ token[i] = 0;
+ if(i == 0)
+ goto character;
+ i = lookup(token,sname);
+ if(i < 0) {
+ warning("Undefined start condition %s",token);
+ continue;
+ }
+ *slptr++ = i+1;
+ } while(c && c != '>');
+ *slptr++ = 0;
+ /* check if previous value re-usable */
+ for (xp=slist; xp<t; ){
+ if (strcmp((char*)xp, (char*)t)==0)
+ break;
+ while (*xp++);
+ }
+ if (xp<t){
+ /* re-use previous pointer to string */
+ slptr=t;
+ t=xp;
+ }
+ if(slptr > slist+STARTSIZE) /* note not packed ! */
+ error("Too many start conditions used");
+ yylval.cp = t;
+ x = SCON;
+ break;
+ case '"':
+ i = 0;
+ while((c=gch()) && c != '"' && c != '\n'){
+ if(c == '\\') c = usescape(gch());
+ token[i++] = c;
+ if(i > TOKENSIZE){
+ warning("String too long");
+ i = TOKENSIZE-1;
+ break;
+ }
+ }
+ if(c == '\n') {
+ yyline--;
+ warning("Non-terminated string");
+ yyline++;
+ }
+ token[i] = 0;
+ if(i == 0)x = NULLS;
+ else if(i == 1){
+ yylval.i = token[0];
+ x = CHAR;
+ } else {
+ yylval.cp = token;
+ x = STR;
+ }
+ break;
+ case '[':
+ for(i=1;i<NCH;i++) symbol[i] = 0;
+ x = CCL;
+ if((c = gch()) == '^'){
+ x = NCCL;
+ c = gch();
+ }
+ while(c != ']' && c){
+ if(c == '\\') c = usescape(gch());
+ symbol[c] = 1;
+ j = c;
+ if((c=gch()) == '-' && peek != ']'){ /* range specified */
+ c = gch();
+ if(c == '\\') c = usescape(gch());
+ k = c;
+ if(j > k) {
+ n = j;
+ j = k;
+ k = n;
+ }
+ if(!(('A' <= j && k <= 'Z') ||
+ ('a' <= j && k <= 'z') ||
+ ('0' <= j && k <= '9')))
+ warning("Non-portable Character Class");
+ for(n=j+1;n<=k;n++)
+ symbol[n] = 1; /* implementation dependent */
+ c = gch();
+ }
+ }
+ /* try to pack ccl's */
+ i = 0;
+ for(j=0;j<NCH;j++)
+ if(symbol[j])token[i++] = j;
+ token[i] = 0;
+ p = ccl;
+ while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++;
+ if(p < ccptr) /* found it */
+ yylval.cp = p;
+ else {
+ yylval.cp = ccptr;
+ strcpy((char*)ccptr,(char*)token);
+ ccptr += strlen((char*)token) + 1;
+ if(ccptr >= ccl+CCLSIZE)
+ error("Too many large character classes");
+ }
+ cclinter(x==CCL);
+ break;
+ case '\\':
+ c = usescape(gch());
+ default:
+ character:
+ if(iter){ /* second part of an iteration */
+ iter = FALSE;
+ if('0' <= c && c <= '9')
+ goto ieval;
+ }
+ if(isalpha(peek)){
+ i = 0;
+ yylval.cp = token;
+ token[i++] = c;
+ while(isalpha(peek))
+ token[i++] = gch();
+ if(peek == '?' || peek == '*' || peek == '+')
+ munputc(token[--i]);
+ token[i] = 0;
+ if(i == 1){
+ yylval.i = token[0];
+ x = CHAR;
+ }
+ else x = STR;
+ } else {
+ yylval.i = c;
+ x = CHAR;
+ }
+ }
+ scon = FALSE;
+ if(x == SCON)scon = TRUE;
+ sectbegin = FALSE;
+ return(freturn(x));
+ }
+ }
+ /* section three */
+ ptail();
+# ifdef DEBUG
+ if(debug)
+ Bprint(&fout,"\n/*this comes from section three - debug */\n");
+# endif
+ while(getl(buf) && !eof)
+ Bprint(&fout,"%s\n",(char*)buf);
+ return(freturn(0));
+}
+/* end of yylex */
+# ifdef DEBUG
+int
+freturn(int i)
+{
+ if(yydebug) {
+ print("now return ");
+ if(i < NCH) allprint(i);
+ else print("%d",i);
+ printf(" yylval = ");
+ switch(i){
+ case STR: case CCL: case NCCL:
+ strpt(yylval.cp);
+ break;
+ case CHAR:
+ allprint(yylval.i);
+ break;
+ default:
+ print("%d",yylval.i);
+ break;
+ }
+ print("\n");
+ }
+ return(i);
+}
+# endif