diff options
author | wkj <devnull@localhost> | 2004-04-21 01:22:09 +0000 |
---|---|---|
committer | wkj <devnull@localhost> | 2004-04-21 01:22:09 +0000 |
commit | e37302c4b99f147f1fd85ca27a2dbd2aa7a4eb41 (patch) | |
tree | c4d4944ccfc614f0ad0ad8aa16126e336f210853 /src/cmd/lex/parser.y | |
parent | 7d3bbe16529792999d0627256748f9b780c084f3 (diff) | |
download | plan9port-e37302c4b99f147f1fd85ca27a2dbd2aa7a4eb41.tar.gz plan9port-e37302c4b99f147f1fd85ca27a2dbd2aa7a4eb41.tar.bz2 plan9port-e37302c4b99f147f1fd85ca27a2dbd2aa7a4eb41.zip |
Plan 9 lex (to be installed as lex.9, if at all).
Diffstat (limited to 'src/cmd/lex/parser.y')
-rw-r--r-- | src/cmd/lex/parser.y | 651 |
1 files changed, 651 insertions, 0 deletions
diff --git a/src/cmd/lex/parser.y b/src/cmd/lex/parser.y new file mode 100644 index 00000000..5ac33a28 --- /dev/null +++ b/src/cmd/lex/parser.y @@ -0,0 +1,651 @@ +%token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS +%left SCON '/' NEWE +%left '|' +%left '$' '^' +%left CHAR CCL NCCL '(' '.' STR NULLS +%left ITER +%left CAT +%left '*' '+' '?' + +%{ +# include "ldefs.h" +#define YYSTYPE union _yystype_ +union _yystype_ +{ + int i; + uchar *cp; +}; +%} +%% +%{ +int i; +int j,k; +int g; +uchar *p; +%} +acc : lexinput + ={ +# ifdef DEBUG + if(debug) sect2dump(); +# endif + } + ; +lexinput: defns delim prods end + | defns delim end + ={ + if(!funcflag)phead2(); + funcflag = TRUE; + } + | error + ={ +# ifdef DEBUG + if(debug) { + sect1dump(); + sect2dump(); + } +# endif + } + ; +end: delim | ; +defns: defns STR STR + ={ strcpy((char*)dp,(char*)$2.cp); + def[dptr] = dp; + dp += strlen((char*)$2.cp) + 1; + strcpy((char*)dp,(char*)$3.cp); + subs[dptr++] = dp; + if(dptr >= DEFSIZE) + error("Too many definitions"); + dp += strlen((char*)$3.cp) + 1; + if(dp >= dchar+DEFCHAR) + error("Definitions too long"); + subs[dptr]=def[dptr]=0; /* for lookup - require ending null */ + } + | + ; +delim: DELIM + ={ +# ifdef DEBUG + if(sect == DEFSECTION && debug) sect1dump(); +# endif + sect++; + } + ; +prods: prods pr + ={ $$.i = mn2(RNEWE,$1.i,$2.i); + } + | pr + ={ $$.i = $1.i;} + ; +pr: r NEWE + ={ + if(divflg == TRUE) + i = mn1(S1FINAL,casecount); + else i = mn1(FINAL,casecount); + $$.i = mn2(RCAT,$1.i,i); + divflg = FALSE; + casecount++; + } + | error NEWE + ={ +# ifdef DEBUG + if(debug) sect2dump(); +# endif + } +r: CHAR + ={ $$.i = mn0($1.i); } + | STR + ={ + p = $1.cp; + i = mn0(*p++); + while(*p) + i = mn2(RSTR,i,*p++); + $$.i = i; + } + | '.' + ={ symbol['\n'] = 0; + if(psave == FALSE){ + p = ccptr; + psave = ccptr; + for(i=1;i<'\n';i++){ + symbol[i] = 1; + *ccptr++ = i; + } + for(i='\n'+1;i<NCH;i++){ + symbol[i] = 1; + *ccptr++ = i; + } + *ccptr++ = 0; + if(ccptr > ccl+CCLSIZE) + error("Too many large character classes"); + } + else + p = psave; + $$.i = mn1(RCCL,(int)p); + cclinter(1); + } + | CCL + ={ $$.i = mn1(RCCL,$1.i); } + | NCCL + ={ $$.i = mn1(RNCCL,$1.i); } + | r '*' + ={ $$.i = mn1(STAR,$1.i); } + | r '+' + ={ $$.i = mn1(PLUS,$1.i); } + | r '?' + ={ $$.i = mn1(QUEST,$1.i); } + | r '|' r + ={ $$.i = mn2(BAR,$1.i,$3.i); } + | r r %prec CAT + ={ $$.i = mn2(RCAT,$1.i,$2.i); } + | r '/' r + ={ if(!divflg){ + j = mn1(S2FINAL,-casecount); + i = mn2(RCAT,$1.i,j); + $$.i = mn2(DIV,i,$3.i); + } + else { + $$.i = mn2(RCAT,$1.i,$3.i); + warning("Extra slash removed"); + } + divflg = TRUE; + } + | r ITER ',' ITER '}' + ={ if($2.i > $4.i){ + i = $2.i; + $2.i = $4.i; + $4.i = i; + } + if($4.i <= 0) + warning("Iteration range must be positive"); + else { + j = $1.i; + for(k = 2; k<=$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + for(i = $2.i+1; i<=$4.i; i++){ + g = dupl($1.i); + for(k=2;k<=i;k++) + g = mn2(RCAT,g,dupl($1.i)); + j = mn2(BAR,j,g); + } + $$.i = j; + } + } + | r ITER '}' + ={ + if($2.i < 0)warning("Can't have negative iteration"); + else if($2.i == 0) $$.i = mn0(RNULLS); + else { + j = $1.i; + for(k=2;k<=$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + $$.i = j; + } + } + | r ITER ',' '}' + ={ + /* from n to infinity */ + if($2.i < 0)warning("Can't have negative iteration"); + else if($2.i == 0) $$.i = mn1(STAR,$1.i); + else if($2.i == 1)$$.i = mn1(PLUS,$1.i); + else { /* >= 2 iterations minimum */ + j = $1.i; + for(k=2;k<$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + k = mn1(PLUS,dupl($1.i)); + $$.i = mn2(RCAT,j,k); + } + } + | SCON r + ={ $$.i = mn2(RSCON,$2.i,$1.i); } + | '^' r + ={ $$.i = mn1(CARAT,$2.i); } + | r '$' + ={ i = mn0('\n'); + if(!divflg){ + j = mn1(S2FINAL,-casecount); + k = mn2(RCAT,$1.i,j); + $$.i = mn2(DIV,k,i); + } + else $$.i = mn2(RCAT,$1.i,i); + divflg = TRUE; + } + | '(' r ')' + ={ $$.i = $2.i; } + | NULLS + ={ $$.i = mn0(RNULLS); } + ; +%% +int +yylex(void) +{ + uchar *p; + int c, i; + uchar *t, *xp; + int n, j, k, x; + static int sectbegin; + static uchar token[TOKENSIZE]; + static int iter; + +# ifdef DEBUG + yylval.i = 0; +# endif + + if(sect == DEFSECTION) { /* definitions section */ + while(!eof) { + if(prev == '\n'){ /* next char is at beginning of line */ + getl(p=buf); + switch(*p){ + case '%': + switch(*(p+1)){ + case '%': + lgate(); + Bprint(&fout,"#define YYNEWLINE %d\n",'\n'); + Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n"); + sectbegin = TRUE; + i = treesize*(sizeof(*name)+sizeof(*left)+ + sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA; + p = myalloc(i,1); + if(p == 0) + error("Too little core for parse tree"); + free(p); + name = myalloc(treesize,sizeof(*name)); + left = myalloc(treesize,sizeof(*left)); + right = myalloc(treesize,sizeof(*right)); + nullstr = myalloc(treesize,sizeof(*nullstr)); + parent = myalloc(treesize,sizeof(*parent)); + if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0) + error("Too little core for parse tree"); + return(freturn(DELIM)); + case 'p': case 'P': /* has overridden number of positions */ + while(*p && !isdigit(*p))p++; + maxpos = atol((char*)p); +# ifdef DEBUG + if (debug) print("positions (%%p) now %d\n",maxpos); +# endif + if(report == 2)report = 1; + continue; + case 'n': case 'N': /* has overridden number of states */ + while(*p && !isdigit(*p))p++; + nstates = atol((char*)p); +# ifdef DEBUG + if(debug)print( " no. states (%%n) now %d\n",nstates); +# endif + if(report == 2)report = 1; + continue; + case 'e': case 'E': /* has overridden number of tree nodes */ + while(*p && !isdigit(*p))p++; + treesize = atol((char*)p); +# ifdef DEBUG + if (debug) print("treesize (%%e) now %d\n",treesize); +# endif + if(report == 2)report = 1; + continue; + case 'o': case 'O': + while (*p && !isdigit(*p))p++; + outsize = atol((char*)p); + if (report ==2) report=1; + continue; + case 'a': case 'A': /* has overridden number of transitions */ + while(*p && !isdigit(*p))p++; + if(report == 2)report = 1; + ntrans = atol((char*)p); +# ifdef DEBUG + if (debug)print("N. trans (%%a) now %d\n",ntrans); +# endif + continue; + case 'k': case 'K': /* overriden packed char classes */ + while (*p && !isdigit(*p))p++; + if (report==2) report=1; + free(pchar); + pchlen = atol((char*)p); +# ifdef DEBUG + if (debug) print( "Size classes (%%k) now %d\n",pchlen); +# endif + pchar=pcptr=myalloc(pchlen, sizeof(*pchar)); + continue; + case '{': + lgate(); + while(getl(p) && strcmp((char*)p,"%}") != 0) + Bprint(&fout, "%s\n",(char*)p); + if(p[0] == '%') continue; + error("Premature eof"); + case 's': case 'S': /* start conditions */ + lgate(); + while(*p && strchr(" \t,", *p) == 0) p++; + n = TRUE; + while(n){ + while(*p && strchr(" \t,", *p)) p++; + t = p; + while(*p && strchr(" \t,", *p) == 0)p++; + if(!*p) n = FALSE; + *p++ = 0; + if (*t == 0) continue; + i = sptr*2; + Bprint(&fout,"#define %s %d\n",(char*)t,i); + strcpy((char*)sp, (char*)t); + sname[sptr++] = sp; + sname[sptr] = 0; /* required by lookup */ + if(sptr >= STARTSIZE) + error("Too many start conditions"); + sp += strlen((char*)sp) + 1; + if(sp >= stchar+STARTCHAR) + error("Start conditions too long"); + } + continue; + default: + warning("Invalid request %s",p); + continue; + } /* end of switch after seeing '%' */ + case ' ': case '\t': /* must be code */ + lgate(); + Bprint(&fout, "%s\n",(char*)p); + continue; + default: /* definition */ + while(*p && !isspace(*p)) p++; + if(*p == 0) + continue; + prev = *p; + *p = 0; + bptr = p+1; + yylval.cp = buf; + if(isdigit(buf[0])) + warning("Substitution strings may not begin with digits"); + return(freturn(STR)); + } + } + /* still sect 1, but prev != '\n' */ + else { + p = bptr; + while(*p && isspace(*p)) p++; + if(*p == 0) + warning("No translation given - null string assumed"); + strcpy((char*)token, (char*)p); + yylval.cp = token; + prev = '\n'; + return(freturn(STR)); + } + } + /* end of section one processing */ + } else if(sect == RULESECTION){ /* rules and actions */ + while(!eof){ + switch(c=gch()){ + case '\0': + return(freturn(0)); + case '\n': + if(prev == '\n') continue; + x = NEWE; + break; + case ' ': + case '\t': + if(sectbegin == TRUE){ + cpyact(); + while((c=gch()) && c != '\n'); + continue; + } + if(!funcflag)phead2(); + funcflag = TRUE; + Bprint(&fout,"case %d:\n",casecount); + if(cpyact()) + Bprint(&fout,"break;\n"); + while((c=gch()) && c != '\n'); + if(peek == ' ' || peek == '\t' || sectbegin == TRUE){ + warning("Executable statements should occur right after %%"); + continue; + } + x = NEWE; + break; + case '%': + if(prev != '\n') goto character; + if(peek == '{'){ /* included code */ + getl(buf); + while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0) + Bprint(&fout,"%s\n",(char*)buf); + continue; + } + if(peek == '%'){ + gch(); + gch(); + x = DELIM; + break; + } + goto character; + case '|': + if(peek == ' ' || peek == '\t' || peek == '\n'){ + Bprint(&fout,"%d\n",30000+casecount++); + continue; + } + x = '|'; + break; + case '$': + if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){ + x = c; + break; + } + goto character; + case '^': + if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */ + x = c; + break; + case '?': + case '+': + case '.': + case '*': + case '(': + case ')': + case ',': + case '/': + x = c; + break; + case '}': + iter = FALSE; + x = c; + break; + case '{': /* either iteration or definition */ + if(isdigit(c=gch())){ /* iteration */ + iter = TRUE; + ieval: + i = 0; + while(isdigit(c)){ + token[i++] = c; + c = gch(); + } + token[i] = 0; + yylval.i = atol((char*)token); + munputc(c); + x = ITER; + break; + } else { /* definition */ + i = 0; + while(c && c!='}'){ + token[i++] = c; + c = gch(); + } + token[i] = 0; + i = lookup(token,def); + if(i < 0) + warning("Definition %s not found",token); + else + munputs(subs[i]); + continue; + } + case '<': /* start condition ? */ + if(prev != '\n') /* not at line begin, not start */ + goto character; + t = slptr; + do { + i = 0; + c = gch(); + while(c != ',' && c && c != '>'){ + token[i++] = c; + c = gch(); + } + token[i] = 0; + if(i == 0) + goto character; + i = lookup(token,sname); + if(i < 0) { + warning("Undefined start condition %s",token); + continue; + } + *slptr++ = i+1; + } while(c && c != '>'); + *slptr++ = 0; + /* check if previous value re-usable */ + for (xp=slist; xp<t; ){ + if (strcmp((char*)xp, (char*)t)==0) + break; + while (*xp++); + } + if (xp<t){ + /* re-use previous pointer to string */ + slptr=t; + t=xp; + } + if(slptr > slist+STARTSIZE) /* note not packed ! */ + error("Too many start conditions used"); + yylval.cp = t; + x = SCON; + break; + case '"': + i = 0; + while((c=gch()) && c != '"' && c != '\n'){ + if(c == '\\') c = usescape(gch()); + token[i++] = c; + if(i > TOKENSIZE){ + warning("String too long"); + i = TOKENSIZE-1; + break; + } + } + if(c == '\n') { + yyline--; + warning("Non-terminated string"); + yyline++; + } + token[i] = 0; + if(i == 0)x = NULLS; + else if(i == 1){ + yylval.i = token[0]; + x = CHAR; + } else { + yylval.cp = token; + x = STR; + } + break; + case '[': + for(i=1;i<NCH;i++) symbol[i] = 0; + x = CCL; + if((c = gch()) == '^'){ + x = NCCL; + c = gch(); + } + while(c != ']' && c){ + if(c == '\\') c = usescape(gch()); + symbol[c] = 1; + j = c; + if((c=gch()) == '-' && peek != ']'){ /* range specified */ + c = gch(); + if(c == '\\') c = usescape(gch()); + k = c; + if(j > k) { + n = j; + j = k; + k = n; + } + if(!(('A' <= j && k <= 'Z') || + ('a' <= j && k <= 'z') || + ('0' <= j && k <= '9'))) + warning("Non-portable Character Class"); + for(n=j+1;n<=k;n++) + symbol[n] = 1; /* implementation dependent */ + c = gch(); + } + } + /* try to pack ccl's */ + i = 0; + for(j=0;j<NCH;j++) + if(symbol[j])token[i++] = j; + token[i] = 0; + p = ccl; + while(p <ccptr && strcmp((char*)token,(char*)p) != 0)p++; + if(p < ccptr) /* found it */ + yylval.cp = p; + else { + yylval.cp = ccptr; + strcpy((char*)ccptr,(char*)token); + ccptr += strlen((char*)token) + 1; + if(ccptr >= ccl+CCLSIZE) + error("Too many large character classes"); + } + cclinter(x==CCL); + break; + case '\\': + c = usescape(gch()); + default: + character: + if(iter){ /* second part of an iteration */ + iter = FALSE; + if('0' <= c && c <= '9') + goto ieval; + } + if(isalpha(peek)){ + i = 0; + yylval.cp = token; + token[i++] = c; + while(isalpha(peek)) + token[i++] = gch(); + if(peek == '?' || peek == '*' || peek == '+') + munputc(token[--i]); + token[i] = 0; + if(i == 1){ + yylval.i = token[0]; + x = CHAR; + } + else x = STR; + } else { + yylval.i = c; + x = CHAR; + } + } + scon = FALSE; + if(x == SCON)scon = TRUE; + sectbegin = FALSE; + return(freturn(x)); + } + } + /* section three */ + ptail(); +# ifdef DEBUG + if(debug) + Bprint(&fout,"\n/*this comes from section three - debug */\n"); +# endif + while(getl(buf) && !eof) + Bprint(&fout,"%s\n",(char*)buf); + return(freturn(0)); +} +/* end of yylex */ +# ifdef DEBUG +int +freturn(int i) +{ + if(yydebug) { + print("now return "); + if(i < NCH) allprint(i); + else print("%d",i); + printf(" yylval = "); + switch(i){ + case STR: case CCL: case NCCL: + strpt(yylval.cp); + break; + case CHAR: + allprint(yylval.i); + break; + default: + print("%d",yylval.i); + break; + } + print("\n"); + } + return(i); +} +# endif |