From f08fdedcee12c06e3ce9ac9bec363915978e8289 Mon Sep 17 00:00:00 2001 From: rsc Date: Sun, 23 Nov 2003 18:04:08 +0000 Subject: Plan 9's rc. not a clear win over byron's, but at least it has the right syntax. --- src/cmd/rc/lex.c | 322 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100644 src/cmd/rc/lex.c (limited to 'src/cmd/rc/lex.c') diff --git a/src/cmd/rc/lex.c b/src/cmd/rc/lex.c new file mode 100644 index 00000000..dd58151d --- /dev/null +++ b/src/cmd/rc/lex.c @@ -0,0 +1,322 @@ +#include "rc.h" +#include "exec.h" +#include "io.h" +#include "getflags.h" +#include "fns.h" +int getnext(void); +int wordchr(int c) +{ + return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF; +} +int idchr(int c) +{ + /* + * Formerly: + * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9' + * || c=='_' || c=='*'; + */ + return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); +} +int future=EOF; +int doprompt=1; +int inquote; +/* + * Look ahead in the input stream + */ +int nextc(void){ + if(future==EOF) future=getnext(); + return future; +} +/* + * Consume the lookahead character. + */ +int advance(void){ + int c=nextc(); + lastc=future; + future=EOF; + return c; +} +/* + * read a character from the input stream + */ +int getnext(void){ + register int c; + static int peekc=EOF; + if(peekc!=EOF){ + c=peekc; + peekc=EOF; + return c; + } + if(runq->eof) return EOF; + if(doprompt) pprompt(); + c=rchr(runq->cmdfd); + if(!inquote && c=='\\'){ + c=rchr(runq->cmdfd); + if(c=='\n'){ + doprompt=1; + c=' '; + } + else{ + peekc=c; + c='\\'; + } + } + doprompt=doprompt || c=='\n' || c==EOF; + if(c==EOF) runq->eof++; + else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c); + return c; +} +void pprompt(void){ + var *prompt; + if(runq->iflag){ + pstr(err, promptstr); + flush(err); + prompt=vlook("prompt"); + if(prompt->val && prompt->val->next) + promptstr=prompt->val->next->word; + else + promptstr="\t"; + } + runq->lineno++; + doprompt=0; +} +void skipwhite(void){ + int c; + for(;;){ + c=nextc(); + if(c=='#'){ /* Why did this used to be if(!inquote && c=='#') ?? */ + for(;;){ + c=nextc(); + if(c=='\n' || c==EOF) break; + advance(); + } + } + if(c==' ' || c=='\t') advance(); + else return; + } +} +void skipnl(void){ + register int c; + for(;;){ + skipwhite(); + c=nextc(); + if(c!='\n') return; + advance(); + } +} +int nextis(int c){ + if(nextc()==c){ + advance(); + return 1; + } + return 0; +} +char *addtok(char *p, int val){ + if(p==0) return 0; + if(p==&tok[NTOK]){ + *p=0; + yyerror("token buffer too short"); + return 0; + } + *p++=val; + return p; +} +char *addutf(char *p, int c){ + p=addtok(p, c); + if(twobyte(c)) /* 2-byte escape */ + return addtok(p, advance()); + if(threebyte(c)){ /* 3-byte escape */ + p=addtok(p, advance()); + return addtok(p, advance()); + } + return p; +} +int lastdol; /* was the last token read '$' or '$#' or '"'? */ +int lastword; /* was the last token read a word or compound word terminator? */ +int yylex(void){ + register int c, d=nextc(); + register char *w=tok; + register struct tree *t; + yylval.tree=0; + /* + * Embarassing sneakiness: if the last token read was a quoted or unquoted + * WORD then we alter the meaning of what follows. If the next character + * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise, + * if the next character is the first character of a simple or compound word, + * we insert a `^' before it. + */ + if(lastword){ + lastword=0; + if(d=='('){ + advance(); + strcpy(tok, "( [SUB]"); + return SUB; + } + if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ + strcpy(tok, "^"); + return '^'; + } + } + inquote=0; + skipwhite(); + switch(c=advance()){ + case EOF: + lastdol=0; + strcpy(tok, "EOF"); + return EOF; + case '$': + lastdol=1; + if(nextis('#')){ + strcpy(tok, "$#"); + return COUNT; + } + if(nextis('"')){ + strcpy(tok, "$\""); + return '"'; + } + strcpy(tok, "$"); + return '$'; + case '&': + lastdol=0; + if(nextis('&')){ + skipnl(); + strcpy(tok, "&&"); + return ANDAND; + } + strcpy(tok, "&"); + return '&'; + case '|': + lastdol=0; + if(nextis(c)){ + skipnl(); + strcpy(tok, "||"); + return OROR; + } + case '<': + case '>': + lastdol=0; + /* + * funny redirection tokens: + * redir: arrow | arrow '[' fd ']' + * arrow: '<' | '<<' | '>' | '>>' | '|' + * fd: digit | digit '=' | digit '=' digit + * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' + * some possibilities are nonsensical and get a message. + */ + *w++=c; + t=newtree(); + switch(c){ + case '|': + t->type=PIPE; + t->fd0=1; + t->fd1=0; + break; + case '>': + t->type=REDIR; + if(nextis(c)){ + t->rtype=APPEND; + *w++=c; + } + else t->rtype=WRITE; + t->fd0=1; + break; + case '<': + t->type=REDIR; + if(nextis(c)){ + t->rtype=HERE; + *w++=c; + } + else t->rtype=READ; + t->fd0=0; + break; + } + if(nextis('[')){ + *w++='['; + c=advance(); + *w++=c; + if(c<'0' || '9'type==PIPE?"pipe syntax" + :"redirection syntax"); + return EOF; + } + t->fd0=0; + do{ + t->fd0=t->fd0*10+c-'0'; + *w++=c; + c=advance(); + }while('0'<=c && c<='9'); + if(c=='='){ + *w++='='; + if(t->type==REDIR) t->type=DUP; + c=advance(); + if('0'<=c && c<='9'){ + t->rtype=DUPFD; + t->fd1=t->fd0; + t->fd0=0; + do{ + t->fd0=t->fd0*10+c-'0'; + *w++=c; + c=advance(); + }while('0'<=c && c<='9'); + } + else{ + if(t->type==PIPE) goto RedirErr; + t->rtype=CLOSE; + } + } + if(c!=']' + || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND)) + goto RedirErr; + *w++=']'; + } + *w='\0'; + yylval.tree=t; + if(t->type==PIPE) skipnl(); + return t->type; + case '\'': + lastdol=0; + lastword=1; + inquote=1; + for(;;){ + c=advance(); + if(c==EOF) break; + if(c=='\''){ + if(nextc()!='\'') + break; + advance(); + } + w=addutf(w, c); + } + if(w!=0) *w='\0'; + t=token(tok, WORD); + t->quoted=1; + yylval.tree=t; + return t->type; + } + if(!wordchr(c)){ + lastdol=0; + tok[0]=c; + tok[1]='\0'; + return c; + } + for(;;){ + /* next line should have (char)c==GLOB, but ken's compiler is broken */ + if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB) + w=addtok(w, GLOB); + w=addutf(w, c); + c=nextc(); + if(lastdol?!idchr(c):!wordchr(c)) break; + advance(); + } + + lastword=1; + lastdol=0; + if(w!=0) *w='\0'; + t=klook(tok); + if(t->type!=WORD) lastword=0; + t->quoted=0; + yylval.tree=t; + return t->type; +} -- cgit v1.2.3