diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/cmd/9sed.c | 1447 | ||||
-rw-r--r-- | src/lib9/get9root.c | 13 | ||||
-rw-r--r-- | src/lib9/unsharp.c | 44 | ||||
-rw-r--r-- | src/libventi/parsescore.c | 40 |
4 files changed, 1544 insertions, 0 deletions
diff --git a/src/cmd/9sed.c b/src/cmd/9sed.c new file mode 100644 index 00000000..49e78b89 --- /dev/null +++ b/src/cmd/9sed.c @@ -0,0 +1,1447 @@ +/* + * sed -- stream editor + * + * + */ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <regexp.h> + +enum { + DEPTH = 20, /* max nesting depth of {} */ + MAXCMDS = 512, /* max sed commands */ + ADDSIZE = 10000, /* size of add & read buffer */ + MAXADDS = 20, /* max pending adds and reads */ + LBSIZE = 8192, /* input line size */ + LABSIZE = 50, /* max label name size */ + MAXSUB = 10, /* max number of sub reg exp */ + MAXFILES = 120, /* max output files */ +}; + /* An address is a line #, a R.E., "$", a reference to the last + * R.E., or nothing. + */ +typedef struct { + enum { + A_NONE, + A_DOL, + A_LINE, + A_RE, + A_LAST, + }type; + union { + long line; /* Line # */ + Reprog *rp; /* Compiled R.E. */ + }; +} Addr; + +typedef struct SEDCOM { + Addr ad1; /* optional start address */ + Addr ad2; /* optional end address */ + union { + Reprog *re1; /* compiled R.E. */ + Rune *text; /* added text or file name */ + struct SEDCOM *lb1; /* destination command of branch */ + }; + Rune *rhs; /* Right-hand side of substitution */ + Biobuf* fcode; /* File ID for read and write */ + char command; /* command code -see below */ + char gfl; /* 'Global' flag for substitutions */ + char pfl; /* 'print' flag for substitutions */ + char active; /* 1 => data between start and end */ + char negfl; /* negation flag */ +} SedCom; + + /* Command Codes for field SedCom.command */ +#define ACOM 01 +#define BCOM 020 +#define CCOM 02 +#define CDCOM 025 +#define CNCOM 022 +#define COCOM 017 +#define CPCOM 023 +#define DCOM 03 +#define ECOM 015 +#define EQCOM 013 +#define FCOM 016 +#define GCOM 027 +#define CGCOM 030 +#define HCOM 031 +#define CHCOM 032 +#define ICOM 04 +#define LCOM 05 +#define NCOM 012 +#define PCOM 010 +#define QCOM 011 +#define RCOM 06 +#define SCOM 07 +#define TCOM 021 +#define WCOM 014 +#define CWCOM 024 +#define YCOM 026 +#define XCOM 033 + + +typedef struct label { /* Label symbol table */ + Rune asc[9]; /* Label name */ + SedCom *chain; + SedCom *address; /* Command associated with label */ +} Label; + +typedef struct FILE_CACHE { /* Data file control block */ + struct FILE_CACHE *next; /* Forward Link */ + char *name; /* Name of file */ +} FileCache; + +SedCom pspace[MAXCMDS]; /* Command storage */ +SedCom *pend = pspace+MAXCMDS; /* End of command storage */ +SedCom *rep = pspace; /* Current fill point */ + +Reprog *lastre = 0; /* Last regular expression */ +Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/ + +Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */ +Rune *addend = addspace+ADDSIZE; + +SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */ +SedCom **aptr = abuf; + +struct { /* Sed program input control block */ + enum PTYPE /* Either on command line or in file */ + { P_ARG, + P_FILE + } type; + union PCTL { /* Pointer to data */ + Biobuf *bp; + char *curr; + } pctl; +} prog; + +Rune genbuf[LBSIZE]; /* Miscellaneous buffer */ + +FileCache *fhead = 0; /* Head of File Cache Chain */ +FileCache *ftail = 0; /* Tail of File Cache Chain */ + +Rune *loc1; /* Start of pattern match */ +Rune *loc2; /* End of pattern match */ +Rune seof; /* Pattern delimiter char */ + +Rune linebuf[LBSIZE+1]; /* Input data buffer */ +Rune *lbend = linebuf+LBSIZE; /* End of buffer */ +Rune *spend = linebuf; /* End of input data */ +Rune *cp; /* Current scan point in linebuf */ + +Rune holdsp[LBSIZE+1]; /* Hold buffer */ +Rune *hend = holdsp+LBSIZE; /* End of hold buffer */ +Rune *hspend = holdsp; /* End of hold data */ + +int nflag; /* Command line flags */ +int gflag; + +int dolflag; /* Set when at true EOF */ +int sflag; /* Set when substitution done */ +int jflag; /* Set when jump required */ +int delflag; /* Delete current line when set */ + +long lnum = 0; /* Input line count */ + +char fname[MAXFILES][40]; /* File name cache */ +Biobuf *fcode[MAXFILES]; /* File ID cache */ +int nfiles = 0; /* Cache fill point */ + +Biobuf fout; /* Output stream */ +Biobuf bstdin; /* Default input */ +Biobuf* f = 0; /* Input data */ + +Label ltab[LABSIZE]; /* Label name symbol table */ +Label *labend = ltab+LABSIZE; /* End of label table */ +Label *lab = ltab+1; /* Current Fill point */ + +int depth = 0; /* {} stack pointer */ + +Rune bad; /* Dummy err ptr reference */ +Rune *badp = &bad; + + +char CGMES[] = "Command garbled: %S"; +char TMMES[] = "Too much text: %S"; +char LTL[] = "Label too long: %S"; +char AD0MES[] = "No addresses allowed: %S"; +char AD1MES[] = "Only one address allowed: %S"; + +void address(Addr *); +void arout(void); +int cmp(char *, char *); +int rcmp(Rune *, Rune *); +void command(SedCom *); +Reprog *compile(void); +Rune *compsub(Rune *, Rune *); +void dechain(void); +void dosub(Rune *); +int ecmp(Rune *, Rune *, int); +void enroll(char *); +void errexit(void); +int executable(SedCom *); +void execute(void); +void fcomp(void); +long getrune(void); +Rune *gline(Rune *); +int match(Reprog *, Rune *); +void newfile(enum PTYPE, char *); +int opendata(void); +Biobuf *open_file(char *); +Rune *place(Rune *, Rune *, Rune *); +void quit(char *, char *); +int rline(Rune *, Rune *); +Label *search(Label *); +int substitute(SedCom *); +char *text(char *); +Rune *stext(Rune *, Rune *); +int ycomp(SedCom *); +char * trans(int c); +void putline(Biobuf *bp, Rune *buf, int n); + +void +main(int argc, char **argv) +{ + int compfl; + + lnum = 0; + Binit(&fout, 1, OWRITE); + fcode[nfiles++] = &fout; + compfl = 0; + + if(argc == 1) + exits(0); + ARGBEGIN{ + case 'n': + nflag++; + continue; + case 'f': + if(argc <= 1) + quit("no pattern-file", 0); + newfile(P_FILE, ARGF()); + fcomp(); + compfl = 1; + continue; + case 'e': + if (argc <= 1) + quit("missing pattern", 0); + newfile(P_ARG, ARGF()); + fcomp(); + compfl = 1; + continue; + case 'g': + gflag++; + continue; + default: + fprint(2, "sed: Unknown flag: %c\n", ARGC()); + continue; + } ARGEND + + if(compfl == 0) { + if (--argc < 0) + quit("missing pattern", 0); + newfile(P_ARG, *argv++); + fcomp(); + } + + if(depth) + quit("Too many {'s", 0); + + ltab[0].address = rep; + + dechain(); + + if(argc <= 0) + enroll(0); /* Add stdin to cache */ + else while(--argc >= 0) { + enroll(*argv++); + } + execute(); + exits(0); +} +void +fcomp(void) +{ + Rune *tp; + SedCom *pt, *pt1; + int i; + Label *lpt; + + static Rune *p = addspace; + static SedCom **cmpend[DEPTH]; /* stack of {} operations */ + + while (rline(linebuf, lbend) >= 0) { + cp = linebuf; +comploop: + while(*cp == ' ' || *cp == '\t') + cp++; + if(*cp == '\0' || *cp == '#') + continue; + if(*cp == ';') { + cp++; + goto comploop; + } + + address(&rep->ad1); + if (rep->ad1.type != A_NONE) { + if (rep->ad1.type == A_LAST) { + if (!lastre) + quit("First RE may not be null", 0); + rep->ad1.type = A_RE; + rep->ad1.rp = lastre; + } + if(*cp == ',' || *cp == ';') { + cp++; + address(&rep->ad2); + if (rep->ad2.type == A_LAST) { + rep->ad1.type = A_RE; + rep->ad2.rp = lastre; + } + } else + rep->ad2.type = A_NONE; + } + while(*cp == ' ' || *cp == '\t') + cp++; + +swit: + switch(*cp++) { + + default: + quit("Unrecognized command: %S", (char *)linebuf); + + case '!': + rep->negfl = 1; + goto swit; + + case '{': + rep->command = BCOM; + rep->negfl = !(rep->negfl); + cmpend[depth++] = &rep->lb1; + if(++rep >= pend) + quit("Too many commands: %S", (char *) linebuf); + if(*cp == '\0') continue; + goto comploop; + + case '}': + if(rep->ad1.type != A_NONE) + quit(AD0MES, (char *) linebuf); + if(--depth < 0) + quit("Too many }'s", 0); + *cmpend[depth] = rep; + if(*cp == 0) continue; + goto comploop; + + case '=': + rep->command = EQCOM; + if(rep->ad2.type != A_NONE) + quit(AD1MES, (char *) linebuf); + break; + + case ':': + if(rep->ad1.type != A_NONE) + quit(AD0MES, (char *) linebuf); + + while(*cp == ' ') + cp++; + tp = lab->asc; + while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') { + *tp++ = *cp++; + if(tp >= &(lab->asc[8])) + quit(LTL, (char *) linebuf); + } + *tp = '\0'; + + if(lpt = search(lab)) { + if(lpt->address) + quit("Duplicate labels: %S", (char *) linebuf); + } else { + lab->chain = 0; + lpt = lab; + if(++lab >= labend) + quit("Too many labels: %S", (char *) linebuf); + } + lpt->address = rep; + if (*cp == '#') + continue; + rep--; /* reuse this slot */ + break; + + case 'a': + rep->command = ACOM; + if(rep->ad2.type != A_NONE) + quit(AD1MES, (char *) linebuf); + if(*cp == '\\') cp++; + if(*cp++ != '\n') + quit(CGMES, (char *) linebuf); + rep->text = p; + p = stext(p, addend); + break; + case 'c': + rep->command = CCOM; + if(*cp == '\\') cp++; + if(*cp++ != '\n') + quit(CGMES, (char *) linebuf); + rep->text = p; + p = stext(p, addend); + break; + case 'i': + rep->command = ICOM; + if(rep->ad2.type != A_NONE) + quit(AD1MES, (char *) linebuf); + if(*cp == '\\') cp++; + if(*cp++ != '\n') + quit(CGMES, (char *) linebuf); + rep->text = p; + p = stext(p, addend); + break; + + case 'g': + rep->command = GCOM; + break; + + case 'G': + rep->command = CGCOM; + break; + + case 'h': + rep->command = HCOM; + break; + + case 'H': + rep->command = CHCOM; + break; + + case 't': + rep->command = TCOM; + goto jtcommon; + + case 'b': + rep->command = BCOM; +jtcommon: + while(*cp == ' ')cp++; + if(*cp == '\0') { + if(pt = ltab[0].chain) { + while(pt1 = pt->lb1) + pt = pt1; + pt->lb1 = rep; + } else + ltab[0].chain = rep; + break; + } + tp = lab->asc; + while((*tp++ = *cp++)) + if(tp >= &(lab->asc[8])) + quit(LTL, (char *) linebuf); + cp--; + tp[-1] = '\0'; + + if(lpt = search(lab)) { + if(lpt->address) { + rep->lb1 = lpt->address; + } else { + pt = lpt->chain; + while(pt1 = pt->lb1) + pt = pt1; + pt->lb1 = rep; + } + } else { + lab->chain = rep; + lab->address = 0; + if(++lab >= labend) + quit("Too many labels: %S", + (char *) linebuf); + } + break; + + case 'n': + rep->command = NCOM; + break; + + case 'N': + rep->command = CNCOM; + break; + + case 'p': + rep->command = PCOM; + break; + + case 'P': + rep->command = CPCOM; + break; + + case 'r': + rep->command = RCOM; + if(rep->ad2.type != A_NONE) + quit(AD1MES, (char *) linebuf); + if(*cp++ != ' ') + quit(CGMES, (char *) linebuf); + rep->text = p; + p = stext(p, addend); + break; + + case 'd': + rep->command = DCOM; + break; + + case 'D': + rep->command = CDCOM; + rep->lb1 = pspace; + break; + + case 'q': + rep->command = QCOM; + if(rep->ad2.type != A_NONE) + quit(AD1MES, (char *) linebuf); + break; + + case 'l': + rep->command = LCOM; + break; + + case 's': + rep->command = SCOM; + seof = *cp++; + if ((rep->re1 = compile()) == 0) { + if(!lastre) + quit("First RE may not be null.", 0); + rep->re1 = lastre; + } + rep->rhs = p; + if((p = compsub(p, addend)) == 0) + quit(CGMES, (char *) linebuf); + if(*cp == 'g') { + cp++; + rep->gfl++; + } else if(gflag) + rep->gfl++; + + if(*cp == 'p') { + cp++; + rep->pfl = 1; + } + + if(*cp == 'P') { + cp++; + rep->pfl = 2; + } + + if(*cp == 'w') { + cp++; + if(*cp++ != ' ') + quit(CGMES, (char *) linebuf); + text(fname[nfiles]); + for(i = nfiles - 1; i >= 0; i--) + if(cmp(fname[nfiles],fname[i]) == 0) { + rep->fcode = fcode[i]; + goto done; + } + if(nfiles >= MAXFILES) + quit("Too many files in w commands 1", 0); + rep->fcode = open_file(fname[nfiles]); + } + break; + + case 'w': + rep->command = WCOM; + if(*cp++ != ' ') + quit(CGMES, (char *) linebuf); + text(fname[nfiles]); + for(i = nfiles - 1; i >= 0; i--) + if(cmp(fname[nfiles], fname[i]) == 0) { + rep->fcode = fcode[i]; + goto done; + } + if(nfiles >= MAXFILES){ + fprint(2, "sed: Too many files in w commands 2 \n"); + fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES); + errexit(); + } + rep->fcode = open_file(fname[nfiles]); + break; + + case 'x': + rep->command = XCOM; + break; + + case 'y': + rep->command = YCOM; + seof = *cp++; + if (ycomp(rep) == 0) + quit(CGMES, (char *) linebuf); + break; + + } +done: + if(++rep >= pend) + quit("Too many commands, last: %S", (char *) linebuf); + + if(*cp++ != '\0') { + if(cp[-1] == ';') + goto comploop; + quit(CGMES, (char *) linebuf); + } + + } +} + +Biobuf * +open_file(char *name) +{ + Biobuf *bp; + int fd; + + if ((bp = malloc(sizeof(Biobuf))) == 0) + quit("Out of memory", 0); + if ((fd = open(name, OWRITE)) < 0 && + (fd = create(name, OWRITE, 0666)) < 0) + quit("Cannot create %s", name); + Binit(bp, fd, OWRITE); + Bseek(bp, 0, 2); + fcode[nfiles++] = bp; + return bp; +} + +Rune * +compsub(Rune *rhs, Rune *end) +{ + Rune r; + + while ((r = *cp++) != '\0') { + if(r == '\\') { + if (rhs < end) + *rhs++ = 0xFFFF; + else + return 0; + r = *cp++; + if(r == 'n') + r = '\n'; + } else { + if(r == seof) { + if (rhs < end) + *rhs++ = '\0'; + else + return 0; + return rhs; + } + } + if (rhs < end) + *rhs++ = r; + else + return 0; + + } + return 0; +} + +Reprog * +compile(void) +{ + Rune c; + char *ep; + char expbuf[512]; + + if((c = *cp++) == seof) /* '//' */ + return 0; + ep = expbuf; + do { + if (c == 0 || c == '\n') + quit(TMMES, (char *) linebuf); + if (c == '\\') { + if (ep >= expbuf+sizeof(expbuf)) + quit(TMMES, (char *) linebuf); + ep += runetochar(ep, &c); + if ((c = *cp++) == 'n') + c = '\n'; + } + if (ep >= expbuf+sizeof(expbuf)) + quit(TMMES, (char *) linebuf); + ep += runetochar(ep, &c); + } while ((c = *cp++) != seof); + *ep = 0; + return lastre = regcomp(expbuf); +} + +void +regerror(char *s) +{ + USED(s); + quit(CGMES, (char *) linebuf); +} + +void +newfile(enum PTYPE type, char *name) +{ + if (type == P_ARG) + prog.pctl.curr = name; + else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0) + quit("Cannot open pattern-file: %s\n", name); + prog.type = type; +} + +int +rline(Rune *buf, Rune *end) +{ + long c; + Rune r; + + while ((c = getrune()) >= 0) { + r = c; + if (r == '\\') { + if (buf <= end) + *buf++ = r; + if ((c = getrune()) < 0) + break; + r = c; + } else if (r == '\n') { + *buf = '\0'; + return(1); + } + if (buf <= end) + *buf++ = r; + } + *buf = '\0'; + return(-1); +} + +long +getrune(void) +{ + char *p; + long c; + Rune r; + + if (prog.type == P_ARG) { + if ((p = prog.pctl.curr) != 0) { + if (*p) { + prog.pctl.curr += chartorune(&r, p); + c = r; + } else { + c = '\n'; /* fake an end-of-line */ + prog.pctl.curr = 0; + } + } else + c = -1; + } else if ((c = Bgetrune(prog.pctl.bp)) < 0) + Bterm(prog.pctl.bp); + return c; +} + +void +address(Addr *ap) +{ + int c; + long lno; + + if((c = *cp++) == '$') + ap->type = A_DOL; + else if(c == '/') { + seof = c; + if (ap->rp = compile()) + ap->type = A_RE; + else + ap->type = A_LAST; + } + else if (c >= '0' && c <= '9') { + lno = c-'0'; + while ((c = *cp) >= '0' && c <= '9') + lno = lno*10 + *cp++-'0'; + if(!lno) + quit("line number 0 is illegal",0); + ap->type = A_LINE; + ap->line = lno; + } + else { + cp--; + ap->type = A_NONE; + } +} + +int +cmp(char *a, char *b) /* compare characters */ +{ + while(*a == *b++) + if (*a == '\0') + return(0); + else a++; + return(1); +} + +int +rcmp(Rune *a, Rune *b) /* compare runes */ +{ + while(*a == *b++) + if (*a == '\0') + return(0); + else a++; + return(1); +} + +char * +text(char *p) /* extract character string */ +{ + Rune r; + + while(*cp == '\t' || *cp == ' ') + cp++; + while (*cp) { + if ((r = *cp++) == '\\') + if ((r = *cp++) == 0) + break;; + if (r == '\n') + while (*cp == '\t' || *cp == ' ') + cp++; + p += runetochar(p, &r); + } + *p++ = '\0'; + return p; +} + +Rune * +stext(Rune *p, Rune *end) /* extract rune string */ +{ + while(*cp == '\t' || *cp == ' ') + cp++; + while (*cp) { + if (*cp == '\\') + if (*++cp == 0) + break; + if (p >= end-1) + quit(TMMES, (char *) linebuf); + if ((*p++ = *cp++) == '\n') + while(*cp == '\t' || *cp == ' ') + cp++; + } + *p++ = 0; + return p; +} + + +Label * +search (Label *ptr) +{ + Label *rp; + + for (rp = ltab; rp < ptr; rp++) + if(rcmp(rp->asc, ptr->asc) == 0) + return(rp); + return(0); +} + +void +dechain(void) +{ + Label *lptr; + SedCom *rptr, *trptr; + + for(lptr = ltab; lptr < lab; lptr++) { + + if(lptr->address == 0) + quit("Undefined label: %S", (char *) lptr->asc); + + if(lptr->chain) { + rptr = lptr->chain; + while(trptr = rptr->lb1) { + rptr->lb1 = lptr->address; + rptr = trptr; + } + rptr->lb1 = lptr->address; + } + } +} + +int +ycomp(SedCom *r) +{ + int i; + Rune *rp; + Rune c, *tsp, highc; + Rune *sp; + + highc = 0; + for(tsp = cp; *tsp != seof; tsp++) { + if(*tsp == '\\') + tsp++; + if(*tsp == '\n' || *tsp == '\0') + return(0); + if (*tsp > highc) highc = *tsp; + } + tsp++; + if ((rp = r->text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0) + quit("Out of memory", 0); + *rp++ = highc; /* save upper bound */ + for (i = 0; i <= highc; i++) + rp[i] = i; + sp = cp; + while((c = *sp++) != seof) { + if(c == '\\' && *sp == 'n') { + sp++; + c = '\n'; + } + if((rp[c] = *tsp++) == '\\' && *tsp == 'n') { + rp[c] = '\n'; + tsp++; + } + if(rp[c] == seof || rp[c] == '\0') { + free(r->re1); + r->re1 = 0; + return(0); + } + } + if(*tsp != seof) { + free(r->re1); + r->re1 = 0; + return(0); + } + cp = tsp+1; + return(1); +} + +void +execute(void) +{ + SedCom *ipc; + + while (spend = gline(linebuf)){ + for(ipc = pspace; ipc->command; ) { + if (!executable(ipc)) { + ipc++; + continue; + } + command(ipc); + + if(delflag) + break; + if(jflag) { + jflag = 0; + if((ipc = ipc->lb1) == 0) + break; + } else + ipc++; + + } + if(!nflag && !delflag) + putline(&fout, linebuf, spend-linebuf); + if(aptr > abuf) { + arout(); + } + delflag = 0; + } +} + /* determine if a statement should be applied to an input line */ +int +executable(SedCom *ipc) +{ + if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */ + if (ipc->active == 1) /* Second line */ + ipc->active = 2; + switch(ipc->ad2.type) { + case A_NONE: /* No second addr; use first */ + ipc->active = 0; + break; + case A_DOL: /* Accept everything */ + return !ipc->negfl; + case A_LINE: /* Line at end of range? */ + if (lnum <= ipc->ad2.line) { + if (ipc->ad2.line == lnum) + ipc->active = 0; + return !ipc->negfl; + } + ipc->active = 0; /* out of range */ + return ipc->negfl; + case A_RE: /* Check for matching R.E. */ + if (match(ipc->ad2.rp, linebuf)) + ipc->active = 0; + return !ipc->negfl; + default: /* internal error */ + quit("Internal error", 0); + } + } + switch (ipc->ad1.type) { /* Check first address */ + case A_NONE: /* Everything matches */ + return !ipc->negfl; + case A_DOL: /* Only last line */ + if (dolflag) + return !ipc->negfl; + break; + case A_LINE: /* Check line number */ + if (ipc->ad1.line == lnum) { + ipc->active = 1; /* In range */ + return !ipc->negfl; + } + break; + case A_RE: /* Check R.E. */ + if (match(ipc->ad1.rp, linebuf)) { + ipc->active = 1; /* In range */ + return !ipc->negfl; + } + break; + default: + quit("Internal error", 0); + } + return ipc->negfl; +} + +int +match(Reprog *pattern, Rune *buf) +{ + if (!pattern) + return 0; + subexp[0].s.rsp = buf; + subexp[0].e.rep = 0; + if (rregexec(pattern, linebuf, subexp, MAXSUB)) { + loc1 = subexp[0].s.rsp; + loc2 = subexp[0].e.rep; + return 1; + } + loc1 = loc2 = 0; + return 0; +} + +int +substitute(SedCom *ipc) +{ + int len; + + if(!match(ipc->re1, linebuf)) + return 0; + + /* + * we have at least one match. some patterns, e.g. '$' or '^', can + * produce zero-length matches, so during a global substitute we + * must bump to the character after a zero-length match to keep from looping. + */ + sflag = 1; + if(ipc->gfl == 0) /* single substitution */ + dosub(ipc->rhs); + else + do{ /* global substitution */ + len = loc2-loc1; /* length of match */ + dosub(ipc->rhs); /* dosub moves loc2 */ + if(*loc2 == 0) /* end of string */ + break; + if(len == 0) /* zero-length R.E. match */ + loc2++; /* bump over zero-length match */ + if(*loc2 == 0) /* end of string */ + break; + } while(match(ipc->re1, loc2)); + return 1; +} + +void +dosub(Rune *rhsbuf) +{ + Rune *lp, *sp; + Rune *rp; + int c, n; + + lp = linebuf; + sp = genbuf; + rp = rhsbuf; + while (lp < loc1) + *sp++ = *lp++; + while(c = *rp++) { + if (c == '&') { + sp = place(sp, loc1, loc2); + continue; + } + if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') { + n = c-'0'; + if (subexp[n].s.rsp && subexp[n].e.rep) { + sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep); + continue; + } + else { + fprint(2, "sed: Invalid back reference \\%d\n",n); + errexit(); + } + } + *sp++ = c; + if (sp >= &genbuf[LBSIZE]) + fprint(2, "sed: Output line too long.\n"); + } + lp = loc2; + loc2 = sp - genbuf + linebuf; + while (*sp++ = *lp++) + if (sp >= &genbuf[LBSIZE]) + fprint(2, "sed: Output line too long.\n"); + lp = linebuf; + sp = genbuf; + while (*lp++ = *sp++) + ; + spend = lp-1; +} + +Rune * +place(Rune *sp, Rune *l1, Rune *l2) +{ + while (l1 < l2) { + *sp++ = *l1++; + if (sp >= &genbuf[LBSIZE]) + fprint(2, "sed: Output line too long.\n"); + } + return(sp); +} + +char * +trans(int c) +{ + static char buf[] = "\\x0000"; + static char hex[] = "0123456789abcdef"; + + switch(c) { + case '\b': + return "\\b"; + case '\n': + return "\\n"; + case '\r': + return "\\r"; + case '\t': + return "\\t"; + case '\\': + return "\\\\"; + } + buf[2] = hex[(c>>12)&0xF]; + buf[3] = hex[(c>>8)&0xF]; + buf[4] = hex[(c>>4)&0xF]; + buf[5] = hex[c&0xF]; + return buf; +} + +void +command(SedCom *ipc) +{ + int i, c; + Rune *p1, *p2; + char *ucp; + Rune *rp; + Rune *execp; + + switch(ipc->command) { + + case ACOM: + *aptr++ = ipc; + if(aptr >= abuf+MAXADDS) { + quit("sed: Too many appends after line %ld\n", + (char *) lnum); + } + *aptr = 0; + break; + case CCOM: + delflag = 1; + if(ipc->active == 1) { + for(rp = ipc->text; *rp; rp++) + Bputrune(&fout, *rp); + Bputc(&fout, '\n'); + } + break; + case DCOM: + delflag++; + break; + case CDCOM: + p1 = p2 = linebuf; + while(*p1 != '\n') { + if(*p1++ == 0) { + delflag++; + return; + } + } + p1++; + while(*p2++ = *p1++) + ; + spend = p2-1; + jflag++; + break; + case EQCOM: + Bprint(&fout, "%ld\n", lnum); + break; + case GCOM: + p1 = linebuf; + p2 = holdsp; + while(*p1++ = *p2++) + ; + spend = p1-1; + break; + case CGCOM: + *spend++ = '\n'; + p1 = spend; + p2 = holdsp; + while(*p1++ = *p2++) + if(p1 >= lbend) + break; + spend = p1-1; + break; + case HCOM: + p1 = holdsp; + p2 = linebuf; + while(*p1++ = *p2++); + hspend = p1-1; + break; + case CHCOM: + *hspend++ = '\n'; + p1 = hspend; + p2 = linebuf; + while(*p1++ = *p2++) + if(p1 >= hend) + break; + hspend = p1-1; + break; + case ICOM: + for(rp = ipc->text; *rp; rp++) + Bputrune(&fout, *rp); + Bputc(&fout, '\n'); + break; + case BCOM: + jflag = 1; + break; + case LCOM: + c = 0; + for (i = 0, rp = linebuf; *rp; rp++) { + c = *rp; + if(c >= 0x20 && c < 0x7F && c != '\\') { + Bputc(&fout, c); + if(i++ > 71) { + Bprint(&fout, "\\\n"); + i = 0; + } + } else { + for (ucp = trans(*rp); *ucp; ucp++){ + c = *ucp; + Bputc(&fout, c); + if(i++ > 71) { + Bprint(&fout, "\\\n"); + i = 0; + } + } + } + } + if(c == ' ') + Bprint(&fout, "\\n"); + Bputc(&fout, '\n'); + break; + case NCOM: + if(!nflag) + putline(&fout, linebuf, spend-linebuf); + + if(aptr > abuf) + arout(); + if((execp = gline(linebuf)) == 0) { + delflag = 1; + break; + } + spend = execp; + break; + case CNCOM: + if(aptr > abuf) + arout(); + *spend++ = '\n'; + if((execp = gline(spend)) == 0) { + delflag = 1; + break; + } + spend = execp; + break; + case PCOM: + putline(&fout, linebuf, spend-linebuf); + break; + case CPCOM: + cpcom: + for(rp = linebuf; *rp && *rp != '\n'; rp++) + Bputc(&fout, *rp); + Bputc(&fout, '\n'); + break; + case QCOM: + if(!nflag) + putline(&fout, linebuf, spend-linebuf); + if(aptr > abuf) + arout(); + exits(0); + case RCOM: + *aptr++ = ipc; + if(aptr >= &abuf[MAXADDS]) + quit("sed: Too many reads after line %ld\n", + (char *) lnum); + *aptr = 0; + break; + case SCOM: + i = substitute(ipc); + if(i && ipc->pfl) + if(ipc->pfl == 1) + putline(&fout, linebuf, spend-linebuf); + else + goto cpcom; + if(i && ipc->fcode) + goto wcom; + break; + + case TCOM: + if(sflag == 0) break; + sflag = 0; + jflag = 1; + break; + + wcom: + case WCOM: + putline(ipc->fcode,linebuf, spend-linebuf); + break; + case XCOM: + p1 = linebuf; + p2 = genbuf; + while(*p2++ = *p1++); + p1 = holdsp; + p2 = linebuf; + while(*p2++ = *p1++); + spend = p2 - 1; + p1 = genbuf; + p2 = holdsp; + while(*p2++ = *p1++); + hspend = p2 - 1; + break; + case YCOM: + p1 = linebuf; + p2 = ipc->text; + for (i = *p2++; *p1; p1++){ + if (*p1 <= i) *p1 = p2[*p1]; + } + break; + } + +} + +void +putline(Biobuf *bp, Rune *buf, int n) +{ + while (n--) + Bputrune(bp, *buf++); + Bputc(bp, '\n'); +} + +int +ecmp(Rune *a, Rune *b, int count) +{ + while(count--) + if(*a++ != *b++) return(0); + return(1); +} + +void +arout(void) +{ + Rune *p1; + Biobuf *fi; + int c; + char *s; + char buf[128]; + + for (aptr = abuf; *aptr; aptr++) { + if((*aptr)->command == ACOM) { + for(p1 = (*aptr)->text; *p1; p1++ ) + Bputrune(&fout, *p1); + Bputc(&fout, '\n'); + } else { + for(s = buf, p1= (*aptr)->text; *p1; p1++) + s += runetochar(s, p1); + *s = '\0'; + if((fi = Bopen(buf, OREAD)) == 0) + continue; + while((c = Bgetc(fi)) >= 0) + Bputc(&fout, c); + Bterm(fi); + } + } + aptr = abuf; + *aptr = 0; +} + +void +errexit(void) +{ + exits("error"); +} + +void +quit (char *msg, char *arg) +{ + fprint(2, "sed: "); + fprint(2, msg, arg); + fprint(2, "\n"); + errexit(); +} + +Rune * +gline(Rune *addr) +{ + long c; + Rune *p; + + static long peekc = 0; + + if (f == 0 && opendata() < 0) + return 0; + sflag = 0; + lnum++; +/* Bflush(&fout);********* dumped 4/30/92 - bobf****/ + do { + p = addr; + for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) { + if (c == '\n') { + if ((peekc = Bgetrune(f)) < 0) { + if (fhead == 0) + dolflag = 1; + } + *p = '\0'; + return p; + } + if (c && p < lbend) + *p++ = c; + } + /* return partial final line, adding implicit newline */ + if(p != addr) { + *p = '\0'; + peekc = -1; + if (fhead == 0) + dolflag = 1; + return p; + } + peekc = 0; + Bterm(f); + } while (opendata() > 0); /* Switch to next stream */ + f = 0; + return 0; +} + + /* Data file input section - the intent is to transparently + * catenate all data input streams. + */ +void +enroll(char *filename) /* Add a file to the input file cache */ +{ + FileCache *fp; + + if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0) + quit("Out of memory", 0); + if (ftail == 0) + fhead = fp; + else + ftail->next = fp; + ftail = fp; + fp->next = 0; + fp->name = filename; /* 0 => stdin */ +} + +int +opendata(void) +{ + if (fhead == 0) + return -1; + if (fhead->name) { + if ((f = Bopen(fhead->name, OREAD)) == 0) + quit("Can't open %s", fhead->name); + } else { + Binit(&bstdin, 0, OREAD); + f = &bstdin; + } + fhead = fhead->next; + return 1; +} diff --git a/src/lib9/get9root.c b/src/lib9/get9root.c new file mode 100644 index 00000000..858b4e74 --- /dev/null +++ b/src/lib9/get9root.c @@ -0,0 +1,13 @@ +#include <u.h> +#include <libc.h> + +char* +get9root(void) +{ + char *s; + + if((s = getenv("PLAN9")) != 0) + return s; + return "/usr/local/plan9"; +} + diff --git a/src/lib9/unsharp.c b/src/lib9/unsharp.c new file mode 100644 index 00000000..b7db700c --- /dev/null +++ b/src/lib9/unsharp.c @@ -0,0 +1,44 @@ +#include <u.h> +#include <libc.h> + +/* + * I don't want too many of these, + * but the ones we have are just too useful. + */ +static struct { + char *old; + char *new; +} replace[] = { + "#9", nil, /* must be first */ + "#d", "/dev/fd", +}; + +char* +unsharp(char *old) +{ + char *new; + int i, olen, nlen, len; + + if(replace[0].new == nil) + replace[0].new = get9root(); + + for(i=0; i<nelem(replace); i++){ + if(!replace[i].new) + continue; + olen = strlen(replace[i].old); + if(strncmp(old, replace[i].old, olen) != 0 + || (old[olen] != '\0' && old[olen] != '/')) + continue; + nlen = strlen(replace[i].new); + len = strlen(old)+nlen-olen; + new = malloc(len+1); + if(new == nil) + /* Most callers won't check the return value... */ + sysfatal("out of memory translating %s to %s%s", old, replace[i].new, old+olen); + strcpy(new, replace[i].new); + strcpy(new+nlen, old+olen); + assert(strlen(new) == len); + return new; + } + return old; +} diff --git a/src/libventi/parsescore.c b/src/libventi/parsescore.c new file mode 100644 index 00000000..2c38808b --- /dev/null +++ b/src/libventi/parsescore.c @@ -0,0 +1,40 @@ +#include <u.h> +#include <libc.h> +#include <venti.h> + +int +vtparsescore(char *s, char **prefix, uchar score[VtScoreSize]) +{ + int i, c; + char *buf, *colon; + + if((colon = strchr(s, ':')) != nil) + buf = colon+1; + else + buf = s; + + if(strlen(buf) != 2*VtScoreSize) + return -1; + + memset(score, 0, VtScoreSize); + for(i=0; i<2*VtScoreSize; i++){ + if(buf[i] >= '0' && buf[i] <= '9') + c = buf[i] - '0'; + else if(buf[i] >= 'a' && buf[i] <= 'z') + c = buf[i] - 'a' + 10; + else if(buf[i] >= 'A' && buf[i] <= 'Z') + c = buf[i] - 'A' + 10; + else + return -1; + + if((i & 1) == 0) + c <<= 4; + score[i>>1] |= c; + } + if(colon){ + *colon = 0; + *prefix = s; + }else + *prefix = nil; + return 0; +} |