diff options
author | rsc <devnull@localhost> | 2003-11-23 18:04:47 +0000 |
---|---|---|
committer | rsc <devnull@localhost> | 2003-11-23 18:04:47 +0000 |
commit | bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d (patch) | |
tree | 8ca0fe4e2418e6aa18dc74a236c577a719f6c6ed /src/cmd/join.c | |
parent | f08fdedcee12c06e3ce9ac9bec363915978e8289 (diff) | |
download | plan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.tar.gz plan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.tar.bz2 plan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.zip |
new utilities.
the .C files compile but are renamed to avoid building automatically.
Diffstat (limited to 'src/cmd/join.c')
-rw-r--r-- | src/cmd/join.c | 369 |
1 files changed, 369 insertions, 0 deletions
diff --git a/src/cmd/join.c b/src/cmd/join.c new file mode 100644 index 00000000..7efff976 --- /dev/null +++ b/src/cmd/join.c @@ -0,0 +1,369 @@ +/* join F1 F2 on stuff */ +#include <u.h> +#include <libc.h> +#include <stdio.h> +#include <ctype.h> +#define F1 0 +#define F2 1 +#define F0 3 +#define NFLD 100 /* max field per line */ +#define comp() runecmp(ppi[F1][j1],ppi[F2][j2]) +FILE *f[2]; +Rune buf[2][BUFSIZ]; /*input lines */ +Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */ +Rune *s1,*s2; +#define j1 joinj1 +#define j2 joinj2 + +int j1 = 1; /* join of this field of file 1 */ +int j2 = 1; /* join of this field of file 2 */ +int olist[2*NFLD]; /* output these fields */ +int olistf[2*NFLD]; /* from these files */ +int no; /* number of entries in olist */ +Rune sep1 = ' '; /* default field separator */ +Rune sep2 = '\t'; +char *sepstr=" "; +int discard; /* count of truncated lines */ +Rune null[BUFSIZ]/* = L""*/; +int a1; +int a2; + +char *getoptarg(int*, char***); +void output(int, int); +int input(int); +void oparse(char*); +void error(char*, char*); +void seek1(void), seek2(void); +Rune *strtorune(Rune *, char *); + + +void +main(int argc, char **argv) +{ + int i; + + while (argc > 1 && argv[1][0] == '-') { + if (argv[1][1] == '\0') + break; + switch (argv[1][1]) { + case '-': + argc--; + argv++; + goto proceed; + case 'a': + switch(*getoptarg(&argc, &argv)) { + case '1': + a1++; + break; + case '2': + a2++; + break; + default: + error("incomplete option -a",""); + } + break; + case 'e': + strtorune(null, getoptarg(&argc, &argv)); + break; + case 't': + sepstr=getoptarg(&argc, &argv); + chartorune(&sep1, sepstr); + sep2 = sep1; + break; + case 'o': + if(argv[1][2]!=0 || + argc>2 && strchr(argv[2],',')!=0) + oparse(getoptarg(&argc, &argv)); + else for (no = 0; no<2*NFLD && argc>2; no++){ + if (argv[2][0] == '1' && argv[2][1] == '.') { + olistf[no] = F1; + olist[no] = atoi(&argv[2][2]); + } else if (argv[2][0] == '2' && argv[2][1] == '.') { + olist[no] = atoi(&argv[2][2]); + olistf[no] = F2; + } else if (argv[2][0] == '0') + olistf[no] = F0; + else + break; + argc--; + argv++; + } + break; + case 'j': + if(argc <= 2) + break; + if (argv[1][2] == '1') + j1 = atoi(argv[2]); + else if (argv[1][2] == '2') + j2 = atoi(argv[2]); + else + j1 = j2 = atoi(argv[2]); + argc--; + argv++; + break; + case '1': + j1 = atoi(getoptarg(&argc, &argv)); + break; + case '2': + j2 = atoi(getoptarg(&argc, &argv)); + break; + } + argc--; + argv++; + } +proceed: + for (i = 0; i < no; i++) + if (olist[i]-- > NFLD) /* 0 origin */ + error("field number too big in -o",""); + if (argc != 3) + error("usage: join [-1 x -2 y] [-o list] file1 file2",""); + j1--; + j2--; /* everyone else believes in 0 origin */ + s1 = ppi[F1][j1]; + s2 = ppi[F2][j2]; + if (strcmp(argv[1], "-") == 0) + f[F1] = stdin; + else if ((f[F1] = fopen(argv[1], "r")) == 0) + error("can't open %s", argv[1]); + if(strcmp(argv[2], "-") == 0) { + f[F2] = stdin; + } else if ((f[F2] = fopen(argv[2], "r")) == 0) + error("can't open %s", argv[2]); + + if(ftell(f[F2]) >= 0) + seek2(); + else if(ftell(f[F1]) >= 0) + seek1(); + else + error("neither file is randomly accessible",""); + if (discard) + error("some input line was truncated", ""); + exits(""); +} +int runecmp(Rune *a, Rune *b){ + while(*a==*b){ + if(*a=='\0') return 0; + a++; + b++; + } + if(*a<*b) return -1; + return 1; +} +char *runetostr(char *buf, Rune *r){ + char *s; + for(s=buf;*r;r++) s+=runetochar(s, r); + *s='\0'; + return buf; +} +Rune *strtorune(Rune *buf, char *s){ + Rune *r; + for(r=buf;*s;r++) s+=chartorune(r, s); + *r='\0'; + return buf; +} +/* lazy. there ought to be a clean way to combine seek1 & seek2 */ +#define get1() n1=input(F1) +#define get2() n2=input(F2) +void +seek2() +{ + int n1, n2; + int top2=0; + int bot2 = ftell(f[F2]); + get1(); + get2(); + while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { + if(n1>0 && n2>0 && comp()>0 || n1==0) { + if(a2) output(0, n2); + bot2 = ftell(f[F2]); + get2(); + } else if(n1>0 && n2>0 && comp()<0 || n2==0) { + if(a1) output(n1, 0); + get1(); + } else /*(n1>0 && n2>0 && comp()==0)*/ { + while(n2>0 && comp()==0) { + output(n1, n2); + top2 = ftell(f[F2]); + get2(); + } + fseek(f[F2], bot2, 0); + get2(); + get1(); + for(;;) { + if(n1>0 && n2>0 && comp()==0) { + output(n1, n2); + get2(); + } else if(n1>0 && n2>0 && comp()<0 || n2==0) { + fseek(f[F2], bot2, 0); + get2(); + get1(); + } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ + fseek(f[F2], top2, 0); + bot2 = top2; + get2(); + break; + } + } + } + } +} +void +seek1() +{ + int n1, n2; + int top1=0; + int bot1 = ftell(f[F1]); + get1(); + get2(); + while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { + if(n1>0 && n2>0 && comp()>0 || n1==0) { + if(a2) output(0, n2); + get2(); + } else if(n1>0 && n2>0 && comp()<0 || n2==0) { + if(a1) output(n1, 0); + bot1 = ftell(f[F1]); + get1(); + } else /*(n1>0 && n2>0 && comp()==0)*/ { + while(n2>0 && comp()==0) { + output(n1, n2); + top1 = ftell(f[F1]); + get1(); + } + fseek(f[F1], bot1, 0); + get2(); + get1(); + for(;;) { + if(n1>0 && n2>0 && comp()==0) { + output(n1, n2); + get1(); + } else if(n1>0 && n2>0 && comp()>0 || n1==0) { + fseek(f[F1], bot1, 0); + get2(); + get1(); + } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ + fseek(f[F1], top1, 0); + bot1 = top1; + get1(); + break; + } + } + } + } +} + +int +input(int n) /* get input line and split into fields */ +{ + register int i, c; + Rune *bp; + Rune **pp; + char line[BUFSIZ]; + + bp = buf[n]; + pp = ppi[n]; + if (fgets(line, BUFSIZ, f[n]) == 0) + return(0); + strtorune(bp, line); + i = 0; + do { + i++; + if (sep1 == ' ') /* strip multiples */ + while ((c = *bp) == sep1 || c == sep2) + bp++; /* skip blanks */ + *pp++ = bp; /* record beginning */ + while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') + bp++; + *bp++ = '\0'; /* mark end by overwriting blank */ + } while (c != '\n' && c != '\0' && i < NFLD-1); + if (c != '\n') + discard++; + + *pp = 0; + return(i); +} + +void +output(int on1, int on2) /* print items from olist */ +{ + int i; + Rune *temp; + char buf[BUFSIZ]; + + if (no <= 0) { /* default case */ + printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); + for (i = 0; i < on1; i++) + if (i != j1) + printf("%s%s", sepstr, runetostr(buf, ppi[F1][i])); + for (i = 0; i < on2; i++) + if (i != j2) + printf("%s%s", sepstr, runetostr(buf, ppi[F2][i])); + printf("\n"); + } else { + for (i = 0; i < no; i++) { + if (olistf[i]==F0 && on1>j1) + temp = ppi[F1][j1]; + else if (olistf[i]==F0 && on2>j2) + temp = ppi[F2][j2]; + else { + temp = ppi[olistf[i]][olist[i]]; + if(olistf[i]==F1 && on1<=olist[i] || + olistf[i]==F2 && on2<=olist[i] || + *temp==0) + temp = null; + } + printf("%s", runetostr(buf, temp)); + if (i == no - 1) + printf("\n"); + else + printf("%s", sepstr); + } + } +} + +void +error(char *s1, char *s2) +{ + fprintf(stderr, "join: "); + fprintf(stderr, s1, s2); + fprintf(stderr, "\n"); + exits(s1); +} + +char * +getoptarg(int *argcp, char ***argvp) +{ + int argc = *argcp; + char **argv = *argvp; + if(argv[1][2] != 0) + return &argv[1][2]; + if(argc<=2 || argv[2][0]=='-') + error("incomplete option %s", argv[1]); + *argcp = argc-1; + *argvp = ++argv; + return argv[1]; +} + +void +oparse(char *s) +{ + for (no = 0; no<2*NFLD && *s; no++, s++) { + switch(*s) { + case 0: + return; + case '0': + olistf[no] = F0; + break; + case '1': + case '2': + if(s[1] == '.' && isdigit(s[2])) { + olistf[no] = *s=='1'? F1: F2; + olist[no] = atoi(s += 2); + break; + } /* fall thru */ + default: + error("invalid -o list", ""); + } + if(s[1] == ',') + s++; + } +} |