aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/join.c
diff options
context:
space:
mode:
authorrsc <devnull@localhost>2003-11-23 18:04:47 +0000
committerrsc <devnull@localhost>2003-11-23 18:04:47 +0000
commitbc7cb1a15a67c859c8c71c4b52bb35fe9425a63d (patch)
tree8ca0fe4e2418e6aa18dc74a236c577a719f6c6ed /src/cmd/join.c
parentf08fdedcee12c06e3ce9ac9bec363915978e8289 (diff)
downloadplan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.tar.gz
plan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.tar.bz2
plan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.zip
new utilities.
the .C files compile but are renamed to avoid building automatically.
Diffstat (limited to 'src/cmd/join.c')
-rw-r--r--src/cmd/join.c369
1 files changed, 369 insertions, 0 deletions
diff --git a/src/cmd/join.c b/src/cmd/join.c
new file mode 100644
index 00000000..7efff976
--- /dev/null
+++ b/src/cmd/join.c
@@ -0,0 +1,369 @@
+/* join F1 F2 on stuff */
+#include <u.h>
+#include <libc.h>
+#include <stdio.h>
+#include <ctype.h>
+#define F1 0
+#define F2 1
+#define F0 3
+#define NFLD 100 /* max field per line */
+#define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
+FILE *f[2];
+Rune buf[2][BUFSIZ]; /*input lines */
+Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */
+Rune *s1,*s2;
+#define j1 joinj1
+#define j2 joinj2
+
+int j1 = 1; /* join of this field of file 1 */
+int j2 = 1; /* join of this field of file 2 */
+int olist[2*NFLD]; /* output these fields */
+int olistf[2*NFLD]; /* from these files */
+int no; /* number of entries in olist */
+Rune sep1 = ' '; /* default field separator */
+Rune sep2 = '\t';
+char *sepstr=" ";
+int discard; /* count of truncated lines */
+Rune null[BUFSIZ]/* = L""*/;
+int a1;
+int a2;
+
+char *getoptarg(int*, char***);
+void output(int, int);
+int input(int);
+void oparse(char*);
+void error(char*, char*);
+void seek1(void), seek2(void);
+Rune *strtorune(Rune *, char *);
+
+
+void
+main(int argc, char **argv)
+{
+ int i;
+
+ while (argc > 1 && argv[1][0] == '-') {
+ if (argv[1][1] == '\0')
+ break;
+ switch (argv[1][1]) {
+ case '-':
+ argc--;
+ argv++;
+ goto proceed;
+ case 'a':
+ switch(*getoptarg(&argc, &argv)) {
+ case '1':
+ a1++;
+ break;
+ case '2':
+ a2++;
+ break;
+ default:
+ error("incomplete option -a","");
+ }
+ break;
+ case 'e':
+ strtorune(null, getoptarg(&argc, &argv));
+ break;
+ case 't':
+ sepstr=getoptarg(&argc, &argv);
+ chartorune(&sep1, sepstr);
+ sep2 = sep1;
+ break;
+ case 'o':
+ if(argv[1][2]!=0 ||
+ argc>2 && strchr(argv[2],',')!=0)
+ oparse(getoptarg(&argc, &argv));
+ else for (no = 0; no<2*NFLD && argc>2; no++){
+ if (argv[2][0] == '1' && argv[2][1] == '.') {
+ olistf[no] = F1;
+ olist[no] = atoi(&argv[2][2]);
+ } else if (argv[2][0] == '2' && argv[2][1] == '.') {
+ olist[no] = atoi(&argv[2][2]);
+ olistf[no] = F2;
+ } else if (argv[2][0] == '0')
+ olistf[no] = F0;
+ else
+ break;
+ argc--;
+ argv++;
+ }
+ break;
+ case 'j':
+ if(argc <= 2)
+ break;
+ if (argv[1][2] == '1')
+ j1 = atoi(argv[2]);
+ else if (argv[1][2] == '2')
+ j2 = atoi(argv[2]);
+ else
+ j1 = j2 = atoi(argv[2]);
+ argc--;
+ argv++;
+ break;
+ case '1':
+ j1 = atoi(getoptarg(&argc, &argv));
+ break;
+ case '2':
+ j2 = atoi(getoptarg(&argc, &argv));
+ break;
+ }
+ argc--;
+ argv++;
+ }
+proceed:
+ for (i = 0; i < no; i++)
+ if (olist[i]-- > NFLD) /* 0 origin */
+ error("field number too big in -o","");
+ if (argc != 3)
+ error("usage: join [-1 x -2 y] [-o list] file1 file2","");
+ j1--;
+ j2--; /* everyone else believes in 0 origin */
+ s1 = ppi[F1][j1];
+ s2 = ppi[F2][j2];
+ if (strcmp(argv[1], "-") == 0)
+ f[F1] = stdin;
+ else if ((f[F1] = fopen(argv[1], "r")) == 0)
+ error("can't open %s", argv[1]);
+ if(strcmp(argv[2], "-") == 0) {
+ f[F2] = stdin;
+ } else if ((f[F2] = fopen(argv[2], "r")) == 0)
+ error("can't open %s", argv[2]);
+
+ if(ftell(f[F2]) >= 0)
+ seek2();
+ else if(ftell(f[F1]) >= 0)
+ seek1();
+ else
+ error("neither file is randomly accessible","");
+ if (discard)
+ error("some input line was truncated", "");
+ exits("");
+}
+int runecmp(Rune *a, Rune *b){
+ while(*a==*b){
+ if(*a=='\0') return 0;
+ a++;
+ b++;
+ }
+ if(*a<*b) return -1;
+ return 1;
+}
+char *runetostr(char *buf, Rune *r){
+ char *s;
+ for(s=buf;*r;r++) s+=runetochar(s, r);
+ *s='\0';
+ return buf;
+}
+Rune *strtorune(Rune *buf, char *s){
+ Rune *r;
+ for(r=buf;*s;r++) s+=chartorune(r, s);
+ *r='\0';
+ return buf;
+}
+/* lazy. there ought to be a clean way to combine seek1 & seek2 */
+#define get1() n1=input(F1)
+#define get2() n2=input(F2)
+void
+seek2()
+{
+ int n1, n2;
+ int top2=0;
+ int bot2 = ftell(f[F2]);
+ get1();
+ get2();
+ while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
+ if(n1>0 && n2>0 && comp()>0 || n1==0) {
+ if(a2) output(0, n2);
+ bot2 = ftell(f[F2]);
+ get2();
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+ if(a1) output(n1, 0);
+ get1();
+ } else /*(n1>0 && n2>0 && comp()==0)*/ {
+ while(n2>0 && comp()==0) {
+ output(n1, n2);
+ top2 = ftell(f[F2]);
+ get2();
+ }
+ fseek(f[F2], bot2, 0);
+ get2();
+ get1();
+ for(;;) {
+ if(n1>0 && n2>0 && comp()==0) {
+ output(n1, n2);
+ get2();
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+ fseek(f[F2], bot2, 0);
+ get2();
+ get1();
+ } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
+ fseek(f[F2], top2, 0);
+ bot2 = top2;
+ get2();
+ break;
+ }
+ }
+ }
+ }
+}
+void
+seek1()
+{
+ int n1, n2;
+ int top1=0;
+ int bot1 = ftell(f[F1]);
+ get1();
+ get2();
+ while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
+ if(n1>0 && n2>0 && comp()>0 || n1==0) {
+ if(a2) output(0, n2);
+ get2();
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+ if(a1) output(n1, 0);
+ bot1 = ftell(f[F1]);
+ get1();
+ } else /*(n1>0 && n2>0 && comp()==0)*/ {
+ while(n2>0 && comp()==0) {
+ output(n1, n2);
+ top1 = ftell(f[F1]);
+ get1();
+ }
+ fseek(f[F1], bot1, 0);
+ get2();
+ get1();
+ for(;;) {
+ if(n1>0 && n2>0 && comp()==0) {
+ output(n1, n2);
+ get1();
+ } else if(n1>0 && n2>0 && comp()>0 || n1==0) {
+ fseek(f[F1], bot1, 0);
+ get2();
+ get1();
+ } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
+ fseek(f[F1], top1, 0);
+ bot1 = top1;
+ get1();
+ break;
+ }
+ }
+ }
+ }
+}
+
+int
+input(int n) /* get input line and split into fields */
+{
+ register int i, c;
+ Rune *bp;
+ Rune **pp;
+ char line[BUFSIZ];
+
+ bp = buf[n];
+ pp = ppi[n];
+ if (fgets(line, BUFSIZ, f[n]) == 0)
+ return(0);
+ strtorune(bp, line);
+ i = 0;
+ do {
+ i++;
+ if (sep1 == ' ') /* strip multiples */
+ while ((c = *bp) == sep1 || c == sep2)
+ bp++; /* skip blanks */
+ *pp++ = bp; /* record beginning */
+ while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
+ bp++;
+ *bp++ = '\0'; /* mark end by overwriting blank */
+ } while (c != '\n' && c != '\0' && i < NFLD-1);
+ if (c != '\n')
+ discard++;
+
+ *pp = 0;
+ return(i);
+}
+
+void
+output(int on1, int on2) /* print items from olist */
+{
+ int i;
+ Rune *temp;
+ char buf[BUFSIZ];
+
+ if (no <= 0) { /* default case */
+ printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
+ for (i = 0; i < on1; i++)
+ if (i != j1)
+ printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
+ for (i = 0; i < on2; i++)
+ if (i != j2)
+ printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
+ printf("\n");
+ } else {
+ for (i = 0; i < no; i++) {
+ if (olistf[i]==F0 && on1>j1)
+ temp = ppi[F1][j1];
+ else if (olistf[i]==F0 && on2>j2)
+ temp = ppi[F2][j2];
+ else {
+ temp = ppi[olistf[i]][olist[i]];
+ if(olistf[i]==F1 && on1<=olist[i] ||
+ olistf[i]==F2 && on2<=olist[i] ||
+ *temp==0)
+ temp = null;
+ }
+ printf("%s", runetostr(buf, temp));
+ if (i == no - 1)
+ printf("\n");
+ else
+ printf("%s", sepstr);
+ }
+ }
+}
+
+void
+error(char *s1, char *s2)
+{
+ fprintf(stderr, "join: ");
+ fprintf(stderr, s1, s2);
+ fprintf(stderr, "\n");
+ exits(s1);
+}
+
+char *
+getoptarg(int *argcp, char ***argvp)
+{
+ int argc = *argcp;
+ char **argv = *argvp;
+ if(argv[1][2] != 0)
+ return &argv[1][2];
+ if(argc<=2 || argv[2][0]=='-')
+ error("incomplete option %s", argv[1]);
+ *argcp = argc-1;
+ *argvp = ++argv;
+ return argv[1];
+}
+
+void
+oparse(char *s)
+{
+ for (no = 0; no<2*NFLD && *s; no++, s++) {
+ switch(*s) {
+ case 0:
+ return;
+ case '0':
+ olistf[no] = F0;
+ break;
+ case '1':
+ case '2':
+ if(s[1] == '.' && isdigit(s[2])) {
+ olistf[no] = *s=='1'? F1: F2;
+ olist[no] = atoi(s += 2);
+ break;
+ } /* fall thru */
+ default:
+ error("invalid -o list", "");
+ }
+ if(s[1] == ',')
+ s++;
+ }
+}