From bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d Mon Sep 17 00:00:00 2001 From: rsc Date: Sun, 23 Nov 2003 18:04:47 +0000 Subject: new utilities. the .C files compile but are renamed to avoid building automatically. --- src/cmd/uniq.c | 169 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 src/cmd/uniq.c (limited to 'src/cmd/uniq.c') diff --git a/src/cmd/uniq.c b/src/cmd/uniq.c new file mode 100644 index 00000000..122fb5ed --- /dev/null +++ b/src/cmd/uniq.c @@ -0,0 +1,169 @@ +/* + * Deal with duplicated lines in a file + */ +#include +#include +#include +#include + +#define SIZE 8000 + +int fields = 0; +int letters = 0; +int linec = 0; +char mode; +int uniq; +char *b1, *b2; +long bsize; +Biobuf fin; +Biobuf fout; + +int gline(char *buf); +void pline(char *buf); +int equal(char *b1, char *b2); +char* skip(char *s); + +void +main(int argc, char *argv[]) +{ + int f; + + bsize = SIZE; + b1 = malloc(bsize); + b2 = malloc(bsize); + f = 0; + while(argc > 1) { + if(*argv[1] == '-') { + if(isdigit(argv[1][1])) + fields = atoi(&argv[1][1]); + else + mode = argv[1][1]; + argc--; + argv++; + continue; + } + if(*argv[1] == '+') { + letters = atoi(&argv[1][1]); + argc--; + argv++; + continue; + } + f = open(argv[1], 0); + if(f < 0) { + fprint(2, "cannot open %s\n", argv[1]); + exits("open"); + } + break; + } + if(argc > 2) { + fprint(2, "unexpected argument %s\n", argv[2]); + exits("arg"); + } + Binit(&fin, f, OREAD); + Binit(&fout, 1, OWRITE); + + if(gline(b1)) + exits(0); + for(;;) { + linec++; + if(gline(b2)) { + pline(b1); + exits(0); + } + if(!equal(b1, b2)) { + pline(b1); + linec = 0; + do { + linec++; + if(gline(b1)) { + pline(b2); + exits(0); + } + } while(equal(b2, b1)); + pline(b2); + linec = 0; + } + } +} + +int +gline(char *buf) +{ + char *p; + + p = Brdline(&fin, '\n'); + if(p == 0) + return 1; + if(fin.rdline >= bsize-1) { + fprint(2, "line too long\n"); + exits("too long"); + } + memmove(buf, p, fin.rdline); + buf[fin.rdline-1] = 0; + return 0; +} + +void +pline(char *buf) +{ + + switch(mode) { + + case 'u': + if(uniq) { + uniq = 0; + return; + } + break; + + case 'd': + if(uniq) + break; + return; + + case 'c': + Bprint(&fout, "%4d ", linec); + } + uniq = 0; + Bprint(&fout, "%s\n", buf); +} + +int +equal(char *b1, char *b2) +{ + char c; + + if(fields || letters) { + b1 = skip(b1); + b2 = skip(b2); + } + for(;;) { + c = *b1++; + if(c != *b2++) { + if(c == 0 && mode == 's') + return 1; + return 0; + } + if(c == 0) { + uniq++; + return 1; + } + } +} + +char* +skip(char *s) +{ + int nf, nl; + + nf = nl = 0; + while(nf++ < fields) { + while(*s == ' ' || *s == '\t') + s++; + while(!(*s == ' ' || *s == '\t' || *s == 0) ) + s++; + } + while(nl++ < letters && *s != 0) + s++; + return s; +} -- cgit v1.2.3