aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/uniq.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/uniq.c')
-rw-r--r--src/cmd/uniq.c169
1 files changed, 169 insertions, 0 deletions
diff --git a/src/cmd/uniq.c b/src/cmd/uniq.c
new file mode 100644
index 00000000..122fb5ed
--- /dev/null
+++ b/src/cmd/uniq.c
@@ -0,0 +1,169 @@
+/*
+ * Deal with duplicated lines in a file
+ */
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+
+#define SIZE 8000
+
+int fields = 0;
+int letters = 0;
+int linec = 0;
+char mode;
+int uniq;
+char *b1, *b2;
+long bsize;
+Biobuf fin;
+Biobuf fout;
+
+int gline(char *buf);
+void pline(char *buf);
+int equal(char *b1, char *b2);
+char* skip(char *s);
+
+void
+main(int argc, char *argv[])
+{
+ int f;
+
+ bsize = SIZE;
+ b1 = malloc(bsize);
+ b2 = malloc(bsize);
+ f = 0;
+ while(argc > 1) {
+ if(*argv[1] == '-') {
+ if(isdigit(argv[1][1]))
+ fields = atoi(&argv[1][1]);
+ else
+ mode = argv[1][1];
+ argc--;
+ argv++;
+ continue;
+ }
+ if(*argv[1] == '+') {
+ letters = atoi(&argv[1][1]);
+ argc--;
+ argv++;
+ continue;
+ }
+ f = open(argv[1], 0);
+ if(f < 0) {
+ fprint(2, "cannot open %s\n", argv[1]);
+ exits("open");
+ }
+ break;
+ }
+ if(argc > 2) {
+ fprint(2, "unexpected argument %s\n", argv[2]);
+ exits("arg");
+ }
+ Binit(&fin, f, OREAD);
+ Binit(&fout, 1, OWRITE);
+
+ if(gline(b1))
+ exits(0);
+ for(;;) {
+ linec++;
+ if(gline(b2)) {
+ pline(b1);
+ exits(0);
+ }
+ if(!equal(b1, b2)) {
+ pline(b1);
+ linec = 0;
+ do {
+ linec++;
+ if(gline(b1)) {
+ pline(b2);
+ exits(0);
+ }
+ } while(equal(b2, b1));
+ pline(b2);
+ linec = 0;
+ }
+ }
+}
+
+int
+gline(char *buf)
+{
+ char *p;
+
+ p = Brdline(&fin, '\n');
+ if(p == 0)
+ return 1;
+ if(fin.rdline >= bsize-1) {
+ fprint(2, "line too long\n");
+ exits("too long");
+ }
+ memmove(buf, p, fin.rdline);
+ buf[fin.rdline-1] = 0;
+ return 0;
+}
+
+void
+pline(char *buf)
+{
+
+ switch(mode) {
+
+ case 'u':
+ if(uniq) {
+ uniq = 0;
+ return;
+ }
+ break;
+
+ case 'd':
+ if(uniq)
+ break;
+ return;
+
+ case 'c':
+ Bprint(&fout, "%4d ", linec);
+ }
+ uniq = 0;
+ Bprint(&fout, "%s\n", buf);
+}
+
+int
+equal(char *b1, char *b2)
+{
+ char c;
+
+ if(fields || letters) {
+ b1 = skip(b1);
+ b2 = skip(b2);
+ }
+ for(;;) {
+ c = *b1++;
+ if(c != *b2++) {
+ if(c == 0 && mode == 's')
+ return 1;
+ return 0;
+ }
+ if(c == 0) {
+ uniq++;
+ return 1;
+ }
+ }
+}
+
+char*
+skip(char *s)
+{
+ int nf, nl;
+
+ nf = nl = 0;
+ while(nf++ < fields) {
+ while(*s == ' ' || *s == '\t')
+ s++;
+ while(!(*s == ' ' || *s == '\t' || *s == 0) )
+ s++;
+ }
+ while(nl++ < letters && *s != 0)
+ s++;
+ return s;
+}