aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/tr.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/tr.c')
-rw-r--r--src/cmd/tr.c356
1 files changed, 356 insertions, 0 deletions
diff --git a/src/cmd/tr.c b/src/cmd/tr.c
new file mode 100644
index 00000000..39ba747c
--- /dev/null
+++ b/src/cmd/tr.c
@@ -0,0 +1,356 @@
+#include <u.h>
+#include <libc.h>
+
+typedef struct PCB /* Control block controlling specification parse */
+{
+ char *base; /* start of specification */
+ char *current; /* current parse point */
+ long last; /* last Rune returned */
+ long final; /* final Rune in a span */
+} Pcb;
+
+uchar bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
+
+#define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07])
+#define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07])
+#define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07])
+
+#define MAXRUNE 0xFFFF
+
+uchar f[(MAXRUNE+1)/8];
+uchar t[(MAXRUNE+1)/8];
+char wbuf[4096];
+char *wptr;
+
+Pcb pfrom, pto;
+
+int cflag;
+int dflag;
+int sflag;
+
+void complement(void);
+void delete(void);
+void squeeze(void);
+void translit(void);
+void error(char*);
+long canon(Pcb*);
+char *getrune(char*, Rune*);
+void Pinit(Pcb*, char*);
+void Prewind(Pcb *p);
+int readrune(int, long*);
+void wflush(int);
+void writerune(int, Rune);
+
+void
+main(int argc, char **argv)
+{
+ ARGBEGIN{
+ case 's': sflag++; break;
+ case 'd': dflag++; break;
+ case 'c': cflag++; break;
+ default: error("bad option");
+ }ARGEND
+ if(argc>0)
+ Pinit(&pfrom, argv[0]);
+ if(argc>1)
+ Pinit(&pto, argv[1]);
+ if(argc>2)
+ error("arg count");
+ if(dflag) {
+ if ((sflag && argc != 2) || (!sflag && argc != 1))
+ error("arg count");
+ delete();
+ } else {
+ if (argc != 2)
+ error("arg count");
+ if (cflag)
+ complement();
+ else translit();
+ }
+ exits(0);
+}
+
+void
+delete(void)
+{
+ long c, last;
+
+ if (cflag) {
+ memset((char *) f, 0xff, sizeof f);
+ while ((c = canon(&pfrom)) >= 0)
+ CLEARBIT(f, c);
+ } else {
+ while ((c = canon(&pfrom)) >= 0)
+ SETBIT(f, c);
+ }
+ if (sflag) {
+ while ((c = canon(&pto)) >= 0)
+ SETBIT(t, c);
+ }
+
+ last = 0x10000;
+ while (readrune(0, &c) > 0) {
+ if(!BITSET(f, c) && (c != last || !BITSET(t,c))) {
+ last = c;
+ writerune(1, (Rune) c);
+ }
+ }
+ wflush(1);
+}
+
+void
+complement(void)
+{
+ Rune *p;
+ int i;
+ long from, to, lastc, high;
+
+ lastc = 0;
+ high = 0;
+ while ((from = canon(&pfrom)) >= 0) {
+ if (from > high) high = from;
+ SETBIT(f, from);
+ }
+ while ((to = canon(&pto)) > 0) {
+ if (to > high) high = to;
+ SETBIT(t,to);
+ }
+ Prewind(&pto);
+ if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
+ error("can't allocate memory");
+ for (i = 0; i <= high; i++){
+ if (!BITSET(f,i)) {
+ if ((to = canon(&pto)) < 0)
+ to = lastc;
+ else lastc = to;
+ p[i] = to;
+ }
+ else p[i] = i;
+ }
+ if (sflag){
+ lastc = 0x10000;
+ while (readrune(0, &from) > 0) {
+ if (from > high)
+ from = to;
+ else
+ from = p[from];
+ if (from != lastc || !BITSET(t,from)) {
+ lastc = from;
+ writerune(1, (Rune) from);
+ }
+ }
+
+ } else {
+ while (readrune(0, &from) > 0){
+ if (from > high)
+ from = to;
+ else
+ from = p[from];
+ writerune(1, (Rune) from);
+ }
+ }
+ wflush(1);
+}
+
+void
+translit(void)
+{
+ Rune *p;
+ int i;
+ long from, to, lastc, high;
+
+ lastc = 0;
+ high = 0;
+ while ((from = canon(&pfrom)) >= 0)
+ if (from > high) high = from;
+ Prewind(&pfrom);
+ if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
+ error("can't allocate memory");
+ for (i = 0; i <= high; i++)
+ p[i] = i;
+ while ((from = canon(&pfrom)) >= 0) {
+ if ((to = canon(&pto)) < 0)
+ to = lastc;
+ else lastc = to;
+ if (BITSET(f,from) && p[from] != to)
+ error("ambiguous translation");
+ SETBIT(f,from);
+ p[from] = to;
+ SETBIT(t,to);
+ }
+ while ((to = canon(&pto)) >= 0) {
+ SETBIT(t,to);
+ }
+ if (sflag){
+ lastc = 0x10000;
+ while (readrune(0, &from) > 0) {
+ if (from <= high)
+ from = p[from];
+ if (from != lastc || !BITSET(t,from)) {
+ lastc = from;
+ writerune(1, (Rune) from);
+ }
+ }
+
+ } else {
+ while (readrune(0, &from) > 0) {
+ if (from <= high)
+ from = p[from];
+ writerune(1, (Rune) from);
+ }
+ }
+ wflush(1);
+}
+
+int
+readrune(int fd, long *rp)
+{
+ Rune r;
+ int j;
+ static int i, n;
+ static char buf[4096];
+
+ j = i;
+ for (;;) {
+ if (i >= n) {
+ wflush(1);
+ if (j != i)
+ memcpy(buf, buf+j, n-j);
+ i = n-j;
+ n = read(fd, &buf[i], sizeof(buf)-i);
+ if (n < 0)
+ error("read error");
+ if (n == 0)
+ return 0;
+ j = 0;
+ n += i;
+ }
+ i++;
+ if (fullrune(&buf[j], i-j))
+ break;
+ }
+ chartorune(&r, &buf[j]);
+ *rp = r;
+ return 1;
+}
+
+void
+writerune(int fd, Rune r)
+{
+ char buf[UTFmax];
+ int n;
+
+ if (!wptr)
+ wptr = wbuf;
+ n = runetochar(buf, (Rune*)&r);
+ if (wptr+n >= wbuf+sizeof(wbuf))
+ wflush(fd);
+ memcpy(wptr, buf, n);
+ wptr += n;
+}
+
+void
+wflush(int fd)
+{
+ if (wptr && wptr > wbuf)
+ if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf)
+ error("write error");
+ wptr = wbuf;
+}
+
+char *
+getrune(char *s, Rune *rp)
+{
+ Rune r;
+ char *save;
+ int i, n;
+
+ s += chartorune(rp, s);
+ if((r = *rp) == '\\' && *s){
+ n = 0;
+ if (*s == 'x') {
+ s++;
+ for (i = 0; i < 4; i++) {
+ save = s;
+ s += chartorune(&r, s);
+ if ('0' <= r && r <= '9')
+ n = 16*n + r - '0';
+ else if ('a' <= r && r <= 'f')
+ n = 16*n + r - 'a' + 10;
+ else if ('A' <= r && r <= 'F')
+ n = 16*n + r - 'A' + 10;
+ else {
+ if (i == 0)
+ *rp = 'x';
+ else *rp = n;
+ return save;
+ }
+ }
+ } else {
+ for(i = 0; i < 3; i++) {
+ save = s;
+ s += chartorune(&r, s);
+ if('0' <= r && r <= '7')
+ n = 8*n + r - '0';
+ else {
+ if (i == 0)
+ {
+ *rp = r;
+ return s;
+ }
+ *rp = n;
+ return save;
+ }
+ }
+ if(n > 0377)
+ error("char>0377");
+ }
+ *rp = n;
+ }
+ return s;
+}
+
+long
+canon(Pcb *p)
+{
+ Rune r;
+
+ if (p->final >= 0) {
+ if (p->last < p->final)
+ return ++p->last;
+ p->final = -1;
+ }
+ if (*p->current == '\0')
+ return -1;
+ if(*p->current == '-' && p->last >= 0 && p->current[1]){
+ p->current = getrune(p->current+1, &r);
+ if (r < p->last)
+ error ("Invalid range specification");
+ if (r > p->last) {
+ p->final = r;
+ return ++p->last;
+ }
+ }
+ p->current = getrune(p->current, &r);
+ p->last = r;
+ return p->last;
+}
+
+void
+Pinit(Pcb *p, char *cp)
+{
+ p->current = p->base = cp;
+ p->last = p->final = -1;
+}
+void
+Prewind(Pcb *p)
+{
+ p->current = p->base;
+ p->last = p->final = -1;
+}
+void
+error(char *s)
+{
+ fprint(2, "%s: %s\n", argv0, s);
+ exits(s);
+}