#include <u.h> #include <libc.h> typedef struct PCB /* Control block controlling specification parse */ { char *base; /* start of specification */ char *current; /* current parse point */ long last; /* last Rune returned */ long final; /* final Rune in a span */ } Pcb; uchar bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; #define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07]) #define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07]) #define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07]) #define MAXRUNE 0xFFFF uchar f[(MAXRUNE+1)/8]; uchar t[(MAXRUNE+1)/8]; char wbuf[4096]; char *wptr; Pcb pfrom, pto; int cflag; int dflag; int sflag; void complement(void); void delete(void); void squeeze(void); void translit(void); void error(char*); long canon(Pcb*); char *getrune(char*, Rune*); void Pinit(Pcb*, char*); void Prewind(Pcb *p); int readrune(int, long*); void wflush(int); void writerune(int, Rune); void main(int argc, char **argv) { ARGBEGIN{ case 's': sflag++; break; case 'd': dflag++; break; case 'c': cflag++; break; default: error("bad option"); }ARGEND if(argc>0) Pinit(&pfrom, argv[0]); if(argc>1) Pinit(&pto, argv[1]); if(argc>2) error("arg count"); if(dflag) { if ((sflag && argc != 2) || (!sflag && argc != 1)) error("arg count"); delete(); } else { if (argc != 2) error("arg count"); if (cflag) complement(); else translit(); } exits(0); } void delete(void) { long c, last; if (cflag) { memset((char *) f, 0xff, sizeof f); while ((c = canon(&pfrom)) >= 0) CLEARBIT(f, c); } else { while ((c = canon(&pfrom)) >= 0) SETBIT(f, c); } if (sflag) { while ((c = canon(&pto)) >= 0) SETBIT(t, c); } last = 0x10000; while (readrune(0, &c) > 0) { if(!BITSET(f, c) && (c != last || !BITSET(t,c))) { last = c; writerune(1, (Rune) c); } } wflush(1); } void complement(void) { Rune *p; int i; long from, to, lastc, high; lastc = 0; high = 0; while ((from = canon(&pfrom)) >= 0) { if (from > high) high = from; SETBIT(f, from); } while ((to = canon(&pto)) > 0) { if (to > high) high = to; SETBIT(t,to); } Prewind(&pto); if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0) error("can't allocate memory"); for (i = 0; i <= high; i++){ if (!BITSET(f,i)) { if ((to = canon(&pto)) < 0) to = lastc; else lastc = to; p[i] = to; } else p[i] = i; } if (sflag){ lastc = 0x10000; while (readrune(0, &from) > 0) { if (from > high) from = to; else from = p[from]; if (from != lastc || !BITSET(t,from)) { lastc = from; writerune(1, (Rune) from); } } } else { while (readrune(0, &from) > 0){ if (from > high) from = to; else from = p[from]; writerune(1, (Rune) from); } } wflush(1); } void translit(void) { Rune *p; int i; long from, to, lastc, high; lastc = 0; high = 0; while ((from = canon(&pfrom)) >= 0) if (from > high) high = from; Prewind(&pfrom); if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0) error("can't allocate memory"); for (i = 0; i <= high; i++) p[i] = i; while ((from = canon(&pfrom)) >= 0) { if ((to = canon(&pto)) < 0) to = lastc; else lastc = to; if (BITSET(f,from) && p[from] != to) error("ambiguous translation"); SETBIT(f,from); p[from] = to; SETBIT(t,to); } while ((to = canon(&pto)) >= 0) { SETBIT(t,to); } if (sflag){ lastc = 0x10000; while (readrune(0, &from) > 0) { if (from <= high) from = p[from]; if (from != lastc || !BITSET(t,from)) { lastc = from; writerune(1, (Rune) from); } } } else { while (readrune(0, &from) > 0) { if (from <= high) from = p[from]; writerune(1, (Rune) from); } } wflush(1); } int readrune(int fd, long *rp) { Rune r; int j; static int i, n; static char buf[4096]; j = i; for (;;) { if (i >= n) { wflush(1); if (j != i) memcpy(buf, buf+j, n-j); i = n-j; n = read(fd, &buf[i], sizeof(buf)-i); if (n < 0) error("read error"); if (n == 0) return 0; j = 0; n += i; } i++; if (fullrune(&buf[j], i-j)) break; } chartorune(&r, &buf[j]); *rp = r; return 1; } void writerune(int fd, Rune r) { char buf[UTFmax]; int n; if (!wptr) wptr = wbuf; n = runetochar(buf, (Rune*)&r); if (wptr+n >= wbuf+sizeof(wbuf)) wflush(fd); memcpy(wptr, buf, n); wptr += n; } void wflush(int fd) { if (wptr && wptr > wbuf) if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf) error("write error"); wptr = wbuf; } char * getrune(char *s, Rune *rp) { Rune r; char *save; int i, n; s += chartorune(rp, s); if((r = *rp) == '\\' && *s){ n = 0; if (*s == 'x') { s++; for (i = 0; i < 4; i++) { save = s; s += chartorune(&r, s); if ('0' <= r && r <= '9') n = 16*n + r - '0'; else if ('a' <= r && r <= 'f') n = 16*n + r - 'a' + 10; else if ('A' <= r && r <= 'F') n = 16*n + r - 'A' + 10; else { if (i == 0) *rp = 'x'; else *rp = n; return save; } } } else { for(i = 0; i < 3; i++) { save = s; s += chartorune(&r, s); if('0' <= r && r <= '7') n = 8*n + r - '0'; else { if (i == 0) { *rp = r; return s; } *rp = n; return save; } } if(n > 0377) error("char>0377"); } *rp = n; } return s; } long canon(Pcb *p) { Rune r; if (p->final >= 0) { if (p->last < p->final) return ++p->last; p->final = -1; } if (*p->current == '\0') return -1; if(*p->current == '-' && p->last >= 0 && p->current[1]){ p->current = getrune(p->current+1, &r); if (r < p->last) error ("Invalid range specification"); if (r > p->last) { p->final = r; return ++p->last; } } p->current = getrune(p->current, &r); p->last = r; return p->last; } void Pinit(Pcb *p, char *cp) { p->current = p->base = cp; p->last = p->final = -1; } void Prewind(Pcb *p) { p->current = p->base; p->last = p->final = -1; } void error(char *s) { fprint(2, "%s: %s\n", argv0, s); exits(s); }