From 5cdb17983ae6e6367ad7a940cb219eab247a9304 Mon Sep 17 00:00:00 2001 From: rsc Date: Sat, 29 Oct 2005 16:26:44 +0000 Subject: Thanks to John Cummings. --- src/cmd/upas/filterkit/list.c | 315 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 315 insertions(+) create mode 100644 src/cmd/upas/filterkit/list.c (limited to 'src/cmd/upas/filterkit/list.c') diff --git a/src/cmd/upas/filterkit/list.c b/src/cmd/upas/filterkit/list.c new file mode 100644 index 00000000..6c23dcc1 --- /dev/null +++ b/src/cmd/upas/filterkit/list.c @@ -0,0 +1,315 @@ +#include +#include +#include +#include +#include +#include +#include "dat.h" + +int debug; + +enum +{ + Tregexp= (1<<0), /* ~ */ + Texact= (1<<1), /* = */ +}; + +typedef struct Pattern Pattern; +struct Pattern +{ + Pattern *next; + int type; + char *arg; + int bang; +}; + +String *patternpath; +Pattern *patterns; +String *mbox; + +static void +usage(void) +{ + fprint(2, "usage: %s 'check|add' patternfile addr [addr*]\n", argv0); + exits("usage"); +} + +/* + * convert string to lower case + */ +static void +mklower(char *p) +{ + int c; + + for(; *p; p++){ + c = *p; + if(c <= 'Z' && c >= 'A') + *p = c - 'A' + 'a'; + } +} + +/* + * simplify an address, reduce to a domain + */ +static String* +simplify(char *addr) +{ + int dots; + char *p, *at; + String *s; + + mklower(addr); + at = strchr(addr, '@'); + if(at == nil){ + /* local address, make it an exact match */ + s = s_copy("="); + s_append(s, addr); + return s; + } + + /* copy up to the '@' sign */ + at++; + s = s_copy("~"); + for(p = addr; p < at; p++){ + if(strchr(".*+?(|)\\[]^$", *p)) + s_putc(s, '\\'); + s_putc(s, *p); + } + + /* just any address matching the two most significant domain elements */ + s_append(s, "(.*\\.)?"); + p = addr+strlen(addr); + dots = 0; + for(; p > at; p--){ + if(*p != '.') + continue; + if(dots++ > 0){ + p++; + break; + } + } + for(; *p; p++){ + if(strchr(".*+?(|)\\[]^$", *p) != 0) + s_putc(s, '\\'); + s_putc(s, *p); + } + s_terminate(s); + + return s; +} + +/* + * link patterns in order + */ +static int +newpattern(int type, char *arg, int bang) +{ + Pattern *p; + static Pattern *last; + + mklower(arg); + + p = mallocz(sizeof *p, 1); + if(p == nil) + return -1; + if(type == Tregexp){ + p->arg = malloc(strlen(arg)+3); + if(p->arg == nil){ + free(p); + return -1; + } + p->arg[0] = 0; + strcat(p->arg, "^"); + strcat(p->arg, arg); + strcat(p->arg, "$"); + } else { + p->arg = strdup(arg); + if(p->arg == nil){ + free(p); + return -1; + } + } + p->type = type; + p->bang = bang; + if(last == nil) + patterns = p; + else + last->next = p; + last = p; + + return 0; +} + +/* + * patterns are either + * ~ regular expression + * = exact match string + * + * all comparisons are case insensitive + */ +static int +readpatterns(char *path) +{ + Biobuf *b; + char *p; + char *token[2]; + int n; + int bang; + + b = Bopen(path, OREAD); + if(b == nil) + return -1; + while((p = Brdline(b, '\n')) != nil){ + p[Blinelen(b)-1] = 0; + n = tokenize(p, token, 2); + if(n == 0) + continue; + + mklower(token[0]); + p = token[0]; + if(*p == '!'){ + p++; + bang = 1; + } else + bang = 0; + + if(*p == '='){ + if(newpattern(Texact, p+1, bang) < 0) + return -1; + } else if(*p == '~'){ + if(newpattern(Tregexp, p+1, bang) < 0) + return -1; + } else if(strcmp(token[0], "#include") == 0 && n == 2) + readpatterns(token[1]); + } + Bterm(b); + return 0; +} + +/* fuck, shit, bugger, damn */ +void regerror(char*) +{ +} + +/* + * check lower case version of address agains patterns + */ +static Pattern* +checkaddr(char *arg) +{ + Pattern *p; + Reprog *rp; + String *s; + + s = s_copy(arg); + mklower(s_to_c(s)); + + for(p = patterns; p != nil; p = p->next) + switch(p->type){ + case Texact: + if(strcmp(p->arg, s_to_c(s)) == 0){ + free(s); + return p; + } + break; + case Tregexp: + rp = regcomp(p->arg); + if(rp == nil) + continue; + if(regexec(rp, s_to_c(s), nil, 0)){ + free(rp); + free(s); + return p; + } + free(rp); + break; + } + s_free(s); + return 0; +} +static char* +check(int argc, char **argv) +{ + int i; + Addr *a; + Pattern *p; + int matchedbang; + + matchedbang = 0; + for(i = 0; i < argc; i++){ + a = readaddrs(argv[i], nil); + for(; a != nil; a = a->next){ + p = checkaddr(a->val); + if(p == nil) + continue; + if(p->bang) + matchedbang = 1; + else + return nil; + } + } + if(matchedbang) + return "!match"; + else + return "no match"; +} + +/* + * add anything that isn't already matched, all matches are lower case + */ +static char* +add(char *pp, int argc, char **argv) +{ + int fd, i; + String *s; + char *cp; + Addr *a; + + a = nil; + for(i = 0; i < argc; i++) + a = readaddrs(argv[i], a); + + fd = open(pp, OWRITE); + seek(fd, 0, 2); + for(; a != nil; a = a->next){ + if(checkaddr(a->val)) + continue; + s = simplify(a->val); + cp = s_to_c(s); + fprint(fd, "%q\t%q\n", cp, a->val); + if(*cp == '=') + newpattern(Texact, cp+1, 0); + else if(*cp == '~') + newpattern(Tregexp, cp+1, 0); + s_free(s); + } + close(fd); + return nil; +} + +void +main(int argc, char **argv) +{ + char *patternpath; + + ARGBEGIN { + case 'd': + debug++; + break; + } ARGEND; + + quotefmtinstall(); + + if(argc < 3) + usage(); + + patternpath = argv[1]; + readpatterns(patternpath); + if(strcmp(argv[0], "add") == 0) + exits(add(patternpath, argc-2, argv+2)); + else if(strcmp(argv[0], "check") == 0) + exits(check(argc-2, argv+2)); + else + usage(); +} -- cgit v1.2.3