aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/upas/filterkit/list.c
diff options
context:
space:
mode:
authorrsc <devnull@localhost>2005-10-29 16:26:44 +0000
committerrsc <devnull@localhost>2005-10-29 16:26:44 +0000
commit5cdb17983ae6e6367ad7a940cb219eab247a9304 (patch)
tree8ca1ef49af2a96e7daebe624d91fdf679814a057 /src/cmd/upas/filterkit/list.c
parentcd3745196389579fb78b9b01ef1daefb5a57aa71 (diff)
downloadplan9port-5cdb17983ae6e6367ad7a940cb219eab247a9304.tar.gz
plan9port-5cdb17983ae6e6367ad7a940cb219eab247a9304.tar.bz2
plan9port-5cdb17983ae6e6367ad7a940cb219eab247a9304.zip
Thanks to John Cummings.
Diffstat (limited to 'src/cmd/upas/filterkit/list.c')
-rw-r--r--src/cmd/upas/filterkit/list.c315
1 files changed, 315 insertions, 0 deletions
diff --git a/src/cmd/upas/filterkit/list.c b/src/cmd/upas/filterkit/list.c
new file mode 100644
index 00000000..6c23dcc1
--- /dev/null
+++ b/src/cmd/upas/filterkit/list.c
@@ -0,0 +1,315 @@
+#include <u.h>
+#include <libc.h>
+#include <regexp.h>
+#include <libsec.h>
+#include <String.h>
+#include <bio.h>
+#include "dat.h"
+
+int debug;
+
+enum
+{
+ Tregexp= (1<<0), /* ~ */
+ Texact= (1<<1), /* = */
+};
+
+typedef struct Pattern Pattern;
+struct Pattern
+{
+ Pattern *next;
+ int type;
+ char *arg;
+ int bang;
+};
+
+String *patternpath;
+Pattern *patterns;
+String *mbox;
+
+static void
+usage(void)
+{
+ fprint(2, "usage: %s 'check|add' patternfile addr [addr*]\n", argv0);
+ exits("usage");
+}
+
+/*
+ * convert string to lower case
+ */
+static void
+mklower(char *p)
+{
+ int c;
+
+ for(; *p; p++){
+ c = *p;
+ if(c <= 'Z' && c >= 'A')
+ *p = c - 'A' + 'a';
+ }
+}
+
+/*
+ * simplify an address, reduce to a domain
+ */
+static String*
+simplify(char *addr)
+{
+ int dots;
+ char *p, *at;
+ String *s;
+
+ mklower(addr);
+ at = strchr(addr, '@');
+ if(at == nil){
+ /* local address, make it an exact match */
+ s = s_copy("=");
+ s_append(s, addr);
+ return s;
+ }
+
+ /* copy up to the '@' sign */
+ at++;
+ s = s_copy("~");
+ for(p = addr; p < at; p++){
+ if(strchr(".*+?(|)\\[]^$", *p))
+ s_putc(s, '\\');
+ s_putc(s, *p);
+ }
+
+ /* just any address matching the two most significant domain elements */
+ s_append(s, "(.*\\.)?");
+ p = addr+strlen(addr);
+ dots = 0;
+ for(; p > at; p--){
+ if(*p != '.')
+ continue;
+ if(dots++ > 0){
+ p++;
+ break;
+ }
+ }
+ for(; *p; p++){
+ if(strchr(".*+?(|)\\[]^$", *p) != 0)
+ s_putc(s, '\\');
+ s_putc(s, *p);
+ }
+ s_terminate(s);
+
+ return s;
+}
+
+/*
+ * link patterns in order
+ */
+static int
+newpattern(int type, char *arg, int bang)
+{
+ Pattern *p;
+ static Pattern *last;
+
+ mklower(arg);
+
+ p = mallocz(sizeof *p, 1);
+ if(p == nil)
+ return -1;
+ if(type == Tregexp){
+ p->arg = malloc(strlen(arg)+3);
+ if(p->arg == nil){
+ free(p);
+ return -1;
+ }
+ p->arg[0] = 0;
+ strcat(p->arg, "^");
+ strcat(p->arg, arg);
+ strcat(p->arg, "$");
+ } else {
+ p->arg = strdup(arg);
+ if(p->arg == nil){
+ free(p);
+ return -1;
+ }
+ }
+ p->type = type;
+ p->bang = bang;
+ if(last == nil)
+ patterns = p;
+ else
+ last->next = p;
+ last = p;
+
+ return 0;
+}
+
+/*
+ * patterns are either
+ * ~ regular expression
+ * = exact match string
+ *
+ * all comparisons are case insensitive
+ */
+static int
+readpatterns(char *path)
+{
+ Biobuf *b;
+ char *p;
+ char *token[2];
+ int n;
+ int bang;
+
+ b = Bopen(path, OREAD);
+ if(b == nil)
+ return -1;
+ while((p = Brdline(b, '\n')) != nil){
+ p[Blinelen(b)-1] = 0;
+ n = tokenize(p, token, 2);
+ if(n == 0)
+ continue;
+
+ mklower(token[0]);
+ p = token[0];
+ if(*p == '!'){
+ p++;
+ bang = 1;
+ } else
+ bang = 0;
+
+ if(*p == '='){
+ if(newpattern(Texact, p+1, bang) < 0)
+ return -1;
+ } else if(*p == '~'){
+ if(newpattern(Tregexp, p+1, bang) < 0)
+ return -1;
+ } else if(strcmp(token[0], "#include") == 0 && n == 2)
+ readpatterns(token[1]);
+ }
+ Bterm(b);
+ return 0;
+}
+
+/* fuck, shit, bugger, damn */
+void regerror(char*)
+{
+}
+
+/*
+ * check lower case version of address agains patterns
+ */
+static Pattern*
+checkaddr(char *arg)
+{
+ Pattern *p;
+ Reprog *rp;
+ String *s;
+
+ s = s_copy(arg);
+ mklower(s_to_c(s));
+
+ for(p = patterns; p != nil; p = p->next)
+ switch(p->type){
+ case Texact:
+ if(strcmp(p->arg, s_to_c(s)) == 0){
+ free(s);
+ return p;
+ }
+ break;
+ case Tregexp:
+ rp = regcomp(p->arg);
+ if(rp == nil)
+ continue;
+ if(regexec(rp, s_to_c(s), nil, 0)){
+ free(rp);
+ free(s);
+ return p;
+ }
+ free(rp);
+ break;
+ }
+ s_free(s);
+ return 0;
+}
+static char*
+check(int argc, char **argv)
+{
+ int i;
+ Addr *a;
+ Pattern *p;
+ int matchedbang;
+
+ matchedbang = 0;
+ for(i = 0; i < argc; i++){
+ a = readaddrs(argv[i], nil);
+ for(; a != nil; a = a->next){
+ p = checkaddr(a->val);
+ if(p == nil)
+ continue;
+ if(p->bang)
+ matchedbang = 1;
+ else
+ return nil;
+ }
+ }
+ if(matchedbang)
+ return "!match";
+ else
+ return "no match";
+}
+
+/*
+ * add anything that isn't already matched, all matches are lower case
+ */
+static char*
+add(char *pp, int argc, char **argv)
+{
+ int fd, i;
+ String *s;
+ char *cp;
+ Addr *a;
+
+ a = nil;
+ for(i = 0; i < argc; i++)
+ a = readaddrs(argv[i], a);
+
+ fd = open(pp, OWRITE);
+ seek(fd, 0, 2);
+ for(; a != nil; a = a->next){
+ if(checkaddr(a->val))
+ continue;
+ s = simplify(a->val);
+ cp = s_to_c(s);
+ fprint(fd, "%q\t%q\n", cp, a->val);
+ if(*cp == '=')
+ newpattern(Texact, cp+1, 0);
+ else if(*cp == '~')
+ newpattern(Tregexp, cp+1, 0);
+ s_free(s);
+ }
+ close(fd);
+ return nil;
+}
+
+void
+main(int argc, char **argv)
+{
+ char *patternpath;
+
+ ARGBEGIN {
+ case 'd':
+ debug++;
+ break;
+ } ARGEND;
+
+ quotefmtinstall();
+
+ if(argc < 3)
+ usage();
+
+ patternpath = argv[1];
+ readpatterns(patternpath);
+ if(strcmp(argv[0], "add") == 0)
+ exits(add(patternpath, argc-2, argv+2));
+ else if(strcmp(argv[0], "check") == 0)
+ exits(check(argc-2, argv+2));
+ else
+ usage();
+}