diff options
author | rsc <devnull@localhost> | 2003-11-23 18:04:47 +0000 |
---|---|---|
committer | rsc <devnull@localhost> | 2003-11-23 18:04:47 +0000 |
commit | bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d (patch) | |
tree | 8ca0fe4e2418e6aa18dc74a236c577a719f6c6ed /src/cmd/split.c | |
parent | f08fdedcee12c06e3ce9ac9bec363915978e8289 (diff) | |
download | plan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.tar.gz plan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.tar.bz2 plan9port-bc7cb1a15a67c859c8c71c4b52bb35fe9425a63d.zip |
new utilities.
the .C files compile but are renamed to avoid building automatically.
Diffstat (limited to 'src/cmd/split.c')
-rw-r--r-- | src/cmd/split.c | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/src/cmd/split.c b/src/cmd/split.c new file mode 100644 index 00000000..c4025e20 --- /dev/null +++ b/src/cmd/split.c @@ -0,0 +1,189 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ctype.h> +#include <regexp.h> + +char digit[] = "0123456789"; +char *suffix = ""; +char *stem = "x"; +char suff[] = "aa"; +char name[200]; +Biobuf bout; +Biobuf *output = &bout; + +extern int nextfile(void); +extern int matchfile(Resub*); +extern void openf(void); +extern char *fold(char*,int); +extern void usage(void); +extern void badexp(void); + +void +main(int argc, char *argv[]) +{ + Reprog *exp; + char *pattern = 0; + int n = 1000; + char *line; + int xflag = 0; + int iflag = 0; + Biobuf bin; + Biobuf *b = &bin; + char buf[256]; + + ARGBEGIN { + case 'l': + case 'n': + n=atoi(EARGF(usage())); + break; + case 'e': + pattern = strdup(EARGF(usage())); + break; + case 'f': + stem = strdup(EARGF(usage())); + break; + case 's': + suffix = strdup(EARGF(usage())); + break; + case 'x': + xflag++; + break; + case 'i': + iflag++; + break; + default: + usage(); + break; + + } ARGEND; + + if(argc < 0 || argc > 1) + usage(); + + if(argc != 0) { + b = Bopen(argv[0], OREAD); + if(b == nil) { + fprint(2, "split: can't open %s: %r\n", argv[0]); + exits("open"); + } + } else + Binit(b, 0, OREAD); + + if(pattern) { + if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern))) + badexp(); + while((line=Brdline(b,'\n')) != 0) { + Resub match[2]; + memset(match, 0, sizeof match); + line[Blinelen(b)-1] = 0; + if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) { + if(matchfile(match) && xflag) + continue; + } else if(output == 0) + nextfile(); /* at most once */ + Bwrite(output, line, Blinelen(b)-1); + Bputc(output, '\n'); + } + } else { + int linecnt = n; + + while((line=Brdline(b,'\n')) != 0) { + if(++linecnt > n) { + nextfile(); + linecnt = 1; + } + Bwrite(output, line, Blinelen(b)); + } + + /* + * in case we didn't end with a newline, tack whatever's + * left onto the last file + */ + while((n = Bread(b, buf, sizeof(buf))) > 0) + Bwrite(output, buf, n); + } + if(b != nil) + Bterm(b); + exits(0); +} + +int +nextfile(void) +{ + static int canopen = 1; + if(suff[0] > 'z') { + if(canopen) + fprint(2, "split: file %szz not split\n",stem); + canopen = 0; + } else { + strcpy(name, stem); + strcat(name, suff); + if(++suff[1] > 'z') + suff[1] = 'a', ++suff[0]; + openf(); + } + return canopen; +} + +int +matchfile(Resub *match) +{ + if(match[1].s.sp) { + int len = match[1].e.ep - match[1].s.sp; + strncpy(name, match[1].s.sp, len); + strcpy(name+len, suffix); + openf(); + return 1; + } + return nextfile(); +} + +void +openf(void) +{ + static int fd = 0; + Bflush(output); + Bterm(output); + if(fd > 0) + close(fd); + fd = create(name,OWRITE,0666); + if(fd < 0) { + fprint(2, "grep: can't create %s: %r\n", name); + exits("create"); + } + Binit(output, fd, OWRITE); +} + +char * +fold(char *s, int n) +{ + static char *fline; + static int linesize = 0; + char *t; + + if(linesize < n+1){ + fline = realloc(fline,n+1); + linesize = n+1; + } + for(t=fline; *t++ = tolower(*s++); ) + continue; + /* we assume the 'A'-'Z' only appear as themselves + * in a utf encoding. + */ + return fline; +} + +void +usage(void) +{ + fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n"); + exits("usage"); +} + +void +badexp(void) +{ + fprint(2, "split: bad regular expression\n"); + exits("bad regular expression"); +} |