diff --git a/src/lib9/LICENSE b/src/lib9/LICENSE new file mode 100644 index 00000000..a5d7d87d --- /dev/null +++ b/src/lib9/LICENSE @@ -0,0 +1,258 @@ +The Plan 9 software is provided under the terms of the +Lucent Public License, Version 1.02, reproduced below, +with the following exceptions: + +1. No right is granted to create derivative works of or + to redistribute (other than with the Plan 9 Operating System) + the screen imprinter fonts identified in subdirectory + /lib/font/bit/lucida and printer fonts (Lucida Sans Unicode, Lucida + Sans Italic, Lucida Sans Demibold, Lucida Typewriter, Lucida Sans + Typewriter83), identified in subdirectory /sys/lib/postscript/font. + These directories contain material copyrights by B&H Inc. and Y&Y Inc. + +2. The printer fonts identified in subdirectory /sys/lib/ghostscript/font + are subject to the GNU GPL, reproduced in the file /LICENSE.gpl. + +3. s;/.*;;; s; ;;g'} + +# this works in bsd make +SYSNAME!=uname +OBJTYPE!=uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g' + +# the gnu rules will mess up bsd but not vice versa, +# hence the gnu rules come first. + +include Make.$(SYSNAME)-$(OBJTYPE) + +PREFIX=/usr/local + +NUKEFILES= + +TGZFILES= + +LIB=lib9.a +VERSION=2.0 +PORTPLACE=devel/lib9 +NAME=lib9 + +OFILES=\ + _exits.$O\ + argv0.$O\ + await.$O\ + encodefmt.$O\ + errstr.$O\ + exits.$O\ + ffork-$(SYSNAME).$O\ + getcallerpc-$(OBJTYPE).$O\ + getfields.$O\ + lock.$O\ + malloctag.$O\ + mallocz.$O\ + nrand.$O\ + qlock.$O\ + readn.$O\ + rendez.$O\ + strecpy.$O\ + sysfatal.$O\ + tas-$(OBJTYPE).$O\ + tokenize.$O\ + u16.$O\ + u32.$O\ + u64.$O\ + wait.$O\ + werrstr.$O\ + +HFILES=\ + lib9.h\ + +all: $(LIB) + +install: $(LIB) + test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3 + # install -m 0644 lib9.3 $(PREFIX)/man/man3/lib9.3 + install -m 0644 lib9.h $(PREFIX)/include/lib9.h + install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + +test: $(LIB) test.$O + $(CC) -o test test.$O $(LIB) -L$(PREFIX)/lib -lfmt -lutf + +testfork: $(LIB) testfork.$O + $(CC) -o testfork testfork.$O $(LIB) -L$(PREFIX)/lib -lfmt -lutf + +$(LIB): $(OFILES) + $(AR) $(ARFLAGS) $(LIB) $(OFILES) + +NUKEFILES+=$(LIB) +.c.$O: + $(CC) $(CFLAGS) -I$(PREFIX)/include $*.c + +%.$O: %.c + $(CC) $(CFLAGS) -I$(PREFIX)/include $*.c + + +$(OFILES): $(HFILES) + +tgz: + rm -rf $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION) + cp Makefile Make.* README LICENSE NOTICE *.[ch137] rpm.spec bundle.ports $(TGZFILES) $(NAME)-$(VERSION) + tar cf - $(NAME)-$(VERSION) | gzip >$(NAME)-$(VERSION).tgz + rm -rf $(NAME)-$(VERSION) + +clean: + rm -f $(OFILES) $(LIB) + +nuke: + rm -f $(OFILES) *.tgz *.rpm $(NUKEFILES) + +rpm: + make tgz + cp $(NAME)-$(VERSION).tgz /usr/src/RPM/SOURCES + rpm -ba rpm.spec + cp /usr/src/RPM/SRPMS/$(NAME)-$(VERSION)-1.src.rpm . + cp /usr/src/RPM/RPMS/i586/$(NAME)-$(VERSION)-1.i586.rpm . + scp *.rpm rsc@amsterdam.lcs.mit.edu:public_html/software + +PORTDIR=/usr/ports/$(PORTPLACE) + +ports: + make tgz + rm -rf $(PORTDIR) + mkdir $(PORTDIR) + cp $(NAME)-$(VERSION).tgz /usr/ports/distfiles + cat bundle.ports | (cd $(PORTDIR) && awk '$$1=="---" && $$3=="---" { ofile=$$2; next} {if(ofile) print >ofile}') + (cd $(PORTDIR); make makesum) + (cd $(PORTDIR); make) + (cd $(PORTDIR); /usr/local/bin/portlint) + rm -rf $(PORTDIR)/work + shar `find $(PORTDIR)` > ports.shar + (cd $(PORTDIR); tar cf - *) | gzip >$(NAME)-$(VERSION)-ports.tgz + scp *.tgz rsc@amsterdam.lcs.mit.edu:public_html/software + +.phony: all clean nuke install tgz rpm ports diff --git a/src/lib9/Makefile.MID b/src/lib9/Makefile.MID new file mode 100644 index 00000000..8b3584cb --- /dev/null +++ b/src/lib9/Makefile.MID @@ -0,0 +1,49 @@ +LIB=lib9.a +VERSION=2.0 +PORTPLACE=devel/lib9 +NAME=lib9 + +OFILES=\ + _exits.$O\ + argv0.$O\ + await.$O\ + encodefmt.$O\ + errstr.$O\ + exits.$O\ + ffork-$(SYSNAME).$O\ + getcallerpc-$(OBJTYPE).$O\ + getfields.$O\ + lock.$O\ + malloctag.$O\ + mallocz.$O\ + nrand.$O\ + qlock.$O\ + readn.$O\ + rendez.$O\ + strecpy.$O\ + sysfatal.$O\ + tas-$(OBJTYPE).$O\ + tokenize.$O\ + u16.$O\ + u32.$O\ + u64.$O\ + wait.$O\ + werrstr.$O\ + +HFILES=\ + lib9.h\ + +all: $(LIB) + +install: $(LIB) + test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3 + # install -m 0644 lib9.3 $(PREFIX)/man/man3/lib9.3 + install -m 0644 lib9.h $(PREFIX)/include/lib9.h + install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + +test: $(LIB) test.$O + $(CC) -o test test.$O $(LIB) -L$(PREFIX)/lib -lfmt -lutf + +testfork: $(LIB) testfork.$O + $(CC) -o testfork testfork.$O $(LIB) -L$(PREFIX)/lib -lfmt -lutf + diff --git a/src/lib9/_exits.c b/src/lib9/_exits.c new file mode 100644 index 00000000..35ff4e67 --- /dev/null +++ b/src/lib9/_exits.c @@ -0,0 +1,9 @@ +#include <lib9.h> + +void +_exits(char *s) +{ + if(s && *s) + _exit(1); + _exit(0); +} diff --git a/src/lib9/argv0.c b/src/lib9/argv0.c new file mode 100644 index 00000000..2c846f4d --- /dev/null +++ b/src/lib9/argv0.c @@ -0,0 +1,4 @@ +#include <lib9.h> + +char *argv0; + diff --git a/src/lib9/await.c b/src/lib9/await.c new file mode 100644 index 00000000..9df7faa5 --- /dev/null +++ b/src/lib9/await.c @@ -0,0 +1,105 @@ +#include <signal.h> +#include <sys/types.h> +#include <sys/resource.h> +#include <sys/wait.h> +#include <sys/time.h> +#include <string.h> +#include <errno.h> +#include <lib9.h> + +static struct { + int sig; + char *str; +} tab[] = { + SIGHUP, "hangup", + SIGINT, "interrupt", + SIGQUIT, "quit", + SIGILL, "sys: trap: illegal instruction", + SIGTRAP, "sys: trace trap", + SIGABRT, "sys: abort", +#ifdef SIGEMT + SIGEMT, "sys: emulate instruction executed", +#endif + SIGFPE, "sys: fp: trap", + SIGKILL, "sys: kill", + SIGBUS, "sys: bus error", + SIGSEGV, "sys: segmentation violation", + SIGALRM, "alarm", + SIGTERM, "kill", + SIGURG, "sys: urgent condition on socket", + SIGSTOP, "sys: stop", + SIGTSTP, "sys: tstp", + SIGCONT, "sys: cont", + SIGCHLD, "sys: child", + SIGTTIN, "sys: ttin", + SIGTTOU, "sys: ttou", + SIGIO, "sys: i/o possible on fd", + SIGXCPU, "sys: cpu time limit exceeded", + SIGXFSZ, "sys: file size limit exceeded", + SIGVTALRM, "sys: virtual time alarm", + SIGPROF, "sys: profiling timer alarm", + SIGWINCH, "sys: window size change", +#ifdef SIGINFO + SIGINFO, "sys: status request", +#endif + SIGUSR1, "sys: usr1", + SIGUSR2, "sys: usr2", +}; + +static char* +_p9sigstr(int sig, char *tmp) +{ + int i; + + for(i=0; i<nelem(tab); i++) + if(tab[i].sig == sig) + return tab[i].str; + sprint(tmp, "sys: signal %d", sig); + return tmp; +} + +/* +static int +_p9strsig(char *s) +{ + int i; + + for(i=0; i<nelem(tab); i++) + if(strcmp(s, tab[i].str) == 0) + return tab[i].sig; + return 0; +} +*/ + +int +await(char *str, int n) +{ + int pid, status, cd; + struct rusage ru; + char buf[128], tmp[64]; + ulong u, s; + + for(;;){ + pid = wait3(&status, 0, &ru); + if(pid < 0) + return -1; + u = ru.ru_utime.tv_sec*1000+((ru.ru_utime.tv_usec+500)/1000); + s = ru.ru_stime.tv_sec*1000+((ru.ru_stime.tv_usec+500)/1000); + if(WIFEXITED(status)){ + status = WEXITSTATUS(status); + if(status) + snprint(buf, sizeof buf, "%d %lu %lu %lu %d", pid, u, s, u+s, status); + else + snprint(buf, sizeof buf, "%d %lu %lu %lu ''", pid, u, s, u+s); + strecpy(str, str+n, buf); + return strlen(str); + } + if(WIFSIGNALED(status)){ + cd = WCOREDUMP(status); + USED(cd); + snprint(buf, sizeof buf, "%d %lu %lu %lu '%s'", pid, u, s, u+s, _p9sigstr(WTERMSIG(status), tmp)); + strecpy(str, str+n, buf); + return strlen(str); + } + } +} diff --git a/src/lib9/encodefmt.c b/src/lib9/encodefmt.c new file mode 100644 index 00000000..9a5cbfc4 --- /dev/null +++ b/src/lib9/encodefmt.c @@ -0,0 +1,69 @@ +#include <lib9.h> + +int +encodefmt(Fmt *f) +{ + char *out; + char *buf; + int len; + int ilen; + int rv; + uchar *b; + char obuf[64]; // rsc optimization + + if(!(f->flags&FmtPrec) || f->prec < 1) + goto error; + + b = va_arg(f->args, uchar*); + + ilen = f->prec; + f->prec = 0; + f->flags &= ~FmtPrec; + switch(f->r){ + case '<': + len = (8*ilen+4)/5 + 3; + break; + case '[': + len = (8*ilen+5)/6 + 4; + break; + case 'H': + len = 2*ilen + 1; + break; + default: + goto error; + } + + if(len > sizeof(obuf)){ + buf = malloc(len); + if(buf == nil) + goto error; + } else + buf = obuf; + + // convert + out = buf; + switch(f->r){ + case '<': + rv = enc32(out, len, b, ilen); + break; + case '[': + rv = enc64(out, len, b, ilen); + break; + case 'H': + rv = enc16(out, len, b, ilen); + break; + default: + rv = -1; + break; + } + if(rv < 0) + goto error; + + fmtstrcpy(f, buf); + if(buf != obuf) + free(buf); + return 0; + +error: + return fmtstrcpy(f, "<encodefmt>"); +} diff --git a/src/lib9/errstr.c b/src/lib9/errstr.c new file mode 100644 index 00000000..e576b12b --- /dev/null +++ b/src/lib9/errstr.c @@ -0,0 +1,68 @@ +/* + * We assume there's only one error buffer for the whole system. + * If you use ffork, you need to provide a _syserrstr. Since most + * people will use libthread (which provides a _syserrstr), this is + * okay. + */ + +#include <errno.h> +#include <string.h> +#include <lib9.h> + +enum +{ + EPLAN9 = 0x19283745, +}; + +char *(*_syserrstr)(void); +static char xsyserr[ERRMAX]; +static char* +getsyserr(void) +{ + char *s; + + s = nil; + if(_syserrstr) + s = (*_syserrstr)(); + if(s == nil) + s = xsyserr; + return s; +} + +int +errstr(char *err, uint n) +{ + char tmp[ERRMAX]; + char *syserr; + + syserr = getsyserr(); + if(errno != EPLAN9) + strcpy(syserr, strerror(errno)); + + strecpy(tmp, tmp+ERRMAX, syserr); + strecpy(syserr, syserr+ERRMAX, err); + strecpy(err, err+n, tmp); + errno = EPLAN9; + return 0; +} + +void +rerrstr(char *err, uint n) +{ + char *syserr; + + syserr = getsyserr(); + if(errno != EPLAN9) + strcpy(syserr, strerror(errno)); + strecpy(err, err+n, syserr); +} + +/* replaces __errfmt in libfmt */ + +int +__errfmt(Fmt *f) +{ + if(errno == EPLAN9) + return fmtstrcpy(f, getsyserr()); + return fmtstrcpy(f, strerror(errno)); +} diff --git a/src/lib9/exits.c b/src/lib9/exits.c new file mode 100644 index 00000000..a449f68e --- /dev/null +++ b/src/lib9/exits.c @@ -0,0 +1,10 @@ +#include <lib9.h> + +void +exits(char *s) +{ + if(s && *s) + exit(1); + exit(0); +} + diff --git a/src/lib9/ffork-FreeBSD.c b/src/lib9/ffork-FreeBSD.c new file mode 100644 index 00000000..a7c82e64 --- /dev/null +++ b/src/lib9/ffork-FreeBSD.c @@ -0,0 +1,33 @@ +#include <lib9.h> + +extern int __isthreaded; +int +ffork(int flags, void(*fn)(void*), void *arg) +{ + void *p; + + __isthreaded = 1; + p = malloc(16384); + if(p == nil) + return -1; + memset(p, 0xFE, 16384); + return rfork_thread(RFPROC|flags, (char*)p+16000, (int(*)(void*))fn, arg); +} + +/* + * For FreeBSD libc. + */ + +typedef struct { + volatile long access_lock; + volatile long lock_owner; + volatile char *fname; + volatile int lineno; +} spinlock_t; + +void +_spinlock(spinlock_t *lk) +{ + lock((Lock*)&lk->access_lock); +} + diff --git a/src/lib9/ffork-Linux.c b/src/lib9/ffork-Linux.c new file mode 100644 index 00000000..aad80041 --- /dev/null +++ b/src/lib9/ffork-Linux.c @@ -0,0 +1,39 @@ +#include <sched.h> +#include <signal.h> +#include <lib9.h> + +int fforkstacksize = 16384; + +int +ffork(int flags, void (*fn)(void*), void *arg) +{ + char *p; + int cloneflag, pid; + + p = malloc(fforkstacksize); + if(p == nil) + return -1; + cloneflag = 0; + flags &= ~RFPROC; + if(flags&RFMEM){ + cloneflag |= CLONE_VM; + flags &= ~RFMEM; + } + if(!(flags&RFFDG)) + cloneflag |= CLONE_FILES; + else + flags &= ~RFFDG; + if(!(flags&RFNOWAIT)) + cloneflag |= SIGCHLD; + else + flags &= ~RFNOWAIT; + if(flags){ + fprint(2, "unknown rfork flags %x\n", flags); + return -1; + } + pid = clone((int(*)(void*))fn, p+fforkstacksize-16, cloneflag, arg); + if(pid < 0) + free(p); + return pid; +} + diff --git a/src/lib9/getcallerpc-386.c b/src/lib9/getcallerpc-386.c new file mode 100644 index 00000000..1367370e --- /dev/null +++ b/src/lib9/getcallerpc-386.c @@ -0,0 +1,7 @@ +#include <lib9.h> + +ulong +getcallerpc(void *x) +{ + return (((ulong*)(x))[-1]); +} diff --git a/src/lib9/getfields.c b/src/lib9/getfields.c new file mode 100644 index 00000000..79f7abad --- /dev/null +++ b/src/lib9/getfields.c @@ -0,0 +1,36 @@ +#include <lib9.h> + +int +getfields(char *str, char **args, int max, int mflag, char *set) +{ + Rune r; + int nr, intok, narg; + + if(max <= 0) + return 0; + + narg = 0; + args[narg] = str; + if(!mflag) + narg++; + intok = 0; + for(;; str += nr) { + nr = chartorune(&r, str); + if(r == 0) + break; + if(utfrune(set, r)) { + if(narg >= max) + break; + *str = 0; + intok = 0; + args[narg] = str + nr; + if(!mflag) + narg++; + } else { + if(!intok && mflag) + narg++; + intok = 1; + } + } + return narg; +} diff --git a/src/lib9/lib9.h b/src/lib9/lib9.h new file mode 100644 index 00000000..bb7e5404 --- /dev/null +++ b/src/lib9/lib9.h @@ -0,0 +1,246 @@ +/* + * Lib9 is miscellany from the Plan 9 C library that doesn't + * fit into libutf or into libfmt, but is still missing from traditional + * Unix C libraries. + */ +#ifndef _LIB9H_ +#define _LIB9H_ 1 + +#if defined(__cplusplus) +extern "C" { +#endif + + +#include <unistd.h> +#include <string.h> +#include <stdlib.h> +#include <stdarg.h> +#include <fcntl.h> +#include <assert.h> + +#ifndef _FMTH_ +# include <fmt.h> +#endif + +#define nil ((void*)0) +#define nelem(x) (sizeof(x)/sizeof((x)[0])) + +#define _NEEDUCHAR 1 +#define _NEEDUSHORT 1 +#define _NEEDUINT 1 +#define _NEEDULONG 1 + +#if defined(__linux__) +# include <sys/types.h> +# if defined(__USE_MISC) +# undef _NEEDUSHORT +# undef _NEEDUINT +# undef _NEEDULONG +# endif +#endif +#if defined(__FreeBSD__) +# include <sys/types.h> +# if !defined(_POSIX_SOURCE) +# undef _NEEDUSHORT +# undef _NEEDUINT +# endif +#endif + +typedef signed char schar; +typedef unsigned int u32int; +#ifdef _NEEDUCHAR + typedef unsigned char uchar; +#endif +#ifdef _NEEDUSHORT + typedef unsigned short ushort; +#endif +#ifdef _NEEDUINT + typedef unsigned int uint; +#endif +#ifdef _NEEDULONG + typedef unsigned long ulong; +#endif +typedef unsigned long long uvlong; +typedef long long vlong; + +/* rfork to create new process running fn(arg) */ + +#if defined(__FreeBSD__) +#undef RFFDG +#undef RFNOTEG +#undef RFPROC +#undef RFMEM +#undef RFNOWAIT +#undef RFCFDG +#endif + +enum +{ +/* RFNAMEG = (1<<0), */ +/* RFENVG = (1<<1), */ + RFFDG = (1<<2), + RFNOTEG = (1<<3), + RFPROC = (1<<4), + RFMEM = (1<<5), + RFNOWAIT = (1<<6), +/* RFCNAMEG = (1<<10), */ +/* RFCENVG = (1<<11), */ + RFCFDG = (1<<12), +/* RFREND = (1<<13), */ +/* RFNOMNT = (1<<14) */ +}; +extern int ffork(int, void(*)(void*), void*); + +/* wait for processes */ +#define wait _p9wait +typedef struct Waitmsg Waitmsg; +struct Waitmsg +{ + int pid; /* of loved one */ + ulong time[3]; /* of loved one & descendants */ + char *msg; +}; +extern int await(char*, int); +extern Waitmsg* wait(void); + +/* synchronization */ +typedef struct Lock Lock; +struct Lock +{ + int val; +}; + +extern int _tas(void*); +extern void lock(Lock*); +extern void unlock(Lock*); +extern int canlock(Lock*); + +typedef struct QLp QLp; +struct QLp +{ + int inuse; + QLp *next; + int state; +}; + +typedef struct QLock QLock; +struct QLock +{ + Lock lock; + int locked; + QLp *head; + QLp *tail; +}; + +extern void qlock(QLock*); +extern void qunlock(QLock*); +extern int canqlock(QLock*); +extern void _qlockinit(ulong (*)(ulong, ulong)); + +typedef struct RWLock RWLock; +struct RWLock +{ + Lock lock; + int readers; + int writer; + QLp *head; + QLp *tail; +}; + +extern void rlock(RWLock*); +extern void runlock(RWLock*); +extern int canrlock(RWLock*); +extern void wlock(RWLock*); +extern void wunlock(RWLock*); +extern int canwlock(RWLock*); + +typedef struct Rendez Rendez; +struct Rendez +{ + QLock *l; + QLp *head; + QLp *tail; +}; + +extern void rsleep(Rendez*); +extern int rwakeup(Rendez*); +extern int rwakeupall(Rendez*); + +extern ulong rendezvous(ulong, ulong); + +/* one of a kind */ +extern void sysfatal(char*, ...); +extern int nrand(int); +extern void setmalloctag(void*, ulong); +extern void setrealloctag(void*, ulong); +extern void *mallocz(ulong, int); +extern long readn(int, void*, long); +extern void exits(char*); +extern void _exits(char*); +extern ulong getcallerpc(void*); + +/* string routines */ +extern char* strecpy(char*, char*, char*); +extern int tokenize(char*, char**, int); +extern int cistrncmp(char*, char*, int); +extern int cistrcmp(char*, char*); +extern char* cistrstr(char*, char*); +extern int getfields(char*, char**, int, int, char*); +extern int gettokens(char *, char **, int, char *); + +/* formatting helpers */ +extern int dec64(uchar*, int, char*, int); +extern int enc64(char*, int, uchar*, int); +extern int dec32(uchar*, int, char*, int); +extern int enc32(char*, int, uchar*, int); +extern int dec16(uchar*, int, char*, int); +extern int enc16(char*, int, uchar*, int); +extern int encodefmt(Fmt*); + +/* error string */ +enum +{ + ERRMAX = 128 +}; +extern void rerrstr(char*, uint); +extern void werrstr(char*, ...); +extern int errstr(char*, uint); + +/* compiler directives on plan 9 */ +#define USED(x) if(x){}else{} +#define SET(x) ((x)=0) + +/* command line */ +extern char *argv0; +#define ARGBEGIN for((argv0||(argv0=*argv)),argv++,argc--;\ + argv[0] && argv[0][0]=='-' && argv[0][1];\ + argc--, argv++) {\ + char *_args, *_argt;\ + Rune _argc;\ + _args = &argv[0][1];\ + if(_args[0]=='-' && _args[1]==0){\ + argc--; argv++; break;\ + }\ + _argc = 0;\ + while(*_args && (_args += chartorune(&_argc, _args)))\ + switch(_argc) +#define ARGEND SET(_argt);USED(_argt);USED(_argc);USED(_args);}USED(argv);USED(argc); +#define ARGF() (_argt=_args, _args="",\ + (*_argt? _argt: argv[1]? (argc--, *++argv): 0)) +#define EARGF(x) (_argt=_args, _args="",\ + (*_argt? _argt: argv[1]? (argc--, *++argv): ((x), abort(), (char*)0))) + +#define ARGC() _argc + +#define OREAD O_RDONLY +#define OWRITE O_WRONLY +#define AEXIST 0 +#define AREAD 4 +#define AWRITE 2 +#define AEXEC 1 + +#if defined(__cplusplus) +} +#endif + +#endif /* _LIB9H_ */ diff --git a/src/lib9/lock.c b/src/lib9/lock.c new file mode 100644 index 00000000..2da73626 --- /dev/null +++ b/src/lib9/lock.c @@ -0,0 +1,54 @@ +#include <unistd.h> +#include <sched.h> +#include <lib9.h> + +int _ntas; +static int +_xtas(void *v) +{ + int x; + +_ntas++; + x = _tas(v); + if(x == 0 || x == 0xCAFEBABE) + return x; + fprint(2, "%d: tas %p got %ux\n", getpid(), v, x); + abort(); +} + +int +canlock(Lock *l) +{ + return !_xtas(&l->val); +} + +void +unlock(Lock *l) +{ + l->val = 0; +} + +void +lock(Lock *lk) +{ + int i; + + /* once fast */ + if(!_xtas(&lk->val)) + return; + /* a thousand times pretty fast */ + for(i=0; i<1000; i++){ + if(!_xtas(&lk->val)) + return; + sched_yield(); + } + /* now nice and slow */ + for(i=0; i<1000; i++){ + if(!_xtas(&lk->val)) + return; + usleep(100*1000); + } + /* take your time */ + while(_xtas(&lk->val)) + usleep(1000*1000); +} diff --git a/src/lib9/malloctag.c b/src/lib9/malloctag.c new file mode 100644 index 00000000..e5682bc7 --- /dev/null +++ b/src/lib9/malloctag.c @@ -0,0 +1,15 @@ +#include <lib9.h> + +void +setmalloctag(void *v, ulong t) +{ + USED(v); + USED(t); +} + +void +setrealloctag(void *v, ulong t) +{ + USED(v); + USED(t); +} diff --git a/src/lib9/mallocz.c b/src/lib9/mallocz.c new file mode 100644 index 00000000..c6313008 --- /dev/null +++ b/src/lib9/mallocz.c @@ -0,0 +1,14 @@ +#include <unistd.h> +#include <string.h> +#include <lib9.h> + +void* +mallocz(unsigned long n, int clr) +{ + void *v; + + v = malloc(n); + if(clr && v) + memset(v, 0, n); + return v; +} diff --git a/src/lib9/mkfile b/src/lib9/mkfile new file mode 100644 index 00000000..703f6b06 --- /dev/null +++ b/src/lib9/mkfile @@ -0,0 +1,2 @@ +<../libutf/mkfile + diff --git a/src/lib9/nrand.c b/src/lib9/nrand.c new file mode 100644 index 00000000..cf9c17c3 --- /dev/null +++ b/src/lib9/nrand.c @@ -0,0 +1,17 @@ +#include <lib9.h> + +#define MASK 0x7fffffffL + +int +nrand(int n) +{ + long slop, v; + + if(n < 0) + return n; + slop = MASK % n; + do + v = lrand(); + while(v <= slop); + return v % n; +} diff --git a/src/lib9/qlock.c b/src/lib9/qlock.c new file mode 100644 index 00000000..55a18466 --- /dev/null +++ b/src/lib9/qlock.c @@ -0,0 +1,360 @@ +#include <lib9.h> + +static struct { + QLp *p; + QLp x[1024]; +} ql = { + ql.x +}; + +enum +{ + Queuing, + QueuingR, + QueuingW, + Sleeping, +}; + +static ulong (*_rendezvousp)(ulong, ulong) = rendezvous; + +/* this gets called by the thread library ONLY to get us to use its rendezvous */ +void +_qlockinit(ulong (*r)(ulong, ulong)) +{ + _rendezvousp = r; +} + +/* find a free shared memory location to queue ourselves in */ +static QLp* +getqlp(void) +{ + QLp *p, *op; + + op = ql.p; + for(p = op+1; ; p++){ + if(p == &ql.x[nelem(ql.x)]) + p = ql.x; + if(p == op) + abort(); + if(_tas(&(p->inuse)) == 0){ + ql.p = p; + p->next = nil; + break; + } + } + return p; +} + +void +qlock(QLock *q) +{ + QLp *p, *mp; + + lock(&q->lock); + if(!q->locked){ + q->locked = 1; + unlock(&q->lock); + return; + } + + + /* chain into waiting list */ + mp = getqlp(); + p = q->tail; + if(p == nil) + q->head = mp; + else + p->next = mp; + q->tail = mp; + mp->state = Queuing; + unlock(&q->lock); + + /* wait */ + while((*_rendezvousp)((ulong)mp, 1) == ~0) + ; + mp->inuse = 0; +} + +void +qunlock(QLock *q) +{ + QLp *p; + + lock(&q->lock); + p = q->head; + if(p != nil){ + /* wakeup head waiting process */ + q->head = p->next; + if(q->head == nil) + q->tail = nil; + unlock(&q->lock); + while((*_rendezvousp)((ulong)p, 0x12345) == ~0) + ; + return; + } + q->locked = 0; + unlock(&q->lock); +} + +int +canqlock(QLock *q) +{ + if(!canlock(&q->lock)) + return 0; + if(!q->locked){ + q->locked = 1; + unlock(&q->lock); + return 1; + } + unlock(&q->lock); + return 0; +} + +void +rlock(RWLock *q) +{ + QLp *p, *mp; + + lock(&q->lock); + if(q->writer == 0 && q->head == nil){ + /* no writer, go for it */ + q->readers++; + unlock(&q->lock); + return; + } + + mp = getqlp(); + p = q->tail; + if(p == 0) + q->head = mp; + else + p->next = mp; + q->tail = mp; + mp->next = nil; + mp->state = QueuingR; + unlock(&q->lock); + + /* wait in kernel */ + while((*_rendezvousp)((ulong)mp, 1) == ~0) + ; + mp->inuse = 0; +} + +int +canrlock(RWLock *q) +{ + lock(&q->lock); + if (q->writer == 0 && q->head == nil) { + /* no writer; go for it */ + q->readers++; + unlock(&q->lock); + return 1; + } + unlock(&q->lock); + return 0; +} + +void +runlock(RWLock *q) +{ + QLp *p; + + lock(&q->lock); + if(q->readers <= 0) + abort(); + p = q->head; + if(--(q->readers) > 0 || p == nil){ + unlock(&q->lock); + return; + } + + /* start waiting writer */ + if(p->state != QueuingW) + abort(); + q->head = p->next; + if(q->head == 0) + q->tail = 0; + q->writer = 1; + unlock(&q->lock); + + /* wakeup waiter */ + while((*_rendezvousp)((ulong)p, 0) == ~0) + ; +} + +void +wlock(RWLock *q) +{ + QLp *p, *mp; + + lock(&q->lock); + if(q->readers == 0 && q->writer == 0){ + /* noone waiting, go for it */ + q->writer = 1; + unlock(&q->lock); + return; + } + + /* wait */ + p = q->tail; + mp = getqlp(); + if(p == nil) + q->head = mp; + else + p->next = mp; + q->tail = mp; + mp->next = nil; + mp->state = QueuingW; + unlock(&q->lock); + + /* wait in kernel */ + while((*_rendezvousp)((ulong)mp, 1) == ~0) + ; + mp->inuse = 0; +} + +int +canwlock(RWLock *q) +{ + lock(&q->lock); + if (q->readers == 0 && q->writer == 0) { + /* no one waiting; go for it */ + q->writer = 1; + unlock(&q->lock); + return 1; + } + unlock(&q->lock); + return 0; +} + +void +wunlock(RWLock *q) +{ + QLp *p; + + lock(&q->lock); + if(q->writer == 0) + abort(); + p = q->head; + if(p == nil){ + q->writer = 0; + unlock(&q->lock); + return; + } + if(p->state == QueuingW){ + /* start waiting writer */ + q->head = p->next; + if(q->head == nil) + q->tail = nil; + unlock(&q->lock); + while((*_rendezvousp)((ulong)p, 0) == ~0) + ; + return; + } + + if(p->state != QueuingR) + abort(); + + /* wake waiting readers */ + while(q->head != nil && q->head->state == QueuingR){ + p = q->head; + q->head = p->next; + q->readers++; + while((*_rendezvousp)((ulong)p, 0) == ~0) + ; + } + if(q->head == nil) + q->tail = nil; + q->writer = 0; + unlock(&q->lock); +} + +void +rsleep(Rendez *r) +{ + QLp *t, *me; + + if(!r->l) + abort(); + lock(&r->l->lock); + /* we should hold the qlock */ + if(!r->l->locked) + abort(); + + /* add ourselves to the wait list */ + me = getqlp(); + me->state = Sleeping; + if(r->head == nil) + r->head = me; + else + r->tail->next = me; + me->next = nil; + r->tail = me; + + /* pass the qlock to the next guy */ + t = r->l->head; + if(t){ + r->l->head = t->next; + if(r->l->head == nil) + r->l->tail = nil; + unlock(&r->l->lock); + while((*_rendezvousp)((ulong)t, 0x12345) == ~0) + ; + }else{ + r->l->locked = 0; + unlock(&r->l->lock); + } + + /* wait for a wakeup */ + while((*_rendezvousp)((ulong)me, 0x23456) == ~0) + ; + me->inuse = 0; + if(!r->l->locked) + abort(); +} + +int +rwakeup(Rendez *r) +{ + QLp *t; + + /* + * take off wait and put on front of queue + * put on front so guys that have been waiting will not get starved + */ + + if(!r->l) + abort(); + lock(&r->l->lock); + if(!r->l->locked) + abort(); + + t = r->head; + if(t == nil){ + unlock(&r->l->lock); + return 0; + } + + r->head = t->next; + if(r->head == nil) + r->tail = nil; + + t->next = r->l->head; + r->l->head = t; + if(r->l->tail == nil) + r->l->tail = t; + + t->state = Queuing; + unlock(&r->l->lock); + return 1; +} + +int +rwakeupall(Rendez *r) +{ + int i; + + for(i=0; rwakeup(r); i++) + ; + return i; +} diff --git a/src/lib9/rand.c b/src/lib9/rand.c new file mode 100644 index 00000000..34f77eca --- /dev/null +++ b/src/lib9/rand.c @@ -0,0 +1,89 @@ +#include <lib9.h> + +/* + * algorithm by + * D. P. Mitchell & J. A. Reeds + */ + +#define LEN 607 +#define TAP 273 +#define MASK 0x7fffffffL +#define A 48271 +#define M 2147483647 +#define Q 44488 +#define R 3399 +#define NORM (1.0/(1.0+MASK)) + +static ulong rng_vec[LEN]; +static ulong* rng_tap = rng_vec; +static ulong* rng_feed = 0; +static Lock lk; + +static void +isrand(long seed) +{ + long lo, hi, x; + int i; + + rng_tap = rng_vec; + rng_feed = rng_vec+LEN-TAP; + seed = seed%M; + if(seed < 0) + seed += M; + if(seed == 0) + seed = 89482311; + x = seed; + /* + * Initialize by x[n+1] = 48271 * x[n] mod (2**31 - 1) + */ + for(i = -20; i < LEN; i++) { + hi = x / Q; + lo = x % Q; + x = A*lo - R*hi; + if(x < 0) + x += M; + if(i >= 0) + rng_vec[i] = x; + } +} + +void +srand(long seed) +{ + lock(&lk); + isrand(seed); + unlock(&lk); +} + +long +lrand(void) +{ + ulong x; + + lock(&lk); + + rng_tap--; + if(rng_tap < rng_vec) { + if(rng_feed == 0) { + isrand(1); + rng_tap--; + } + rng_tap += LEN; + } + rng_feed--; + if(rng_feed < rng_vec) + rng_feed += LEN; + x = (*rng_feed + *rng_tap) & MASK; + *rng_feed = x; + + unlock(&lk); + + return x; +} + +int +rand(void) +{ + return lrand() & 0x7fff; +} + diff --git a/src/lib9/readn.c b/src/lib9/readn.c new file mode 100644 index 00000000..e7b9d138 --- /dev/null +++ b/src/lib9/readn.c @@ -0,0 +1,21 @@ +#include <lib9.h> + +long +readn(int f, void *av, long n) +{ + char *a; + long m, t; + + a = av; + t = 0; + while(t < n){ + m = read(f, a+t, n-t); + if(m <= 0){ + if(t == 0) + return m; + break; + } + t += m; + } + return t; +} diff --git a/src/lib9/rendez.c b/src/lib9/rendez.c new file mode 100644 index 00000000..320bd11a --- /dev/null +++ b/src/lib9/rendez.c @@ -0,0 +1,180 @@ +/* + NAME + rendezvous - user level process synchronization + + SYNOPSIS + ulong rendezvous(ulong tag, ulong value) + + DESCRIPTION + The rendezvous system call allows two processes to synchro- + nize and exchange a value. In conjunction with the shared + memory system calls (see segattach(2) and fork(2)), it + enables parallel programs to control their scheduling. + + Two processes wishing to synchronize call rendezvous with a + common tag, typically an address in memory they share. One + process will arrive at the rendezvous first; it suspends + execution until a second arrives. When a second process + meets the rendezvous the value arguments are exchanged + between the processes and returned as the result of the + respective rendezvous system calls. Both processes are + awakened when the rendezvous succeeds. + + The set of tag values which two processes may use to + rendezvous-their tag space-is inherited when a process + forks, unless RFREND is set in the argument to rfork; see + fork(2). + + If a rendezvous is interrupted the return value is ~0, so + that value should not be used in normal communication. + + * This simulates rendezvous with shared memory, pause, and SIGUSR1. + */ + +#include <signal.h> +#include <lib9.h> + +enum +{ + VOUSHASH = 257, +}; + +typedef struct Vous Vous; +struct Vous +{ + Vous *link; + Lock lk; + int pid; + ulong val; + ulong tag; +}; + +static void +ign(int x) +{ + USED(x); +} + +void /*__attribute__((constructor))*/ +ignusr1(void) +{ + signal(SIGUSR1, ign); +} + +static Vous vouspool[2048]; +static int nvousused; +static Vous *vousfree; +static Vous *voushash[VOUSHASH]; +static Lock vouslock; + +static Vous* +getvous(void) +{ + Vous *v; + + if(vousfree){ + v = vousfree; + vousfree = v->link; + }else if(nvousused < nelem(vouspool)) + v = &vouspool[nvousused++]; + else + abort(); + return v; +} + +static void +putvous(Vous *v) +{ + lock(&vouslock); + v->link = vousfree; + vousfree = v; + unlock(&vouslock); +} + +static Vous* +findvous(ulong tag, ulong val, int pid) +{ + int h; + Vous *v, **l; + + lock(&vouslock); + h = tag%VOUSHASH; + for(l=&voushash[h], v=*l; v; l=&(*l)->link, v=*l){ + if(v->tag == tag){ + *l = v->link; + unlock(&vouslock); + return v; + } + } + v = getvous(); + v->pid = pid; + v->link = voushash[h]; + v->val = val; + v->tag = tag; + lock(&v->lk); + voushash[h] = v; + unlock(&vouslock); + return v; +} + +#define DBG 0 +ulong +rendezvous(ulong tag, ulong val) +{ + int me, vpid; + ulong rval; + Vous *v; + sigset_t mask; + + me = getpid(); + v = findvous(tag, val, me); + if(v->pid == me){ + if(DBG)fprint(2, "pid is %d tag %lux, sleeping\n", me, tag); + /* + * No rendezvous partner was found; the next guy + * through will find v and wake us, so we must go + * to sleep. + * + * To go to sleep: + * 1. disable USR1 signals. + * 2. unlock v->lk (tells waker okay to signal us). + * 3. atomically suspend and enable USR1 signals. + * + * The call to ignusr1() could be done once at + * process creation instead of every time through rendezvous. + */ + v->val = val; + ignusr1(); + sigprocmask(SIG_SETMASK, NULL, &mask); + sigaddset(&mask, SIGUSR1); + sigprocmask(SIG_SETMASK, &mask, NULL); + sigdelset(&mask, SIGUSR1); + unlock(&v->lk); + sigsuspend(&mask); + rval = v->val; + if(DBG)fprint(2, "pid is %d, awake\n", me); + putvous(v); + }else{ + /* + * Found someone to meet. Wake him: + * + * A. lock v->lk (waits for him to get to his step 2) + * B. send a USR1 + * + * He won't get the USR1 until he suspends, which + * means it must wake him up (it can't get delivered + * before he sleeps). + */ + vpid = v->pid; + lock(&v->lk); + rval = v->val; + v->val = val; + unlock(&v->lk); + if(kill(vpid, SIGUSR1) < 0){ + if(DBG)fprint(2, "pid is %d, kill %d failed: %r\n", me, vpid); + abort(); + } + } + return rval; +} + diff --git a/src/lib9/strecpy.c b/src/lib9/strecpy.c new file mode 100644 index 00000000..7d2f2277 --- /dev/null +++ b/src/lib9/strecpy.c @@ -0,0 +1,16 @@ +#include <lib9.h> + +char* +strecpy(char *to, char *e, char *from) +{ + if(to >= e) + return to; + to = memccpy(to, from, '\0', e - to); + if(to == nil){ + to = e - 1; + *to = '\0'; + }else{ + to--; + } + return to; +} diff --git a/src/lib9/sysfatal.c b/src/lib9/sysfatal.c new file mode 100644 index 00000000..f9ab6985 --- /dev/null +++ b/src/lib9/sysfatal.c @@ -0,0 +1,20 @@ +#include <lib9.h> + +void (*_sysfatal)(char*, ...); + +void +sysfatal(char *fmt, ...) +{ + char buf[256]; + va_list arg; + + va_start(arg, fmt); + if(_sysfatal) + (*_sysfatal)(fmt, arg); + vseprint(buf, buf+sizeof buf, fmt, arg); + va_end(arg); + + fprint(2, "%s; %s\n", argv0 ? argv0 : "<prog>", buf); + exits("fatal"); +} + diff --git a/src/lib9/tas-386.s b/src/lib9/tas-386.s new file mode 100644 index 00000000..7a62d2d3 --- /dev/null +++ b/src/lib9/tas-386.s @@ -0,0 +1,6 @@ +.globl _tas +_tas: + movl $0xCAFEBABE, %eax + movl 4(%esp), %ecx + xchgl %eax, 0(%ecx) + ret diff --git a/src/lib9/test.c b/src/lib9/test.c new file mode 100644 index 00000000..3a358c6c --- /dev/null +++ b/src/lib9/test.c @@ -0,0 +1,8 @@ +#include <lib9.h> + +int +main(int argc, char **argv) +{ + werrstr("hello world"); + print("%r\n"); +} diff --git a/src/lib9/testfork.c b/src/lib9/testfork.c new file mode 100644 index 00000000..a5e63718 --- /dev/null +++ b/src/lib9/testfork.c @@ -0,0 +1,21 @@ +#include <lib9.h> + +void +sayhi(void *v) +{ + USED(v); + + print("hello from subproc\n"); + print("rendez got %lu from main\n", rendezvous(0x1234, 1234)); + exits(0); +} + +int +main(int argc, char **argv) +{ + print("hello from main\n"); + ffork(RFMEM|RFPROC, sayhi, nil); + + print("rendez got %lu from subproc\n", rendezvous(0x1234, 0)); + exits(0); +} diff --git a/src/lib9/tokenize.c b/src/lib9/tokenize.c new file mode 100644 index 00000000..6fa9fc73 --- /dev/null +++ b/src/lib9/tokenize.c @@ -0,0 +1,106 @@ +#include <lib9.h> + +static char qsep[] = " \t\r\n"; + +static char* +qtoken(char *s, char *sep) +{ + int quoting; + char *t; + + quoting = 0; + t = s; /* s is output string, t is input string */ + while(*t!='\0' && (quoting || utfrune(sep, *t)==nil)){ + if(*t != '\''){ + *s++ = *t++; + continue; + } + /* *t is a quote */ + if(!quoting){ + quoting = 1; + t++; + continue; + } + /* quoting and we're on a quote */ + if(t[1] != '\''){ + /* end of quoted section; absorb closing quote */ + t++; + quoting = 0; + continue; + } + /* doubled quote; fold one quote into two */ + t++; + *s++ = *t++; + } + if(*s != '\0'){ + *s = '\0'; + if(t == s) + t++; + } + return t; +} + +static char* +etoken(char *t, char *sep) +{ + int quoting; + + /* move to end of next token */ + quoting = 0; + while(*t!='\0' && (quoting || utfrune(sep, *t)==nil)){ + if(*t != '\''){ + t++; + continue; + } + /* *t is a quote */ + if(!quoting){ + quoting = 1; + t++; + continue; + } + /* quoting and we're on a quote */ + if(t[1] != '\''){ + /* end of quoted section; absorb closing quote */ + t++; + quoting = 0; + continue; + } + /* doubled quote; fold one quote into two */ + t += 2; + } + return t; +} + +int +gettokens(char *s, char **args, int maxargs, char *sep) +{ + int nargs; + + for(nargs=0; nargs<maxargs; nargs++){ + while(*s!='\0' && utfrune(sep, *s)!=nil) + *s++ = '\0'; + if(*s == '\0') + break; + args[nargs] = s; + s = etoken(s, sep); + } + + return nargs; +} + +int +tokenize(char *s, char **args, int maxargs) +{ + int nargs; + + for(nargs=0; nargs<maxargs; nargs++){ + while(*s!='\0' && utfrune(qsep, *s)!=nil) + s++; + if(*s == '\0') + break; + args[nargs] = s; + s = qtoken(s, qsep); + } + + return nargs; +} diff --git a/src/lib9/u16.c b/src/lib9/u16.c new file mode 100644 index 00000000..d9f41e46 --- /dev/null +++ b/src/lib9/u16.c @@ -0,0 +1,52 @@ +#include <lib9.h> +static char t16e[] = "0123456789ABCDEF"; + +int +dec16(uchar *out, int lim, char *in, int n) +{ + int c, w = 0, i = 0; + uchar *start = out; + uchar *eout = out + lim; + + while(n-- > 0){ + c = *in++; + if('0' <= c && c <= '9') + c = c - '0'; + else if('a' <= c && c <= 'z') + c = c - 'a' + 10; + else if('A' <= c && c <= 'Z') + c = c - 'A' + 10; + else + continue; + w = (w<<4) + c; + i++; + if(i == 2){ + if(out + 1 > eout) + goto exhausted; + *out++ = w; + w = 0; + i = 0; + } + } +exhausted: + return out - start; +} + +int +enc16(char *out, int lim, uchar *in, int n) +{ + uint c; + char *eout = out + lim; + char *start = out; + + while(n-- > 0){ + c = *in++; + if(out + 2 >= eout) + goto exhausted; + *out++ = t16e[c>>4]; + *out++ = t16e[c&0xf]; + } +exhausted: + *out = 0; + return out - start; +} diff --git a/src/lib9/u32.c b/src/lib9/u32.c new file mode 100644 index 00000000..1eb0c6e0 --- /dev/null +++ b/src/lib9/u32.c @@ -0,0 +1,109 @@ +#include <lib9.h> + +int +dec32(uchar *dest, int ndest, char *src, int nsrc) +{ + char *s, *tab; + uchar *start; + int i, u[8]; + + if(ndest+1 < (5*nsrc+7)/8) + return -1; + start = dest; + tab = "23456789abcdefghijkmnpqrstuvwxyz"; + while(nsrc>=8){ + for(i=0; i<8; i++){ + s = strchr(tab,(int)src[i]); + u[i] = s ? s-tab : 0; + } + *dest++ = (u[0]<<3) | (0x7 & (u[1]>>2)); + *dest++ = ((0x3 & u[1])<<6) | (u[2]<<1) | (0x1 & (u[3]>>4)); + *dest++ = ((0xf & u[3])<<4) | (0xf & (u[4]>>1)); + *dest++ = ((0x1 & u[4])<<7) | (u[5]<<2) | (0x3 & (u[6]>>3)); + *dest++ = ((0x7 & u[6])<<5) | u[7]; + src += 8; + nsrc -= 8; + } + if(nsrc > 0){ + if(nsrc == 1 || nsrc == 3 || nsrc == 6) + return -1; + for(i=0; i<nsrc; i++){ + s = strchr(tab,(int)src[i]); + u[i] = s ? s-tab : 0; + } + *dest++ = (u[0]<<3) | (0x7 & (u[1]>>2)); + if(nsrc == 2) + goto out; + *dest++ = ((0x3 & u[1])<<6) | (u[2]<<1) | (0x1 & (u[3]>>4)); + if(nsrc == 4) + goto out; + *dest++ = ((0xf & u[3])<<4) | (0xf & (u[4]>>1)); + if(nsrc == 5) + goto out; + *dest++ = ((0x1 & u[4])<<7) | (u[5]<<2) | (0x3 & (u[6]>>3)); + } +out: + return dest-start; +} + +int +enc32(char *dest, int ndest, uchar *src, int nsrc) +{ + char *tab, *start; + int j; + + if(ndest <= (8*nsrc+4)/5 ) + return -1; + start = dest; + tab = "23456789abcdefghijkmnpqrstuvwxyz"; + while(nsrc>=5){ + j = (0x1f & (src[0]>>3)); + *dest++ = tab[j]; + j = (0x1c & (src[0]<<2)) | (0x03 & (src[1]>>6)); + *dest++ = tab[j]; + j = (0x1f & (src[1]>>1)); + *dest++ = tab[j]; + j = (0x10 & (src[1]<<4)) | (0x0f & (src[2]>>4)); + *dest++ = tab[j]; + j = (0x1e & (src[2]<<1)) | (0x01 & (src[3]>>7)); + *dest++ = tab[j]; + j = (0x1f & (src[3]>>2)); + *dest++ = tab[j]; + j = (0x18 & (src[3]<<3)) | (0x07 & (src[4]>>5)); + *dest++ = tab[j]; + j = (0x1f & (src[4])); + *dest++ = tab[j]; + src += 5; + nsrc -= 5; + } + if(nsrc){ + j = (0x1f & (src[0]>>3)); + *dest++ = tab[j]; + j = (0x1c & (src[0]<<2)); + if(nsrc == 1) + goto out; + j |= (0x03 & (src[1]>>6)); + *dest++ = tab[j]; + j = (0x1f & (src[1]>>1)); + if(nsrc == 2) + goto out; + *dest++ = tab[j]; + j = (0x10 & (src[1]<<4)); + if(nsrc == 3) + goto out; + j |= (0x0f & (src[2]>>4)); + *dest++ = tab[j]; + j = (0x1e & (src[2]<<1)); + if(nsrc == 4) + goto out; + j |= (0x01 & (src[3]>>7)); + *dest++ = tab[j]; + j = (0x1f & (src[3]>>2)); + *dest++ = tab[j]; + j = (0x18 & (src[3]<<3)); +out: + *dest++ = tab[j]; + } + *dest = 0; + return dest-start; +} diff --git a/src/lib9/u64.c b/src/lib9/u64.c new file mode 100644 index 00000000..a17bdf1d --- /dev/null +++ b/src/lib9/u64.c @@ -0,0 +1,126 @@ +#include <lib9.h> + +enum { + INVAL= 255 +}; + +static uchar t64d[256] = { + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, 62,INVAL,INVAL,INVAL, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL, + INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL,INVAL +}; +static char t64e[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +int +dec64(uchar *out, int lim, char *in, int n) +{ + ulong b24; + uchar *start = out; + uchar *e = out + lim; + int i, c; + + b24 = 0; + i = 0; + while(n-- > 0){ + + c = t64d[*(uchar*)in++]; + if(c == INVAL) + continue; + switch(i){ + case 0: + b24 = c<<18; + break; + case 1: + b24 |= c<<12; + break; + case 2: + b24 |= c<<6; + break; + case 3: + if(out + 3 > e) + goto exhausted; + + b24 |= c; + *out++ = b24>>16; + *out++ = b24>>8; + *out++ = b24; + i = -1; + break; + } + i++; + } + switch(i){ + case 2: + if(out + 1 > e) + goto exhausted; + *out++ = b24>>16; + break; + case 3: + if(out + 2 > e) + goto exhausted; + *out++ = b24>>16; + *out++ = b24>>8; + break; + } +exhausted: + return out - start; +} + +int +enc64(char *out, int lim, uchar *in, int n) +{ + int i; + ulong b24; + char *start = out; + char *e = out + lim; + + for(i = n/3; i > 0; i--){ + b24 = (*in++)<<16; + b24 |= (*in++)<<8; + b24 |= *in++; + if(out + 4 >= e) + goto exhausted; + *out++ = t64e[(b24>>18)]; + *out++ = t64e[(b24>>12)&0x3f]; + *out++ = t64e[(b24>>6)&0x3f]; + *out++ = t64e[(b24)&0x3f]; + } + + switch(n%3){ + case 2: + b24 = (*in++)<<16; + b24 |= (*in)<<8; + if(out + 4 >= e) + goto exhausted; + *out++ = t64e[(b24>>18)]; + *out++ = t64e[(b24>>12)&0x3f]; + *out++ = t64e[(b24>>6)&0x3f]; + *out++ = '='; + break; + case 1: + b24 = (*in)<<16; + if(out + 4 >= e) + goto exhausted; + *out++ = t64e[(b24>>18)]; + *out++ = t64e[(b24>>12)&0x3f]; + *out++ = '='; + *out++ = '='; + break; + } +exhausted: + *out = 0; + return out - start; +} diff --git a/src/lib9/wait.c b/src/lib9/wait.c new file mode 100644 index 00000000..14af7156 --- /dev/null +++ b/src/lib9/wait.c @@ -0,0 +1,30 @@ +#include <lib9.h> + +Waitmsg* +wait(void) +{ + int n, l; + char buf[512], *fld[5]; + Waitmsg *w; + + n = await(buf, sizeof buf-1); + if(n < 0) + return nil; + buf[n] = '\0'; + if(tokenize(buf, fld, nelem(fld)) != nelem(fld)){ + werrstr("couldn't parse wait message"); + return nil; + } + l = strlen(fld[4])+1; + w = malloc(sizeof(Waitmsg)+l); + if(w == nil) + return nil; + w->pid = atoi(fld[0]); + w->time[0] = atoi(fld[1]); + w->time[1] = atoi(fld[2]); + w->time[2] = atoi(fld[3]); + w->msg = (char*)&w[1]; + memmove(w->msg, fld[4], l); + return w; +} + diff --git a/src/lib9/werrstr.c b/src/lib9/werrstr.c new file mode 100644 index 00000000..7fa1f2ea --- /dev/null +++ b/src/lib9/werrstr.c @@ -0,0 +1,13 @@ +#include <lib9.h> + +void +werrstr(char *fmt, ...) +{ + va_list arg; + char buf[ERRMAX]; + + va_start(arg, fmt); + vseprint(buf, buf+ERRMAX, fmt, arg); + va_end(arg); + errstr(buf, ERRMAX); +} diff --git a/src/libbio/LICENSE b/src/libbio/LICENSE new file mode 100644 index 00000000..a5d7d87d --- /dev/null +++ b/src/libbio/LICENSE @@ -0,0 +1,258 @@ +The Plan 9 software is provided under the terms of the +Lucent Public License, Version 1.02, reproduced below, +with the following exceptions: + +1. Send comments about packaging to +Russ Cox <rsc@post.harvard.edu> + diff --git a/src/libbio/bbuffered.c b/src/libbio/bbuffered.c new file mode 100644 index 00000000..dfc0cf53 --- /dev/null +++ b/src/libbio/bbuffered.c @@ -0,0 +1,20 @@ +#include "lib9.h" +#include <bio.h> + +int +Bbuffered(Biobuf *bp) +{ + switch(bp->state) { + case Bracteof: + case Bractive: + return -bp->icount; + + case Bwactive: + return bp->bsize + bp->ocount; + + case Binactive: + return 0; + } + fprint(2, "Bbuffered: unknown state %d\n", bp->state); + return 0; +} diff --git a/src/libbio/bcat.c b/src/libbio/bcat.c new file mode 100644 index 00000000..dea346a5 --- /dev/null +++ b/src/libbio/bcat.c @@ -0,0 +1,42 @@ +#include <fmt.h> +#include "bio.h" + +Biobuf bout; + +void +bcat(Biobuf *b, char *name) +{ + char buf[1000]; + int n; + + while((n = Bread(b, buf, sizeof buf)) > 0){ + if(Bwrite(&bout, buf, n) < 0) + fprint(2, "writing during %s: %r\n", name); + } + if(n < 0) + fprint(2, "reading %s: %r\n", name); +} + +int +main(int argc, char **argv) +{ + int i; + Biobuf b, *bp; + + Binit(&bout, 1, O_WRONLY); + + if(argc == 1){ + Binit(&b, 0, O_RDONLY); + bcat(&b, "<stdin>"); + }else{ + for(i=1; i<argc; i++){ + if((bp = Bopen(argv[i], O_RDONLY)) == 0){ + fprint(2, "Bopen %s: %r\n", argv[i]); + continue; + } + bcat(bp, argv[i]); + Bterm(bp); + } + } + exit(0); +} diff --git a/src/libbio/bfildes.c b/src/libbio/bfildes.c new file mode 100644 index 00000000..5188180f --- /dev/null +++ b/src/libbio/bfildes.c @@ -0,0 +1,9 @@ +#include "lib9.h" +#include <bio.h> + +int +Bfildes(Biobuf *bp) +{ + + return bp->fid; +} diff --git a/src/libbio/bflush.c b/src/libbio/bflush.c new file mode 100644 index 00000000..0ab81267 --- /dev/null +++ b/src/libbio/bflush.c @@ -0,0 +1,33 @@ +#include "lib9.h" +#include <bio.h> + +int +Bflush(Biobuf *bp) +{ + int n, c; + + switch(bp->state) { + case Bwactive: + n = bp->bsize+bp->ocount; + if(n == 0) + return 0; + c = write(bp->fid, bp->bbuf, n); + if(n == c) { + bp->offset += n; + bp->ocount = -bp->bsize; + return 0; + } + bp->state = Binactive; + bp->ocount = 0; + break; + + case Bracteof: + bp->state = Bractive; + + case Bractive: + bp->icount = 0; + bp->gbuf = bp->ebuf; + return 0; + } + return Beof; +} diff --git a/src/libbio/bgetc.c b/src/libbio/bgetc.c new file mode 100644 index 00000000..4c8ae90c --- /dev/null +++ b/src/libbio/bgetc.c @@ -0,0 +1,52 @@ +#include "lib9.h" +#include <bio.h> + +int +Bgetc(Biobuf *bp) +{ + int i; + +loop: + i = bp->icount; + if(i != 0) { + bp->icount = i+1; + return bp->ebuf[i]; + } + if(bp->state != Bractive) { + if(bp->state == Bracteof) + bp->state = Bractive; + return Beof; + } + /* + * get next buffer, try to keep Bungetsize + * characters pre-catenated from the previous + * buffer to allow that many ungets. + */ + memmove(bp->bbuf-Bungetsize, bp->ebuf-Bungetsize, Bungetsize); + i = read(bp->fid, bp->bbuf, bp->bsize); + bp->gbuf = bp->bbuf; + if(i <= 0) { + if(i < 0) + bp->state = Binactive; + return Beof; + } + if(i < bp->bsize) { + memmove(bp->ebuf-i-Bungetsize, bp->bbuf-Bungetsize, i+Bungetsize); + bp->gbuf = bp->ebuf-i; + } + bp->icount = -i; + bp->offset += i; + goto loop; +} + +int +Bungetc(Biobuf *bp) +{ + + if(bp->state == Bracteof) + bp->state = Bractive; + if(bp->state != Bractive) + return Beof; + bp->icount--; + return 1; +} diff --git a/src/libbio/bgetd.c b/src/libbio/bgetd.c new file mode 100644 index 00000000..e7dd305d --- /dev/null +++ b/src/libbio/bgetd.c @@ -0,0 +1,36 @@ +#include "lib9.h" +#include <bio.h> + +struct bgetd +{ + Biobuf* b; + int eof; +}; + +static int +Bgetdf(void *vp) +{ + int c; + struct bgetd *bg = vp; + + c = Bgetc(bg->b); + if(c == Beof) + bg->eof = 1; + return c; +} + +int +Bgetd(Biobuf *bp, double *dp) +{ + double d; + struct bgetd b; + + b.b = bp; + b.eof = 0; + d = fmtcharstod(Bgetdf, &b); + if(b.eof) + return -1; + Bungetc(bp); + *dp = d; + return 1; +} diff --git a/src/libbio/bgetrune.c b/src/libbio/bgetrune.c new file mode 100644 index 00000000..f1b065bc --- /dev/null +++ b/src/libbio/bgetrune.c @@ -0,0 +1,47 @@ +#include "lib9.h" +#include <bio.h> +#include <utf.h> + +long +Bgetrune(Biobuf *bp) +{ + int c, i; + Rune rune; + char str[4]; + + c = Bgetc(bp); + if(c < Runeself) { /* one char */ + bp->runesize = 1; + return c; + } + str[0] = c; + + for(i=1;;) { + c = Bgetc(bp); + if(c < 0) + return c; + str[i++] = c; + + if(fullrune(str, i)) { + bp->runesize = chartorune(&rune, str); + while(i > bp->runesize) { + Bungetc(bp); + i--; + } + return rune; + } + } +} + +int +Bungetrune(Biobuf *bp) +{ + + if(bp->state == Bracteof) + bp->state = Bractive; + if(bp->state != Bractive) + return Beof; + bp->icount -= bp->runesize; + bp->runesize = 0; + return 1; +} diff --git a/src/libbio/binit.c b/src/libbio/binit.c new file mode 100644 index 00000000..d76168b7 --- /dev/null +++ b/src/libbio/binit.c @@ -0,0 +1,142 @@ +#include "lib9.h" +#include <bio.h> + +enum +{ + MAXBUFS = 20 +}; + +static Biobuf* wbufs[MAXBUFS]; +static int atexitflag; + +static +void +batexit(void) +{ + Biobuf *bp; + int i; + + for(i=0; i<MAXBUFS; i++) { + bp = wbufs[i]; + if(bp != 0) { + wbufs[i] = 0; + Bflush(bp); + } + } +} + +static +void +deinstall(Biobuf *bp) +{ + int i; + + for(i=0; i<MAXBUFS; i++) + if(wbufs[i] == bp) + wbufs[i] = 0; +} + +static +void +install(Biobuf *bp) +{ + int i; + + deinstall(bp); + for(i=0; i<MAXBUFS; i++) + if(wbufs[i] == 0) { + wbufs[i] = bp; + break; + } + if(atexitflag == 0) { + atexitflag = 1; + atexit(batexit); + } +} + +int +Binits(Biobuf *bp, int f, int mode, unsigned char *p, int size) +{ + + p += Bungetsize; /* make room for Bungets */ + size -= Bungetsize; + + switch(mode) { + default: + fprint(2, "Bopen: unknown mode %d\n", mode); + return Beof; + + case OREAD: + bp->state = Bractive; + bp->ocount = 0; + break; + + case OWRITE: + install(bp); + bp->state = Bwactive; + bp->ocount = -size; + break; + } + bp->bbuf = p; + bp->ebuf = p+size; + bp->bsize = size; + bp->icount = 0; + bp->gbuf = bp->ebuf; + bp->fid = f; + bp->flag = 0; + bp->rdline = 0; + bp->offset = 0; +/* bp->runesize = 0; */ + return 0; +} + + +int +Binit(Biobuf *bp, int f, int mode) +{ + return Binits(bp, f, mode, bp->b, sizeof(bp->b)); +} + +Biobuf* +Bopen(char *name, int mode) +{ + Biobuf *bp; + int f; + + switch(mode) { + default: + fprint(2, "Bopen: unknown mode %d\n", mode); + return 0; + + case OREAD: + f = open(name, OREAD); + if(f < 0) + return 0; + break; + + case OWRITE: + f = creat(name, 0666); + if(f < 0) + return 0; + } + bp = malloc(sizeof(Biobuf)); + if(bp == 0) + return 0; + Binits(bp, f, mode, bp->b, sizeof(bp->b)); + bp->flag = Bmagic; + return bp; +} + +int +Bterm(Biobuf *bp) +{ + + deinstall(bp); + Bflush(bp); + if(bp->flag == Bmagic) { + bp->flag = 0; + close(bp->fid); + free(bp); + } + return 0; +} diff --git a/src/libbio/bio.3 b/src/libbio/bio.3 new file mode 100644 index 00000000..cf504631 --- /dev/null +++ b/src/libbio/bio.3 @@ -0,0 +1,336 @@ +.TH BIO 3 +.SH NAME +Bopen, Binit, Binits, Brdline, Brdstr, Bgetc, Bgetd, Bungetc, Bread, Bseek, Boffset, Bfildes, Blinelen, Bputc, Bprint, Bvprint, Bwrite, Bflush, Bterm, Bbuffered \- buffered input/output +.SH SYNOPSIS +.ta \w'Biobuf* 'u +.B #include <fmt.h> +.B #include <bio.h> +.PP +.B +Biobuf* Bopen(char *file, int mode) +.PP +.B +int Binit(Biobuf *bp, int fd, int mode) +.PP +.B +int Bterm(Biobuf *bp) +.PP +.B +int Bprint(Biobuf *bp, char *format, ...) +.PP +.B +int Bvprint(Biobuf *bp, char *format, va_list arglist); +.PP +.B +void* Brdline(Biobuf *bp, int delim) +.PP +.B +char* Brdstr(Biobuf *bp, int delim, int nulldelim) +.PP +.B +int Blinelen(Biobuf *bp) +.PP +.B +off_t Boffset(Biobuf *bp) +.PP +.B +int Bfildes(Biobuf *bp) +.PP +.B +int Bgetc(Biobuf *bp) +.PP +.B +long Bgetrune(Biobufhdr *bp) +.PP +.B +int Bgetd(Biobuf *bp, double *d) +.PP +.B +int Bungetc(Biobuf *bp) +.PP +.B +int Bungetrune(Biobufhdr *bp) +.PP +.B +off_t Bseek(Biobuf *bp, off_t n, int type) +.PP +.B +int Bputc(Biobuf *bp, int c) +.PP +.B +int Bputrune(Biobufhdr *bp, long c) +.PP +.B +long Bread(Biobuf *bp, void *addr, long nbytes) +.PP +.B +long Bwrite(Biobuf *bp, void *addr, long nbytes) +.PP +.B +int Bflush(Biobuf *bp) +.PP +.B +int Bbuffered(Biobuf *bp) +.PP +.SH DESCRIPTION +These routines implement fast buffered I/O. +I/O on different file descriptors is independent. +.PP +.I Bopen +opens +.I file +for mode +.B O_RDONLY +or creates for mode +.BR O_WRONLY . +It calls +.IR malloc (3) +to allocate a buffer. +.PP +.I Binit +initializes a buffer +with the open file descriptor passed in +by the user. +.PP +Arguments +of types pointer to Biobuf and pointer to Biobuf +can be used interchangeably in the following routines. +.PP +.IR Bopen , +.IR Binit , +or +.I Binits +should be called before any of the +other routines on that buffer. +.I Bfildes +returns the integer file descriptor of the associated open file. +.PP +.I Bterm +flushes the buffer for +.IR bp . +If the buffer was allocated by +.IR Bopen , +the buffer is +.I freed +and the file is closed. +.PP +.I Brdline +reads a string from the file associated with +.I bp +up to and including the first +.I delim +character. +The delimiter character at the end of the line is +not altered. +.I Brdline +returns a pointer to the start of the line or +.L 0 +on end-of-file or read error. +.I Blinelen +returns the length (including the delimiter) +of the most recent string returned by +.IR Brdline . +.PP +.I Brdstr +returns a +.IR malloc (3)-allocated +buffer containing the next line of input delimited by +.IR delim , +terminated by a NUL (0) byte. +Unlike +.IR Brdline , +which returns when its buffer is full even if no delimiter has been found, +.I Brdstr +will return an arbitrarily long line in a single call. +If +.I nulldelim +is set, the terminal delimiter will be overwritten with a NUL. +After a successful call to +.IR Brdstr , +the return value of +.I Blinelen +will be the length of the returned buffer, excluding the NUL. +.PP +.I Bgetc +returns the next byte from +.IR bp , +or a negative value +at end of file. +.I Bungetc +may be called immediately after +.I Bgetc +to allow the same byte to be reread. +.PP +.I Bgetrune +calls +.I Bgetc +to read the bytes of the next +.SM UTF +sequence in the input stream and returns the value of the rune +represented by the sequence. +It returns a negative value +at end of file. +.I Bungetrune +may be called immediately after +.I Bgetrune +to allow the same +.SM UTF +sequence to be reread as either bytes or a rune. +.I Bungetc +and +.I Bungetrune +may back up a maximum of five bytes. +.PP +.I Bgetd +uses +.I fmtcharstod +(undocumented) +and +.I Bgetc +to read the formatted +floating-point number in the input stream, +skipping initial blanks and tabs. +The value is stored in +.BR *d. +.PP +.I Bread +reads +.I nbytes +of data from +.I bp +into memory starting at +.IR addr . +The number of bytes read is returned on success +and a negative value is returned if a read error occurred. +.PP +.I Bseek +applies +.IR lseek (2) +to +.IR bp . +It returns the new file offset. +.I Boffset +returns the file offset of the next character to be processed. +.PP +.I Bputc +outputs the low order 8 bits of +.I c +on +.IR bp . +If this causes a +.IR write +to occur and there is an error, +a negative value is returned. +Otherwise, a zero is returned. +.PP +.I Bputrune +calls +.I Bputc +to output the low order +16 bits of +.I c +as a rune +in +.SM UTF +format +on the output stream. +.PP +.I Bprint +is a buffered interface to +.IR print (2). +If this causes a +.IR write +to occur and there is an error, +a negative value +.RB ( Beof ) +is returned. +Otherwise, the number of bytes output is returned. +.I Bvprint +does the same except it takes as argument a +.B va_list +parameter, so it can be called within a variadic function. +.PP +.I Bwrite +outputs +.I nbytes +of data starting at +.I addr +to +.IR bp . +If this causes a +.IR write +to occur and there is an error, +a negative value is returned. +Otherwise, the number of bytes written is returned. +.PP +.I Bflush +causes any buffered output associated with +.I bp +to be written. +The return is as for +.IR Bputc . +.I Bflush +is called on +exit for every buffer still open +for writing. +.PP +.I Bbuffered +returns the number of bytes in the buffer. +When reading, this is the number of bytes still available from the last +read on the file; when writing, it is the number of bytes ready to be +written. +.PP +This library uses +.IR fmt (3) +for diagnostic messages about internal errors, +as well as for the implementation of +.I Bprint +and +.IR Bvprint . +It uses +.IR utf (3) +for the implementation of +.I Bgetrune +and +.IR Bputrune . +.SH SEE ALSO +.IR atexit (3). +.IR open (2), +.IR print (3), +.IR utf (7) +.SH DIAGNOSTICS +.I Bio +routines that return integers yield +.B Beof +if +.I bp +is not the descriptor of an open file. +.I Bopen +returns zero if the file cannot be opened in the given mode. +.SH HISTORY +The +.IR bio (3) +library originally appeared in Plan 9. +This is a port of the Plan 9 bio library. +.SH BUGS +.I Brdline +returns an error on strings longer than the buffer associated +with the file +and also if the end-of-file is encountered +before a delimiter. +.I Blinelen +will tell how many characters are available +in these cases. +In the case of a true end-of-file, +.I Blinelen +will return zero. +At the cost of allocating a buffer, +.I Brdstr +sidesteps these issues. +.PP +The data returned by +.I Brdline +may be overwritten by calls to any other +.I bio +routine on the same +.IR bp. diff --git a/src/libbio/bio.h b/src/libbio/bio.h new file mode 100644 index 00000000..c4812392 --- /dev/null +++ b/src/libbio/bio.h @@ -0,0 +1,79 @@ +#ifndef _BIOH_ +#define _BIOH_ 1 + +#include <sys/types.h> /* for off_t */ +#include <fcntl.h> /* for O_RDONLY, O_WRONLY */ + +typedef struct Biobuf Biobuf; + +enum +{ + Bsize = 8*1024, + Bungetsize = 4, /* space for ungetc */ + Bmagic = 0x314159, + Beof = -1, + Bbad = -2, + + Binactive = 0, /* states */ + Bractive, + Bwactive, + Bracteof, + + Bend +}; + +struct Biobuf +{ + int icount; /* neg num of bytes at eob */ + int ocount; /* num of bytes at bob */ + int rdline; /* num of bytes after rdline */ + int runesize; /* num of bytes of last getrune */ + int state; /* r/w/inactive */ + int fid; /* open file */ + int flag; /* magic if malloc'ed */ + off_t offset; /* offset of buffer in file */ + int bsize; /* size of buffer */ + unsigned char* bbuf; /* pointer to beginning of buffer */ + unsigned char* ebuf; /* pointer to end of buffer */ + unsigned char* gbuf; /* pointer to good data in buf */ + unsigned char b[Bungetsize+Bsize]; +}; + +#define BGETC(bp)\ + ((bp)->icount?(bp)->bbuf[(bp)->bsize+(bp)->icount++]:Bgetc((bp))) +#define BPUTC(bp,c)\ + ((bp)->ocount?(bp)->bbuf[(bp)->bsize+(bp)->ocount++]=(c),0:Bputc((bp),(c))) +#define BOFFSET(bp)\ + (((bp)->state==Bractive)?\ + (bp)->offset + (bp)->icount:\ + (((bp)->state==Bwactive)?\ + (bp)->offset + ((bp)->bsize + (bp)->ocount):\ + -1)) +#define BLINELEN(bp)\ + (bp)->rdline +#define BFILDES(bp)\ + (bp)->fid + +int Bbuffered(Biobuf*); +int Bfildes(Biobuf*); +int Bflush(Biobuf*); +int Bgetc(Biobuf*); +int Bgetd(Biobuf*, double*); +int Binit(Biobuf*, int, int); +int Binits(Biobuf*, int, int, unsigned char*, int); +int Blinelen(Biobuf*); +off_t Boffset(Biobuf*); +Biobuf* Bopen(char*, int); +int Bprint(Biobuf*, char*, ...); +int Bputc(Biobuf*, int); +void* Brdline(Biobuf*, int); +long Bread(Biobuf*, void*, long); +off_t Bseek(Biobuf*, off_t, int); +int Bterm(Biobuf*); +int Bungetc(Biobuf*); +long Bwrite(Biobuf*, void*, long); +char* Brdstr(Biobuf*, int, int); +long Bgetrune(Biobuf*); +int Bputrune(Biobuf*, long); + +#endif diff --git a/src/libbio/boffset.c b/src/libbio/boffset.c new file mode 100644 index 00000000..df28aaf7 --- /dev/null +++ b/src/libbio/boffset.c @@ -0,0 +1,25 @@ +#include "lib9.h" +#include <bio.h> + +off_t +Boffset(Biobuf *bp) +{ + off_t n; + + switch(bp->state) { + default: + fprint(2, "Boffset: unknown state %d\n", bp->state); + n = Beof; + break; + + case Bracteof: + case Bractive: + n = bp->offset + bp->icount; + break; + + case Bwactive: + n = bp->offset + (bp->bsize + bp->ocount); + break; + } + return n; +} diff --git a/src/libbio/bprint.c b/src/libbio/bprint.c new file mode 100644 index 00000000..81e71e5e --- /dev/null +++ b/src/libbio/bprint.c @@ -0,0 +1,28 @@ +#include "lib9.h" +#include <bio.h> + +int +Bprint(Biobuf *bp, char *fmt, ...) +{ + va_list ap; + char *ip, *ep, *out; + int n; + + ep = (char*)bp->ebuf; + ip = ep + bp->ocount; + va_start(ap, fmt); + out = vseprint(ip, ep, fmt, ap); + va_end(ap); + if(out == 0 || out >= ep-5) { + Bflush(bp); + ip = ep + bp->ocount; + va_start(ap, fmt); + out = vseprint(ip, ep, fmt, ap); + va_end(ap); + if(out >= ep-5) + return Beof; + } + n = out-ip; + bp->ocount += n; + return n; +} diff --git a/src/libbio/bputc.c b/src/libbio/bputc.c new file mode 100644 index 00000000..5f0fba59 --- /dev/null +++ b/src/libbio/bputc.c @@ -0,0 +1,29 @@ +#include "lib9.h" +#include <bio.h> + +int +Bputc(Biobuf *bp, int c) +{ + int i, j; + +loop: + i = bp->ocount; + j = i+1; + if(i != 0) { + bp->ocount = j; + bp->ebuf[i] = c; + return 0; + } + if(bp->state != Bwactive) + return Beof; + j = write(bp->fid, bp->bbuf, bp->bsize); + if(j == bp->bsize) { + bp->ocount = -bp->bsize; + bp->offset += j; + goto loop; + } + fprint(2, "Bputc: write error\n"); + bp->state = Binactive; + bp->ocount = 0; + return Beof; +} diff --git a/src/libbio/bputrune.c b/src/libbio/bputrune.c new file mode 100644 index 00000000..a2eaa83e --- /dev/null +++ b/src/libbio/bputrune.c @@ -0,0 +1,23 @@ +#include "lib9.h" +#include <bio.h> +#include <utf.h> + +int +Bputrune(Biobuf *bp, long c) +{ + Rune rune; + char str[4]; + int n; + + rune = c; + if(rune < Runeself) { + Bputc(bp, rune); + return 1; + } + n = runetochar(str, &rune); + if(n == 0) + return Bbad; + if(Bwrite(bp, str, n) != n) + return Beof; + return n; +} diff --git a/src/libbio/brdline.c b/src/libbio/brdline.c new file mode 100644 index 00000000..4ac6316a --- /dev/null +++ b/src/libbio/brdline.c @@ -0,0 +1,94 @@ +#include "lib9.h" +#include <bio.h> + +void* +Brdline(Biobuf *bp, int delim) +{ + char *ip, *ep; + int i, j; + + i = -bp->icount; + if(i == 0) { + /* + * eof or other error + */ + if(bp->state != Bractive) { + if(bp->state == Bracteof) + bp->state = Bractive; + bp->rdline = 0; + bp->gbuf = bp->ebuf; + return 0; + } + } + + /* + * first try in remainder of buffer (gbuf doesn't change) + */ + ip = (char*)bp->ebuf - i; + ep = memchr(ip, delim, i); + if(ep) { + j = (ep - ip) + 1; + bp->rdline = j; + bp->icount += j; + return ip; + } + + /* + * copy data to beginning of buffer + */ + if(i < bp->bsize) + memmove(bp->bbuf, ip, i); + bp->gbuf = bp->bbuf; + + /* + * append to buffer looking for the delim + */ + ip = (char*)bp->bbuf + i; + while(i < bp->bsize) { + j = read(bp->fid, ip, bp->bsize-i); + if(j <= 0) { + /* + * end of file with no delim + */ + memmove(bp->ebuf-i, bp->bbuf, i); + bp->rdline = i; + bp->icount = -i; + bp->gbuf = bp->ebuf-i; + return 0; + } + bp->offset += j; + i += j; + ep = memchr(ip, delim, j); + if(ep) { + /* + * found in new piece + * copy back up and reset everything + */ + ip = (char*)bp->ebuf - i; + if(i < bp->bsize){ + memmove(ip, bp->bbuf, i); + bp->gbuf = (unsigned char*)ip; + } + j = (ep - (char*)bp->bbuf) + 1; + bp->rdline = j; + bp->icount = j - i; + return ip; + } + ip += j; + } + + /* + * full buffer without finding + */ + bp->rdline = bp->bsize; + bp->icount = -bp->bsize; + bp->gbuf = bp->bbuf; + return 0; +} + +int +Blinelen(Biobuf *bp) +{ + + return bp->rdline; +} diff --git a/src/libbio/brdstr.c b/src/libbio/brdstr.c new file mode 100644 index 00000000..b3612a53 --- /dev/null +++ b/src/libbio/brdstr.c @@ -0,0 +1,112 @@ +#include "lib9.h" +#include <bio.h> + +static char* +badd(char *p, int *np, char *data, int ndata, int delim, int nulldelim) +{ + int n; + + n = *np; + p = realloc(p, n+ndata+1); + if(p){ + memmove(p+n, data, ndata); + n += ndata; + if(n>0 && nulldelim && p[n-1]==delim) + p[--n] = '\0'; + else + p[n] = '\0'; + *np = n; + } + return p; +} + +char* +Brdstr(Biobuf *bp, int delim, int nulldelim) +{ + char *ip, *ep, *p; + int i, j; + + i = -bp->icount; + bp->rdline = 0; + if(i == 0) { + /* + * eof or other error + */ + if(bp->state != Bractive) { + if(bp->state == Bracteof) + bp->state = Bractive; + bp->gbuf = bp->ebuf; + return nil; + } + } + + /* + * first try in remainder of buffer (gbuf doesn't change) + */ + ip = (char*)bp->ebuf - i; + ep = memchr(ip, delim, i); + if(ep) { + j = (ep - ip) + 1; + bp->icount += j; + return badd(nil, &bp->rdline, ip, j, delim, nulldelim); + } + + /* + * copy data to beginning of buffer + */ + if(i < bp->bsize) + memmove(bp->bbuf, ip, i); + bp->gbuf = bp->bbuf; + + /* + * append to buffer looking for the delim + */ + p = nil; + for(;;){ + ip = (char*)bp->bbuf + i; + while(i < bp->bsize) { + j = read(bp->fid, ip, bp->bsize-i); + if(j <= 0 && i == 0) + return p; + if(j <= 0 && i > 0){ + /* + * end of file but no delim. pretend we got a delim + * by making the delim \0 and smashing it with nulldelim. + */ + j = 1; + ep = ip; + delim = '\0'; + nulldelim = 1; + *ep = delim; /* there will be room for this */ + }else{ + bp->offset += j; + ep = memchr(ip, delim, j); + } + i += j; + if(ep) { + /* + * found in new piece + * copy back up and reset everything + */ + ip = (char*)bp->ebuf - i; + if(i < bp->bsize){ + memmove(ip, bp->bbuf, i); + bp->gbuf = (unsigned char*)ip; + } + j = (ep - (char*)bp->bbuf) + 1; + bp->icount = j - i; + return badd(p, &bp->rdline, ip, j, delim, nulldelim); + } + ip += j; + } + + /* + * full buffer without finding; add to user string and continue + */ + p = badd(p, &bp->rdline, (char*)bp->bbuf, bp->bsize, 0, 0); + i = 0; + bp->icount = 0; + bp->gbuf = bp->ebuf; + } + return 0; /* never happens */ +} diff --git a/src/libbio/bread.c b/src/libbio/bread.c new file mode 100644 index 00000000..0254d017 --- /dev/null +++ b/src/libbio/bread.c @@ -0,0 +1,45 @@ +#include "lib9.h" +#include <bio.h> + +long +Bread(Biobuf *bp, void *ap, long count) +{ + long c; + unsigned char *p; + int i, n, ic; + + p = ap; + c = count; + ic = bp->icount; + + while(c > 0) { + n = -ic; + if(n > c) + n = c; + if(n == 0) { + if(bp->state != Bractive) + break; + i = read(bp->fid, bp->bbuf, bp->bsize); + if(i <= 0) { + bp->state = Bracteof; + if(i < 0) + bp->state = Binactive; + break; + } + bp->gbuf = bp->bbuf; + bp->offset += i; + if(i < bp->bsize) { + memmove(bp->ebuf-i, bp->bbuf, i); + bp->gbuf = bp->ebuf-i; + } + ic = -i; + continue; + } + memmove(p, bp->ebuf+ic, n); + c -= n; + ic += n; + p += n; + } + bp->icount = ic; + return count-c; +} diff --git a/src/libbio/bseek.c b/src/libbio/bseek.c new file mode 100644 index 00000000..f4325fb8 --- /dev/null +++ b/src/libbio/bseek.c @@ -0,0 +1,56 @@ +#include "lib9.h" +#include <bio.h> + +off_t +Bseek(Biobuf *bp, off_t offset, int base) +{ + off_t n, d; + + switch(bp->state) { + default: + fprint(2, "Bseek: unknown state %d\n", bp->state); + return Beof; + + case Bracteof: + bp->state = Bractive; + bp->icount = 0; + bp->gbuf = bp->ebuf; + + case Bractive: + n = offset; + if(base == 1) { + n += Boffset(bp); + base = 0; + } + + /* + * try to seek within buffer + */ + if(base == 0) { + d = n - Boffset(bp); + bp->icount += d; + if(d >= 0) { + if(bp->icount <= 0) + return n; + } else { + if(bp->ebuf - bp->gbuf >= -bp->icount) + return n; + } + } + + /* + * reset the buffer + */ + n = lseek(bp->fid, n, base); + bp->icount = 0; + bp->gbuf = bp->ebuf; + break; + + case Bwactive: + Bflush(bp); + n = lseek(bp->fid, offset, base); + break; + } + bp->offset = n; + return n; +} diff --git a/src/libbio/bundle.ports b/src/libbio/bundle.ports new file mode 100644 index 00000000..bea32d34 --- /dev/null +++ b/src/libbio/bundle.ports @@ -0,0 +1,45 @@ +--- Makefile --- +# New ports collection makefile for: libbio +# Date Created: 11 Feb 2003 +# Whom: rsc +# +# THIS LINE NEEDS REPLACING. IT'S HERE TO GET BY PORTLINT +# $FreeBSD: ports/devel/libbio/Makefile,v 1.1 2003/02/12 00:51:22 rsc Exp $ + +PORTNAME= libbio +PORTVERSION= 2.0 +CATEGORIES= devel +MASTER_SITES= http://pdos.lcs.mit.edu/~rsc/software/ +EXTRACT_SUFX= .tgz + +MAINTAINER= rsc@post.harvard.edu + +DEPENDS= ${PORTSDIR}/devel/libfmt ${PORTSDIR}/devel/libutf + +MAN3= bio.3 +USE_REINPLACE= yes + +.include <bsd.port.pre.mk> + +post-patch: + ${REINPLACE_CMD} -e 's,$$(PREFIX),${PREFIX},g' ${WRKSRC}/Makefile + +.include <bsd.port.post.mk> +--- pkg-comment --- +Simple buffered I/O library from Plan 9 +--- pkg-descr --- +Libbio is a port of Plan 9's formatted I/O library. +It provides most of the same functionality as stdio or sfio, +but with a simpler interface and smaller footprint. + +WWW: http://pdos.lcs.mit.edu/~rsc/software/#libbio +http://plan9.bell-labs.com/magic/man2html/2/bio + +Russ Cox +rsc@post.harvard.edu +--- pkg-plist --- +lib/libbio.a +include/bio.h +--- /dev/null --- +This is just a way to make sure blank lines don't +creep into pkg-plist. diff --git a/src/libbio/bwrite.c b/src/libbio/bwrite.c new file mode 100644 index 00000000..2dfeaaad --- /dev/null +++ b/src/libbio/bwrite.c @@ -0,0 +1,38 @@ +#include "lib9.h" +#include <bio.h> + +long +Bwrite(Biobuf *bp, void *ap, long count) +{ + long c; + unsigned char *p; + int i, n, oc; + + p = ap; + c = count; + oc = bp->ocount; + + while(c > 0) { + n = -oc; + if(n > c) + n = c; + if(n == 0) { + if(bp->state != Bwactive) + return Beof; + i = write(bp->fid, bp->bbuf, bp->bsize); + if(i != bp->bsize) { + bp->state = Binactive; + return Beof; + } + bp->offset += i; + oc = -bp->bsize; + continue; + } + memmove(bp->ebuf+oc, p, n); + oc += n; + c -= n; + p += n; + } + bp->ocount = oc; + return count-c; +} diff --git a/src/libbio/lib9.h b/src/libbio/lib9.h new file mode 100644 index 00000000..843f7558 --- /dev/null +++ b/src/libbio/lib9.h @@ -0,0 +1,12 @@ +#include <fmt.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> + +#define OREAD O_RDONLY +#define OWRITE O_WRONLY + +#include <utf.h> + +#define nil ((void*)0) diff --git a/src/libbio/mkfile b/src/libbio/mkfile new file mode 100644 index 00000000..bb99a25a --- /dev/null +++ b/src/libbio/mkfile @@ -0,0 +1 @@ +<../libutf/mkfile diff --git a/src/libbio/rpm.spec b/src/libbio/rpm.spec new file mode 100644 index 00000000..5cb9830c --- /dev/null +++ b/src/libbio/rpm.spec @@ -0,0 +1,30 @@ +Summary: Simple buffered I/O library from Plan 9 +Name: libbio +Version: 2.0 +Release: 1 +Group: Development/C +Copyright: LGPL +Packager: Russ Cox <rsc@post.harvard.edu> +Source: http://pdos.lcs.mit.edu/~rsc/software/libbio-2.0.tgz +URL: http://pdos.lcs.mit.edu/~rsc/software/#libbio +Requires: libfmt libutf + +%description +Libbio is a port of Plan 9's formatted I/O library. +It provides most of the same functionality as stdio or sfio, +but with a simpler interface and smaller footprint. + +http://plan9.bell-labs.com/magic/man2html/2/bio +%prep +%setup + +%build +make + +%install +make install + +%files +/usr/local/include/bio.h +/usr/local/lib/libbio.a +/usr/local/man/man3/bio.3 diff --git a/src/libfmt/LICENSE b/src/libfmt/LICENSE new file mode 100644 index 00000000..5dc21cb5 --- /dev/null +++ b/src/libfmt/LICENSE @@ -0,0 +1,19 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. +*/ + +This is a Unix port of the Plan 9 formatted I/O package. + +Please send comments about the packaging +to Russ Cox <rsc@post.harvard.edu>. + diff --git a/src/libfmt/Make.Darwin-PowerMacintosh b/src/libfmt/Make.Darwin-PowerMacintosh new file mode 100644 index 00000000..14b8d4e7 --- /dev/null +++ b/src/libfmt/Make.Darwin-PowerMacintosh @@ -0,0 +1,6 @@ +CC=gcc +CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I${PREFIX}/include +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O diff --git a/src/libfmt/Make.FreeBSD-386 b/src/libfmt/Make.FreeBSD-386 new file mode 100644 index 00000000..9799dcbb --- /dev/null +++ b/src/libfmt/Make.FreeBSD-386 @@ -0,0 +1,7 @@ +CC=gcc +CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I$(PREFIX)/include -pg +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME) +NAN=nan64.$O diff --git a/src/libfmt/Make.HP-UX-9000 b/src/libfmt/Make.HP-UX-9000 new file mode 100644 index 00000000..edbdc111 --- /dev/null +++ b/src/libfmt/Make.HP-UX-9000 @@ -0,0 +1,6 @@ +CC=cc +CFLAGS=-O -c -Ae -I. +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O diff --git a/src/libfmt/Make.Linux-386 b/src/libfmt/Make.Linux-386 new file mode 100644 index 00000000..20432828 --- /dev/null +++ b/src/libfmt/Make.Linux-386 @@ -0,0 +1,7 @@ +CC=gcc +CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -DNEEDLL +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME) +NAN=nan64.$O diff --git a/src/libfmt/Make.NetBSD-386 b/src/libfmt/Make.NetBSD-386 new file mode 100644 index 00000000..087ed3ab --- /dev/null +++ b/src/libfmt/Make.NetBSD-386 @@ -0,0 +1,7 @@ +CC=gcc +CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I$(PREFIX)/include +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME) +NAN=nan64.$O diff --git a/src/libfmt/Make.OSF1-alpha b/src/libfmt/Make.OSF1-alpha new file mode 100644 index 00000000..3d45279b --- /dev/null +++ b/src/libfmt/Make.OSF1-alpha @@ -0,0 +1,6 @@ +CC=cc +CFLAGS+=-g -c -I. +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O diff --git a/src/libfmt/Make.SunOS-sun4u b/src/libfmt/Make.SunOS-sun4u new file mode 100644 index 00000000..c5fe67b8 --- /dev/null +++ b/src/libfmt/Make.SunOS-sun4u @@ -0,0 +1,2 @@ +include Make.SunOS-sun4u-$(CC) +NAN=nan64.$O diff --git a/src/libfmt/Make.SunOS-sun4u-cc b/src/libfmt/Make.SunOS-sun4u-cc new file mode 100644 index 00000000..829301de --- /dev/null +++ b/src/libfmt/Make.SunOS-sun4u-cc @@ -0,0 +1,6 @@ +CC=cc +CFLAGS+=-g -c -I. -O +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O diff --git a/src/libfmt/Make.SunOS-sun4u-gcc b/src/libfmt/Make.SunOS-sun4u-gcc new file mode 100644 index 00000000..5c415948 --- /dev/null +++ b/src/libfmt/Make.SunOS-sun4u-gcc @@ -0,0 +1,6 @@ +CC=gcc +CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O diff --git a/src/libfmt/Makefile b/src/libfmt/Makefile new file mode 100644 index 00000000..4b8ff604 --- /dev/null +++ b/src/libfmt/Makefile @@ -0,0 +1,134 @@ + +# this works in gnu make +SYSNAME:=${shell uname} +OBJTYPE:=${shell uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g'} + +# this works in bsd make +SYSNAME!=uname +OBJTYPE!=uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g' + +# the gnu rules will mess up bsd but not vice versa, +# hence the gnu rules come first. + +include Make.$(SYSNAME)-$(OBJTYPE) + +PREFIX=/usr/local + +NUKEFILES= + +TGZFILES= + +LIB=libfmt.a +VERSION=2.0 +PORTPLACE=devel/libfmt +NAME=libfmt + +NUM=\ + charstod.$O\ + pow10.$O\ + +OFILES=\ + dofmt.$O\ + errfmt.$O\ + fltfmt.$O\ + fmt.$O\ + fmtfd.$O\ + fmtfdflush.$O\ + fmtlock.$O\ + fmtprint.$O\ + fmtquote.$O\ + fmtrune.$O\ + fmtstr.$O\ + fmtvprint.$O\ + fprint.$O\ + print.$O\ + runefmtstr.$O\ + runeseprint.$O\ + runesmprint.$O\ + runesnprint.$O\ + runesprint.$O\ + runevseprint.$O\ + runevsmprint.$O\ + runevsnprint.$O\ + seprint.$O\ + smprint.$O\ + snprint.$O\ + sprint.$O\ + strtod.$O\ + vfprint.$O\ + vseprint.$O\ + vsmprint.$O\ + vsnprint.$O\ + $(NUM)\ + $(NAN)\ + +HFILES=\ + fmtdef.h\ + fmt.h\ + +all: $(LIB) + +install: $(LIB) + test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3 + install -m 0644 print.3 $(PREFIX)/man/man3/print.3 + install -m 0644 fmtinstall.3 $(PREFIX)/man/man3/fmtinstall.3 + install -m 0644 fmt.h $(PREFIX)/include/fmt.h + install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + +$(NAN).$O: nan.h +strtod.$O: nan.h + +test: $(LIB) test.$O + $(CC) -o test test.$O $(LIB) -L$(PREFIX)/lib -lutf + +$(LIB): $(OFILES) + $(AR) $(ARFLAGS) $(LIB) $(OFILES) + +NUKEFILES+=$(LIB) +.c.$O: + $(CC) $(CFLAGS) -I$(PREFIX)/include $*.c + +%.$O: %.c + $(CC) $(CFLAGS) -I$(PREFIX)/include $*.c + + +$(OFILES): $(HFILES) + +tgz: + rm -rf $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION) + cp Makefile Make.* README LICENSE NOTICE *.[ch137] rpm.spec bundle.ports $(TGZFILES) $(NAME)-$(VERSION) + tar cf - $(NAME)-$(VERSION) | gzip >$(NAME)-$(VERSION).tgz + rm -rf $(NAME)-$(VERSION) + +clean: + rm -f $(OFILES) $(LIB) + +nuke: + rm -f $(OFILES) *.tgz *.rpm $(NUKEFILES) + +rpm: + make tgz + cp $(NAME)-$(VERSION).tgz /usr/src/RPM/SOURCES + rpm -ba rpm.spec + cp /usr/src/RPM/SRPMS/$(NAME)-$(VERSION)-1.src.rpm . + cp /usr/src/RPM/RPMS/i586/$(NAME)-$(VERSION)-1.i586.rpm . + scp *.rpm rsc@amsterdam.lcs.mit.edu:public_html/software + +PORTDIR=/usr/ports/$(PORTPLACE) + +ports: + make tgz + rm -rf $(PORTDIR) + mkdir $(PORTDIR) + cp $(NAME)-$(VERSION).tgz /usr/ports/distfiles + cat bundle.ports | (cd $(PORTDIR) && awk '$$1=="---" && $$3=="---" { ofile=$$2; next} {if(ofile) print >ofile}') + (cd $(PORTDIR); make makesum) + (cd $(PORTDIR); make) + (cd $(PORTDIR); /usr/local/bin/portlint) + rm -rf $(PORTDIR)/work + shar `find $(PORTDIR)` > ports.shar + (cd $(PORTDIR); tar cf - *) | gzip >$(NAME)-$(VERSION)-ports.tgz + scp *.tgz rsc@amsterdam.lcs.mit.edu:public_html/software + +.phony: all clean nuke install tgz rpm ports diff --git a/src/libfmt/Makefile.MID b/src/libfmt/Makefile.MID new file mode 100644 index 00000000..8302c281 --- /dev/null +++ b/src/libfmt/Makefile.MID @@ -0,0 +1,63 @@ +LIB=libfmt.a +VERSION=2.0 +PORTPLACE=devel/libfmt +NAME=libfmt + +NUM=\ + charstod.$O\ + pow10.$O\ + +OFILES=\ + dofmt.$O\ + errfmt.$O\ + fltfmt.$O\ + fmt.$O\ + fmtfd.$O\ + fmtfdflush.$O\ + fmtlock.$O\ + fmtprint.$O\ + fmtquote.$O\ + fmtrune.$O\ + fmtstr.$O\ + fmtvprint.$O\ + fprint.$O\ + print.$O\ + runefmtstr.$O\ + runeseprint.$O\ + runesmprint.$O\ + runesnprint.$O\ + runesprint.$O\ + runevseprint.$O\ + runevsmprint.$O\ + runevsnprint.$O\ + seprint.$O\ + smprint.$O\ + snprint.$O\ + sprint.$O\ + strtod.$O\ + vfprint.$O\ + vseprint.$O\ + vsmprint.$O\ + vsnprint.$O\ + $(NUM)\ + $(NAN)\ + +HFILES=\ + fmtdef.h\ + fmt.h\ + +all: $(LIB) + +install: $(LIB) + test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3 + install -m 0644 print.3 $(PREFIX)/man/man3/print.3 + install -m 0644 fmtinstall.3 $(PREFIX)/man/man3/fmtinstall.3 + install -m 0644 fmt.h $(PREFIX)/include/fmt.h + install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + +$(NAN).$O: nan.h +strtod.$O: nan.h + +test: $(LIB) test.$O + $(CC) -o test test.$O $(LIB) -L$(PREFIX)/lib -lutf + diff --git a/src/libfmt/NOTICE b/src/libfmt/NOTICE new file mode 100644 index 00000000..5dc21cb5 --- /dev/null +++ b/src/libfmt/NOTICE @@ -0,0 +1,19 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. +*/ + +This is a Unix port of the Plan 9 formatted I/O package. + +Please send comments about the packaging +to Russ Cox <rsc@post.harvard.edu>. + diff --git a/src/libfmt/README b/src/libfmt/README new file mode 100644 index 00000000..5dc21cb5 --- /dev/null +++ b/src/libfmt/README @@ -0,0 +1,19 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. +*/ + +This is a Unix port of the Plan 9 formatted I/O package. + +Please send comments about the packaging +to Russ Cox <rsc@post.harvard.edu>. + diff --git a/src/libfmt/bundle.ports b/src/libfmt/bundle.ports new file mode 100644 index 00000000..9ecf6a24 --- /dev/null +++ b/src/libfmt/bundle.ports @@ -0,0 +1,51 @@ +--- Makefile --- +# New ports collection makefile for: libfmt +# Date Created: 11 Feb 2003 +# Whom: rsc +# +# THIS LINE NEEDS REPLACING. IT'S HERE TO GET BY PORTLINT +# $FreeBSD: ports/devel/libfmt/Makefile,v 1.1 2003/02/12 00:51:22 rsc Exp $ + +PORTNAME= libfmt +PORTVERSION= 2.0 +CATEGORIES= devel +MASTER_SITES= http://pdos.lcs.mit.edu/~rsc/software/ +EXTRACT_SUFX= .tgz + +MAINTAINER= rsc@post.harvard.edu + +DEPENDS= ${PORTSDIR}/devel/libutf + +MAN3= print.3 fmtinstall.3 + +USE_REINPLACE=yes + +.include <bsd.port.pre.mk> + +post-patch: + ${REINPLACE_CMD} -e 's,$$(PREFIX),${PREFIX},g' ${WRKSRC}/Makefile + +.include <bsd.port.post.mk> +--- pkg-comment --- +Extensible formatted print C library (printf with user-defined verbs) +--- pkg-descr --- +Libfmt is a port of Plan 9's formatted print library. +As a base it provides all the syntax of ANSI printf +but adds the ability for client programs to install +new print verbs. One such print verb (installed by +default) is %r, which prints the system error string. +Instead of perror("foo"), you can write fprint(2, "foo: %r\n"). +This is especially nice when you write verbs to format +the data structures used by your particular program. + +WWW: http://pdos.lcs.mit.edu/~rsc/software/#libfmt +http://plan9.bell-labs.com/magic/man2html/2/print + +Russ Cox +rsc@post.harvard.edu +--- pkg-plist --- +lib/libfmt.a +include/fmt.h +--- /dev/null --- +This is just a way to make sure blank lines don't +creep into pkg-plist. diff --git a/src/libfmt/charstod.c b/src/libfmt/charstod.c new file mode 100644 index 00000000..ec403b11 --- /dev/null +++ b/src/libfmt/charstod.c @@ -0,0 +1,85 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * Reads a floating-point number by interpreting successive characters + * returned by (*f)(vp). The last call it makes to f terminates the + * scan, so is not a character in the number. It may therefore be + * necessary to back up the input stream up one byte after calling charstod. + */ + +double +fmtcharstod(int(*f)(void*), void *vp) +{ + double num, dem; + int neg, eneg, dig, exp, c; + + num = 0; + neg = 0; + dig = 0; + exp = 0; + eneg = 0; + + c = (*f)(vp); + while(c == ' ' || c == '\t') + c = (*f)(vp); + if(c == '-' || c == '+'){ + if(c == '-') + neg = 1; + c = (*f)(vp); + } + while(c >= '0' && c <= '9'){ + num = num*10 + c-'0'; + c = (*f)(vp); + } + if(c == '.') + c = (*f)(vp); + while(c >= '0' && c <= '9'){ + num = num*10 + c-'0'; + dig++; + c = (*f)(vp); + } + if(c == 'e' || c == 'E'){ + c = (*f)(vp); + if(c == '-' || c == '+'){ + if(c == '-'){ + dig = -dig; + eneg = 1; + } + c = (*f)(vp); + } + while(c >= '0' && c <= '9'){ + exp = exp*10 + c-'0'; + c = (*f)(vp); + } + } + exp -= dig; + if(exp < 0){ + exp = -exp; + eneg = !eneg; + } + dem = __fmtpow10(exp); + if(eneg) + num /= dem; + else + num *= dem; + if(neg) + return -num; + return num; +} diff --git a/src/libfmt/dofmt.c b/src/libfmt/dofmt.c new file mode 100644 index 00000000..d26f7158 --- /dev/null +++ b/src/libfmt/dofmt.c @@ -0,0 +1,558 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "fmt.h" +#include "fmtdef.h" + +/* format the output into f->to and return the number of characters fmted */ +int +dofmt(Fmt *f, char *fmt) +{ + Rune rune, *rt, *rs; + int r; + char *t, *s; + int n, nfmt; + + nfmt = f->nfmt; + for(;;){ + if(f->runes){ + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + while((r = *(uchar*)fmt) && r != '%'){ + if(r < Runeself) + fmt++; + else{ + fmt += chartorune(&rune, fmt); + r = rune; + } + FMTRCHAR(f, rt, rs, r); + } + fmt++; + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(!r) + return f->nfmt - nfmt; + f->stop = rs; + }else{ + t = (char*)f->to; + s = (char*)f->stop; + while((r = *(uchar*)fmt) && r != '%'){ + if(r < Runeself){ + FMTCHAR(f, t, s, r); + fmt++; + }else{ + n = chartorune(&rune, fmt); + if(t + n > s){ + t = (char*)__fmtflush(f, t, n); + if(t != nil) + s = (char*)f->stop; + else + return -1; + } + while(n--) + *t++ = *fmt++; + } + } + fmt++; + f->nfmt += t - (char *)f->to; + f->to = t; + if(!r) + return f->nfmt - nfmt; + f->stop = s; + } + + fmt = (char*)__fmtdispatch(f, fmt, 0); + if(fmt == nil) + return -1; + } + return 0; /* not reached */ +} + +void * +__fmtflush(Fmt *f, void *t, int len) +{ + if(f->runes) + f->nfmt += (Rune*)t - (Rune*)f->to; + else + f->nfmt += (char*)t - (char *)f->to; + f->to = t; + if(f->flush == 0 || (*f->flush)(f) == 0 || (char*)f->to + len > (char*)f->stop){ + f->stop = f->to; + return nil; + } + return f->to; +} + +/* + * put a formatted block of memory sz bytes long of n runes into the output buffer, + * left/right justified in a field of at least f->width charactes + */ +int +__fmtpad(Fmt *f, int n) +{ + char *t, *s; + int i; + + t = (char*)f->to; + s = (char*)f->stop; + for(i = 0; i < n; i++) + FMTCHAR(f, t, s, ' '); + f->nfmt += t - (char *)f->to; + f->to = t; + return 0; +} + +int +__rfmtpad(Fmt *f, int n) +{ + Rune *t, *s; + int i; + + t = (Rune*)f->to; + s = (Rune*)f->stop; + for(i = 0; i < n; i++) + FMTRCHAR(f, t, s, ' '); + f->nfmt += t - (Rune *)f->to; + f->to = t; + return 0; +} + +int +__fmtcpy(Fmt *f, const void *vm, int n, int sz) +{ + Rune *rt, *rs, r; + char *t, *s, *m, *me; + ulong fl; + int nc, w; + + m = (char*)vm; + me = m + sz; + w = f->width; + fl = f->flags; + if((fl & FmtPrec) && n > f->prec) + n = f->prec; + if(f->runes){ + if(!(fl & FmtLeft) && __rfmtpad(f, w - n) < 0) + return -1; + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + for(nc = n; nc > 0; nc--){ + r = *(uchar*)m; + if(r < Runeself) + m++; + else if((me - m) >= UTFmax || fullrune(m, me-m)) + m += chartorune(&r, m); + else + break; + FMTRCHAR(f, rt, rs, r); + } + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(m < me) + return -1; + if(fl & FmtLeft && __rfmtpad(f, w - n) < 0) + return -1; + }else{ + if(!(fl & FmtLeft) && __fmtpad(f, w - n) < 0) + return -1; + t = (char*)f->to; + s = (char*)f->stop; + for(nc = n; nc > 0; nc--){ + r = *(uchar*)m; + if(r < Runeself) + m++; + else if((me - m) >= UTFmax || fullrune(m, me-m)) + m += chartorune(&r, m); + else + break; + FMTRUNE(f, t, s, r); + } + f->nfmt += t - (char *)f->to; + f->to = t; + if(fl & FmtLeft && __fmtpad(f, w - n) < 0) + return -1; + } + return 0; +} + +int +__fmtrcpy(Fmt *f, const void *vm, int n) +{ + Rune r, *m, *me, *rt, *rs; + char *t, *s; + ulong fl; + int w; + + m = (Rune*)vm; + w = f->width; + fl = f->flags; + if((fl & FmtPrec) && n > f->prec) + n = f->prec; + if(f->runes){ + if(!(fl & FmtLeft) && __rfmtpad(f, w - n) < 0) + return -1; + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + for(me = m + n; m < me; m++) + FMTRCHAR(f, rt, rs, *m); + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(fl & FmtLeft && __rfmtpad(f, w - n) < 0) + return -1; + }else{ + if(!(fl & FmtLeft) && __fmtpad(f, w - n) < 0) + return -1; + t = (char*)f->to; + s = (char*)f->stop; + for(me = m + n; m < me; m++){ + r = *m; + FMTRUNE(f, t, s, r); + } + f->nfmt += t - (char *)f->to; + f->to = t; + if(fl & FmtLeft && __fmtpad(f, w - n) < 0) + return -1; + } + return 0; +} + +/* fmt out one character */ +int +__charfmt(Fmt *f) +{ + char x[1]; + + x[0] = va_arg(f->args, int); + f->prec = 1; + return __fmtcpy(f, (const char*)x, 1, 1); +} + +/* fmt out one rune */ +int +__runefmt(Fmt *f) +{ + Rune x[1]; + + x[0] = va_arg(f->args, int); + return __fmtrcpy(f, (const void*)x, 1); +} + +/* public helper routine: fmt out a null terminated string already in hand */ +int +fmtstrcpy(Fmt *f, char *s) +{ + int p, i; + if(!s) + return __fmtcpy(f, "<nil>", 5, 5); + /* if precision is specified, make sure we don't wander off the end */ + if(f->flags & FmtPrec){ + p = f->prec; + for(i = 0; i < p; i++) + if(s[i] == 0) + break; + return __fmtcpy(f, s, utfnlen(s, i), i); /* BUG?: won't print a partial rune at end */ + } + + return __fmtcpy(f, s, utflen(s), strlen(s)); +} + +/* fmt out a null terminated utf string */ +int +__strfmt(Fmt *f) +{ + char *s; + + s = va_arg(f->args, char *); + return fmtstrcpy(f, s); +} + +/* public helper routine: fmt out a null terminated rune string already in hand */ +int +fmtrunestrcpy(Fmt *f, Rune *s) +{ + Rune *e; + int n, p; + + if(!s) + return __fmtcpy(f, "<nil>", 5, 5); + /* if precision is specified, make sure we don't wander off the end */ + if(f->flags & FmtPrec){ + p = f->prec; + for(n = 0; n < p; n++) + if(s[n] == 0) + break; + }else{ + for(e = s; *e; e++) + ; + n = e - s; + } + return __fmtrcpy(f, s, n); +} + +/* fmt out a null terminated rune string */ +int +__runesfmt(Fmt *f) +{ + Rune *s; + + s = va_arg(f->args, Rune *); + return fmtrunestrcpy(f, s); +} + +/* fmt a % */ +int +__percentfmt(Fmt *f) +{ + Rune x[1]; + + x[0] = f->r; + f->prec = 1; + return __fmtrcpy(f, (const void*)x, 1); +} + +/* fmt an integer */ +int +__ifmt(Fmt *f) +{ + char buf[70], *p, *conv; + uvlong vu; + ulong u; + int neg, base, i, n, fl, w, isv; + + neg = 0; + fl = f->flags; + isv = 0; + vu = 0; + u = 0; + /* + * Unsigned verbs + */ + switch(f->r){ + case 'o': + case 'u': + case 'x': + case 'X': + fl |= FmtUnsigned; + break; + } + if(f->r == 'p'){ + u = (ulong)va_arg(f->args, void*); + f->r = 'x'; + fl |= FmtUnsigned; + }else if(fl & FmtVLong){ + isv = 1; + if(fl & FmtUnsigned) + vu = va_arg(f->args, uvlong); + else + vu = va_arg(f->args, vlong); + }else if(fl & FmtLong){ + if(fl & FmtUnsigned) + u = va_arg(f->args, ulong); + else + u = va_arg(f->args, long); + }else if(fl & FmtByte){ + if(fl & FmtUnsigned) + u = (uchar)va_arg(f->args, int); + else + u = (char)va_arg(f->args, int); + }else if(fl & FmtShort){ + if(fl & FmtUnsigned) + u = (ushort)va_arg(f->args, int); + else + u = (short)va_arg(f->args, int); + }else{ + if(fl & FmtUnsigned) + u = va_arg(f->args, uint); + else + u = va_arg(f->args, int); + } + conv = "0123456789abcdef"; + switch(f->r){ + case 'd': + case 'i': + base = 10; + break; + case 'u': + base = 10; + break; + case 'x': + base = 16; + break; + case 'X': + base = 16; + conv = "0123456789ABCDEF"; + break; + case 'b': + base = 2; + break; + case 'o': + base = 8; + break; + default: + return -1; + } + if(!(fl & FmtUnsigned)){ + if(isv && (vlong)vu < 0){ + vu = -(vlong)vu; + neg = 1; + }else if(!isv && (long)u < 0){ + u = -(long)u; + neg = 1; + } + }else{ + fl &= ~(FmtSign|FmtSpace); /* no + for unsigned conversions */ + } + p = buf + sizeof buf - 1; + n = 0; + if(isv){ + while(vu){ + i = vu % base; + vu /= base; + if((fl & FmtComma) && n % 4 == 3){ + *p-- = ','; + n++; + } + *p-- = conv[i]; + n++; + } + }else{ + while(u){ + i = u % base; + u /= base; + if((fl & FmtComma) && n % 4 == 3){ + *p-- = ','; + n++; + } + *p-- = conv[i]; + n++; + } + } + if(n == 0){ + if(!(fl & FmtPrec) || f->prec != 0){ + *p-- = '0'; + n = 1; + } + fl &= ~FmtSharp; + } + for(w = f->prec; n < w && p > buf+3; n++) + *p-- = '0'; + if(neg || (fl & (FmtSign|FmtSpace))) + n++; + if(fl & FmtSharp){ + if(base == 16) + n += 2; + else if(base == 8){ + if(p[1] == '0') + fl &= ~FmtSharp; + else + n++; + } + } + if((fl & FmtZero) && !(fl & (FmtLeft|FmtPrec))){ + for(w = f->width; n < w && p > buf+3; n++) + *p-- = '0'; + f->width = 0; + } + if(fl & FmtSharp){ + if(base == 16) + *p-- = f->r; + if(base == 16 || base == 8) + *p-- = '0'; + } + if(neg) + *p-- = '-'; + else if(fl & FmtSign) + *p-- = '+'; + else if(fl & FmtSpace) + *p-- = ' '; + f->flags &= ~FmtPrec; + return __fmtcpy(f, p + 1, n, n); +} + +int +__countfmt(Fmt *f) +{ + void *p; + ulong fl; + + fl = f->flags; + p = va_arg(f->args, void*); + if(fl & FmtVLong){ + *(vlong*)p = f->nfmt; + }else if(fl & FmtLong){ + *(long*)p = f->nfmt; + }else if(fl & FmtByte){ + *(char*)p = f->nfmt; + }else if(fl & FmtShort){ + *(short*)p = f->nfmt; + }else{ + *(int*)p = f->nfmt; + } + return 0; +} + +int +__flagfmt(Fmt *f) +{ + switch(f->r){ + case ',': + f->flags |= FmtComma; + break; + case '-': + f->flags |= FmtLeft; + break; + case '+': + f->flags |= FmtSign; + break; + case '#': + f->flags |= FmtSharp; + break; + case ' ': + f->flags |= FmtSpace; + break; + case 'u': + f->flags |= FmtUnsigned; + break; + case 'h': + if(f->flags & FmtShort) + f->flags |= FmtByte; + f->flags |= FmtShort; + break; + case 'L': + f->flags |= FmtLDouble; + break; + case 'l': + if(f->flags & FmtLong) + f->flags |= FmtVLong; + f->flags |= FmtLong; + break; + } + return 1; +} + +/* default error format */ +int +__badfmt(Fmt *f) +{ + char x[3]; + + x[0] = '%'; + x[1] = f->r; + x[2] = '%'; + f->prec = 3; + __fmtcpy(f, (const void*)x, 3, 3); + return 0; +} diff --git a/src/libfmt/dorfmt.c b/src/libfmt/dorfmt.c new file mode 100644 index 00000000..cdaee8a5 --- /dev/null +++ b/src/libfmt/dorfmt.c @@ -0,0 +1,61 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "fmt.h" +#include "fmtdef.h" + +/* format the output into f->to and return the number of characters fmted */ + +int +dorfmt(Fmt *f, const Rune *fmt) +{ + Rune *rt, *rs; + int r; + char *t, *s; + int nfmt; + + nfmt = f->nfmt; + for(;;){ + if(f->runes){ + rt = f->to; + rs = f->stop; + while((r = *fmt++) && r != '%'){ + FMTRCHAR(f, rt, rs, r); + } + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(!r) + return f->nfmt - nfmt; + f->stop = rs; + }else{ + t = f->to; + s = f->stop; + while((r = *fmt++) && r != '%'){ + FMTRUNE(f, t, f->stop, r); + } + f->nfmt += t - (char *)f->to; + f->to = t; + if(!r) + return f->nfmt - nfmt; + f->stop = s; + } + + fmt = __fmtdispatch(f, fmt, 1); + if(fmt == nil) + return -1; + } + return 0; /* not reached */ +} diff --git a/src/libfmt/errfmt.c b/src/libfmt/errfmt.c new file mode 100644 index 00000000..21847054 --- /dev/null +++ b/src/libfmt/errfmt.c @@ -0,0 +1,28 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <errno.h> +#include <string.h> +#include "utf.h" +#include "fmt.h" +#include "fmtdef.h" + +int +__errfmt(Fmt *f) +{ + char *s; + + s = strerror(errno); + return fmtstrcpy(f, s); +} diff --git a/src/libfmt/fltfmt.c b/src/libfmt/fltfmt.c new file mode 100644 index 00000000..234f03d9 --- /dev/null +++ b/src/libfmt/fltfmt.c @@ -0,0 +1,610 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdio.h> +#include <math.h> +#include <float.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> +#include "fmt.h" +#include "fmtdef.h" +#include "nan.h" + +enum +{ + FDEFLT = 6, + NSIGNIF = 17 +}; + +/* + * first few powers of 10, enough for about 1/2 of the + * total space for doubles. + */ +static double pows10[] = +{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, + 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, + 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, + 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, + 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79, + 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88, 1e89, + 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, + 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, + 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, + 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, + 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, + 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, +}; + +static double +pow10(int n) +{ + double d; + int neg; + + neg = 0; + if(n < 0){ + if(n < DBL_MIN_10_EXP){ + return 0.; + } + neg = 1; + n = -n; + }else if(n > DBL_MAX_10_EXP){ + return HUGE_VAL; + } + if(n < (int)(sizeof(pows10)/sizeof(pows10[0]))) + d = pows10[n]; + else{ + d = pows10[sizeof(pows10)/sizeof(pows10[0]) - 1]; + for(;;){ + n -= sizeof(pows10)/sizeof(pows10[0]) - 1; + if(n < (int)(sizeof(pows10)/sizeof(pows10[0]))){ + d *= pows10[n]; + break; + } + d *= pows10[sizeof(pows10)/sizeof(pows10[0]) - 1]; + } + } + if(neg){ + return 1./d; + } + return d; +} + +static int +xadd(char *a, int n, int v) +{ + char *b; + int c; + + if(n < 0 || n >= NSIGNIF) + return 0; + for(b = a+n; b >= a; b--) { + c = *b + v; + if(c <= '9') { + *b = c; + return 0; + } + *b = '0'; + v = 1; + } + *a = '1'; /* overflow adding */ + return 1; +} + +static int +xsub(char *a, int n, int v) +{ + char *b; + int c; + + for(b = a+n; b >= a; b--) { + c = *b - v; + if(c >= '0') { + *b = c; + return 0; + } + *b = '9'; + v = 1; + } + *a = '9'; /* underflow subtracting */ + return 1; +} + +static void +xaddexp(char *p, int e) +{ + char se[9]; + int i; + + *p++ = 'e'; + if(e < 0) { + *p++ = '-'; + e = -e; + } + i = 0; + while(e) { + se[i++] = e % 10 + '0'; + e /= 10; + } + if(i == 0) { + *p++ = '0'; + } else { + while(i > 0) + *p++ = se[--i]; + } + *p++ = '\0'; +} + +static char* +xdodtoa(char *s1, double f, int chr, int prec, int *decpt, int *rsign) +{ + char s2[NSIGNIF+10]; + double g, h; + int e, d, i; + int c2, sign, oerr; + + if(chr == 'F') + chr = 'f'; + if(prec > NSIGNIF) + prec = NSIGNIF; + if(prec < 0) + prec = 0; + if(__isNaN(f)) { + *decpt = 9999; + *rsign = 0; + strcpy(s1, "nan"); + return &s1[3]; + } + sign = 0; + if(f < 0) { + f = -f; + sign++; + } + *rsign = sign; + if(__isInf(f, 1) || __isInf(f, -1)) { + *decpt = 9999; + strcpy(s1, "inf"); + return &s1[3]; + } + + e = 0; + g = f; + if(g != 0) { + frexp(f, &e); + e = (int)(e * .301029995664); + if(e >= -150 && e <= +150) { + d = 0; + h = f; + } else { + d = e/2; + h = f * pow10(-d); + } + g = h * pow10(d-e); + while(g < 1) { + e--; + g = h * pow10(d-e); + } + while(g >= 10) { + e++; + g = h * pow10(d-e); + } + } + + /* + * convert NSIGNIF digits and convert + * back to get accuracy. + */ + for(i=0; i<NSIGNIF; i++) { + d = (int)g; + s1[i] = d + '0'; + g = (g - d) * 10; + } + s1[i] = 0; + + /* + * try decimal rounding to eliminate 9s + */ + c2 = prec + 1; + if(chr == 'f') + c2 += e; + oerr = errno; + if(c2 >= NSIGNIF-2) { + strcpy(s2, s1); + d = e; + s1[NSIGNIF-2] = '0'; + s1[NSIGNIF-1] = '0'; + xaddexp(s1+NSIGNIF, e-NSIGNIF+1); + g = fmtstrtod(s1, nil); + if(g == f) + goto found; + if(xadd(s1, NSIGNIF-3, 1)) { + e++; + xaddexp(s1+NSIGNIF, e-NSIGNIF+1); + } + g = fmtstrtod(s1, nil); + if(g == f) + goto found; + strcpy(s1, s2); + e = d; + } + + /* + * convert back so s1 gets exact answer + */ + for(d = 0; d < 10; d++) { + xaddexp(s1+NSIGNIF, e-NSIGNIF+1); + g = fmtstrtod(s1, nil); + if(f > g) { + if(xadd(s1, NSIGNIF-1, 1)) + e--; + continue; + } + if(f < g) { + if(xsub(s1, NSIGNIF-1, 1)) + e++; + continue; + } + break; + } + +found: + errno = oerr; + + /* + * sign + */ + d = 0; + i = 0; + + /* + * round & adjust 'f' digits + */ + c2 = prec + 1; + if(chr == 'f'){ + if(xadd(s1, c2+e, 5)) + e++; + c2 += e; + if(c2 < 0){ + c2 = 0; + e = -prec - 1; + } + }else{ + if(xadd(s1, c2, 5)) + e++; + } + if(c2 > NSIGNIF){ + c2 = NSIGNIF; + } + + *decpt = e + 1; + + /* + * terminate the converted digits + */ + s1[c2] = '\0'; + return &s1[c2]; +} + +/* + * this function works like the standard dtoa, if you want it. + */ +#if 0 +static char* +__dtoa(double f, int mode, int ndigits, int *decpt, int *rsign, char **rve) +{ + static char s2[NSIGNIF + 10]; + char *es; + int chr, prec; + + switch(mode) { + /* like 'e' */ + case 2: + case 4: + case 6: + case 8: + chr = 'e'; + break; + /* like 'g' */ + case 0: + case 1: + default: + chr = 'g'; + break; + /* like 'f' */ + case 3: + case 5: + case 7: + case 9: + chr = 'f'; + break; + } + + if(chr != 'f' && ndigits){ + ndigits--; + } + prec = ndigits; + if(prec > NSIGNIF) + prec = NSIGNIF; + if(ndigits == 0) + prec = NSIGNIF; + es = xdodtoa(s2, f, chr, prec, decpt, rsign); + + /* + * strip trailing 0 + */ + for(; es > s2 + 1; es--){ + if(es[-1] != '0'){ + break; + } + } + *es = '\0'; + if(rve != NULL) + *rve = es; + return s2; +} +#endif + +static int +fmtzdotpad(Fmt *f, int n, int pt) +{ + char *t, *s; + int i; + Rune *rt, *rs; + + if(f->runes){ + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + for(i = 0; i < n; i++){ + if(i == pt){ + FMTRCHAR(f, rt, rs, '.'); + } + FMTRCHAR(f, rt, rs, '0'); + } + f->nfmt += rt - (Rune*)f->to; + f->to = rt; + }else{ + t = (char*)f->to; + s = (char*)f->stop; + for(i = 0; i < n; i++){ + if(i == pt){ + FMTCHAR(f, t, s, '.'); + } + FMTCHAR(f, t, s, '0'); + } + f->nfmt += t - (char *)f->to; + f->to = t; + } + return 0; +} + +int +__efgfmt(Fmt *fmt) +{ + double f; + char s1[NSIGNIF+10]; + int e, d, n; + int c1, c2, c3, c4, ucase, sign, chr, prec, fl; + + f = va_arg(fmt->args, double); + prec = FDEFLT; + fl = fmt->flags; + fmt->flags = 0; + if(fl & FmtPrec) + prec = fmt->prec; + chr = fmt->r; + ucase = 0; + if(chr == 'E'){ + chr = 'e'; + ucase = 1; + }else if(chr == 'F'){ + chr = 'f'; + ucase = 1; + }else if(chr == 'G'){ + chr = 'g'; + ucase = 1; + } + if(prec > 0 && chr == 'g') + prec--; + if(prec < 0) + prec = 0; + + xdodtoa(s1, f, chr, prec, &e, &sign); + e--; + if(*s1 == 'i' || *s1 == 'n'){ + if(ucase){ + if(*s1 == 'i'){ + strcpy(s1, "INF"); + }else{ + strcpy(s1, "NAN"); + } + } + fmt->flags = fl & (FmtWidth|FmtLeft); + return __fmtcpy(fmt, (const void*)s1, 3, 3); + } + + /* + * copy into final place + * c1 digits of leading '0' + * c2 digits from conversion + * c3 digits of trailing '0' + * c4 digits after '.' + */ + c1 = 0; + c2 = prec + 1; + c3 = 0; + c4 = prec; + switch(chr) { + default: + chr = 'e'; + break; + case 'g': + /* + * decide on 'e' of 'f' style convers + */ + if(e >= -4 && e <= prec) { + c1 = -e; + c4 = prec - e; + chr = 'h'; /* flag for 'f' style */ + } + break; + case 'f': + c1 = -e; + if(c1 > prec) + c1 = prec + 1; + c2 += e; + break; + } + + /* + * clean up c1 c2 and c3 + */ + if(c1 < 0) + c1 = 0; + if(c2 < 0) + c2 = 0; + if(c2 > NSIGNIF) { + c3 = c2-NSIGNIF; + c2 = NSIGNIF; + } + + /* + * trim trailing zeros for %g + */ + if(!(fl & FmtSharp) + && (chr == 'g' || chr == 'h')){ + if(c4 >= c3){ + c4 -= c3; + c3 = 0; + }else{ + c3 -= c4; + c4 = 0; + } + while(c4 && c2 > 1 && s1[c2 - 1] == '0'){ + c4--; + c2--; + } + } + + /* + * calculate the total length + */ + n = c1 + c2 + c3; + if(sign || (fl & (FmtSign|FmtSpace))) + n++; + if(c4 || (fl & FmtSharp)){ + n++; + } + if(chr == 'e' || chr == 'g'){ + n += 4; + if(e >= 100) + n++; + } + + /* + * pad to width if right justified + */ + if((fl & (FmtWidth|FmtLeft)) == FmtWidth && n < fmt->width){ + if(fl & FmtZero){ + c1 += fmt->width - n; + }else{ + if(__fmtpad(fmt, fmt->width - n) < 0){ + return -1; + } + } + } + + /* + * sign + */ + d = 0; + if(sign) + d = '-'; + else if(fl & FmtSign) + d = '+'; + else if(fl & FmtSpace) + d = ' '; + if(d && fmtrune(fmt, d) < 0){ + return -1; + } + + /* + * copy digits + */ + c4 = c1 + c2 + c3 - c4; + if(c1 > 0){ + if(fmtzdotpad(fmt, c1, c4) < 0){ + return -1; + } + c4 -= c1; + } + d = 0; + if(c4 >= 0 && c4 < c2){ + if(__fmtcpy(fmt, s1, c4, c4) < 0 || fmtrune(fmt, '.') < 0) + return -1; + d = c4; + c2 -= c4; + c4 = -1; + } + if(__fmtcpy(fmt, (const void*)(s1 + d), c2, c2) < 0){ + return -1; + } + c4 -= c2; + if(c3 > 0){ + if(fmtzdotpad(fmt, c3, c4) < 0){ + return -1; + } + c4 -= c3; + } + + /* + * strip trailing '0' on g conv + */ + if((fl & FmtSharp) && c4 == 0 && fmtrune(fmt, '.') < 0){ + return -1; + } + if(chr == 'e' || chr == 'g') { + d = 0; + if(ucase) + s1[d++] = 'E'; + else + s1[d++] = 'e'; + c1 = e; + if(c1 < 0) { + s1[d++] = '-'; + c1 = -c1; + } else + s1[d++] = '+'; + if(c1 >= 100) { + s1[d++] = c1/100 + '0'; + c1 = c1%100; + } + s1[d++] = c1/10 + '0'; + s1[d++] = c1%10 + '0'; + if(__fmtcpy(fmt, s1, d, d) < 0){ + return -1; + } + } + if((fl & (FmtWidth|FmtLeft)) == (FmtWidth|FmtLeft) && n < fmt->width){ + if(__fmtpad(fmt, fmt->width - n) < 0){ + return -1; + } + } + return 0; +} diff --git a/src/libfmt/fmt.c b/src/libfmt/fmt.c new file mode 100644 index 00000000..06f6c950 --- /dev/null +++ b/src/libfmt/fmt.c @@ -0,0 +1,221 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "fmt.h" +#include "fmtdef.h" + +enum +{ + Maxfmt = 64 +}; + +typedef struct Convfmt Convfmt; +struct Convfmt +{ + int c; + volatile Fmts fmt; /* for spin lock in fmtfmt; avoids race due to write order */ +}; + +struct +{ + /* lock by calling __fmtlock, __fmtunlock */ + int nfmt; + Convfmt fmt[Maxfmt]; +} fmtalloc; + +static Convfmt knownfmt[] = { + ' ', __flagfmt, + '#', __flagfmt, + '%', __percentfmt, + '+', __flagfmt, + ',', __flagfmt, + '-', __flagfmt, + 'C', __runefmt, /* Plan 9 addition */ + 'E', __efgfmt, + 'F', __efgfmt, /* ANSI only */ + 'G', __efgfmt, + 'L', __flagfmt, /* ANSI only */ + 'S', __runesfmt, /* Plan 9 addition */ + 'X', __ifmt, + 'b', __ifmt, /* Plan 9 addition */ + 'c', __charfmt, + 'd', __ifmt, + 'e', __efgfmt, + 'f', __efgfmt, + 'g', __efgfmt, + 'h', __flagfmt, + 'i', __ifmt, /* ANSI only */ + 'l', __flagfmt, + 'n', __countfmt, + 'o', __ifmt, + 'p', __ifmt, + 'r', __errfmt, + 's', __strfmt, + 'u', __ifmt, /* in Plan 9, __flagfmt */ + 'x', __ifmt, + 0, nil, +}; + + +int (*fmtdoquote)(int); + +/* + * __fmtlock() must be set + */ +static int +__fmtinstall(int c, Fmts f) +{ + Convfmt *p, *ep; + + if(c<=0 || c>=65536) + return -1; + if(!f) + f = __badfmt; + + ep = &fmtalloc.fmt[fmtalloc.nfmt]; + for(p=fmtalloc.fmt; p<ep; p++) + if(p->c == c) + break; + + if(p == &fmtalloc.fmt[Maxfmt]) + return -1; + + p->fmt = f; + if(p == ep){ /* installing a new format character */ + fmtalloc.nfmt++; + p->c = c; + } + + return 0; +} + +int +fmtinstall(int c, Fmts f) +{ + int ret; + + __fmtlock(); + ret = __fmtinstall(c, f); + __fmtunlock(); + return ret; +} + +static Fmts +fmtfmt(int c) +{ + Convfmt *p, *ep; + + ep = &fmtalloc.fmt[fmtalloc.nfmt]; + for(p=fmtalloc.fmt; p<ep; p++) + if(p->c == c){ + while(p->fmt == nil) /* loop until value is updated */ + ; + return p->fmt; + } + + /* is this a predefined format char? */ + __fmtlock(); + for(p=knownfmt; p->c; p++) + if(p->c == c){ + __fmtinstall(p->c, p->fmt); + __fmtunlock(); + return p->fmt; + } + __fmtunlock(); + + return __badfmt; +} + +void* +__fmtdispatch(Fmt *f, void *fmt, int isrunes) +{ + Rune rune, r; + int i, n; + + f->flags = 0; + f->width = f->prec = 0; + + for(;;){ + if(isrunes){ + r = *(Rune*)fmt; + fmt = (Rune*)fmt + 1; + }else{ + fmt = (char*)fmt + chartorune(&rune, (char*)fmt); + r = rune; + } + f->r = r; + switch(r){ + case '\0': + return nil; + case '.': + f->flags |= FmtWidth|FmtPrec; + continue; + case '0': + if(!(f->flags & FmtWidth)){ + f->flags |= FmtZero; + continue; + } + /* fall through */ + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + i = 0; + while(r >= '0' && r <= '9'){ + i = i * 10 + r - '0'; + if(isrunes){ + r = *(Rune*)fmt; + fmt = (Rune*)fmt + 1; + }else{ + r = *(char*)fmt; + fmt = (char*)fmt + 1; + } + } + if(isrunes) + fmt = (Rune*)fmt - 1; + else + fmt = (char*)fmt - 1; + numflag: + if(f->flags & FmtWidth){ + f->flags |= FmtPrec; + f->prec = i; + }else{ + f->flags |= FmtWidth; + f->width = i; + } + continue; + case '*': + i = va_arg(f->args, int); + if(i < 0){ + /* + * negative precision => + * ignore the precision. + */ + if(f->flags & FmtPrec){ + f->flags &= ~FmtPrec; + f->prec = 0; + continue; + } + i = -i; + f->flags |= FmtLeft; + } + goto numflag; + } + n = (*fmtfmt(r))(f); + if(n < 0) + return nil; + if(n == 0) + return fmt; + } +} diff --git a/src/libfmt/fmt.h b/src/libfmt/fmt.h new file mode 100644 index 00000000..c913e14a --- /dev/null +++ b/src/libfmt/fmt.h @@ -0,0 +1,100 @@ + +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ + +#ifndef _FMTH_ +#define _FMTH_ 1 + +#include <stdarg.h> + +#ifndef _UTFH_ +#include <utf.h> +#endif + +typedef struct Fmt Fmt; +struct Fmt{ + unsigned char runes; /* output buffer is runes or chars? */ + void *start; /* of buffer */ + void *to; /* current place in the buffer */ + void *stop; /* end of the buffer; overwritten if flush fails */ + int (*flush)(Fmt *); /* called when to == stop */ + void *farg; /* to make flush a closure */ + int nfmt; /* num chars formatted so far */ + va_list args; /* args passed to dofmt */ + int r; /* % format Rune */ + int width; + int prec; + unsigned long flags; +}; + +enum{ + FmtWidth = 1, + FmtLeft = FmtWidth << 1, + FmtPrec = FmtLeft << 1, + FmtSharp = FmtPrec << 1, + FmtSpace = FmtSharp << 1, + FmtSign = FmtSpace << 1, + FmtZero = FmtSign << 1, + FmtUnsigned = FmtZero << 1, + FmtShort = FmtUnsigned << 1, + FmtLong = FmtShort << 1, + FmtVLong = FmtLong << 1, + FmtComma = FmtVLong << 1, + FmtByte = FmtComma << 1, + FmtLDouble = FmtByte << 1, + + FmtFlag = FmtLDouble << 1 +}; + +extern int print(char*, ...); +extern char* seprint(char*, char*, char*, ...); +extern char* vseprint(char*, char*, char*, va_list); +extern int snprint(char*, int, char*, ...); +extern int vsnprint(char*, int, char*, va_list); +extern char* smprint(char*, ...); +extern char* vsmprint(char*, va_list); +extern int sprint(char*, char*, ...); +extern int fprint(int, char*, ...); +extern int vfprint(int, char*, va_list); + +extern int runesprint(Rune*, char*, ...); +extern int runesnprint(Rune*, int, char*, ...); +extern int runevsnprint(Rune*, int, char*, va_list); +extern Rune* runeseprint(Rune*, Rune*, char*, ...); +extern Rune* runevseprint(Rune*, Rune*, char*, va_list); +extern Rune* runesmprint(char*, ...); +extern Rune* runevsmprint(char*, va_list); + +extern int fmtfdinit(Fmt*, int, char*, int); +extern int fmtfdflush(Fmt*); +extern int fmtstrinit(Fmt*); +extern char* fmtstrflush(Fmt*); +extern int runefmtstrinit(Fmt*); + +extern int quotestrfmt(Fmt *f); +extern void quotefmtinstall(void); +extern int (*fmtdoquote)(int); + + +extern int fmtinstall(int, int (*)(Fmt*)); +extern int dofmt(Fmt*, char*); +extern int fmtprint(Fmt*, char*, ...); +extern int fmtvprint(Fmt*, char*, va_list); +extern int fmtrune(Fmt*, int); +extern int fmtstrcpy(Fmt*, char*); + +extern double fmtstrtod(const char *, char **); +extern double fmtcharstod(int(*)(void*), void*); + +#endif diff --git a/src/libfmt/fmtdef.h b/src/libfmt/fmtdef.h new file mode 100644 index 00000000..ca2010ab --- /dev/null +++ b/src/libfmt/fmtdef.h @@ -0,0 +1,121 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +/* + * dofmt -- format to a buffer + * the number of characters formatted is returned, + * or -1 if there was an error. + * if the buffer is ever filled, flush is called. + * it should reset the buffer and return whether formatting should continue. + */ +#define uchar _fmtuchar +#define ushort _fmtushort +#define uint _fmtuint +#define ulong _fmtulong +#define vlong _fmtvlong +#define uvlong _fmtuvlong + +#define USED(x) if(x);else + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +#ifndef NOVLONGS +typedef unsigned long long uvlong; +typedef long long vlong; +#endif + +#define nil 0 /* cannot be ((void*)0) because used for function pointers */ + +typedef int (*Fmts)(Fmt*); + +typedef struct Quoteinfo Quoteinfo; +struct Quoteinfo +{ + int quoted; /* if set, string must be quoted */ + int nrunesin; /* number of input runes that can be accepted */ + int nbytesin; /* number of input bytes that can be accepted */ + int nrunesout; /* number of runes that will be generated */ + int nbytesout; /* number of bytes that will be generated */ +}; + +void *__fmtflush(Fmt*, void*, int); +void *__fmtdispatch(Fmt*, void*, int); +int __floatfmt(Fmt*, double); +int __fmtpad(Fmt*, int); +int __rfmtpad(Fmt*, int); +int __fmtFdFlush(Fmt*); + +int __efgfmt(Fmt*); +int __charfmt(Fmt*); +int __runefmt(Fmt*); +int __runesfmt(Fmt*); +int __countfmt(Fmt*); +int __flagfmt(Fmt*); +int __percentfmt(Fmt*); +int __ifmt(Fmt*); +int __strfmt(Fmt*); +int __badfmt(Fmt*); +int __fmtcpy(Fmt*, const void*, int, int); +int __fmtrcpy(Fmt*, const void*, int n); +int __errfmt(Fmt *f); + +double __fmtpow10(int); + +void __fmtlock(void); +void __fmtunlock(void); + +#define FMTCHAR(f, t, s, c)\ + do{\ + if(t + 1 > (char*)s){\ + t = __fmtflush(f, t, 1);\ + if(t != nil)\ + s = f->stop;\ + else\ + return -1;\ + }\ + *t++ = c;\ + }while(0) + +#define FMTRCHAR(f, t, s, c)\ + do{\ + if(t + 1 > (Rune*)s){\ + t = __fmtflush(f, t, sizeof(Rune));\ + if(t != nil)\ + s = f->stop;\ + else\ + return -1;\ + }\ + *t++ = c;\ + }while(0) + +#define FMTRUNE(f, t, s, r)\ + do{\ + Rune _rune;\ + int _runelen;\ + if(t + UTFmax > (char*)s && t + (_runelen = runelen(r)) > (char*)s){\ + t = __fmtflush(f, t, _runelen);\ + if(t != nil)\ + s = f->stop;\ + else\ + return -1;\ + }\ + if(r < Runeself)\ + *t++ = r;\ + else{\ + _rune = r;\ + t += runetochar(t, &_rune);\ + }\ + }while(0) diff --git a/src/libfmt/fmtfd.c b/src/libfmt/fmtfd.c new file mode 100644 index 00000000..d4251402 --- /dev/null +++ b/src/libfmt/fmtfd.c @@ -0,0 +1,46 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * public routine for final flush of a formatting buffer + * to a file descriptor; returns total char count. + */ +int +fmtfdflush(Fmt *f) +{ + if(__fmtFdFlush(f) <= 0) + return -1; + return f->nfmt; +} + +/* + * initialize an output buffer for buffered printing + */ +int +fmtfdinit(Fmt *f, int fd, char *buf, int size) +{ + f->runes = 0; + f->start = buf; + f->to = buf; + f->stop = buf + size; + f->flush = __fmtFdFlush; + f->farg = (void*)fd; + f->nfmt = 0; + return 0; +} diff --git a/src/libfmt/fmtfdflush.c b/src/libfmt/fmtfdflush.c new file mode 100644 index 00000000..796feab2 --- /dev/null +++ b/src/libfmt/fmtfdflush.c @@ -0,0 +1,33 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <unistd.h> +#include "fmt.h" +#include "fmtdef.h" + +/* + * generic routine for flushing a formatting buffer + * to a file descriptor + */ +int +__fmtFdFlush(Fmt *f) +{ + int n; + + n = (char*)f->to - (char*)f->start; + if(n && write((int)f->farg, f->start, n) != n) + return 0; + f->to = f->start; + return 1; +} diff --git a/src/libfmt/fmtinstall.3 b/src/libfmt/fmtinstall.3 new file mode 100644 index 00000000..2a0e55bf --- /dev/null +++ b/src/libfmt/fmtinstall.3 @@ -0,0 +1,346 @@ +.TH FMTINSTALL 3 +.de EX +.nf +.ft B +.. +.de EE +.fi +.ft R +.. +.SH NAME +fmtinstall, dofmt, fmtprint, fmtvprint, fmtstrcpy, fmtfdinit, fmtfdflush, fmtstrinit, fmtstrflush \- support for user-defined print formats and output routines +.SH SYNOPSIS +.B #include <fmt.h> +.PP +.ft L +.nf +.ta \w' 'u +\w' 'u +\w' 'u +\w' 'u +\w' 'u +typedef struct Fmt Fmt; +struct Fmt{ + void *start; /* of buffer */ + void *to; /* current place in the buffer */ + void *stop; /* end of the buffer; overwritten if flush fails */ + int (*flush)(Fmt*); /* called when to == stop */ + void *farg; /* to make flush a closure */ + int nfmt; /* num chars formatted so far */ + va_list args; /* args passed to dofmt */ + int r; /* % format character */ + int width; + int prec; + unsigned long flags; +}; + +enum{ + FmtWidth = 1, + FmtLeft = FmtWidth << 1, + FmtPrec = FmtLeft << 1, + FmtSharp = FmtPrec << 1, + FmtSpace = FmtSharp << 1, + FmtSign = FmtSpace << 1, + FmtZero = FmtSign << 1, + FmtUnsigned = FmtZero << 1, + FmtShort = FmtUnsigned << 1, + FmtLong = FmtShort << 1, + FmtVLong = FmtLong << 1, + FmtComma = FmtVLong << 1, + FmtByte = FmtComma << 1, + FmtLDouble = FmtByte << 1, + + FmtFlag = FmtLDouble << 1 +}; +.fi +.PP +.B +.ta \w'\fLchar* 'u + +.PP +.B +int fmtfdinit(Fmt *f, int fd, char *buf, int nbuf); +.PP +.B +int fmtfdflush(Fmt *f); +.PP +.B +int fmtstrinit(Fmt *f); +.PP +.B +char* fmtstrflush(Fmt *f); +.PP +.B +int fmtinstall(int c, int (*fn)(Fmt*)); +.PP +.B +int dofmt(Fmt *f, char *fmt); +.PP +.B +int fmtprint(Fmt *f, char *fmt, ...); +.PP +.B +int fmtvprint(Fmt *f, char *fmt, va_list v); +.PP +.B +int fmtrune(Fmt *f, int r); +.PP +.B +int fmtstrcpy(Fmt *f, char *s); +.SH DESCRIPTION +The interface described here allows the construction of custom +.IR print (3) +verbs and output routines. +In essence, they provide access to the workings of the formatted print code. +.PP +The +.IR print (3) +suite maintains its state with a data structure called +.BR Fmt . +A typical call to +.IR print (3) +or its relatives initializes a +.B Fmt +structure, passes it to subsidiary routines to process the output, +and finishes by emitting any saved state recorded in the +.BR Fmt . +The details of the +.B Fmt +are unimportant to outside users, except insofar as the general +design influences the interface. +The +.B Fmt +records +the verb being processed, its precision and width, +and buffering parameters. +Most important, it also records a +.I flush +routine that the library will call if a buffer overflows. +When printing to a file descriptor, the flush routine will +emit saved characters and reset the buffer; when printing +to an allocated string, it will resize the string to receive more output. +The flush routine is nil when printing to fixed-size buffers. +User code need never provide a flush routine; this is done internally +by the library. +.SS Custom output routines +To write a custom output routine, such as an error handler that +formats and prints custom error messages, the output sequence can be run +from outside the library using the routines described here. +There are two main cases: output to an open file descriptor +and output to a string. +.PP +To write to a file descriptor, call +.I fmtfdinit +to initialize the local +.B Fmt +structure +.IR f , +giving the file descriptor +.IR fd , +the buffer +.IR buf , +and its size +.IR nbuf . +Then call +.IR fmtprint +or +.IR fmtvprint +to generate the output. +These behave just like +.B fprint +(see +.IR print (3)) +or +.B vfprint +except that the characters are buffered until +.I fmtfdflush +is called. +A typical example of this sequence appears in the Examples section. +.PP +The same basic sequence applies when outputting to an allocated string: +call +.I fmtstrinit +to initialize the +.BR Fmt , +then call +.I fmtprint +and +.I fmtvprint +to generate the output. +Finally, +.I fmtstrflush +will return the allocated string, which should be freed after use. +Regardless of the output style or type, +.I fmtprint +or +.I fmtvprint +generates the characters. +.SS Custom format verbs +.I Fmtinstall +is used to install custom verbs and flags labeled by character +.IR c , +which may be any non-zero Unicode character. +.I Fn +should be declared as +.IP +.EX +int fn(Fmt*) +.EE +.PP +.IB Fp ->r +is the flag or verb character to cause +.I fn +to be called. +In +.IR fn , +.IB fp ->width , +.IB fp ->prec +are the width and precision, and +.IB fp ->flags +the decoded flags for the verb (see +.IR print (3) +for a description of these items). +The standard flag values are: +.B FmtSign +.RB ( + ), +.B FmtLeft +.RB ( - ), +.B FmtSpace +.RB ( '\ ' ), +.B FmtSharp +.RB ( # ), +.B FmtComma +.RB ( , ), +.B FmtLong +.RB ( l ), +.B FmtShort +.RB ( h ), +.B FmtByte +.RB ( hh ), +.B FmtUnsigned +.RB ( u ), +.B FmtLDouble +.RB ( L ), +and +.B FmtVLong +.RB ( ll ). +The flag bits +.B FmtWidth +and +.B FmtPrec +identify whether a width and precision were specified. +.PP +.I Fn +is passed a pointer to the +.B Fmt +structure recording the state of the output. +If +.IB fp ->r +is a verb (rather than a flag), +.I fn +should use +.B Fmt->args +to fetch its argument from the list, +then format it, and return zero. +If +.IB fp ->r +is a flag, +.I fn +should return a negative value: +the negation of one of the above flag values, or some otherwise unused power of two. +All interpretation of +.IB fp ->width\f1, +.IB fp ->prec\f1, +and +.IB fp-> flags +is left up to the conversion routine. +.I Fmtinstall +returns 0 if the installation succeeds, \-1 if it fails. +.PP +.IR Fmtprint +and +.IR fmtvprint +may be called to +help prepare output in custom conversion routines. +However, these functions clear the width, precision, and flags. +The function +.I dofmt +is the underlying formatter; it +uses the existing contents of +.B Fmt +and should be called only by sophisticated conversion routines. +All these routines return the number of characters +produced. +.PP +Some internal functions may be useful to format primitive types. +They honor the width, precision and flags as described in +.IR print (3). +.I Fmtrune +formats a single character +.BR r . +.I Fmtstrcpy +formats a string +.BR s . +All these routines return zero for successful execution. +.SH EXAMPLES +This function prints an error message with a variable +number of arguments and then quits. +Compared to the corresponding example in +.IR print (3), +this version uses a smaller buffer, will never truncate +the output message, but might generate multiple +.B write +system calls to produce its output. +.IP +.EX +.ta 6n +6n +6n +6n +6n +6n +6n +6n +6n + +void fatal(char *fmt, ...) +{ + Fmt f; + char buf[64]; + va_list arg; + + fmtfdinit(&f, 1, buf, sizeof buf); + fmtprint(&f, "fatal: "); + va_start(arg, fmt); + fmtvprint(&f, fmt, arg); + va_end(arg); + fmtprint(&f, "\en"); + fmtfdflush(&f); + exits("fatal error"); +} +.EE +.PP +This example adds a verb to print complex numbers. +.IP +.EX +typedef +struct { + double r, i; +} Complex; + +int +Xfmt(Fmt *f) +{ + Complex c; + + c = va_arg(f->args, Complex); + return fmtprint(f, "(%g,%g)", c.r, c.i); +} + +main(...) +{ + Complex x; + + x.r = 1.5; + x.i = -2.3; + + fmtinstall('X', Xfmt); + print("x = %X\en", x); +} +.EE +.SH SEE ALSO +.IR print (3) +.SH HISTORY +This formatted print library originally +appeared as part of the Plan 9 C library. +.SH BUGS +The Plan 9 version supports Unicode strings and produces UTF output. +This version assumes that characters are always represented by 1-byte values. diff --git a/src/libfmt/fmtlock.c b/src/libfmt/fmtlock.c new file mode 100644 index 00000000..fffe81cf --- /dev/null +++ b/src/libfmt/fmtlock.c @@ -0,0 +1,28 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include "fmt.h" +#include "fmtdef.h" + +void +__fmtlock(void) +{ + ; +} + +void +__fmtunlock(void) +{ + ; +} diff --git a/src/libfmt/fmtprint.c b/src/libfmt/fmtprint.c new file mode 100644 index 00000000..fe2ad3cc --- /dev/null +++ b/src/libfmt/fmtprint.c @@ -0,0 +1,47 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. How much of the input will be consumed? + * The parameter q is filled in by __quotesetup. + * The string may be UTF or Runes (s or r). + * Return count does not include NUL. + * Terminate the scan at the first of: + * NUL in input + * count exceeded in input + * count exceeded on output + * *ninp is set to number of input bytes accepted. + * nin may be <0 initially, to avoid checking input by count. + */ +void +__quotesetup(char *s, Rune *r, int nin, int nout, Quoteinfo *q, int sharp, int runesout) +{ + int w; + Rune c; + + q->quoted = 0; + q->nbytesout = 0; + q->nrunesout = 0; + q->nbytesin = 0; + q->nrunesin = 0; + if(sharp || nin==0 || (s && *s=='\0') || (r && *r=='\0')){ + if(nout < 2) + return; + q->quoted = 1; + q->nbytesout = 2; + q->nrunesout = 2; + } + for(; nin!=0; nin-=w){ + if(s) + w = chartorune(&c, s); + else{ + c = *r; + w = runelen(c); + } + + if(c == '\0') + break; + if(runesout){ + if(q->nrunesout+1 > nout) + break; + }else{ + if(q->nbytesout+w > nout) + break; + } + + if((c <= L' ') || (c == L'\'') || (fmtdoquote!=nil && fmtdoquote(c))){ + if(!q->quoted){ + if(runesout){ + if(1+q->nrunesout+1+1 > nout) /* no room for quotes */ + break; + }else{ + if(1+q->nbytesout+w+1 > nout) /* no room for quotes */ + break; + } + q->nrunesout += 2; /* include quotes */ + q->nbytesout += 2; /* include quotes */ + q->quoted = 1; + } + if(c == '\'') { + if(runesout){ + if(1+q->nrunesout+1 > nout) /* no room for quotes */ + break; + }else{ + if(1+q->nbytesout+w > nout) /* no room for quotes */ + break; + } + q->nbytesout++; + q->nrunesout++; /* quotes reproduce as two characters */ + } + } + + /* advance input */ + if(s) + s += w; + else + r++; + q->nbytesin += w; + q->nrunesin++; + + /* advance output */ + q->nbytesout += w; + q->nrunesout++; + } +} + +static int +qstrfmt(char *sin, Rune *rin, Quoteinfo *q, Fmt *f) +{ + Rune r, *rm, *rme; + char *t, *s, *m, *me; + Rune *rt, *rs; + ulong fl; + int nc, w; + + m = sin; + me = m + q->nbytesin; + rm = rin; + rme = rm + q->nrunesin; + + w = f->width; + fl = f->flags; + if(f->runes){ + if(!(fl & FmtLeft) && __rfmtpad(f, w - q->nrunesout) < 0) + return -1; + }else{ + if(!(fl & FmtLeft) && __fmtpad(f, w - q->nbytesout) < 0) + return -1; + } + t = (char*)f->to; + s = (char*)f->stop; + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + if(f->runes) + FMTRCHAR(f, rt, rs, '\''); + else + FMTRUNE(f, t, s, '\''); + for(nc = q->nrunesin; nc > 0; nc--){ + if(sin){ + r = *(uchar*)m; + if(r < Runeself) + m++; + else if((me - m) >= UTFmax || fullrune(m, me-m)) + m += chartorune(&r, m); + else + break; + }else{ + if(rm >= rme) + break; + r = *(uchar*)rm++; + } + if(f->runes){ + FMTRCHAR(f, rt, rs, r); + if(r == '\'') + FMTRCHAR(f, rt, rs, r); + }else{ + FMTRUNE(f, t, s, r); + if(r == '\'') + FMTRUNE(f, t, s, r); + } + } + + if(f->runes){ + FMTRCHAR(f, rt, rs, '\''); + USED(rs); + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(fl & FmtLeft && __rfmtpad(f, w - q->nrunesout) < 0) + return -1; + }else{ + FMTRUNE(f, t, s, '\''); + USED(s); + f->nfmt += t - (char *)f->to; + f->to = t; + if(fl & FmtLeft && __fmtpad(f, w - q->nbytesout) < 0) + return -1; + } + return 0; +} + +int +__quotestrfmt(int runesin, Fmt *f) +{ + int outlen; + Rune *r; + char *s; + Quoteinfo q; + + f->flags &= ~FmtPrec; /* ignored for %q %Q, so disable for %s %S in easy case */ + if(runesin){ + r = va_arg(f->args, Rune *); + s = nil; + }else{ + s = va_arg(f->args, char *); + r = nil; + } + if(!s && !r) + return __fmtcpy(f, (void*)"<nil>", 5, 5); + + if(f->flush) + outlen = 0x7FFFFFFF; /* if we can flush, no output limit */ + else if(f->runes) + outlen = (Rune*)f->stop - (Rune*)f->to; + else + outlen = (char*)f->stop - (char*)f->to; + + __quotesetup(s, r, -1, outlen, &q, f->flags&FmtSharp, f->runes); +//print("bytes in %d bytes out %d runes in %d runesout %d\n", q.nbytesin, q.nbytesout, q.nrunesin, q.nrunesout); + + if(runesin){ + if(!q.quoted) + return __fmtrcpy(f, r, q.nrunesin); + return qstrfmt(nil, r, &q, f); + } + + if(!q.quoted) + return __fmtcpy(f, s, q.nrunesin, q.nbytesin); + return qstrfmt(s, nil, &q, f); +} + +int +quotestrfmt(Fmt *f) +{ + return __quotestrfmt(0, f); +} + +int +quoterunestrfmt(Fmt *f) +{ + return __quotestrfmt(1, f); +} + +void +quotefmtinstall(void) +{ + fmtinstall('q', quotestrfmt); IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include "utf.h" +#include "fmt.h" +#include "fmtdef.h" + +static int +fmtStrFlush(Fmt *f) +{ + char *s; + int n; + + n = (int)f->farg; + n += 256; + f->farg = (void*)n; + s = (char*)f->start; + f->start = realloc(s, n); + if(f->start == nil){ + f->start = s; + return 0; + } + f->to = (char*)f->start + ((char*)f->to - s); + f->stop = (char*)f->start + n - 1; + return 1; +} + +int +fmtstrinit(Fmt *f) +{ + int n; + + f->runes = 0; + n = 32; + f->start = malloc(n); + if(f->start == nil) + return -1; + f->to = f->start; + f->stop = (char*)f->start + n - 1; + f->flush = fmtStrFlush; + f->farg = (void*)n; + f->nfmt = 0; + return 0; +} + +char* +fmtstrflush(Fmt *f) +{ + *(char*)f->to = '\0'; + f->to = f->start; + return (char*)f->start; +} diff --git a/src/libfmt/fmtvprint.c b/src/libfmt/fmtvprint.c new file mode 100644 index 00000000..6aed013d --- /dev/null +++ b/src/libfmt/fmtvprint.c @@ -0,0 +1,46 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. + +double +__fmtpow10(int n) +{ + int m; + + if(n < 0) { + n = -n; + if(n < (int)(sizeof(tab)/sizeof(tab[0]))) + return 1/tab[n]; + m = n/2; + return __fmtpow10(-m) * __fmtpow10(m-n); + } + if(n < (int)(sizeof(tab)/sizeof(tab[0]))) + return tab[n]; + m = n/2; + return __fmtpow10(m) * __fmtpow10(n-m); +} diff --git a/src/libfmt/print.3 b/src/libfmt/print.3 new file mode 100644 index 00000000..1fab0ad8 --- /dev/null +++ b/src/libfmt/print.3 @@ -0,0 +1,469 @@ +.TH PRINT 3 +.de EX +.nf +.ft B +.. +.de EE +.fi +.ft R +.. +.SH NAME +print, fprint, sprint, snprint, seprint, smprint, vfprint, vsnprint, vseprint, vsmprint \- print formatted output +.SH SYNOPSIS +.B #include <utf.h> +.PP +.B #include <fmt.h> +.PP +.ta \w'\fLchar* 'u +.B +int print(char *format, ...) +.PP +.B +int fprint(int fd, char *format, ...) +.PP +.B +int sprint(char *s, char *format, ...) +.PP +.B +int snprint(char *s, int len, char *format, ...) +.PP +.B +char* seprint(char *s, char *e, char *format, ...) +.PP +.B +char* smprint(char *format, ...) +.PP +.B +int runesprint(Rune *s, char *format, ...) +.PP +.B +int runesnprint(Rune *s, int len, char *format, ...) +.PP +.B +Rune* runeseprint(Rune *s, Rune *e, char *format, ...) +.PP +.B +Rune* runesmprint(char *format, ...) +.PP +.B +int vfprint(int fd, char *format, va_list v) +.PP +.B +int vsnprint(char *s, int len, char *format, va_list v) +.PP +.B +char* vseprint(char *s, char *e, char *format, va_list v) +.PP +.B +char* vsmprint(char *format, va_list v) +.PP +.B +int runevsnprint(Rune *s, int len, char *format, va_list v) +.PP +.B +Rune* runevseprint(Rune *s, Rune *e, char *format, va_list v) +.PP +.B +Rune* runevsmprint(Rune *format, va_list v) +.PP +.B +.SH DESCRIPTION +.I Print +writes text to the standard output. +.I Fprint +writes to the named output +file descriptor. +.I Sprint +places text +followed by the NUL character +.RB ( \e0 ) +in consecutive bytes starting at +.IR s ; +it is the user's responsibility to ensure that +enough storage is available. +Each function returns the number of bytes +transmitted (not including the NUL +in the case of +.IR sprint ), +or +a negative value if an output error was encountered. +.PP +.I Snprint +is like +.IR sprint , +but will not place more than +.I len +bytes in +.IR s . +Its result is always NUL-terminated and holds the maximal +number of characters that can fit. +.I Seprint +is like +.IR snprint , +except that the end is indicated by a pointer +.I e +rather than a count and the return value points to the terminating NUL of the +resulting string. +.I Smprint +is like +.IR sprint , +except that it prints into and returns a string of the required length, which is +allocated by +.IR malloc (3). +.PP +The routines +.IR runesprint , +.IR runesnprint , +.IR runeseprint , +and +.I runesmprint +are the same as +.IR sprint , +.IR snprint , +.IR seprint +and +.I smprint +except that their output is rune strings instead of byte strings. +.PP +Finally, the routines +.IR vfprint , +.IR vsnprint , +.IR vseprint , +.IR vsmprint , +.IR runevsnprint , +.IR runevseprint , +and +.I runevsmprint +are like their +.BR v-less +relatives except they take as arguments a +.B va_list +parameter, so they can be called within a variadic function. +The Example section shows a representative usage. +.PP +Each of these functions +converts, formats, and prints its +trailing arguments +under control of a +.IR format +string. +The +format +contains two types of objects: +plain characters, which are simply copied to the +output stream, +and conversion specifications, +each of which results in fetching of +zero or more +arguments. +The results are undefined if there are arguments of the +wrong type or too few +arguments for the format. +If the format is exhausted while +arguments remain, the excess +is ignored. +.PP +Each conversion specification has the following format: +.IP +.B "% [flags] verb +.PP +The verb is a single character and each flag is a single character or a +(decimal) numeric string. +Up to two numeric strings may be used; +the first is called +.IR width , +the second +.IR precision . +A period can be used to separate them, and if the period is +present then +.I width +and +.I precision +are taken to be zero if missing, otherwise they are `omitted'. +Either or both of the numbers may be replaced with the character +.BR * , +meaning that the actual number will be obtained from the argument list +as an integer. +The flags and numbers are arguments to +the +.I verb +described below. +.PP +The numeric verbs +.BR d , +.BR i , +.BR u , +.BR o , +.BR b , +.BR x , +and +.B X +format their arguments in decimal, decimal, +unsigned decimal, octal, binary, hexadecimal, and upper case hexadecimal. +Each interprets the flags +.BR 0 , +.BR h , +.BR hh , +.BR l , +.BR + , +.BR - , +.BR , , +and +.B # +to mean pad with zeros, +short, byte, long, always print a sign, left justified, commas every three digits, +and alternate format. +Also, a space character in the flag +position is like +.BR + , +but prints a space instead of a plus sign for non-negative values. +If neither +short nor long is specified, +then the argument is an +.BR int . +If an unsigned verb is specified, +then the argument is interpreted as a +positive number and no sign is output; +space and +.B + +flags are ignored for unsigned verbs. +If two +.B l +flags are given, +then the argument is interpreted as a +.B vlong +(usually an 8-byte, sometimes a 4-byte integer). +If +.I precision +is not omitted, the number is padded on the left with zeros +until at least +.I precision +digits appear. +If +.I precision +is explicitly 0, and the number is 0, +no digits are generated, and alternate formatting +does not apply. +Then, if alternate format is specified, +for +.B o +conversion, the number is preceded by a +.B 0 +if it doesn't already begin with one. +For non-zero numbers and +.B x +conversion, the number is preceded by +.BR 0x ; +for +.B X +conversion, the number is preceded by +.BR 0X . +Finally, if +.I width +is not omitted, the number is padded on the left (or right, if +left justification is specified) with enough blanks to +make the field at least +.I width +characters long. +.PP +The floating point verbs +.BR f , +.BR e , +.BR E , +.BR g , +and +.B G +take a +.B double +argument. +Each interprets the flags +.BR 0 , +.BR L +.BR + , +.BR - , +and +.B # +to mean pad with zeros, +long double argument, +always print a sign, +left justified, +and +alternate format. +.I Width +is the minimum field width and, +if the converted value takes up less than +.I width +characters, it is padded on the left (or right, if `left justified') +with spaces. +.I Precision +is the number of digits that are converted after the decimal place for +.BR e , +.BR E , +and +.B f +conversions, +and +.I precision +is the maximum number of significant digits for +.B g +and +.B G +conversions. +The +.B f +verb produces output of the form +.RB [ - ] digits [ .digits\fR]. +.B E +conversion appends an exponent +.BR E [ - ] digits , +and +.B e +conversion appends an exponent +.BR e [ - ] digits . +The +.B g +verb will output the argument in either +.B e +or +.B f +with the goal of producing the smallest output. +Also, trailing zeros are omitted from the fraction part of +the output, and a trailing decimal point appears only if it is followed +by a digit. +The +.B G +verb is similar, but uses +.B E +format instead of +.BR e . +When alternate format is specified, the result will always contain a decimal point, +and for +.B g +and +.B G +conversions, trailing zeros are not removed. +.PP +The +.B s +verb copies a string +(pointer to +.BR char ) +to the output. +The number of characters copied +.RI ( n ) +is the minimum +of the size of the string and +.IR precision . +These +.I n +characters are justified within a field of +.I width +characters as described above. +If a +.I precision +is given, it is safe for the string not to be nul-terminated +as long as it is at least +.I precision +characters (not bytes!) long. +The +.B S +verb is similar, but it interprets its pointer as an array +of runes (see +.IR utf (7)); +the runes are converted to +.SM UTF +before output. +.PP +The +.B c +verb copies a single +.B char +(promoted to +.BR int ) +justified within a field of +.I width +characters as described above. +The +.B C +verb is similar, but works on runes. +.PP +The +.B p +verb formats a pointer value. +At the moment, it is a synonym for +.BR x , +but that will change if pointers and integers are different sizes. +.PP +The +.B r +verb takes no arguments; it copies the error string returned by a call to +.IR strerror (3) +with an argument of +.IR errno. +.PP +Custom verbs may be installed using +.IR fmtinstall (3). +.SH EXAMPLE +This function prints an error message with a variable +number of arguments and then quits. +.IP +.EX +.ta 6n +6n +6n +void fatal(char *msg, ...) +{ + char buf[1024], *out; + va_list arg; + + out = vseprint(buf, buf+sizeof buf, "Fatal error: "); + va_start(arg, msg); + out = vseprint(out, buf+sizeof buf, msg, arg); + va_end(arg); + write(2, buf, out-buf); + exit(1); +} +.EE +.SH SEE ALSO +.IR fmtinstall (3), +.IR fprintf (3), +.IR utf (7) +.SH DIAGNOSTICS +Routines that write to a file descriptor or call +.IR malloc +set +.IR errstr . +.SH BUGS +The formatting is close to that specified for ANSI +.IR fprintf (3); (Printf with user-defined verbs.) +Name: libfmt +Version: 2.0 +Release: 1 +Group: Development/C +Copyright: BSD-like +Packager: Russ Cox <rsc@post.harvard.edu> +Source: http://pdos.lcs.mit.edu/~rsc/software/libfmt-2.0.tgz +URL: http://pdos.lcs.mit.edu/~rsc/software/#libfmt +Requires: libutf + +%description +Libfmt is a port of Plan 9's formatted print library. +As a base it provides all the syntax of ANSI printf +but adds the ability for client programs to install +new print verbs. One such print verb (installed by +default) is %r, which prints the system error string. +Instead of perror("foo"), you can write fprint(2, "foo: %r\n"). +This is especially nice when you write verbs to format +the data structures used by your particular program. +%prep +%setup + +%build +make + +%install +make install + +%files +/usr/local/include/fmt.h +/usr/local/lib/libfmt.a +/usr/local/man/man3/print.3 +/usr/local/man/man3/fmtinstall.3 diff --git a/src/libfmt/runefmtstr.c b/src/libfmt/runefmtstr.c new file mode 100644 index 00000000..a2ec6cb4 --- /dev/null +++ b/src/libfmt/runefmtstr.c @@ -0,0 +1,65 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include "utf.h" +#include "fmt.h" +#include "fmtdef.h" + +static int +runeFmtStrFlush(Fmt *f) +{ + Rune *s; + int n; + + n = (int)f->farg; + n += 256; + f->farg = (void*)n; + s = (Rune*)f->start; + f->start = realloc(s, sizeof(Rune)*n); + if(f->start == nil){ + f->start = s; + return 0; + } + f->to = (Rune*)f->start + ((Rune*)f->to - s); + f->stop = (Rune*)f->start + n - 1; + return 1; +} + +int +runefmtstrinit(Fmt *f) +{ + int n; + + f->runes = 1; + n = 32; + f->start = malloc(sizeof(Rune)*n); + if(f->start == nil) + return -1; + f->to = f->start; + f->stop = (Rune*)f->start + n - 1; + f->flush = runeFmtStrFlush; + f->farg = (void*)n; + f->nfmt = 0; + return 0; +} + +Rune* +runefmtstrflush(Fmt *f) +{ + *(Rune*)f->to = '\0'; + f->to = f->start; + return f->start; +} diff --git a/src/libfmt/runeseprint.c b/src/libfmt/runeseprint.c new file mode 100644 index 00000000..7829a439 --- /dev/null +++ b/src/libfmt/runeseprint.c @@ -0,0 +1,30 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdlib.h> +#include <math.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include "fmt.h" +#include "nan.h" + +#ifndef nelem +#define nelem(x) (sizeof(x)/sizeof *(x)) +#endif +#define nil ((void*)0) +#define ulong _fmtulong +typedef unsigned long ulong; + +static ulong +umuldiv(ulong a, ulong b, ulong c) +{ + double d; + + d = ((double)a * (double)b) / (double)c; + if(d >= 4294967295.) + d = 4294967295.; + return (ulong)d; +} + +/* + * This routine will convert to arbitrary precision + * floating point entirely in multi-precision fixed. + * The answer is the closest floating point number to + * the given decimal number. Exactly half way are + * rounded ala ieee rules. + * Method is to scale input decimal between .500 and .999... + * with external power of 2, then binary search for the + * closest mantissa to this decimal number. + * Nmant is is the required precision. (53 for ieee dp) + * Nbits is the max number of bits/word. (must be <= 28) + * Prec is calculated - the number of words of fixed mantissa. + */ +enum +{ + Nbits = 28, /* bits safely represented in a ulong */ + Nmant = 53, /* bits of precision required */ + Prec = (Nmant+Nbits+1)/Nbits, /* words of Nbits each to represent mantissa */ + Sigbit = 1<<(Prec*Nbits-Nmant), /* first significant bit of Prec-th word */ + Ndig = 1500, + One = (ulong)(1<<Nbits), + Half = (ulong)(One>>1), + Maxe = 310, + + Fsign = 1<<0, /* found - */ + Fesign = 1<<1, /* found e- */ + Fdpoint = 1<<2, /* found . */ + + S0 = 0, /* _ _S0 +S1 #S2 .S3 */ + S1, /* _+ #S2 .S3 */ + S2, /* _+# #S2 .S4 eS5 */ + S3, /* _+. #S4 */ + S4, /* _+#.# #S4 eS5 */ + S5, /* _+#.#e +S6 #S7 */ + S6, /* _+#.#e+ #S7 */ + S7, /* _+#.#e+# #S7 */ +}; + +static int xcmp(char*, char*); +static int fpcmp(char*, ulong*); +static void frnorm(ulong*); +static void divascii(char*, int*, int*, int*); +static void mulascii(char*, int*, int*, int*); + +typedef struct Tab Tab; +struct Tab +{ + int bp; + int siz; + char* cmp; +}; + +double +fmtstrtod(const char *as, char **aas) +{ + int na, ex, dp, bp, c, i, flag, state; + ulong low[Prec], hig[Prec], mid[Prec]; + double d; + char *s, a[Ndig]; + + flag = 0; /* Fsign, Fesign, Fdpoint */ + na = 0; /* number of digits of a[] */ + dp = 0; /* na of decimal point */ + ex = 0; /* exonent */ + + state = S0; + for(s=(char*)as;; s++) { + c = *s; + if(c >= '0' && c <= '9') { + switch(state) { + case S0: + case S1: + case S2: + state = S2; + break; + case S3: + case S4: + state = S4; + break; + + case S5: + case S6: + case S7: + state = S7; + ex = ex*10 + (c-'0'); + continue; + } + if(na == 0 && c == '0') { + dp--; + continue; + } + if(na < Ndig-50) + a[na++] = c; + continue; + } + switch(c) { + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + case ' ': + if(state == S0) + continue; + break; + case '-': + if(state == S0) + flag |= Fsign; + else + flag |= Fesign; + case '+': + if(state == S0) + state = S1; + else + if(state == S5) + state = S6; + else + break; /* syntax */ + continue; + case '.': + flag |= Fdpoint; + dp = na; + if(state == S0 || state == S1) { + state = S3; + continue; + } + if(state == S2) { + state = S4; + continue; + } + break; + case 'e': + case 'E': + if(state == S2 || state == S4) { + state = S5; + continue; + } + break; + } + break; + } + + /* + * clean up return char-pointer + */ + switch(state) { + case S0: + if(xcmp(s, "nan") == 0) { + if(aas != nil) + *aas = s+3; + goto retnan; + } + case S1: + if(xcmp(s, "infinity") == 0) { + if(aas != nil) + *aas = s+8; + goto retinf; + } + if(xcmp(s, "inf") == 0) { + if(aas != nil) + *aas = s+3; + goto retinf; + } + case S3: + if(aas != nil) + *aas = (char*)as; + goto ret0; /* no digits found */ + case S6: + s--; /* back over +- */ + case S5: + s--; /* back over e */ + break; + } + if(aas != nil) + *aas = s; + + if(flag & Fdpoint) + while(na > 0 && a[na-1] == '0') + na--; + if(na == 0) + goto ret0; /* zero */ + a[na] = 0; + if(!(flag & Fdpoint)) + dp = na; + if(flag & Fesign) + ex = -ex; + dp += ex; + if(dp < -Maxe){ + errno = ERANGE; + goto ret0; /* underflow by exp */ + } else + if(dp > +Maxe) + goto retinf; /* overflow by exp */ + + /* + * normalize the decimal ascii number + * to range .[5-9][0-9]* e0 + */ + bp = 0; /* binary exponent */ + while(dp > 0) + divascii(a, &na, &dp, &bp); + while(dp < 0 || a[0] < '5') + mulascii(a, &na, &dp, &bp); + + /* close approx by naive conversion */ + mid[0] = 0; + mid[1] = 1; + for(i=0; c=a[i]; i++) { + mid[0] = mid[0]*10 + (c-'0'); + mid[1] = mid[1]*10; + if(i >= 8) + break; + } + low[0] = umuldiv(mid[0], One, mid[1]); + hig[0] = umuldiv(mid[0]+1, One, mid[1]); + for(i=1; i<Prec; i++) { + low[i] = 0; + hig[i] = One-1; + } + + /* binary search for closest mantissa */ + for(;;) { + /* mid = (hig + low) / 2 */ + c = 0; + for(i=0; i<Prec; i++) { + mid[i] = hig[i] + low[i]; + if(c) + mid[i] += One; + c = mid[i] & 1; + mid[i] >>= 1; + } + frnorm(mid); + + /* compare */ + c = fpcmp(a, mid); + if(c > 0) { + c = 1; + for(i=0; i<Prec; i++) + if(low[i] != mid[i]) { + c = 0; + low[i] = mid[i]; + } + if(c) + break; /* between mid and hig */ + continue; + } + if(c < 0) { + for(i=0; i<Prec; i++) + hig[i] = mid[i]; + continue; + } + + /* only hard part is if even/odd roundings wants to go up */ + c = mid[Prec-1] & (Sigbit-1); + if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0) + mid[Prec-1] -= c; + break; /* exactly mid */ + } + + /* normal rounding applies */ + c = mid[Prec-1] & (Sigbit-1); + mid[Prec-1] -= c; + if(c >= Sigbit/2) { + mid[Prec-1] += Sigbit; + frnorm(mid); + } + goto out; + +ret0: + return 0; + +retnan: + return __NaN(); + +retinf: + /* + * Unix strtod requires these. Plan 9 would return Inf(0) or Inf(-1). */ + errno = ERANGE; + if(flag & Fsign) + return -HUGE_VAL; + return HUGE_VAL; + +out: + d = 0; + for(i=0; i<Prec; i++) + d = d*One + mid[i]; + if(flag & Fsign) + d = -d; + d = ldexp(d, bp - Prec*Nbits); + if(d == 0){ /* underflow */ + errno = ERANGE; + } + return d; +} + +static void +frnorm(ulong *f) +{ + int i, c; + + c = 0; + for(i=Prec-1; i>0; i--) { + f[i] += c; + c = f[i] >> Nbits; + f[i] &= One-1; + } + f[0] += c; +} + +static int +fpcmp(char *a, ulong* f) +{ + ulong tf[Prec]; + int i, d, c; + + for(i=0; i<Prec; i++) + tf[i] = f[i]; + + for(;;) { + /* tf *= 10 */ + for(i=0; i<Prec; i++) + tf[i] = tf[i]*10; + frnorm(tf); + d = (tf[0] >> Nbits) + '0'; + tf[0] &= One-1; + + /* compare next digit */ + c = *a; + if(c == 0) { + if('0' < d) + return -1; + if(tf[0] != 0) + goto cont; + for(i=1; i<Prec; i++) + if(tf[i] != 0) + goto cont; + return 0; + } + if(c > d) + return +1; + if(c < d) + return -1; + a++; + cont:; + } + return 0; +} + +static void +divby(char *a, int *na, int b) +{ + int n, c; + char *p; + + p = a; + n = 0; + while(n>>b == 0) { + c = *a++; + if(c == 0) { + while(n) { + c = n*10; + if(c>>b) + break; + n = c; + } + goto xx; + } + n = n*10 + c-'0'; + (*na)--; + } + for(;;) { + c = n>>b; + n -= c<<b; + *p++ = c + '0'; + c = *a++; + if(c == 0) + break; + n = n*10 + c-'0'; + } + (*na)++; +xx: + while(n) { + n = n*10; + c = n>>b; + n -= c<<b; + *p++ = c + '0'; + (*na)++; + } + *p = 0; +} + +static Tab tab1[] = +{ + 1, 0, "", + 3, 1, "7", + 6, 2, "63", + 9, 3, "511", + 13, 4, "8191", + 16, 5, "65535", + 19, 6, "524287", + 23, 7, "8388607", + 26, 8, "67108863", + 27, 9, "134217727", +}; + +static void +divascii(char *a, int *na, int *dp, int *bp) +{ + int b, d; + Tab *t; + + d = *dp; + if(d >= (int)(nelem(tab1))) + d = (int)(nelem(tab1))-1; + t = tab1 + d; + b = t->bp; + if(memcmp(a, t->cmp, t->siz) > 0) + d--; + *dp -= d; + *bp += b; + divby(a, na, b); +} + +static void +mulby(char *a, char *p, char *q, int b) +{ + int n, c; + + n = 0; + *p = 0; + for(;;) { + q--; + if(q < a) + break; + c = *q - '0'; + c = (c<<b) + n; + n = c/10; + c -= n*10; + p--; + *p = c + '0'; + } + while(n) { + c = n; + n = c/10; + c -= n*10; + p--; + *p = c + '0'; + } +} + +static Tab tab2[] = +{ + 1, 1, "", /* dp = 0-0 */ + 3, 3, "125", + 6, 5, "15625", + 9, 7, "1953125", + 13, 10, "1220703125", + 16, 12, "152587890625", + 19, 14, "19073486328125", + 23, 17, "11920928955078125", + 26, 19, "1490116119384765625", + 27, 19, "7450580596923828125", /* dp 8-9 */ +}; + +static void +mulascii(char *a, int *na, int *dp, int *bp) +{ + char *p; + int d, b; + Tab *t; + + d = -*dp; + if(d >= (int)(nelem(tab2))) + d = (int)(nelem(tab2))-1; + t = tab2 + d; + b = t->bp; + if(memcmp(a, t->cmp, t->siz) < 0) + d--; + p = a + *na; + *bp -= b; + *dp += d; + *na += d; + mulby(a, p+d, p, b); +} + +static int +xcmp(char *a, char *b) +{ + int c1, c2; + + while(c1 = *b++) { + c2 = *a++; + if(isupper(c2)) + c2 = tolower(c2); + if(c1 != c2) + return 1; + } + return 0; +} diff --git a/src/libfmt/strtod.h b/src/libfmt/strtod.h new file mode 100644 index 00000000..82c3d46e --- /dev/null +++ b/src/libfmt/strtod.h @@ -0,0 +1,4 @@ +extern double __NaN(void); +extern double __Inf(int); +extern double __isNaN(double); +extern double __isInf(double, int); diff --git a/src/libfmt/test.c b/src/libfmt/test.c new file mode 100644 index 00000000..a1a1d5ed --- /dev/null +++ b/src/libfmt/test.c @@ -0,0 +1,39 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. s;/.*;;; s; ;;g'} + +# this works in bsd make +SYSNAME!=uname +OBJTYPE!=uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g' + +# the gnu rules will mess up bsd but not vice versa, +# hence the gnu rules come first. + +include Make.$(SYSNAME)-$(OBJTYPE) + +PREFIX=/usr/local + +NUKEFILES= + +TGZFILES= + +LIB=libregexp9.a +VERSION=2.0 +PORTPLACE=devel/libregexp9 +NAME=libregexp9 + +OFILES=\ + regcomp.$O\ + regerror.$O\ + regexec.$O\ + regsub.$O\ + regaux.$O\ + rregsub.$O\ + rregaux.$O\ + rregexec.$O\ + +HFILES=\ + regexp9.h\ + regcomp.h\ + +all: $(LIB) + +install: $(LIB) + test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3 + test -d $(PREFIX)/man/man7 || mkdir $(PREFIX)/man/man7 + install -m 0644 regexp9.3 $(PREFIX)/man/man3/regexp9.3 + install -m 0644 regexp9.7 $(PREFIX)/man/man7/regexp9.7 + install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + install -m 0644 regexp9.h $(PREFIX)/include/regexp9.h + +test: test.$O $(LIB) + $(CC) -o test test.$O $(LIB) -L/usr/local/lib -lfmt -lutf + +test2: test2.$O $(LIB) + $(CC) -o test2 test2.$O $(LIB) -L/usr/local/lib -lfmt -lutf + +$(LIB): $(OFILES) + $(AR) $(ARFLAGS) $(LIB) $(OFILES) + +NUKEFILES+=$(LIB) +.c.$O: + $(CC) $(CFLAGS) -I$(PREFIX)/include $*.c + +%.$O: %.c + $(CC) $(CFLAGS) -I$(PREFIX)/include $*.c + + +$(OFILES): $(HFILES) + +tgz: + rm -rf $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION) + cp Makefile Make.* README LICENSE NOTICE *.[ch137] rpm.spec bundle.ports $(TGZFILES) $(NAME)-$(VERSION) + tar cf - $(NAME)-$(VERSION) | gzip >$(NAME)-$(VERSION).tgz + rm -rf $(NAME)-$(VERSION) + +clean: + rm -f $(OFILES) $(LIB) + +nuke: + rm -f $(OFILES) *.tgz *.rpm $(NUKEFILES) + +rpm: + make tgz + cp $(NAME)-$(VERSION).tgz /usr/src/RPM/SOURCES + rpm -ba rpm.spec + cp /usr/src/RPM/SRPMS/$(NAME)-$(VERSION)-1.src.rpm . + cp /usr/src/RPM/RPMS/i586/$(NAME)-$(VERSION)-1.i586.rpm . + scp *.rpm rsc@amsterdam.lcs.mit.edu:public_html/software + +PORTDIR=/usr/ports/$(PORTPLACE) + +ports: + make tgz + rm -rf $(PORTDIR) + mkdir $(PORTDIR) + cp $(NAME)-$(VERSION).tgz /usr/ports/distfiles + cat bundle.ports | (cd $(PORTDIR) && awk '$$1=="---" && $$3=="---" { ofile=$$2; next} {if(ofile) print >ofile}') + (cd $(PORTDIR); make makesum) + (cd $(PORTDIR); make) + (cd $(PORTDIR); /usr/local/bin/portlint) + rm -rf $(PORTDIR)/work + shar `find $(PORTDIR)` > ports.shar + (cd $(PORTDIR); tar cf - *) | gzip >$(NAME)-$(VERSION)-ports.tgz + scp *.tgz rsc@amsterdam.lcs.mit.edu:public_html/software + +.phony: all clean nuke install tgz rpm ports diff --git a/src/libregexp/Makefile.MID b/src/libregexp/Makefile.MID new file mode 100644 index 00000000..fa8a3a93 --- /dev/null +++ b/src/libregexp/Makefile.MID @@ -0,0 +1,34 @@ +LIB=libregexp9.a +VERSION=2.0 +PORTPLACE=devel/libregexp9 +NAME=libregexp9 + +OFILES=\ + regcomp.$O\ + regerror.$O\ + regexec.$O\ + regsub.$O\ + regaux.$O\ + rregsub.$O\ + rregaux.$O\ + +HFILES=\ + regexp9.h\ + regcomp.h\ + +all: $(LIB) + +install: $(LIB) + test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3 + test -d $(PREFIX)/man/man7 || mkdir $(PREFIX)/man/man7 + install -m 0644 regexp9.3 $(PREFIX)/man/man3/regexp9.3 + install -m 0644 regexp9.7 $(PREFIX)/man/man7/regexp9.7 + install -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + install -m 0644 regexp9.h $(PREFIX)/include/regexp9.h + +test: test.$O $(LIB) + $(CC) -o test test.$O $(LIB) -L/usr/local/lib -lfmt -lutf + +test2: test2.$O $(LIB) + $(CC) -o test2 test2.$O $(LIB) -L/usr/local/lib -lfmt -lutf + diff --git a/src/libregexp/NOTICE b/src/libregexp/NOTICE new file mode 100644 index 00000000..784ee1d0 --- /dev/null +++ b/src/libregexp/NOTICE @@ -0,0 +1,25 @@ +Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +Portions Copyright © 2000-2002 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. + +Under a licence agreement with Lucent Technologies Inc. effective 1st March 2000, +Vita Nuova Holdings Limited has the right to determine (within a specified scope) +the form and content of sublicences for this software. + +Vita Nuova Holdings Limited now makes this software available as Free +Software under the terms of the `GNU Lesser Public License, Version 2.1' +(see the file LICENCE or http://www.fsf.org/copyleft/lesser.html for +the full terms and conditions). One of the conditions of that licence +is that you must keep intact all notices that refer to that licence and to the absence of +of any warranty: for this software, note that includes this NOTICE file in particular. + +This suite of programs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +`GNU General Public License' for more details. + +This copyright NOTICE applies to all files in this directory and +subdirectories, unless another copyright notice appears in a given +file or subdirectory. If you take code from this software to use in +other programs, you must somehow include with it an appropriate +copyright notice that includes the copyright notice and the other +notices above. diff --git a/src/libregexp/README b/src/libregexp/README new file mode 100644 index 00000000..1f625112 --- /dev/null +++ b/src/libregexp/README @@ -0,0 +1,7 @@ +This is a Unix port of the Plan 9 regular expression library, +originally done for the Inferno operating system. + +Russ Cox repackaged this to build as a standalone +Unix library. Send comments about packaging to +Russ Cox <rsc@post.harvard.edu> + diff --git a/src/libregexp/bundle.ports b/src/libregexp/bundle.ports new file mode 100644 index 00000000..2205e4b9 --- /dev/null +++ b/src/libregexp/bundle.ports @@ -0,0 +1,51 @@ +--- Makefile --- +# New ports collection makefile for: libbio +# Date Created: 11 Feb 2003 +# Whom: rsc +# +# THIS LINE NEEDS REPLACING. IT'S HERE TO GET BY PORTLINT +# $FreeBSD: ports/devel/libbio/Makefile,v 1.1 2003/02/12 00:51:22 rsc Exp $ + +PORTNAME= libregexp9 +PORTVERSION= 2.0 +CATEGORIES= devel +MASTER_SITES= http://pdos.lcs.mit.edu/~rsc/software/ +EXTRACT_SUFX= .tgz + +MAINTAINER= rsc@post.harvard.edu + +DEPENDS= ${PORTSDIR}/devel/libfmt ${PORTSDIR}/devel/libutf + +MAN3= regexp9.3 +MAN7= regexp9.7 +USE_REINPLACE= yes + +.include <bsd.port.pre.mk> + +post-patch: + ${REINPLACE_CMD} -e 's,$$(PREFIX),${PREFIX},g' ${WRKSRC}/Makefile + +.include <bsd.port.post.mk> +--- pkg-comment --- +Simple regular expression library from Plan 9 +--- pkg-descr --- +Libregexp9 is a port of Plan 9's regexp library. +It is small and simple and provides the traditional +extended regular expressions (as opposed to the +current extended regular expressions, which add {} +and various \x character classes, among other +complications). + +It handles Unicode in wide character or UTF8 format! + +WWW: http://pdos.lcs.mit.edu/~rsc/software/ +http://plan9.bell-labs.com/magic/man2html/2/regexp + +Russ Cox +rsc@post.harvard.edu +--- pkg-plist --- +lib/libregexp9.a +include/regex9.h +--- /dev/null --- +This is just a way to make sure blank lines don't +creep into pkg-plist. diff --git a/src/libregexp/lib9.h b/src/libregexp/lib9.h new file mode 100644 index 00000000..d022656d --- /dev/null +++ b/src/libregexp/lib9.h @@ -0,0 +1,6 @@ +#include <fmt.h> +#include <setjmp.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + diff --git a/src/libregexp/mkfile b/src/libregexp/mkfile new file mode 100644 index 00000000..bb99a25a --- /dev/null +++ b/src/libregexp/mkfile @@ -0,0 +1 @@ +<../libutf/mkfile diff --git a/src/libregexp/regaux.c b/src/libregexp/regaux.c new file mode 100644 index 00000000..956c1eb0 --- /dev/null +++ b/src/libregexp/regaux.c @@ -0,0 +1,76 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + + +/* + * save a new match in mp + */ +extern void +_renewmatch(Resub *mp, int ms, Resublist *sp) +{ + int i; + + if(mp==0 || ms<=0) + return; + if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp || + (sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){ + for(i=0; i<ms && i<NSUBEXP; i++) + mp[i] = sp->m[i]; + for(; i<ms; i++) + mp[i].s.sp = mp[i].e.ep = 0; + } +} + +/* + * Note optimization in _renewthread: + * *lp must be pending when _renewthread called; if *l has been looked + * at already, the optimization is a bug. + */ +extern Relist* +_renewthread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + Resublist *sep) /* pointers to subexpressions */ +{ + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if((sep)->m[0].s.sp < p->se.m[0].s.sp) + p->se = *sep; + return 0; + } + } + p->inst = ip; + p->se = *sep; + (++p)->inst = 0; + return p; +} + +/* + * same as renewthread, but called with + * initial empty start pointer. + */ +extern Relist* +_renewemptythread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + char *sp) /* pointers to subexpressions */ +{ + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(sp < p->se.m[0].s.sp) { + memset((void *)&p->se, 0, sizeof(p->se)); + p->se.m[0].s.sp = sp; + } + return 0; + } + } + p->inst = ip; + memset((void *)&p->se, 0, sizeof(p->se)); + p->se.m[0].s.sp = sp; + (++p)->inst = 0; + return p; +} + diff --git a/src/libregexp/regcomp.c b/src/libregexp/regcomp.c new file mode 100644 index 00000000..6c6939c6 --- /dev/null +++ b/src/libregexp/regcomp.c @@ -0,0 +1,557 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + +#define TRUE 1 +#define FALSE 0 + +/* + * Parser Information + */ +typedef +struct Node +{ + Reinst* first; + Reinst* last; +}Node; + +Reprog RePrOg; + +#define NSTACK 20 +static Node andstack[NSTACK]; +static Node *andp; +static int atorstack[NSTACK]; +static int* atorp; +static int cursubid; /* id of current subexpression */ +static int subidstack[NSTACK]; /* parallel to atorstack */ +static int* subidp; +static int lastwasand; /* Last token was operand */ +static int nbra; +static char* exprp; /* pointer to next character in source expression */ +static int lexdone; +static int nclass; +static Reclass*classp; +static Reinst* freep; +static int errors; +static Rune yyrune; /* last lex'd rune */ +static Reclass*yyclassp; /* last lex'd class */ + +/* predeclared crap */ +static void operator(int); +static void pushand(Reinst*, Reinst*); +static void pushator(int); +static void evaluntil(int); +static int bldcclass(void); + +static jmp_buf regkaboom; + +static void +rcerror(char *s) +{ + errors++; + regerror(s); + longjmp(regkaboom, 1); +} + +static Reinst* +newinst(int t) +{ + freep->type = t; + freep->u2.left = 0; + freep->u1.right = 0; + return freep++; +} + +static void +operand(int t) +{ + Reinst *i; + + if(lastwasand) + operator(CAT); /* catenate is implicit */ + i = newinst(t); + + if(t == CCLASS || t == NCCLASS) + i->u1.cp = yyclassp; + if(t == RUNE) + i->u1.r = yyrune; + + pushand(i, i); + lastwasand = TRUE; +} + +static void +operator(int t) +{ + if(t==RBRA && --nbra<0) + rcerror("unmatched right paren"); + if(t==LBRA){ + if(++cursubid >= NSUBEXP) + rcerror ("too many subexpressions"); + nbra++; + if(lastwasand) + operator(CAT); + } else + evaluntil(t); + if(t != RBRA) + pushator(t); + lastwasand = FALSE; + if(t==STAR || t==QUEST || t==PLUS || t==RBRA) + lastwasand = TRUE; /* these look like operands */ +} + +static void +regerr2(char *s, int c) +{ + char buf[100]; + char *cp = buf; + while(*s) + *cp++ = *s++; + *cp++ = c; + *cp = '\0'; + rcerror(buf); +} + +static void +cant(char *s) +{ + char buf[100]; + strcpy(buf, "can't happen: "); + strcat(buf, s); + rcerror(buf); +} + +static void +pushand(Reinst *f, Reinst *l) +{ + if(andp >= &andstack[NSTACK]) + cant("operand stack overflow"); + andp->first = f; + andp->last = l; + andp++; +} + +static void +pushator(int t) +{ + if(atorp >= &atorstack[NSTACK]) + cant("operator stack overflow"); + *atorp++ = t; + *subidp++ = cursubid; +} + +static Node* +popand(int op) +{ + Reinst *inst; + + if(andp <= &andstack[0]){ + regerr2("missing operand for ", op); + inst = newinst(NOP); + pushand(inst,inst); + } + return --andp; +} + +static int +popator(void) +{ + if(atorp <= &atorstack[0]) + cant("operator stack underflow"); + --subidp; + return *--atorp; +} + +static void +evaluntil(int pri) +{ + Node *op1, *op2; + Reinst *inst1, *inst2; + + while(pri==RBRA || atorp[-1]>=pri){ + switch(popator()){ + default: + rcerror("unknown operator in evaluntil"); + break; + case LBRA: /* must have been RBRA */ + op1 = popand('('); + inst2 = newinst(RBRA); + inst2->u1.subid = *subidp; + op1->last->u2.next = inst2; + inst1 = newinst(LBRA); + inst1->u1.subid = *subidp; + inst1->u2.next = op1->first; + pushand(inst1, inst2); + return; + case OR: + op2 = popand('|'); + op1 = popand('|'); + inst2 = newinst(NOP); + op2->last->u2.next = inst2; + op1->last->u2.next = inst2; + inst1 = newinst(OR); + inst1->u1.right = op1->first; + inst1->u2.left = op2->first; + pushand(inst1, inst2); + break; + case CAT: + op2 = popand(0); + op1 = popand(0); + op1->last->u2.next = op2->first; + pushand(op1->first, op2->last); + break; + case STAR: + op2 = popand('*'); + inst1 = newinst(OR); + op2->last->u2.next = inst1; + inst1->u1.right = op2->first; + pushand(inst1, inst1); + break; + case PLUS: + op2 = popand('+'); + inst1 = newinst(OR); + op2->last->u2.next = inst1; + inst1->u1.right = op2->first; + pushand(op2->first, inst1); + break; + case QUEST: + op2 = popand('?'); + inst1 = newinst(OR); + inst2 = newinst(NOP); + inst1->u2.left = inst2; + inst1->u1.right = op2->first; + op2->last->u2.next = inst2; + pushand(inst1, inst2); + break; + } + } +} + +static Reprog* +optimize(Reprog *pp) +{ + Reinst *inst, *target; + int size; + Reprog *npp; + Reclass *cl; + int diff; + + /* + * get rid of NOOP chains + */ + for(inst=pp->firstinst; inst->type!=END; inst++){ + target = inst->u2.next; + while(target->type == NOP) + target = target->u2.next; + inst->u2.next = target; + } + + /* + * The original allocation is for an area larger than + * necessary. Reallocate to the actual space used + * and then relocate the code. + */ + size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); + npp = (Reprog *)realloc(pp, size); + if(npp==0 || npp==pp) + return pp; + diff = (char *)npp - (char *)pp; + freep = (Reinst *)((char *)freep + diff); + for(inst=npp->firstinst; inst<freep; inst++){ + switch(inst->type){ + case OR: + case STAR: + case PLUS: + case QUEST: + *(char **)&inst->u1.right += diff; + break; + case CCLASS: + case NCCLASS: + *(char **)&inst->u1.right += diff; + cl = inst->u1.cp; + *(char **)&cl->end += diff; + break; + } + *(char **)&inst->u2.left += diff; + } + *(char **)&npp->startinst += diff; + return npp; +} + +#ifdef DEBUG +static void +dumpstack(void){ + Node *stk; + int *ip; + + print("operators\n"); + for(ip=atorstack; ip<atorp; ip++) + print("0%o\n", *ip); + print("operands\n"); + for(stk=andstack; stk<andp; stk++) + print("0%o\t0%o\n", stk->first->type, stk->last->type); +} + +static void +dump(Reprog *pp) +{ + Reinst *l; + Rune *p; + + l = pp->firstinst; + do{ + print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, + l->u2.left-pp->firstinst, l->u1.right-pp->firstinst); + if(l->type == RUNE) + print("\t%C\n", l->r); + else if(l->type == CCLASS || l->type == NCCLASS){ + print("\t["); + if(l->type == NCCLASS) + print("^"); + for(p = l->cp->spans; p < l->cp->end; p += 2) + if(p[0] == p[1]) + print("%C", p[0]); + else + print("%C-%C", p[0], p[1]); + print("]\n"); + } else + print("\n"); + }while(l++->type); +} +#endif + +static Reclass* +newclass(void) +{ + if(nclass >= NCLASS) + regerr2("too many character classes; limit", NCLASS+'0'); + return &(classp[nclass++]); +} + +static int +nextc(Rune *rp) +{ + if(lexdone){ + *rp = 0; + return 1; + } + exprp += chartorune(rp, exprp); + if(*rp == L'\\'){ + exprp += chartorune(rp, exprp); + return 1; + } + if(*rp == 0) + lexdone = 1; + return 0; +} + +static int +lex(int literal, int dot_type) +{ + int quoted; + + quoted = nextc(&yyrune); + if(literal || quoted){ + if(yyrune == 0) + return END; + return RUNE; + } + + switch(yyrune){ + case 0: + return END; + case L'*': + return STAR; + case L'?': + return QUEST; + case L'+': + return PLUS; + case L'|': + return OR; + case L'.': + return dot_type; + case L'(': + return LBRA; + case L')': + return RBRA; + case L'^': + return BOL; + case L'$': + return EOL; + case L'[': + return bldcclass(); + } + return RUNE; +} + +static int +bldcclass(void) +{ + int type; + Rune r[NCCRUNE]; + Rune *p, *ep, *np; + Rune rune; + int quoted; + + /* we have already seen the '[' */ + type = CCLASS; + yyclassp = newclass(); + + /* look ahead for negation */ + /* SPECIAL CASE!!! negated classes don't match \n */ + ep = r; + quoted = nextc(&rune); + if(!quoted && rune == L'^'){ + type = NCCLASS; + quoted = nextc(&rune); + *ep++ = L'\n'; + *ep++ = L'\n'; + } + + /* parse class into a set of spans */ + for(; ep<&r[NCCRUNE];){ + if(rune == 0){ + rcerror("malformed '[]'"); + return 0; + } + if(!quoted && rune == L']') + break; + if(!quoted && rune == L'-'){ + if(ep == r){ + rcerror("malformed '[]'"); + return 0; + } + quoted = nextc(&rune); + if((!quoted && rune == L']') || rune == 0){ + rcerror("malformed '[]'"); + return 0; + } + *(ep-1) = rune; + } else { + *ep++ = rune; + *ep++ = rune; + } + quoted = nextc(&rune); + } + + /* sort on span start */ + for(p = r; p < ep; p += 2){ + for(np = p; np < ep; np += 2) + if(*np < *p){ + rune = np[0]; + np[0] = p[0]; + p[0] = rune; + rune = np[1]; + np[1] = p[1]; + p[1] = rune; + } + } + + /* merge spans */ + np = yyclassp->spans; + p = r; + if(r == ep) + yyclassp->end = np; + else { + np[0] = *p++; + np[1] = *p++; + for(; p < ep; p += 2) + if(p[0] <= np[1]){ + if(p[1] > np[1]) + np[1] = p[1]; + } else { + np += 2; + np[0] = p[0]; + np[1] = p[1]; + } + yyclassp->end = np+2; + } + + return type; +} + +static Reprog* +regcomp1(char *s, int literal, int dot_type) +{ + int token; + Reprog *pp; + + /* get memory for the program */ + pp = (Reprog *)malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); + if(pp == 0){ + regerror("out of memory"); + return 0; + } + freep = pp->firstinst; + classp = pp->class; + errors = 0; + + if(setjmp(regkaboom)) + goto out; + + /* go compile the sucker */ + lexdone = 0; + exprp = s; + nclass = 0; + nbra = 0; + atorp = atorstack; + andp = andstack; + subidp = subidstack; + lastwasand = FALSE; + cursubid = 0; + + /* Start with a low priority operator to prime parser */ + pushator(START-1); + while((token = lex(literal, dot_type)) != END){ + if((token&0300) == OPERATOR) + operator(token); + else + operand(token); + } + + /* Close with a low priority operator */ + evaluntil(START); + + /* Force END */ + operand(END); + evaluntil(START); +#ifdef DEBUG + dumpstack(); +#endif + if(nbra) + rcerror("unmatched left paren"); + --andp; /* points to first and only operand */ + pp->startinst = andp->first; +#ifdef DEBUG + dump(pp); +#endif + pp = optimize(pp); +#ifdef DEBUG + print("start: %d\n", andp->first-pp->firstinst); + dump(pp); +#endif +out: + if(errors){ + free(pp); + pp = 0; + } + return pp; +} + +extern Reprog* +regcomp(char *s) +{ + return regcomp1(s, 0, ANY); +} + +extern Reprog* +regcomplit(char *s) +{ + return regcomp1(s, 1, ANY); +} + +extern Reprog* +regcompnl(char *s) +{ + return regcomp1(s, 0, ANYNL); +} diff --git a/src/libregexp/regcomp.h b/src/libregexp/regcomp.h new file mode 100644 index 00000000..a6728b0e --- /dev/null +++ b/src/libregexp/regcomp.h @@ -0,0 +1,74 @@ +/* + * substitution list + */ +#define uchar __reuchar +typedef unsigned char uchar; +#define nelem(x) (sizeof(x)/sizeof((x)[0])) + +#define NSUBEXP 32 +typedef struct Resublist Resublist; +struct Resublist +{ + Resub m[NSUBEXP]; +}; + +/* max character classes per program */ +extern Reprog RePrOg; +#define NCLASS (sizeof(RePrOg.class)/sizeof(Reclass)) + +/* max rune ranges per character class */ +#define NCCRUNE (sizeof(Reclass)/sizeof(Rune)) + +/* + * Actions and Tokens (Reinst types) + * + * 02xx are operators, value == precedence + * 03xx are tokens, i.e. operands for operators + */ +#define RUNE 0177 +#define OPERATOR 0200 /* Bitmask of all operators */ +#define START 0200 /* Start, used for marker on stack */ +#define RBRA 0201 /* Right bracket, ) */ +#define LBRA 0202 /* Left bracket, ( */ +#define OR 0203 /* Alternation, | */ +#define CAT 0204 /* Concatentation, implicit operator */ +#define STAR 0205 /* Closure, * */ +#define PLUS 0206 /* a+ == aa* */ +#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ +#define ANY 0300 /* Any character except newline, . */ +#define ANYNL 0301 /* Any character including newline, . */ +#define NOP 0302 /* No operation, internal use only */ +#define BOL 0303 /* Beginning of line, ^ */ +#define EOL 0304 /* End of line, $ */ +#define CCLASS 0305 /* Character class, [] */ +#define NCCLASS 0306 /* Negated character class, [] */ +#define END 0377 /* Terminate: match found */ + +/* + * regexec execution lists + */ +#define LISTSIZE 10 +#define BIGLISTSIZE (10*LISTSIZE) +typedef struct Relist Relist; +struct Relist +{ + Reinst* inst; /* Reinstruction of the thread */ + Resublist se; /* matched subexpressions in this thread */ +}; +typedef struct Reljunk Reljunk; +struct Reljunk +{ + Relist* relist[2]; + Relist* reliste[2]; + int starttype; + Rune startchar; + char* starts; + char* eol; + Rune* rstarts; + Rune* reol; +}; + +extern Relist* _renewthread(Relist*, Reinst*, Resublist*); +extern void _renewmatch(Resub*, int, Resublist*); +extern Relist* _renewemptythread(Relist*, Reinst*, char*); +extern Relist* _rrenewemptythread(Relist*, Reinst*, Rune*); diff --git a/src/libregexp/regerror.c b/src/libregexp/regerror.c new file mode 100644 index 00000000..2cd1e3e2 --- /dev/null +++ b/src/libregexp/regerror.c @@ -0,0 +1,14 @@ +#include "lib9.h" +#include "regexp9.h" + +void +regerror(char *s) +{ + char buf[132]; + + strcpy(buf, "regerror: "); + strcat(buf, s); + strcat(buf, "\n"); + write(2, buf, strlen(buf)); + exit(1); +} diff --git a/src/libregexp/regexec.c b/src/libregexp/regexec.c new file mode 100644 index 00000000..c9f1eba2 --- /dev/null +++ b/src/libregexp/regexec.c @@ -0,0 +1,219 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + + +/* + * return 0 if no match + * >0 if a match + * <0 if we ran out of _relist space + */ +static int +regexec1(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j +) +{ + int flag=0; + Reinst *inst; + Relist *tlp; + char *s; + int i, checkstart; + Rune r, *rp, *ep; + int n; + Relist* tl; /* This list, next list */ + Relist* nl; + Relist* tle; /* ends of this and next list */ + Relist* nle; + int match; + char *p; + + match = 0; + checkstart = j->starttype; + if(mp) + for(i=0; i<ms; i++) { + mp[i].s.sp = 0; + mp[i].e.ep = 0; + } + j->relist[0][0].inst = 0; + j->relist[1][0].inst = 0; + + /* Execute machine once for each character, including terminal NUL */ + s = j->starts; + do{ + /* fast check for first char */ + if(checkstart) { + switch(j->starttype) { + case RUNE: + p = utfrune(s, j->startchar); + if(p == 0) + return match; + s = p; + break; + case BOL: + if(s == bol) + break; + p = utfrune(s, '\n'); + if(p == 0) + return match; + s = p; + break; + } + } + r = *(uchar*)s; + if(r < (Rune)Runeself) + n = 1; + else + n = chartorune(&r, s); + + /* switch run lists */ + tl = j->relist[flag]; + tle = j->reliste[flag]; + nl = j->relist[flag^=1]; + nle = j->reliste[flag]; + nl->inst = 0; + + /* Add first instruction to current list */ + if(match == 0) + _renewemptythread(tl, progp->startinst, s); + + /* Execute machine until current list is empty */ + for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ + for(inst = tlp->inst; ; inst = inst->u2.next){ + switch(inst->type){ + case RUNE: /* regular character */ + if(inst->u1.r == r){ + if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + return -1; + } + break; + case LBRA: + tlp->se.m[inst->u1.subid].s.sp = s; + continue; + case RBRA: + tlp->se.m[inst->u1.subid].e.ep = s; + continue; + case ANY: + if(r != '\n') + if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + return -1; + break; + case ANYNL: + if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + return -1; + break; + case BOL: + if(s == bol || *(s-1) == '\n') + continue; + break; + case EOL: + if(s == j->eol || r == 0 || r == '\n') + continue; + break; + case CCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]){ + if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + return -1; + break; + } + break; + case NCCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]) + break; + if(rp == ep) + if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + return -1; + break; + case OR: + /* evaluate right choice later */ + if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; + case END: /* Match! */ + match = 1; + tlp->se.m[0].e.ep = s; + if(mp != 0) + _renewmatch(mp, ms, &tlp->se); + break; + } + break; + } + } + if(s == j->eol) + break; + checkstart = j->starttype && nl->inst==0; + s += n; + }while(r); + return match; +} + +static int +regexec2(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j +) +{ + Relist relist0[BIGLISTSIZE], relist1[BIGLISTSIZE]; + + /* mark space */ + j->relist[0] = relist0; + j->relist[1] = relist1; + j->reliste[0] = relist0 + nelem(relist0) - 2; + j->reliste[1] = relist1 + nelem(relist1) - 2; + + return regexec1(progp, bol, mp, ms, j); +} + +extern int +regexec(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms) /* number of elements at mp */ +{ + Reljunk j; + Relist relist0[LISTSIZE], relist1[LISTSIZE]; + int rv; + + /* + * use user-specified starting/ending location if specified + */ + j.starts = bol; + j.eol = 0; + if(mp && ms>0){ + if(mp->s.sp) + j.starts = mp->s.sp; + if(mp->e.ep) + j.eol = mp->e.ep; + } + j.starttype = 0; + j.startchar = 0; + if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) { + j.starttype = RUNE; + j.startchar = progp->startinst->u1.r; + } + if(progp->startinst->type == BOL) + j.starttype = BOL; + + /* mark space */ + j.relist[0] = relist0; + j.relist[1] = relist1; + j.reliste[0] = relist0 + nelem(relist0) - 2; + j.reliste[1] = relist1 + nelem(relist1) - 2; + + rv = regexec1(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + rv = regexec2(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + return -1; +} diff --git a/src/libregexp/regexp9.3 b/src/libregexp/regexp9.3 new file mode 100644 index 00000000..f2603562 --- /dev/null +++ b/src/libregexp/regexp9.3 @@ -0,0 +1,227 @@ +.TH REGEXP9 3 +.de EX +.nf +.ft B +.. +.de EE +.fi +.ft R +.. +.de LR +.if t .BR \\$1 \\$2 +.if n .RB ` \\$1 '\\$2 +.. +.de L +.nh +.if t .B \\$1 +.if n .RB ` \\$1 ' +.. +.SH NAME +regcomp, regcomplit, regcompnl, regexec, regsub, regerror \- Plan 9 regular expression library +.SH SYNOPSIS +.B #include <regexp9.h> +.PP +.ta \w'\fLRegprog 'u +.B +Reprog *regcomp(char *exp) +.PP +.B +Reprog *regcomplit(char *exp) +.PP +.B +Reprog *regcompnl(char *exp) +.PP +.nf +.B +int regexec(Reprog *prog, char *string, Resub *match, int msize) +.PP +.nf +.B +void regsub(char *source, char *dest, int dlen, Resub *match, int msize) +.PP +.nf +.B +int rregexec(Reprog *prog, Rune *string, Resub *match, int msize) +.PP +.nf +.B +void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize) +.PP +.B +void regerror(char *msg) +.SH DESCRIPTION +.I Regcomp +compiles a +regular expression and returns +a pointer to the generated description. +The space is allocated by +.IR malloc (3) +and may be released by +.IR free . +Regular expressions are exactly as in +.IR regexp9 (7). +.PP +.I Regcomplit +is like +.I regcomp +except that all characters are treated literally. +.I Regcompnl +is like +.I regcomp +except that the +.B . +metacharacter matches all characters, including newlines. +.PP +.I Regexec +matches a null-terminated +.I string +against the compiled regular expression in +.IR prog . +If it matches, +.I regexec +returns +.B 1 +and fills in the array +.I match +with character pointers to the substrings of +.I string +that correspond to the +parenthesized subexpressions of +.IR exp : +.BI match[ i ].sp +points to the beginning and +.BI match[ i ].ep +points just beyond +the end of the +.IR i th +substring. +(Subexpression +.I i +begins at the +.IR i th +left parenthesis, counting from 1.) +Pointers in +.B match[0] +pick out the substring that corresponds to +the whole regular expression. +Unused elements of +.I match +are filled with zeros. +Matches involving +.LR * , +.LR + , +and +.L ? +are extended as far as possible. +The number of array elements in +.I match +is given by +.IR msize . +The structure of elements of +.I match +is: +.IP +.EX +typedef struct { + union { + char *sp; + Rune *rsp; + } s; + union { + char *ep; + Rune *rep; + } e; +} Resub; +.EE +.LP +If +.B match[0].s.sp +is nonzero on entry, +.I regexec +starts matching at that point within +.IR string . +If +.B match[0].e.ep +is nonzero on entry, +the last character matched is the one +preceding that point. +.PP +.I Regsub +places in +.I dest +a substitution instance of +.I source +in the context of the last +.I regexec +performed using +.IR match . +Each instance of +.BI \e n\f1, +where +.I n +is a digit, is replaced by the +string delimited by +.BI match[ n ].s.sp +and +.BI match[ n ].e.ep\f1. +Each instance of +.L & +is replaced by the string delimited by +.B match[0].s.sp +and +.BR match[0].e.ep . +The substitution will always be null terminated and +trimmed to fit into dlen bytes. +.PP +.IR Regerror , +called whenever an error is detected in +.IR regcomp , +writes the string +.I msg +on the standard error file and exits. +.I Regerror +can be replaced to perform +special error processing. +If the user supplied +.I regerror +returns rather than exits, +.I regcomp +will return 0. +.PP +.I Rregexec +and +.I rregsub +are variants of +.I regexec +and +.I regsub +that use strings of +.B Runes +instead of strings of +.BR chars . +With these routines, the +.I rsp +and +.I rep +fields of the +.I match +array elements should be used. +.SH "SEE ALSO" +.IR grep (1), +.IR regexp9 (7) +.SH DIAGNOSTICS +.I Regcomp +returns +.B 0 +for an illegal expression +or other failure. +.I Regexec +returns 0 +if +.I string +is not matched. +.SH HISTORY +This particular regular expression was first written by Rob Pike for Plan 9. +It has also appeared as part of the Inferno operating system. +.SH BUGS +There is no way to specify or match a NUL character; NULs terminate patterns and strings. diff --git a/src/libregexp/regexp9.7 b/src/libregexp/regexp9.7 new file mode 100644 index 00000000..14a90d0f --- /dev/null +++ b/src/libregexp/regexp9.7 @@ -0,0 +1,150 @@ +.TH REGEXP9 7 +.de EX +.nf +.ft B +.. +.de EE +.fi +.ft R +.. +.de LR +.if t .BR \\$1 \\$2 +.if n .RB ` \\$1 '\\$2 +.. +.de L +.nh +.if t .B \\$1 +.if n .RB ` \\$1 ' +.. +.SH NAME +regexp9 \- Plan 9 regular expression notation +.SH DESCRIPTION +This manual page describes the regular expression +syntax used by the Plan 9 regular expression library +.IR regexp9 (3). +It is the form used by +.IR egrep (1) +before +.I egrep +got complicated. +.PP +A +.I "regular expression" +specifies +a set of strings of characters. +A member of this set of strings is said to be +.I matched +by the regular expression. In many applications +a delimiter character, commonly +.LR / , +bounds a regular expression. +In the following specification for regular expressions +the word `character' means any character (rune) but newline. +.PP +The syntax for a regular expression +.B e0 +is +.IP +.EX +e3: literal | charclass | '.' | '^' | '$' | '(' e0 ')' + +e2: e3 + | e2 REP + +REP: '*' | '+' | '?' + +e1: e2 + | e1 e2 + +e0: e1 + | e0 '|' e1 +.EE +.PP +A +.B literal +is any non-metacharacter, or a metacharacter +(one of +.BR .*+?[]()|\e^$ ), +or the delimiter +preceded by +.LR \e . +.PP +A +.B charclass +is a nonempty string +.I s +bracketed +.BI [ \|s\| ] +(or +.BI [^ s\| ]\fR); +it matches any character in (or not in) +.IR s . +A negated character class never +matches newline. +A substring +.IB a - b\f1, +with +.I a +and +.I b +in ascending +order, stands for the inclusive +range of +characters between +.I a +and +.IR b . +In +.IR s , +the metacharacters +.LR - , +.LR ] , +an initial +.LR ^ , +and the regular expression delimiter +must be preceded by a +.LR \e ; +other metacharacters +have no special meaning and +may appear unescaped. +.PP +A +.L . +matches any character. +.PP +A +.L ^ +matches the beginning of a line; +.L $ +matches the end of the line. +.PP +The +.B REP +operators match zero or more +.RB ( * ), +one or more +.RB ( + ), +zero or one +.RB ( ? ), +instances respectively of the preceding regular expression +.BR e2 . +.PP +A concatenated regular expression, +.BR "e1\|e2" , +matches a match to +.B e1 +followed by a match to +.BR e2 . +.PP +An alternative regular expression, +.BR "e0\||\|e1" , +matches either a match to +.B e0 +or a match to +.BR e1 . +.PP +A match to any part of a regular expression +extends as far as possible without preventing +a match to the remainder of the regular expression. +.SH "SEE ALSO" +.IR regexp9 (3) diff --git a/src/libregexp/regexp9.h b/src/libregexp/regexp9.h new file mode 100644 index 00000000..e25658a3 --- /dev/null +++ b/src/libregexp/regexp9.h @@ -0,0 +1,71 @@ +#ifndef _REGEXP9H_ + +#define _REGEXP9H_ 1 +#include <utf.h> + +typedef struct Resub Resub; +typedef struct Reclass Reclass; +typedef struct Reinst Reinst; +typedef struct Reprog Reprog; + +/* + * Sub expression matches + */ +struct Resub{ + union + { + char *sp; + Rune *rsp; + }s; + union + { + char *ep; + Rune *rep; + }e; +}; + +/* + * character class, each pair of rune's defines a range + */ +struct Reclass{ + Rune *end; + Rune spans[64]; +}; + +/* + * Machine instructions + */ +struct Reinst{ + int type; + union { + Reclass *cp; /* class pointer */ + Rune r; /* character */ + int subid; /* sub-expression id for RBRA and LBRA */ + Reinst *right; /* right child of OR */ + }u1; + union { /* regexp relies on these two being in the same union */ + Reinst *left; /* left child of OR */ + Reinst *next; /* next instruction for CAT & LBRA */ + }u2; +}; + +/* + * Reprogram definition + */ +struct Reprog{ + Reinst *startinst; /* start pc */ + Reclass class[16]; /* .data */ + Reinst firstinst[5]; /* .text */ +}; + +extern Reprog *regcomp(char*); +extern Reprog *regcomplit(char*); +extern Reprog *regcompnl(char*); +extern void regerror(char*); +extern int regexec(Reprog*, char*, Resub*, int); +extern void regsub(char*, char*, int, Resub*, int); + +extern int rregexec(Reprog*, Rune*, Resub*, int); +extern void rregsub(Rune*, Rune*, Resub*, int); + +#endif diff --git a/src/libregexp/regsub.c b/src/libregexp/regsub.c new file mode 100644 index 00000000..6de2c957 --- /dev/null +++ b/src/libregexp/regsub.c @@ -0,0 +1,62 @@ +#include "lib9.h" +#include "regexp9.h" + +/* substitute into one string using the matches from the last regexec() */ +extern void +regsub(char *sp, /* source string */ + char *dp, /* destination string */ + int dlen, + Resub *mp, /* subexpression elements */ + int ms) /* number of elements pointed to by mp */ +{ + char *ssp, *ep; + int i; + + ep = dp+dlen-1; + while(*sp != '\0'){ + if(*sp == '\\'){ + switch(*++sp){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + i = *sp-'0'; + if(mp[i].s.sp != 0 && mp!=0 && ms>i) + for(ssp = mp[i].s.sp; + ssp < mp[i].e.ep; + ssp++) + if(dp < ep) + *dp++ = *ssp; + break; + case '\\': + if(dp < ep) + *dp++ = '\\'; + break; + case '\0': + sp--; + break; + default: + if(dp < ep) + *dp++ = *sp; + break; + } + }else if(*sp == '&'){ + if(mp[0].s.sp != 0 && mp!=0 && ms>0) + if(mp[0].s.sp != 0) + for(ssp = mp[0].s.sp; + ssp < mp[0].e.ep; ssp++) + if(dp < ep) + *dp++ = *ssp; + }else + if(dp < ep) + *dp++ = *sp; + sp++; + } + *dp = '\0'; +} diff --git a/src/libregexp/rpm.spec b/src/libregexp/rpm.spec new file mode 100644 index 00000000..f4c92d66 --- /dev/null +++ b/src/libregexp/rpm.spec @@ -0,0 +1,34 @@ +Summary: Simple regular expression library from Plan 9 +Name: libregexp9 +Version: 2.0 +Release: 1 +Group: Development/C +Copyright: Public Domain +Packager: Russ Cox <rsc@post.harvard.edu> +Source: http://pdos.lcs.mit.edu/~rsc/software/libregexp9-2.0.tgz +URL: http://pdos.lcs.mit.edu/~rsc/software/#libregexp9 +Requires: libfmt libutf + +%description +Libregexp9 is a port of Plan 9's regexp library. +It is small and simple and provides the traditional +extended regular expressions (as opposed to the +current extended regular expressions, which add {} +and various \x character classes, among other +complications). + +http://plan9.bell-labs.com/magic/man2html/2/regexp +%prep +%setup + +%build +make + +%install +make install + +%files +/usr/local/include/regexp9.h +/usr/local/lib/libregexp9.a +/usr/local/man/man3/regexp9.3 +/usr/local/man/man7/regexp9.7 diff --git a/src/libregexp/rregaux.c b/src/libregexp/rregaux.c new file mode 100644 index 00000000..f4cb0068 --- /dev/null +++ b/src/libregexp/rregaux.c @@ -0,0 +1,26 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + +extern Relist* +_rrenewemptythread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + Rune *rsp) /* pointers to subexpressions */ +{ + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(rsp < p->se.m[0].s.rsp) { + memset((void *)&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + } + return 0; + } + } + p->inst = ip; + memset((void *)&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + (++p)->inst = 0; + return p; +} diff --git a/src/libregexp/rregexec.c b/src/libregexp/rregexec.c new file mode 100644 index 00000000..e96c9721 --- /dev/null +++ b/src/libregexp/rregexec.c @@ -0,0 +1,213 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + +/* + * return 0 if no match + * >0 if a match + * <0 if we ran out of _relist space + */ +static int +rregexec1(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j) +{ + int flag=0; + Reinst *inst; + Relist *tlp; + Rune *s; + int i, checkstart; + Rune r, *rp, *ep; + Relist* tl; /* This list, next list */ + Relist* nl; + Relist* tle; /* ends of this and next list */ + Relist* nle; + int match; + + match = 0; + checkstart = j->startchar; + if(mp) + for(i=0; i<ms; i++) { + mp[i].s.rsp = 0; + mp[i].e.rep = 0; + } + j->relist[0][0].inst = 0; + j->relist[1][0].inst = 0; + + /* Execute machine once for each character, including terminal NUL */ + s = j->rstarts; + do{ + + /* fast check for first char */ + if(checkstart) { + switch(j->starttype) { + case RUNE: + while(*s != j->startchar) { + if(*s == 0) + return match; + s++; + } + break; + case BOL: + if(s == bol) + break; + while(*s != '\n') { + if(*s == 0) + return match; + s++; + } + break; + } + } + + r = *s; + + /* switch run lists */ + tl = j->relist[flag]; + tle = j->reliste[flag]; + nl = j->relist[flag^=1]; + nle = j->reliste[flag]; + nl->inst = 0; + + /* Add first instruction to current list */ + _rrenewemptythread(tl, progp->startinst, s); + + /* Execute machine until current list is empty */ + for(tlp=tl; tlp->inst; tlp++){ + for(inst=tlp->inst; ; inst = inst->u2.next){ + switch(inst->type){ + case RUNE: /* regular character */ + if(inst->u1.r == r) + if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + return -1; + break; + case LBRA: + tlp->se.m[inst->u1.subid].s.rsp = s; + continue; + case RBRA: + tlp->se.m[inst->u1.subid].e.rep = s; + continue; + case ANY: + if(r != '\n') + if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + return -1; + break; + case ANYNL: + if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + return -1; + break; + case BOL: + if(s == bol || *(s-1) == '\n') + continue; + break; + case EOL: + if(s == j->reol || r == 0 || r == '\n') + continue; + break; + case CCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]){ + if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + return -1; + break; + } + break; + case NCCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]) + break; + if(rp == ep) + if(_renewthread(nl, inst->u2.next, &tlp->se)==nle) + return -1; + break; + case OR: + /* evaluate right choice later */ + if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; + case END: /* Match! */ + match = 1; + tlp->se.m[0].e.rep = s; + if(mp != 0) + _renewmatch(mp, ms, &tlp->se); + break; + } + break; + } + } + if(s == j->reol) + break; + checkstart = j->startchar && nl->inst==0; + s++; + }while(r); + return match; +} + +static int +rregexec2(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j +) +{ + Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; + + /* mark space */ + j->relist[0] = relist0; + j->relist[1] = relist1; + j->reliste[0] = relist0 + nelem(relist0) - 2; + j->reliste[1] = relist1 + nelem(relist1) - 2; + + return rregexec1(progp, bol, mp, ms, j); +} + +extern int +rregexec(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms) /* number of elements at mp */ +{ + Reljunk j; + Relist relist0[LISTSIZE], relist1[LISTSIZE]; + int rv; + + /* + * use user-specified starting/ending location if specified + */ + j.rstarts = bol; + j.reol = 0; + if(mp && ms>0){ + if(mp->s.sp) + j.rstarts = mp->s.rsp; + if(mp->e.ep) + j.reol = mp->e.rep; + } + j.starttype = 0; + j.startchar = 0; + if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) { + j.starttype = RUNE; + j.startchar = progp->startinst->u1.r; + } + if(progp->startinst->type == BOL) + j.starttype = BOL; + + /* mark space */ + j.relist[0] = relist0; + j.relist[1] = relist1; + j.reliste[0] = relist0 + nelem(relist0) - 2; + j.reliste[1] = relist1 + nelem(relist1) - 2; + + rv = rregexec1(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + rv = rregexec2(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + return -1; +} diff --git a/src/libregexp/rregsub.c b/src/libregexp/rregsub.c new file mode 100644 index 00000000..15f3c174 --- /dev/null +++ b/src/libregexp/rregsub.c @@ -0,0 +1,55 @@ +#include "lib9.h" +#include "regexp9.h" + +/* substitute into one string using the matches from the last regexec() */ +extern void +rregsub(Rune *sp, /* source string */ + Rune *dp, /* destination string */ + Resub *mp, /* subexpression elements */ + int ms) /* number of elements pointed to by mp */ +{ + Rune *ssp; + int i; + + while(*sp != '\0'){ + if(*sp == '\\'){ + switch(*++sp){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + i = *sp-'0'; + if(mp[i].s.rsp != 0 && mp!=0 && ms>i) + for(ssp = mp[i].s.rsp; + ssp < mp[i].e.rep; + ssp++) + *dp++ = *ssp; + break; + case '\\': + *dp++ = '\\'; + break; + case '\0': + sp--; + break; + default: + *dp++ = *sp; + break; + } + }else if(*sp == '&'){ + if(mp[0].s.rsp != 0 && mp!=0 && ms>0) + if(mp[0].s.rsp != 0) + for(ssp = mp[0].s.rsp; + ssp < mp[0].e.rep; ssp++) + *dp++ = *ssp; + }else + *dp++ = *sp; + sp++; + } + *dp = '\0'; +} diff --git a/src/libregexp/test.c b/src/libregexp/test.c new file mode 100644 index 00000000..83533ee6 --- /dev/null +++ b/src/libregexp/test.c @@ -0,0 +1,46 @@ +#include "lib9.h" +#include <regexp9.h> + +struct x +{ + char *re; + char *s; + Reprog *p; +}; + +struct x t[] = { + { "^[^!@]+$", "/bin/upas/aliasmail '&'", 0 }, + { "^local!(.*)$", "/mail/box/\\1/mbox", 0 }, + { "^plan9!(.*)$", "\\1", 0 }, + { "^helix!(.*)$", "\\1", 0 }, + { "^([^!]+)@([^!@]+)$", "\\2!\\1", 0 }, + { "^(uk\\.[^!]*)(!.*)$", "/bin/upas/uk2uk '\\1' '\\2'", 0 }, + { "^[^!]*\\.[^!]*!.*$", "inet!&", 0 }, + { "^\xE2\x98\xBA$", "smiley", 0 }, + { "^(coma|research|pipe|pyxis|inet|hunny|gauss)!(.*)$", "/mail/lib/qmail '\\s' 'net!\\1' '\\2'", 0 }, + { "^.*$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 }, + { 0, 0, 0 }, +}; + +main(int ac, char **av) +{ + Resub rs[10]; + char dst[128]; + int n; + struct x *tp; + + for(tp = t; tp->re; tp++) + tp->p = regcomp(tp->re); + + + for(tp = t; tp->re; tp++){ + print("%s VIA %s", av[1], tp->re); + memset(rs, 0, sizeof rs); + if(regexec(tp->p, av[1], rs, 10)){ + regsub(tp->s, dst, sizeof dst, rs, 10); + print(" sub %s -> %s", tp->s, dst); + } + print("\n"); + } + exit(0); +} diff --git a/src/libregexp/test2.c b/src/libregexp/test2.c new file mode 100644 index 00000000..150953e4 --- /dev/null +++ b/src/libregexp/test2.c @@ -0,0 +1,20 @@ +#include "lib9.h" +#include <regexp9.h> + + +main(int ac, char **av) +{ + Resub rs[10]; + Reprog *p; + char *s; + int i; + + p = regcomp("[^a-z]"); + s = "\n"; + if(regexec(p, s, rs, 10)) + print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep); + s = "0"; + if(regexec(p, s, rs, 10)) + print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep); + exit(0); +} diff --git a/src/libutf/LICENSE b/src/libutf/LICENSE new file mode 100644 index 00000000..ad76cd52 --- /dev/null +++ b/src/libutf/LICENSE @@ -0,0 +1,13 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 1998-2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. s;/.*;;; s; ;;g'} + +# this works in bsd make +SYSNAME!=uname +OBJTYPE!=uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g' + +# the gnu rules will mess up bsd but not vice versa, +# hence the gnu rules come first. + +include Make.$(SYSNAME)-$(OBJTYPE) + +PREFIX=/usr/local + +NUKEFILES= + +TGZFILES= + +LIB=libutf.a +VERSION=2.0 +PORTPLACE=devel/libutf +NAME=libutf + +OFILES=\ + rune.$O\ + runestrcat.$O\ + runestrchr.$O\ + runestrcmp.$O\ + runestrcpy.$O\ + runestrdup.$O\ + runestrlen.$O\ + runestrecpy.$O\ + runestrncat.$O\ + runestrncmp.$O\ + runestrncpy.$O\ + runestrrchr.$O\ + runestrstr.$O\ + runetype.$O\ + utfecpy.$O\ + utflen.$O\ + utfnlen.$O\ + utfrrune.$O\ + utfrune.$O\ + utfutf.$O\ + +HFILES=\ + utf.h\ + +all: $(LIB) + +install: $(LIB) + test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3 + install -c -m 0644 isalpharune.3 $(PREFIX)/man/man3/isalpharune.3 + install -c -m 0644 utf.7 $(PREFIX)/man/man7/utf.7 + install -c -m 0644 rune.3 $(PREFIX)/man/man3/rune.3 + install -c -m 0644 runestrcat.3 $(PREFIX)/man/man3/runestrcat.3 + install -c -m 0644 utf.h $(PREFIX)/include/utf.h + install -c -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + +$(LIB): $(OFILES) + $(AR) $(ARFLAGS) $(LIB) $(OFILES) + +NUKEFILES+=$(LIB) +.c.$O: + $(CC) $(CFLAGS) -I$(PREFIX)/include $*.c + +%.$O: %.c + $(CC) $(CFLAGS) -I$(PREFIX)/include $*.c + + +$(OFILES): $(HFILES) + +tgz: + rm -rf $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION) + cp Makefile Make.* README LICENSE NOTICE *.[ch137] rpm.spec bundle.ports $(TGZFILES) $(NAME)-$(VERSION) + tar cf - $(NAME)-$(VERSION) | gzip >$(NAME)-$(VERSION).tgz + rm -rf $(NAME)-$(VERSION) + +clean: + rm -f $(OFILES) $(LIB) + +nuke: + rm -f $(OFILES) *.tgz *.rpm $(NUKEFILES) + +rpm: + make tgz + cp $(NAME)-$(VERSION).tgz /usr/src/RPM/SOURCES + rpm -ba rpm.spec + cp /usr/src/RPM/SRPMS/$(NAME)-$(VERSION)-1.src.rpm . + cp /usr/src/RPM/RPMS/i586/$(NAME)-$(VERSION)-1.i586.rpm . + scp *.rpm rsc@amsterdam.lcs.mit.edu:public_html/software + +PORTDIR=/usr/ports/$(PORTPLACE) + +ports: + make tgz + rm -rf $(PORTDIR) + mkdir $(PORTDIR) + cp $(NAME)-$(VERSION).tgz /usr/ports/distfiles + cat bundle.ports | (cd $(PORTDIR) && awk '$$1=="---" && $$3=="---" { ofile=$$2; next} {if(ofile) print >ofile}') + (cd $(PORTDIR); make makesum) + (cd $(PORTDIR); make) + (cd $(PORTDIR); /usr/local/bin/portlint) + rm -rf $(PORTDIR)/work + shar `find $(PORTDIR)` > ports.shar + (cd $(PORTDIR); tar cf - *) | gzip >$(NAME)-$(VERSION)-ports.tgz + scp *.tgz rsc@amsterdam.lcs.mit.edu:public_html/software + +.phony: all clean nuke install tgz rpm ports diff --git a/src/libutf/Makefile.BOT b/src/libutf/Makefile.BOT new file mode 100644 index 00000000..ad852a1f --- /dev/null +++ b/src/libutf/Makefile.BOT @@ -0,0 +1,47 @@ +.c.$O: + $(CC) $(CFLAGS) -I/usr/X11R6/include -I../sam -I$(PREFIX)/include $*.c + +%.$O: %.c + $(CC) $(CFLAGS) -I/usr/X11R6/include -I../sam -I$(PREFIX)/include $*.c + + +$(OFILES): $(HFILES) + +tgz: + rm -rf $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION) + cp Makefile Make.* README LICENSE NOTICE *.[ch137] rpm.spec bundle.ports $(TGZFILES) $(NAME)-$(VERSION) + tar cf - $(NAME)-$(VERSION) | gzip >$(NAME)-$(VERSION).tgz + rm -rf $(NAME)-$(VERSION) + +clean: + rm -f $(OFILES) $(LIB) + +nuke: + rm -f $(OFILES) *.tgz *.rpm $(NUKEFILES) + +rpm: + make tgz + cp $(NAME)-$(VERSION).tgz /usr/src/RPM/SOURCES + rpm -ba rpm.spec + cp /usr/src/RPM/SRPMS/$(NAME)-$(VERSION)-1.src.rpm . + cp /usr/src/RPM/RPMS/i586/$(NAME)-$(VERSION)-1.i586.rpm . + scp *.rpm rsc@amsterdam.lcs.mit.edu:public_html/software + +PORTDIR=/usr/ports/$(PORTPLACE) + +ports: + make tgz + rm -rf $(PORTDIR) + mkdir $(PORTDIR) + cp $(NAME)-$(VERSION).tgz /usr/ports/distfiles + cat bundle.ports | (cd $(PORTDIR) && awk '$$1=="---" && $$3=="---" { ofile=$$2; next} {if(ofile) print >ofile}') + (cd $(PORTDIR); make makesum) + (cd $(PORTDIR); make) + (cd $(PORTDIR); /usr/local/bin/portlint) + rm -rf $(PORTDIR)/work + shar `find $(PORTDIR)` > ports.shar + (cd $(PORTDIR); tar cf - *) | gzip >$(NAME)-$(VERSION)-ports.tgz + scp *.tgz rsc@amsterdam.lcs.mit.edu:public_html/software + +.phony: all clean nuke install tgz rpm ports diff --git a/src/libutf/Makefile.CMD b/src/libutf/Makefile.CMD new file mode 100644 index 00000000..a817fd3c --- /dev/null +++ b/src/libutf/Makefile.CMD @@ -0,0 +1,5 @@ + +$(TARG): $(OFILES) + $(CC) -o $(TARG) $(OFILES) -L$(PREFIX)/lib -lframe -ldraw -lthread -l9 -lregexp9 -lbio -lfmt -lutf -L/usr/X11R6/lib -lX11 -lm + + diff --git a/src/libutf/Makefile.LIB b/src/libutf/Makefile.LIB new file mode 100644 index 00000000..9ebf5606 --- /dev/null +++ b/src/libutf/Makefile.LIB @@ -0,0 +1,4 @@ +$(LIB): $(OFILES) + $(AR) $(ARFLAGS) $(LIB) $(OFILES) + +NUKEFILES+=$(LIB) diff --git a/src/libutf/Makefile.MID b/src/libutf/Makefile.MID new file mode 100644 index 00000000..9c142d44 --- /dev/null +++ b/src/libutf/Makefile.MID @@ -0,0 +1,41 @@ +LIB=libutf.a +VERSION=2.0 +PORTPLACE=devel/libutf +NAME=libutf + +OFILES=\ + rune.$O\ + runestrcat.$O\ + runestrchr.$O\ + runestrcmp.$O\ + runestrcpy.$O\ + runestrdup.$O\ + runestrlen.$O\ + runestrecpy.$O\ + runestrncat.$O\ + runestrncmp.$O\ + runestrncpy.$O\ + runestrrchr.$O\ + runestrstr.$O\ + runetype.$O\ + utfecpy.$O\ + utflen.$O\ + utfnlen.$O\ + utfrrune.$O\ + utfrune.$O\ + utfutf.$O\ + +HFILES=\ + utf.h\ + +all: $(LIB) + +install: $(LIB) + test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3 + install -c -m 0644 isalpharune.3 $(PREFIX)/man/man3/isalpharune.3 + install -c -m 0644 utf.7 $(PREFIX)/man/man7/utf.7 + install -c -m 0644 rune.3 $(PREFIX)/man/man3/rune.3 + install -c -m 0644 runestrcat.3 $(PREFIX)/man/man3/runestrcat.3 + install -c -m 0644 utf.h $(PREFIX)/include/utf.h + install -c -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + diff --git a/src/libutf/Makefile.TOP b/src/libutf/Makefile.TOP new file mode 100644 index 00000000..f6abc8c1 --- /dev/null +++ b/src/libutf/Makefile.TOP @@ -0,0 +1,20 @@ + +# this works in gnu make +SYSNAME:=${shell uname} +OBJTYPE:=${shell uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g'} + +# this works in bsd make +SYSNAME!=uname +OBJTYPE!=uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g' + +# the gnu rules will mess up bsd but not vice versa, +# hence the gnu rules come first. + +include Make.$(SYSNAME)-$(OBJTYPE) + +PREFIX=/usr/local + +NUKEFILES= + +TGZFILES= + diff --git a/src/libutf/NOTICE b/src/libutf/NOTICE new file mode 100644 index 00000000..ad76cd52 --- /dev/null +++ b/src/libutf/NOTICE @@ -0,0 +1,13 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 1998-2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IT'S HERE TO GET BY PORTLINT +# $FreeBSD: ports/devel/libfmt/Makefile,v 1.1 2003/02/12 00:51:22 rsc Exp $ + +PORTNAME= libutf +PORTVERSION= 2.0 +CATEGORIES= devel +MASTER_SITES= http://pdos.lcs.mit.edu/~rsc/software/ +EXTRACT_SUFX= .tgz + +MAINTAINER= rsc@post.harvard.edu + +MAN3= rune.3 runestrcat.3 isalpharune.3 +MAN7= utf.7 + +USE_REINPLACE=yes + +.include <bsd.port.pre.mk> + +post-patch: + ${REINPLACE_CMD} -e 's,$$(PREFIX),${PREFIX},g' ${WRKSRC}/Makefile + +.include <bsd.port.post.mk> +--- pkg-comment --- +UTF8 support library from Plan 9 +--- pkg-descr --- +UTF8 support library from Plan 9. + +WWW: http://pdos.lcs.mit.edu/~rsc/software/#libutf +http://plan9.bell-labs.com/magic/man2html/3/rune + +Russ Cox +rsc@post.harvard.edu +--- pkg-plist --- +lib/libutf.a +include/utf.h +--- /dev/null --- +This is just a way to make sure blank lines don't +creep into pkg-plist. diff --git a/src/libutf/isalpharune.3 b/src/libutf/isalpharune.3 new file mode 100644 index 00000000..f9f58cde --- /dev/null +++ b/src/libutf/isalpharune.3 @@ -0,0 +1,47 @@ +.TH ISALPHARUNE 3 +.SH NAME +isalpharune, islowerrune, isspacerune, istitlerune, isupperrune, tolowerrune, totitlerune, toupperrune \- Unicode character classes and cases +.SH SYNOPSIS +.B #include <utf.h> +.PP +.B +int isalpharune(Rune c) +.PP +.B +int islowerrune(Rune c) +.PP +.B +int isspacerune(Rune c) +.PP +.B +int istitlerune(Rune c) +.PP +.B +int isupperrune(Rune c) +.PP +.B +Rune tolowerrune(Rune c) +.PP +.B +Rune totitlerune(Rune c) +.PP +.B +Rune toupperrune(Rune c) +.SH DESCRIPTION +These routines examine and operate on Unicode characters, +in particular a subset of their properties as defined in the Unicode standard. +Unicode defines some characters as alphabetic and specifies three cases: +upper, lower, and title. +Analogously to +.IR ctype (3) +for +.SM ASCII\c +, +these routines +test types and modify cases for Unicode characters. +The names are self-explanatory. +.PP +The case-conversion routines return the character unchanged if it has no case. +.SH "SEE ALSO +.IR ctype (3) , +.IR "The Unicode Standard" . diff --git a/src/libutf/lib9.h b/src/libutf/lib9.h new file mode 100644 index 00000000..e6128ae4 --- /dev/null +++ b/src/libutf/lib9.h @@ -0,0 +1,17 @@ +#include <string.h> +#include "utf.h" + +#define nil ((void*)0) + +#define uchar _fmtuchar +#define ushort _fmtushort +#define uint _fmtuint +#define ulong _fmtulong +#define vlong _fmtvlong +#define uvlong _fmtuvlong + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + diff --git a/src/libutf/mkfile b/src/libutf/mkfile new file mode 100644 index 00000000..9893a06d --- /dev/null +++ b/src/libutf/mkfile @@ -0,0 +1,9 @@ +all:V: Makefile Make.FreeBSD-386 Make.Linux-386 Make.NetBSD-386 Make.HP-UX-9000 Make.OSF1-alpha \ + Make.SunOS-sun4u Make.SunOS-sun4u-cc Make.SunOS-sun4u-gcc \ + Make.Darwin-PowerMacintosh + +Makefile:D: ../libutf/Makefile.TOP Makefile.MID ../libutf/Makefile.LIB ../libutf/Makefile.BOT + cat $prereq >$target + +Make.%: ../libutf/Make.% + cp $prereq $target diff --git a/src/libutf/rpm.spec b/src/libutf/rpm.spec new file mode 100644 index 00000000..74127833 --- /dev/null +++ b/src/libutf/rpm.spec @@ -0,0 +1,28 @@ +Summary: Port of Plan 9's UTF8 support functions +Name: libutf +Version: 2.0 +Release: 1 +Group: Development/C +Copyright: Public Domain +Packager: Russ Cox <rsc@post.harvard.edu> +Source: http://pdos.lcs.mit.edu/~rsc/software/libutf-2.0.tgz +URL: http://pdos.lcs.mit.edu/~rsc/software/#libutf + +%description +Libutf is a port of Plan 9's UTF8 support functions. +%prep +%setup + +%build +make + +%install +make install + +%files +/usr/local/include/utf.h +/usr/local/lib/libutf.a +/usr/local/man/man3/runestrcat.3 +/usr/local/man/man3/isalpharune.3 +/usr/local/man/man3/rune.3 +/usr/local/man/man7/utf.7 diff --git a/src/libutf/rune.3 b/src/libutf/rune.3 new file mode 100644 index 00000000..7ce978ae --- /dev/null +++ b/src/libutf/rune.3 @@ -0,0 +1,187 @@ +.TH RUNE 3 +.SH NAME +runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion +.SH SYNOPSIS +.ta \w'\fLchar*xx'u +.B #include <utf.h> +.PP +.B +int runetochar(char *s, Rune *r) +.PP +.B +int chartorune(Rune *r, char *s) +.PP +.B +int runelen(long r) +.PP +.B +int runenlen(Rune *r, int n) +.PP +.B +int fullrune(char *s, int n) +.PP +.B +char* utfecpy(char *s1, char *es1, char *s2) +.PP +.B +int utflen(char *s) +.PP +.B +int utfnlen(char *s, long n) +.PP +.B +char* utfrune(char *s, long c) +.PP +.B +char* utfrrune(char *s, long c) +.PP +.B +char* utfutf(char *s1, char *s2) +.SH DESCRIPTION +These routines convert to and from a +.SM UTF +byte stream and runes. +.PP +.I Runetochar +copies one rune at +.I r +to at most +.B UTFmax +bytes starting at +.I s +and returns the number of bytes copied. +.BR UTFmax , +defined as +.B 3 +in +.BR <libc.h> , +is the maximum number of bytes required to represent a rune. +.PP +.I Chartorune +copies at most +.B UTFmax +bytes starting at +.I s +to one rune at +.I r +and returns the number of bytes copied. +If the input is not exactly in +.SM UTF +format, +.I chartorune +will convert to 0x80 and return 1. +.PP +.I Runelen +returns the number of bytes +required to convert +.I r +into +.SM UTF. +.PP +.I Runenlen +returns the number of bytes +required to convert the +.I n +runes pointed to by +.I r +into +.SM UTF. +.PP +.I Fullrune +returns 1 if the string +.I s +of length +.I n +is long enough to be decoded by +.I chartorune +and 0 otherwise. +This does not guarantee that the string +contains a legal +.SM UTF +encoding. +This routine is used by programs that +obtain input a byte at +a time and need to know when a full rune +has arrived. +.PP +The following routines are analogous to the +corresponding string routines with +.B utf +substituted for +.B str +and +.B rune +substituted for +.BR chr . +.PP +.I Utfecpy +copies UTF sequences until a null sequence has been copied, but writes no +sequences beyond +.IR es1 . +If any sequences are copied, +.I s1 +is terminated by a null sequence, and a pointer to that sequence is returned. +Otherwise, the original +.I s1 +is returned. +.PP +.I Utflen +returns the number of runes that +are represented by the +.SM UTF +string +.IR s . +.PP +.I Utfnlen +returns the number of complete runes that +are represented by the first +.I n +bytes of +.SM UTF +string +.IR s . +If the last few bytes of the string contain an incompletely coded rune, +.I utfnlen +will not count them; in this way, it differs from +.IR utflen , +which includes every byte of the string. +.PP +.I Utfrune +.RI ( utfrrune ) +returns a pointer to the first (last) +occurrence of rune +.I c +in the +.SM UTF +string +.IR s , +or 0 if +.I c +does not occur in the string. +The NUL byte terminating a string is considered to +be part of the string +.IR s . +.PP +.I Utfutf +returns a pointer to the first occurrence of +the +.SM UTF +string +.I s2 +as a +.SM UTF +substring of +.IR s1 , +or 0 if there is none. +If +.I s2 +is the null string, +.I utfutf +returns +.IR s1 . +.SH HISTORY +These routines were written by Rob Pike and Ken Thompson +and first appeared in Plan 9. +.SH SEE ALSO +.IR utf (7), +.IR tcs (1) diff --git a/src/libutf/rune.c b/src/libutf/rune.c new file mode 100644 index 00000000..e1aaa9be --- /dev/null +++ b/src/libutf/rune.c @@ -0,0 +1,177 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +/* + * alpha ranges - + * only covers ranges not in lower||upper + */ +static +Rune __alpha2[] = +{ + 0x00d8, 0x00f6, /* Ø - ö */ + 0x00f8, 0x01f5, /* ø - ǵ */ + 0x0250, 0x02a8, /* ɐ - ʨ */ + 0x038e, 0x03a1, /* Ύ - Ρ */ + 0x03a3, 0x03ce, /* Σ - ώ */ + 0x03d0, 0x03d6, /* ϐ - ϖ */ + 0x03e2, 0x03f3, /* Ϣ - ϳ */ + 0x0490, 0x04c4, /* Ґ - ӄ */ + 0x0561, 0x0587, /* ա - և */ + 0x05d0, 0x05ea, /* א - ת */ + 0x05f0, 0x05f2, /* װ - ײ */ + 0x0621, 0x063a, /* ء - غ */ + 0x0640, 0x064a, /* ـ - ي */ + 0x0671, 0x06b7, /* ٱ - ڷ */ + 0x06ba, 0x06be, /* ں - ھ */ + 0x06c0, 0x06ce, /* ۀ - ێ */ + 0x06d0, 0x06d3, /* ې - ۓ */ + 0x0905, 0x0939, /* अ - ह */ + 0x0958, 0x0961, /* क़ - ॡ */ + 0x0985, 0x098c, /* অ - ঌ */ + 0x098f, 0x0990, /* এ - ঐ */ + 0x0993, 0x09a8, /* ও - ন */ + 0x09aa, 0x09b0, /* প - র */ + 0x09b6, 0x09b9, /* শ - হ */ + 0x09dc, 0x09dd, /* ড় - ঢ় */ + 0x09df, 0x09e1, /* য় - ৡ */ + 0x09f0, 0x09f1, /* ৰ - ৱ */ + 0x0a05, 0x0a0a, /* ਅ - ਊ */ + 0x0a0f, 0x0a10, /* ਏ - ਐ */ + 0x0a13, 0x0a28, /* ਓ - ਨ */ + 0x0a2a, 0x0a30, /* ਪ - ਰ */ + 0x0a32, 0x0a33, /* ਲ - ਲ਼ */ + 0x0a35, 0x0a36, /* ਵ - ਸ਼ */ + 0x0a38, 0x0a39, /* ਸ - ਹ */ + 0x0a59, 0x0a5c, /* ਖ਼ - ੜ */ + 0x0a85, 0x0a8b, /* અ - ઋ */ + 0x0a8f, 0x0a91, /* એ - ઑ */ + 0x0a93, 0x0aa8, /* ઓ - ન */ + 0x0aaa, 0x0ab0, /* પ - ર */ + 0x0ab2, 0x0ab3, /* લ - ળ */ + 0x0ab5, 0x0ab9, /* વ - હ */ + 0x0b05, 0x0b0c, /* ଅ - ଌ */ + 0x0b0f, 0x0b10, /* ଏ - ଐ */ + 0x0b13, 0x0b28, /* ଓ - ନ */ + 0x0b2a, 0x0b30, /* ପ - ର */ + 0x0b32, 0x0b33, /* ଲ - ଳ */ + 0x0b36, 0x0b39, /* ଶ - ହ */ + 0x0b5c, 0x0b5d, /* ଡ଼ - ଢ଼ */ + 0x0b5f, 0x0b61, /* ୟ - ୡ */ + 0x0b85, 0x0b8a, /* அ - ஊ */ + 0x0b8e, 0x0b90, /* எ - ஐ */ + 0x0b92, 0x0b95, /* ஒ - க */ + 0x0b99, 0x0b9a, /* ங - ச */ + 0x0b9e, 0x0b9f, /* ஞ - ட */ + 0x0ba3, 0x0ba4, /* ண - த */ + 0x0ba8, 0x0baa, /* ந - ப */ + 0x0bae, 0x0bb5, /* ம - வ */ + 0x0bb7, 0x0bb9, /* ஷ - ஹ */ + 0x0c05, 0x0c0c, /* అ - ఌ */ + 0x0c0e, 0x0c10, /* ఎ - ఐ */ + 0x0c12, 0x0c28, /* ఒ - న */ + 0x0c2a, 0x0c33, /* ప - ళ */ + 0x0c35, 0x0c39, /* వ - హ */ + 0x0c60, 0x0c61, /* ౠ - ౡ */ + 0x0c85, 0x0c8c, /* ಅ - ಌ */ + 0x0c8e, 0x0c90, /* ಎ - ಐ */ + 0x0c92, 0x0ca8, /* ಒ - ನ */ + 0x0caa, 0x0cb3, /* ಪ - ಳ */ + 0x0cb5, 0x0cb9, /* ವ - ಹ */ + 0x0ce0, 0x0ce1, /* ೠ - ೡ */ + 0x0d05, 0x0d0c, /* അ - ഌ */ + 0x0d0e, 0x0d10, /* എ - ഐ */ + 0x0d12, 0x0d28, /* ഒ - ന */ + 0x0d2a, 0x0d39, /* പ - ഹ */ + 0x0d60, 0x0d61, /* ൠ - ൡ */ + 0x0e01, 0x0e30, /* ก - ะ */ + 0x0e32, 0x0e33, /* า - ำ */ + 0x0e40, 0x0e46, /* เ - ๆ */ + 0x0e5a, 0x0e5b, /* ๚ - ๛ */ + 0x0e81, 0x0e82, /* ກ - ຂ */ + 0x0e87, 0x0e88, /* ງ - ຈ */ + 0x0e94, 0x0e97, /* ດ - ທ */ + 0x0e99, 0x0e9f, /* ນ - ຟ */ + 0x0ea1, 0x0ea3, /* ມ - ຣ */ + 0x0eaa, 0x0eab, /* ສ - ຫ */ + 0x0ead, 0x0eae, /* ອ - ຮ */ + 0x0eb2, 0x0eb3, /* າ - ຳ */ + 0x0ec0, 0x0ec4, /* ເ - ໄ */ + 0x0edc, 0x0edd, /* ໜ - ໝ */ + 0x0f18, 0x0f19, /* ༘ - ༙ */ + 0x0f40, 0x0f47, /* ཀ - ཇ */ + 0x0f49, 0x0f69, /* ཉ - ཀྵ */ + 0x10d0, 0x10f6, /* ა - ჶ */ + 0x1100, 0x1159, /* ᄀ - ᅙ */ + 0x115f, 0x11a2, /* ᅟ - ᆢ */ + 0x11a8, 0x11f9, /* ᆨ - ᇹ */ + 0x1e00, 0x1e9b, /* Ḁ - ẛ */ + 0x1f50, 0x1f57, /* ὐ - ὗ */ + 0x1f80, 0x1fb4, /* ᾀ - ᾴ */ + 0x1fb6, 0x1fbc, /* ᾶ - ᾼ */ + 0x1fc2, 0x1fc4, /* ῂ - ῄ */ + 0x1fc6, 0x1fcc, /* ῆ - ῌ */ + 0x1fd0, 0x1fd3, /* ῐ - ΐ */ + 0x1fd6, 0x1fdb, /* ῖ - Ί */ + 0x1fe0, 0x1fec, /* ῠ - Ῥ */ + 0x1ff2, 0x1ff4, /* ῲ - ῴ */ + 0x1ff6, 0x1ffc, /* ῶ - ῼ */ + 0x210a, 0x2113, /* ℊ - ℓ */ + 0x2115, 0x211d, /* ℕ - ℝ */ + 0x2120, 0x2122, /* ℠ - ™ */ + 0x212a, 0x2131, /* K - ℱ */ + 0x2133, 0x2138, /* ℳ - ℸ */ + 0x3041, 0x3094, /* ぁ - ゔ */ + 0x30a1, 0x30fa, /* ァ - ヺ */ + 0x3105, 0x312c, /* ㄅ - ㄬ */ + 0x3131, 0x318e, /* ㄱ - ㆎ */ + 0x3192, 0x319f, /* ㆒ - ㆟ */ + 0x3260, 0x327b, /* ㉠ - ㉻ */ + 0x328a, 0x32b0, /* ㊊ - ㊰ */ + 0x32d0, 0x32fe, /* ㋐ - ㋾ */ + 0x3300, 0x3357, /* ㌀ - ㍗ */ + 0x3371, 0x3376, /* ㍱ - ㍶ */ + 0x337b, 0x3394, /* ㍻ - ㎔ */ + 0x3399, 0x339e, /* ㎙ - ㎞ */ + 0x33a9, 0x33ad, /* ㎩ - ㎭ */ + 0x33b0, 0x33c1, /* ㎰ - ㏁ */ + 0x33c3, 0x33c5, /* ㏃ - ㏅ */ + 0x33c7, 0x33d7, /* ㏇ - ㏗ */ + 0x33d9, 0x33dd, /* ㏙ - ㏝ */ + 0x4e00, 0x9fff, /* 一 - 鿿 */ + 0xac00, 0xd7a3, /* 가 - 힣 */ + 0xf900, 0xfb06, /* 豈 - st */ + 0xfb13, 0xfb17, /* ﬓ - ﬗ */ + 0xfb1f, 0xfb28, /* ײַ - ﬨ */ + 0xfb2a, 0xfb36, /* שׁ - זּ */ + 0xfb38, 0xfb3c, /* טּ - לּ */ + 0xfb40, 0xfb41, /* נּ - סּ */ + 0xfb43, 0xfb44, /* ףּ - פּ */ + 0xfb46, 0xfbb1, /* צּ - ﮱ */ + 0xfbd3, 0xfd3d, /* ﯓ - ﴽ */ + 0xfd50, 0xfd8f, /* ﵐ - ﶏ */ + 0xfd92, 0xfdc7, /* ﶒ - ﷇ */ + 0xfdf0, 0xfdf9, /* ﷰ - ﷹ */ + 0xfe70, 0xfe72, /* ﹰ - ﹲ */ + 0xfe76, 0xfefc, /* ﹶ - ﻼ */ + 0xff66, 0xff6f, /* ヲ - ッ */ + 0xff71, 0xff9d, /* ア - ン */ + 0xffa0, 0xffbe, /* ᅠ - ᄒ */ + 0xffc2, 0xffc7, /* ᅡ - ᅦ */ + 0xffca, 0xffcf, /* ᅧ - ᅬ */ + 0xffd2, 0xffd7, /* ᅭ - ᅲ */ + 0xffda, 0xffdc, /* ᅳ - ᅵ */ +}; + +/* + * alpha singlets - + * only covers ranges not in lower||upper + */ +static +Rune __alpha1[] = +{ + 0x00aa, /* ª */ + 0x00b5, /* µ */ + 0x00ba, /* º */ + 0x03da, /* Ϛ */ + 0x03dc, /* Ϝ */ + 0x03de, /* Ϟ */ + 0x03e0, /* Ϡ */ + 0x06d5, /* ە */ + 0x09b2, /* ল */ + 0x0a5e, /* ਫ਼ */ + 0x0a8d, /* ઍ */ + 0x0ae0, /* ૠ */ + 0x0b9c, /* ஜ */ + 0x0cde, /* ೞ */ + 0x0e4f, /* ๏ */ + 0x0e84, /* ຄ */ + 0x0e8a, /* ຊ */ + 0x0e8d, /* ຍ */ + 0x0ea5, /* ລ */ + 0x0ea7, /* ວ */ + 0x0eb0, /* ະ */ + 0x0ebd, /* ຽ */ + 0x1fbe, /* ι */ + 0x207f, /* ⁿ */ + 0x20a8, /* ₨ */ + 0x2102, /* ℂ */ + 0x2107, /* ℇ */ + 0x2124, /* ℤ */ + 0x2126, /* Ω */ + 0x2128, /* ℨ */ + 0xfb3e, /* מּ */ + 0xfe74, /* ﹴ */ +}; + +/* + * space ranges + */ +static +Rune __space2[] = +{ + 0x0009, 0x000a, /* tab and newline */ + 0x0020, 0x0020, /* space */ + 0x00a0, 0x00a0, /* */ + 0x2000, 0x200b, /* - */ + 0x2028, 0x2029, /*
*/ + 0x3000, 0x3000, /* */ + 0xfeff, 0xfeff, /* */ +}; + +/* + * lower case ranges + * 3rd col is conversion excess 500 + */ +static +Rune __toupper2[] = +{ + 0x0061, 0x007a, 468, /* a-z A-Z */ + 0x00e0, 0x00f6, 468, /* à-ö À-Ö */ + 0x00f8, 0x00fe, 468, /* ø-þ Ø-Þ */ + 0x0256, 0x0257, 295, /* ɖ-ɗ Ɖ-Ɗ */ + 0x0258, 0x0259, 298, /* ɘ-ə Ǝ-Ə */ + 0x028a, 0x028b, 283, /* ʊ-ʋ Ʊ-Ʋ */ + 0x03ad, 0x03af, 463, /* έ-ί Έ-Ί */ + 0x03b1, 0x03c1, 468, /* α-ρ Α-Ρ */ + 0x03c3, 0x03cb, 468, /* σ-ϋ Σ-Ϋ */ + 0x03cd, 0x03ce, 437, /* ύ-ώ Ύ-Ώ */ + 0x0430, 0x044f, 468, /* а-я А-Я */ + 0x0451, 0x045c, 420, /* ё-ќ Ё-Ќ */ + 0x045e, 0x045f, 420, /* ў-џ Ў-Џ */ + 0x0561, 0x0586, 452, /* ա-ֆ Ա-Ֆ */ + 0x1f00, 0x1f07, 508, /* ἀ-ἇ Ἀ-Ἇ */ + 0x1f10, 0x1f15, 508, /* ἐ-ἕ Ἐ-Ἕ */ + 0x1f20, 0x1f27, 508, /* ἠ-ἧ Ἠ-Ἧ */ + 0x1f30, 0x1f37, 508, /* ἰ-ἷ Ἰ-Ἷ */ + 0x1f40, 0x1f45, 508, /* ὀ-ὅ Ὀ-Ὅ */ + 0x1f60, 0x1f67, 508, /* ὠ-ὧ Ὠ-Ὧ */ + 0x1f70, 0x1f71, 574, /* ὰ-ά Ὰ-Ά */ + 0x1f72, 0x1f75, 586, /* ὲ-ή Ὲ-Ή */ + 0x1f76, 0x1f77, 600, /* ὶ-ί Ὶ-Ί */ + 0x1f78, 0x1f79, 628, /* ὸ-ό Ὸ-Ό */ + 0x1f7a, 0x1f7b, 612, /* ὺ-ύ Ὺ-Ύ */ + 0x1f7c, 0x1f7d, 626, /* ὼ-ώ Ὼ-Ώ */ + 0x1f80, 0x1f87, 508, /* ᾀ-ᾇ ᾈ-ᾏ */ + 0x1f90, 0x1f97, 508, /* ᾐ-ᾗ ᾘ-ᾟ */ + 0x1fa0, 0x1fa7, 508, /* ᾠ-ᾧ ᾨ-ᾯ */ + 0x1fb0, 0x1fb1, 508, /* ᾰ-ᾱ Ᾰ-Ᾱ */ + 0x1fd0, 0x1fd1, 508, /* ῐ-ῑ Ῐ-Ῑ */ + 0x1fe0, 0x1fe1, 508, /* ῠ-ῡ Ῠ-Ῡ */ + 0x2170, 0x217f, 484, /* ⅰ-ⅿ Ⅰ-Ⅿ */ + 0x24d0, 0x24e9, 474, /* ⓐ-ⓩ Ⓐ-Ⓩ */ + 0xff41, 0xff5a, 468, /* a-z A-Z */ +}; + +/* + * lower case singlets + * 2nd col is conversion excess 500 + */ +static +Rune __toupper1[] = +{ + 0x00ff, 621, /* ÿ Ÿ */ + 0x0101, 499, /* ā Ā */ + 0x0103, 499, /* ă Ă */ + 0x0105, 499, /* ą Ą */ + 0x0107, 499, /* ć Ć */ + 0x0109, 499, /* ĉ Ĉ */ + 0x010b, 499, /* ċ Ċ */ + 0x010d, 499, /* č Č */ + 0x010f, 499, /* ď Ď */ + 0x0111, 499, /* đ Đ */ + 0x0113, 499, /* ē Ē */ + 0x0115, 499, /* ĕ Ĕ */ + 0x0117, 499, /* ė Ė */ + 0x0119, 499, /* ę Ę */ + 0x011b, 499, /* ě Ě */ + 0x011d, 499, /* ĝ Ĝ */ + 0x011f, 499, /* ğ Ğ */ + 0x0121, 499, /* ġ Ġ */ + 0x0123, 499, /* ģ Ģ */ + 0x0125, 499, /* ĥ Ĥ */ + 0x0127, 499, /* ħ Ħ */ + 0x0129, 499, /* ĩ Ĩ */ + 0x012b, 499, /* ī Ī */ + 0x012d, 499, /* ĭ Ĭ */ + 0x012f, 499, /* į Į */ + 0x0131, 268, /* ı I */ + 0x0133, 499, /* ij IJ */ + 0x0135, 499, /* ĵ Ĵ */ + 0x0137, 499, /* ķ Ķ */ + 0x013a, 499, /* ĺ Ĺ */ + 0x013c, 499, /* ļ Ļ */ + 0x013e, 499, /* ľ Ľ */ + 0x0140, 499, /* ŀ Ŀ */ + 0x0142, 499, /* ł Ł */ + 0x0144, 499, /* ń Ń */ + 0x0146, 499, /* ņ Ņ */ + 0x0148, 499, /* ň Ň */ + 0x014b, 499, /* ŋ Ŋ */ + 0x014d, 499, /* ō Ō */ + 0x014f, 499, /* ŏ Ŏ */ + 0x0151, 499, /* ő Ő */ + 0x0153, 499, /* œ Œ */ + 0x0155, 499, /* ŕ Ŕ */ + 0x0157, 499, /* ŗ Ŗ */ + 0x0159, 499, /* ř Ř */ + 0x015b, 499, /* ś Ś */ + 0x015d, 499, /* ŝ Ŝ */ + 0x015f, 499, /* ş Ş */ + 0x0161, 499, /* š Š */ + 0x0163, 499, /* ţ Ţ */ + 0x0165, 499, /* ť Ť */ + 0x0167, 499, /* ŧ Ŧ */ + 0x0169, 499, /* ũ Ũ */ + 0x016b, 499, /* ū Ū */ + 0x016d, 499, /* ŭ Ŭ */ + 0x016f, 499, /* ů Ů */ + 0x0171, 499, /* ű Ű */ + 0x0173, 499, /* ų Ų */ + 0x0175, 499, /* ŵ Ŵ */ + 0x0177, 499, /* ŷ Ŷ */ + 0x017a, 499, /* ź Ź */ + 0x017c, 499, /* ż Ż */ + 0x017e, 499, /* ž Ž */ + 0x017f, 200, /* ſ S */ + 0x0183, 499, /* ƃ Ƃ */ + 0x0185, 499, /* ƅ Ƅ */ + 0x0188, 499, /* ƈ Ƈ */ + 0x018c, 499, /* ƌ Ƌ */ + 0x0192, 499, /* ƒ Ƒ */ + 0x0199, 499, /* ƙ Ƙ */ + 0x01a1, 499, /* ơ Ơ */ + 0x01a3, 499, /* ƣ Ƣ */ + 0x01a5, 499, /* ƥ Ƥ */ + 0x01a8, 499, /* ƨ Ƨ */ + 0x01ad, 499, /* ƭ Ƭ */ + 0x01b0, 499, /* ư Ư */ + 0x01b4, 499, /* ƴ Ƴ */ + 0x01b6, 499, /* ƶ Ƶ */ + 0x01b9, 499, /* ƹ Ƹ */ + 0x01bd, 499, /* ƽ Ƽ */ + 0x01c5, 499, /* Dž DŽ */ + 0x01c6, 498, /* dž DŽ */ + 0x01c8, 499, /* Lj LJ */ + 0x01c9, 498, /* lj LJ */ + 0x01cb, 499, /* Nj NJ */ + 0x01cc, 498, /* nj NJ */ + 0x01ce, 499, /* ǎ Ǎ */ + 0x01d0, 499, /* ǐ Ǐ */ + 0x01d2, 499, /* ǒ Ǒ */ + 0x01d4, 499, /* ǔ Ǔ */ + 0x01d6, 499, /* ǖ Ǖ */ + 0x01d8, 499, /* ǘ Ǘ */ + 0x01da, 499, /* ǚ Ǚ */ + 0x01dc, 499, /* ǜ Ǜ */ + 0x01df, 499, /* ǟ Ǟ */ + 0x01e1, 499, /* ǡ Ǡ */ + 0x01e3, 499, /* ǣ Ǣ */ + 0x01e5, 499, /* ǥ Ǥ */ + 0x01e7, 499, /* ǧ Ǧ */ + 0x01e9, 499, /* ǩ Ǩ */ + 0x01eb, 499, /* ǫ Ǫ */ + 0x01ed, 499, /* ǭ Ǭ */ + 0x01ef, 499, /* ǯ Ǯ */ + 0x01f2, 499, /* Dz DZ */ + 0x01f3, 498, /* dz DZ */ + 0x01f5, 499, /* ǵ Ǵ */ + 0x01fb, 499, /* ǻ Ǻ */ + 0x01fd, 499, /* ǽ Ǽ */ + 0x01ff, 499, /* ǿ Ǿ */ + 0x0201, 499, /* ȁ Ȁ */ + 0x0203, 499, /* ȃ Ȃ */ + 0x0205, 499, /* ȅ Ȅ */ + 0x0207, 499, /* ȇ Ȇ */ + 0x0209, 499, /* ȉ Ȉ */ + 0x020b, 499, /* ȋ Ȋ */ + 0x020d, 499, /* ȍ Ȍ */ + 0x020f, 499, /* ȏ Ȏ */ + 0x0211, 499, /* ȑ Ȑ */ + 0x0213, 499, /* ȓ Ȓ */ + 0x0215, 499, /* ȕ Ȕ */ + 0x0217, 499, /* ȗ Ȗ */ + 0x0253, 290, /* ɓ Ɓ */ + 0x0254, 294, /* ɔ Ɔ */ + 0x025b, 297, /* ɛ Ɛ */ + 0x0260, 295, /* ɠ Ɠ */ + 0x0263, 293, /* ɣ Ɣ */ + 0x0268, 291, /* ɨ Ɨ */ + 0x0269, 289, /* ɩ Ɩ */ + 0x026f, 289, /* ɯ Ɯ */ + 0x0272, 287, /* ɲ Ɲ */ + 0x0283, 282, /* ʃ Ʃ */ + 0x0288, 282, /* ʈ Ʈ */ + 0x0292, 281, /* ʒ Ʒ */ + 0x03ac, 462, /* ά Ά */ + 0x03cc, 436, /* ό Ό */ + 0x03d0, 438, /* ϐ Β */ + 0x03d1, 443, /* ϑ Θ */ + 0x03d5, 453, /* ϕ Φ */ + 0x03d6, 446, /* ϖ Π */ + 0x03e3, 499, /* ϣ Ϣ */ + 0x03e5, 499, /* ϥ Ϥ */ + 0x03e7, 499, /* ϧ Ϧ */ + 0x03e9, 499, /* ϩ Ϩ */ + 0x03eb, 499, /* ϫ Ϫ */ + 0x03ed, 499, /* ϭ Ϭ */ + 0x03ef, 499, /* ϯ Ϯ */ + 0x03f0, 414, /* ϰ Κ */ + 0x03f1, 420, /* ϱ Ρ */ + 0x0461, 499, /* ѡ Ѡ */ + 0x0463, 499, /* ѣ Ѣ */ + 0x0465, 499, /* ѥ Ѥ */ + 0x0467, 499, /* ѧ Ѧ */ + 0x0469, 499, /* ѩ Ѩ */ + 0x046b, 499, /* ѫ Ѫ */ + 0x046d, 499, /* ѭ Ѭ */ + 0x046f, 499, /* ѯ Ѯ */ + 0x0471, 499, /* ѱ Ѱ */ + 0x0473, 499, /* ѳ Ѳ */ + 0x0475, 499, /* ѵ Ѵ */ + 0x0477, 499, /* ѷ Ѷ */ + 0x0479, 499, /* ѹ Ѹ */ + 0x047b, 499, /* ѻ Ѻ */ + 0x047d, 499, /* ѽ Ѽ */ + 0x047f, 499, /* ѿ Ѿ */ + 0x0481, 499, /* ҁ Ҁ */ + 0x0491, 499, /* ґ Ґ */ + 0x0493, 499, /* ғ Ғ */ + 0x0495, 499, /* ҕ Ҕ */ + 0x0497, 499, /* җ Җ */ + 0x0499, 499, /* ҙ Ҙ */ + 0x049b, 499, /* қ Қ */ + 0x049d, 499, /* ҝ Ҝ */ + 0x049f, 499, /* ҟ Ҟ */ + 0x04a1, 499, /* ҡ Ҡ */ + 0x04a3, 499, /* ң Ң */ + 0x04a5, 499, /* ҥ Ҥ */ + 0x04a7, 499, /* ҧ Ҧ */ + 0x04a9, 499, /* ҩ Ҩ */ + 0x04ab, 499, /* ҫ Ҫ */ + 0x04ad, 499, /* ҭ Ҭ */ + 0x04af, 499, /* ү Ү */ + 0x04b1, 499, /* ұ Ұ */ + 0x04b3, 499, /* ҳ Ҳ */ + 0x04b5, 499, /* ҵ Ҵ */ + 0x04b7, 499, /* ҷ Ҷ */ + 0x04b9, 499, /* ҹ Ҹ */ + 0x04bb, 499, /* һ Һ */ + 0x04bd, 499, /* ҽ Ҽ */ + 0x04bf, 499, /* ҿ Ҿ */ + 0x04c2, 499, /* ӂ Ӂ */ + 0x04c4, 499, /* ӄ Ӄ */ + 0x04c8, 499, /* ӈ Ӈ */ + 0x04cc, 499, /* ӌ Ӌ */ + 0x04d1, 499, /* ӑ Ӑ */ + 0x04d3, 499, /* ӓ Ӓ */ + 0x04d5, 499, /* ӕ Ӕ */ + 0x04d7, 499, /* ӗ Ӗ */ + 0x04d9, 499, /* ә Ә */ + 0x04db, 499, /* ӛ Ӛ */ + 0x04dd, 499, /* ӝ Ӝ */ + 0x04df, 499, /* ӟ Ӟ */ + 0x04e1, 499, /* ӡ Ӡ */ + 0x04e3, 499, /* ӣ Ӣ */ + 0x04e5, 499, /* ӥ Ӥ */ + 0x04e7, 499, /* ӧ Ӧ */ + 0x04e9, 499, /* ө Ө */ + 0x04eb, 499, /* ӫ Ӫ */ + 0x04ef, 499, /* ӯ Ӯ */ + 0x04f1, 499, /* ӱ Ӱ */ + 0x04f3, 499, /* ӳ Ӳ */ + 0x04f5, 499, /* ӵ Ӵ */ + 0x04f9, 499, /* ӹ Ӹ */ + 0x1e01, 499, /* ḁ Ḁ */ + 0x1e03, 499, /* ḃ Ḃ */ + 0x1e05, 499, /* ḅ Ḅ */ + 0x1e07, 499, /* ḇ Ḇ */ + 0x1e09, 499, /* ḉ Ḉ */ + 0x1e0b, 499, /* ḋ Ḋ */ + 0x1e0d, 499, /* ḍ Ḍ */ + 0x1e0f, 499, /* ḏ Ḏ */ + 0x1e11, 499, /* ḑ Ḑ */ + 0x1e13, 499, /* ḓ Ḓ */ + 0x1e15, 499, /* ḕ Ḕ */ + 0x1e17, 499, /* ḗ Ḗ */ + 0x1e19, 499, /* ḙ Ḙ */ + 0x1e1b, 499, /* ḛ Ḛ */ + 0x1e1d, 499, /* ḝ Ḝ */ + 0x1e1f, 499, /* ḟ Ḟ */ + 0x1e21, 499, /* ḡ Ḡ */ + 0x1e23, 499, /* ḣ Ḣ */ + 0x1e25, 499, /* ḥ Ḥ */ + 0x1e27, 499, /* ḧ Ḧ */ + 0x1e29, 499, /* ḩ Ḩ */ + 0x1e2b, 499, /* ḫ Ḫ */ + 0x1e2d, 499, /* ḭ Ḭ */ + 0x1e2f, 499, /* ḯ Ḯ */ + 0x1e31, 499, /* ḱ Ḱ */ + 0x1e33, 499, /* ḳ Ḳ */ + 0x1e35, 499, /* ḵ Ḵ */ + 0x1e37, 499, /* ḷ Ḷ */ + 0x1e39, 499, /* ḹ Ḹ */ + 0x1e3b, 499, /* ḻ Ḻ */ + 0x1e3d, 499, /* ḽ Ḽ */ + 0x1e3f, 499, /* ḿ Ḿ */ + 0x1e41, 499, /* ṁ Ṁ */ + 0x1e43, 499, /* ṃ Ṃ */ + 0x1e45, 499, /* ṅ Ṅ */ + 0x1e47, 499, /* ṇ Ṇ */ + 0x1e49, 499, /* ṉ Ṉ */ + 0x1e4b, 499, /* ṋ Ṋ */ + 0x1e4d, 499, /* ṍ Ṍ */ + 0x1e4f, 499, /* ṏ Ṏ */ + 0x1e51, 499, /* ṑ Ṑ */ + 0x1e53, 499, /* ṓ Ṓ */ + 0x1e55, 499, /* ṕ Ṕ */ + 0x1e57, 499, /* ṗ Ṗ */ + 0x1e59, 499, /* ṙ Ṙ */ + 0x1e5b, 499, /* ṛ Ṛ */ + 0x1e5d, 499, /* ṝ Ṝ */ + 0x1e5f, 499, /* ṟ Ṟ */ + 0x1e61, 499, /* ṡ Ṡ */ + 0x1e63, 499, /* ṣ Ṣ */ + 0x1e65, 499, /* ṥ Ṥ */ + 0x1e67, 499, /* ṧ Ṧ */ + 0x1e69, 499, /* ṩ Ṩ */ + 0x1e6b, 499, /* ṫ Ṫ */ + 0x1e6d, 499, /* ṭ Ṭ */ + 0x1e6f, 499, /* ṯ Ṯ */ + 0x1e71, 499, /* ṱ Ṱ */ + 0x1e73, 499, /* ṳ Ṳ */ + 0x1e75, 499, /* ṵ Ṵ */ + 0x1e77, 499, /* ṷ Ṷ */ + 0x1e79, 499, /* ṹ Ṹ */ + 0x1e7b, 499, /* ṻ Ṻ */ + 0x1e7d, 499, /* ṽ Ṽ */ + 0x1e7f, 499, /* ṿ Ṿ */ + 0x1e81, 499, /* ẁ Ẁ */ + 0x1e83, 499, /* ẃ Ẃ */ + 0x1e85, 499, /* ẅ Ẅ */ + 0x1e87, 499, /* ẇ Ẇ */ + 0x1e89, 499, /* ẉ Ẉ */ + 0x1e8b, 499, /* ẋ Ẋ */ + 0x1e8d, 499, /* ẍ Ẍ */ + 0x1e8f, 499, /* ẏ Ẏ */ + 0x1e91, 499, /* ẑ Ẑ */ + 0x1e93, 499, /* ẓ Ẓ */ + 0x1e95, 499, /* ẕ Ẕ */ + 0x1ea1, 499, /* ạ Ạ */ + 0x1ea3, 499, /* ả Ả */ + 0x1ea5, 499, /* ấ Ấ */ + 0x1ea7, 499, /* ầ Ầ */ + 0x1ea9, 499, /* ẩ Ẩ */ + 0x1eab, 499, /* ẫ Ẫ */ + 0x1ead, 499, /* ậ Ậ */ + 0x1eaf, 499, /* ắ Ắ */ + 0x1eb1, 499, /* ằ Ằ */ + 0x1eb3, 499, /* ẳ Ẳ */ + 0x1eb5, 499, /* ẵ Ẵ */ + 0x1eb7, 499, /* ặ Ặ */ + 0x1eb9, 499, /* ẹ Ẹ */ + 0x1ebb, 499, /* ẻ Ẻ */ + 0x1ebd, 499, /* ẽ Ẽ */ + 0x1ebf, 499, /* ế Ế */ + 0x1ec1, 499, /* ề Ề */ + 0x1ec3, 499, /* ể Ể */ + 0x1ec5, 499, /* ễ Ễ */ + 0x1ec7, 499, /* ệ Ệ */ + 0x1ec9, 499, /* ỉ Ỉ */ + 0x1ecb, 499, /* ị Ị */ + 0x1ecd, 499, /* ọ Ọ */ + 0x1ecf, 499, /* ỏ Ỏ */ + 0x1ed1, 499, /* ố Ố */ + 0x1ed3, 499, /* ồ Ồ */ + 0x1ed5, 499, /* ổ Ổ */ + 0x1ed7, 499, /* ỗ Ỗ */ + 0x1ed9, 499, /* ộ Ộ */ + 0x1edb, 499, /* ớ Ớ */ + 0x1edd, 499, /* ờ Ờ */ + 0x1edf, 499, /* ở Ở */ + 0x1ee1, 499, /* ỡ Ỡ */ + 0x1ee3, 499, /* ợ Ợ */ + 0x1ee5, 499, /* ụ Ụ */ + 0x1ee7, 499, /* ủ Ủ */ + 0x1ee9, 499, /* ứ Ứ */ + 0x1eeb, 499, /* ừ Ừ */ + 0x1eed, 499, /* ử Ử */ + 0x1eef, 499, /* ữ Ữ */ + 0x1ef1, 499, /* ự Ự */ + 0x1ef3, 499, /* ỳ Ỳ */ + 0x1ef5, 499, /* ỵ Ỵ */ + 0x1ef7, 499, /* ỷ Ỷ */ + 0x1ef9, 499, /* ỹ Ỹ */ + 0x1f51, 508, /* ὑ Ὑ */ + 0x1f53, 508, /* ὓ Ὓ */ + 0x1f55, 508, /* ὕ Ὕ */ + 0x1f57, 508, /* ὗ Ὗ */ + 0x1fb3, 509, /* ᾳ ᾼ */ + 0x1fc3, 509, /* ῃ ῌ */ + 0x1fe5, 507, /* ῥ Ῥ */ + 0x1ff3, 509, /* ῳ ῼ */ +}; + +/* + * upper case ranges + * 3rd col is conversion excess 500 + */ +static +Rune __tolower2[] = +{ + 0x0041, 0x005a, 532, /* A-Z a-z */ + 0x00c0, 0x00d6, 532, /* À-Ö à-ö */ + 0x00d8, 0x00de, 532, /* Ø-Þ ø-þ */ + 0x0189, 0x018a, 705, /* Ɖ-Ɗ ɖ-ɗ */ + 0x018e, 0x018f, 702, /* Ǝ-Ə ɘ-ə */ + 0x01b1, 0x01b2, 717, /* Ʊ-Ʋ ʊ-ʋ */ + 0x0388, 0x038a, 537, /* Έ-Ί έ-ί */ + 0x038e, 0x038f, 563, /* Ύ-Ώ ύ-ώ */ + 0x0391, 0x03a1, 532, /* Α-Ρ α-ρ */ + 0x03a3, 0x03ab, 532, /* Σ-Ϋ σ-ϋ */ + 0x0401, 0x040c, 580, /* Ё-Ќ ё-ќ */ + 0x040e, 0x040f, 580, /* Ў-Џ ў-џ */ + 0x0410, 0x042f, 532, /* А-Я а-я */ + 0x0531, 0x0556, 548, /* Ա-Ֆ ա-ֆ */ + 0x10a0, 0x10c5, 548, /* Ⴀ-Ⴥ ა-ჵ */ + 0x1f08, 0x1f0f, 492, /* Ἀ-Ἇ ἀ-ἇ */ + 0x1f18, 0x1f1d, 492, /* Ἐ-Ἕ ἐ-ἕ */ + 0x1f28, 0x1f2f, 492, /* Ἠ-Ἧ ἠ-ἧ */ + 0x1f38, 0x1f3f, 492, /* Ἰ-Ἷ ἰ-ἷ */ + 0x1f48, 0x1f4d, 492, /* Ὀ-Ὅ ὀ-ὅ */ + 0x1f68, 0x1f6f, 492, /* Ὠ-Ὧ ὠ-ὧ */ + 0x1f88, 0x1f8f, 492, /* ᾈ-ᾏ ᾀ-ᾇ */ + 0x1f98, 0x1f9f, 492, /* ᾘ-ᾟ ᾐ-ᾗ */ + 0x1fa8, 0x1faf, 492, /* ᾨ-ᾯ ᾠ-ᾧ */ + 0x1fb8, 0x1fb9, 492, /* Ᾰ-Ᾱ ᾰ-ᾱ */ + 0x1fba, 0x1fbb, 426, /* Ὰ-Ά ὰ-ά */ + 0x1fc8, 0x1fcb, 414, /* Ὲ-Ή ὲ-ή */ + 0x1fd8, 0x1fd9, 492, /* Ῐ-Ῑ ῐ-ῑ */ + 0x1fda, 0x1fdb, 400, /* Ὶ-Ί ὶ-ί */ + 0x1fe8, 0x1fe9, 492, /* Ῠ-Ῡ ῠ-ῡ */ + 0x1fea, 0x1feb, 388, /* Ὺ-Ύ ὺ-ύ */ + 0x1ff8, 0x1ff9, 372, /* Ὸ-Ό ὸ-ό */ + 0x1ffa, 0x1ffb, 374, /* Ὼ-Ώ ὼ-ώ */ + 0x2160, 0x216f, 516, /* Ⅰ-Ⅿ ⅰ-ⅿ */ + 0x24b6, 0x24cf, 526, /* Ⓐ-Ⓩ ⓐ-ⓩ */ + 0xff21, 0xff3a, 532, /* A-Z a-z */ +}; + +/* + * upper case singlets + * 2nd col is conversion excess 500 + */ +static +Rune __tolower1[] = +{ + 0x0100, 501, /* Ā ā */ + 0x0102, 501, /* Ă ă */ + 0x0104, 501, /* Ą ą */ + 0x0106, 501, /* Ć ć */ + 0x0108, 501, /* Ĉ ĉ */ + 0x010a, 501, /* Ċ ċ */ + 0x010c, 501, /* Č č */ + 0x010e, 501, /* Ď ď */ + 0x0110, 501, /* Đ đ */ + 0x0112, 501, /* Ē ē */ + 0x0114, 501, /* Ĕ ĕ */ + 0x0116, 501, /* Ė ė */ + 0x0118, 501, /* Ę ę */ + 0x011a, 501, /* Ě ě */ + 0x011c, 501, /* Ĝ ĝ */ + 0x011e, 501, /* Ğ ğ */ + 0x0120, 501, /* Ġ ġ */ + 0x0122, 501, /* Ģ ģ */ + 0x0124, 501, /* Ĥ ĥ */ + 0x0126, 501, /* Ħ ħ */ + 0x0128, 501, /* Ĩ ĩ */ + 0x012a, 501, /* Ī ī */ + 0x012c, 501, /* Ĭ ĭ */ + 0x012e, 501, /* Į į */ + 0x0130, 301, /* İ i */ + 0x0132, 501, /* IJ ij */ + 0x0134, 501, /* Ĵ ĵ */ + 0x0136, 501, /* Ķ ķ */ + 0x0139, 501, /* Ĺ ĺ */ + 0x013b, 501, /* Ļ ļ */ + 0x013d, 501, /* Ľ ľ */ + 0x013f, 501, /* Ŀ ŀ */ + 0x0141, 501, /* Ł ł */ + 0x0143, 501, /* Ń ń */ + 0x0145, 501, /* Ņ ņ */ + 0x0147, 501, /* Ň ň */ + 0x014a, 501, /* Ŋ ŋ */ + 0x014c, 501, /* Ō ō */ + 0x014e, 501, /* Ŏ ŏ */ + 0x0150, 501, /* Ő ő */ + 0x0152, 501, /* Œ œ */ + 0x0154, 501, /* Ŕ ŕ */ + 0x0156, 501, /* Ŗ ŗ */ + 0x0158, 501, /* Ř ř */ + 0x015a, 501, /* Ś ś */ + 0x015c, 501, /* Ŝ ŝ */ + 0x015e, 501, /* Ş ş */ + 0x0160, 501, /* Š š */ + 0x0162, 501, /* Ţ ţ */ + 0x0164, 501, /* Ť ť */ + 0x0166, 501, /* Ŧ ŧ */ + 0x0168, 501, /* Ũ ũ */ + 0x016a, 501, /* Ū ū */ + 0x016c, 501, /* Ŭ ŭ */ + 0x016e, 501, /* Ů ů */ + 0x0170, 501, /* Ű ű */ + 0x0172, 501, /* Ų ų */ + 0x0174, 501, /* Ŵ ŵ */ + 0x0176, 501, /* Ŷ ŷ */ + 0x0178, 379, /* Ÿ ÿ */ + 0x0179, 501, /* Ź ź */ + 0x017b, 501, /* Ż ż */ + 0x017d, 501, /* Ž ž */ + 0x0181, 710, /* Ɓ ɓ */ + 0x0182, 501, /* Ƃ ƃ */ + 0x0184, 501, /* Ƅ ƅ */ + 0x0186, 706, /* Ɔ ɔ */ + 0x0187, 501, /* Ƈ ƈ */ + 0x018b, 501, /* Ƌ ƌ */ + 0x0190, 703, /* Ɛ ɛ */ + 0x0191, 501, /* Ƒ ƒ */ + 0x0193, 705, /* Ɠ ɠ */ + 0x0194, 707, /* Ɣ ɣ */ + 0x0196, 711, /* Ɩ ɩ */ + 0x0197, 709, /* Ɨ ɨ */ + 0x0198, 501, /* Ƙ ƙ */ + 0x019c, 711, /* Ɯ ɯ */ + 0x019d, 713, /* Ɲ ɲ */ + 0x01a0, 501, /* Ơ ơ */ + 0x01a2, 501, /* Ƣ ƣ */ + 0x01a4, 501, /* Ƥ ƥ */ + 0x01a7, 501, /* Ƨ ƨ */ + 0x01a9, 718, /* Ʃ ʃ */ + 0x01ac, 501, /* Ƭ ƭ */ + 0x01ae, 718, /* Ʈ ʈ */ + 0x01af, 501, /* Ư ư */ + 0x01b3, 501, /* Ƴ ƴ */ + 0x01b5, 501, /* Ƶ ƶ */ + 0x01b7, 719, /* Ʒ ʒ */ + 0x01b8, 501, /* Ƹ ƹ */ + 0x01bc, 501, /* Ƽ ƽ */ + 0x01c4, 502, /* DŽ dž */ + 0x01c5, 501, /* Dž dž */ + 0x01c7, 502, /* LJ lj */ + 0x01c8, 501, /* Lj lj */ + 0x01ca, 502, /* NJ nj */ + 0x01cb, 501, /* Nj nj */ + 0x01cd, 501, /* Ǎ ǎ */ + 0x01cf, 501, /* Ǐ ǐ */ + 0x01d1, 501, /* Ǒ ǒ */ + 0x01d3, 501, /* Ǔ ǔ */ + 0x01d5, 501, /* Ǖ ǖ */ + 0x01d7, 501, /* Ǘ ǘ */ + 0x01d9, 501, /* Ǚ ǚ */ + 0x01db, 501, /* Ǜ ǜ */ + 0x01de, 501, /* Ǟ ǟ */ + 0x01e0, 501, /* Ǡ ǡ */ + 0x01e2, 501, /* Ǣ ǣ */ + 0x01e4, 501, /* Ǥ ǥ */ + 0x01e6, 501, /* Ǧ ǧ */ + 0x01e8, 501, /* Ǩ ǩ */ + 0x01ea, 501, /* Ǫ ǫ */ + 0x01ec, 501, /* Ǭ ǭ */ + 0x01ee, 501, /* Ǯ ǯ */ + 0x01f1, 502, /* DZ dz */ + 0x01f2, 501, /* Dz dz */ + 0x01f4, 501, /* Ǵ ǵ */ + 0x01fa, 501, /* Ǻ ǻ */ + 0x01fc, 501, /* Ǽ ǽ */ + 0x01fe, 501, /* Ǿ ǿ */ + 0x0200, 501, /* Ȁ ȁ */ + 0x0202, 501, /* Ȃ ȃ */ + 0x0204, 501, /* Ȅ ȅ */ + 0x0206, 501, /* Ȇ ȇ */ + 0x0208, 501, /* Ȉ ȉ */ + 0x020a, 501, /* Ȋ ȋ */ + 0x020c, 501, /* Ȍ ȍ */ + 0x020e, 501, /* Ȏ ȏ */ + 0x0210, 501, /* Ȑ ȑ */ + 0x0212, 501, /* Ȓ ȓ */ + 0x0214, 501, /* Ȕ ȕ */ + 0x0216, 501, /* Ȗ ȗ */ + 0x0386, 538, /* Ά ά */ + 0x038c, 564, /* Ό ό */ + 0x03e2, 501, /* Ϣ ϣ */ + 0x03e4, 501, /* Ϥ ϥ */ + 0x03e6, 501, /* Ϧ ϧ */ + 0x03e8, 501, /* Ϩ ϩ */ + 0x03ea, 501, /* Ϫ ϫ */ + 0x03ec, 501, /* Ϭ ϭ */ + 0x03ee, 501, /* Ϯ ϯ */ + 0x0460, 501, /* Ѡ ѡ */ + 0x0462, 501, /* Ѣ ѣ */ + 0x0464, 501, /* Ѥ ѥ */ + 0x0466, 501, /* Ѧ ѧ */ + 0x0468, 501, /* Ѩ ѩ */ + 0x046a, 501, /* Ѫ ѫ */ + 0x046c, 501, /* Ѭ ѭ */ + 0x046e, 501, /* Ѯ ѯ */ + 0x0470, 501, /* Ѱ ѱ */ + 0x0472, 501, /* Ѳ ѳ */ + 0x0474, 501, /* Ѵ ѵ */ + 0x0476, 501, /* Ѷ ѷ */ + 0x0478, 501, /* Ѹ ѹ */ + 0x047a, 501, /* Ѻ ѻ */ + 0x047c, 501, /* Ѽ ѽ */ + 0x047e, 501, /* Ѿ ѿ */ + 0x0480, 501, /* Ҁ ҁ */ + 0x0490, 501, /* Ґ ґ */ + 0x0492, 501, /* Ғ ғ */ + 0x0494, 501, /* Ҕ ҕ */ + 0x0496, 501, /* Җ җ */ + 0x0498, 501, /* Ҙ ҙ */ + 0x049a, 501, /* Қ қ */ + 0x049c, 501, /* Ҝ ҝ */ + 0x049e, 501, /* Ҟ ҟ */ + 0x04a0, 501, /* Ҡ ҡ */ + 0x04a2, 501, /* Ң ң */ + 0x04a4, 501, /* Ҥ ҥ */ + 0x04a6, 501, /* Ҧ ҧ */ + 0x04a8, 501, /* Ҩ ҩ */ + 0x04aa, 501, /* Ҫ ҫ */ + 0x04ac, 501, /* Ҭ ҭ */ + 0x04ae, 501, /* Ү ү */ + 0x04b0, 501, /* Ұ ұ */ + 0x04b2, 501, /* Ҳ ҳ */ + 0x04b4, 501, /* Ҵ ҵ */ + 0x04b6, 501, /* Ҷ ҷ */ + 0x04b8, 501, /* Ҹ ҹ */ + 0x04ba, 501, /* Һ һ */ + 0x04bc, 501, /* Ҽ ҽ */ + 0x04be, 501, /* Ҿ ҿ */ + 0x04c1, 501, /* Ӂ ӂ */ + 0x04c3, 501, /* Ӄ ӄ */ + 0x04c7, 501, /* Ӈ ӈ */ + 0x04cb, 501, /* Ӌ ӌ */ + 0x04d0, 501, /* Ӑ ӑ */ + 0x04d2, 501, /* Ӓ ӓ */ + 0x04d4, 501, /* Ӕ ӕ */ + 0x04d6, 501, /* Ӗ ӗ */ + 0x04d8, 501, /* Ә ә */ + 0x04da, 501, /* Ӛ ӛ */ + 0x04dc, 501, /* Ӝ ӝ */ + 0x04de, 501, /* Ӟ ӟ */ + 0x04e0, 501, /* Ӡ ӡ */ + 0x04e2, 501, /* Ӣ ӣ */ + 0x04e4, 501, /* Ӥ ӥ */ + 0x04e6, 501, /* Ӧ ӧ */ + 0x04e8, 501, /* Ө ө */ + 0x04ea, 501, /* Ӫ ӫ */ + 0x04ee, 501, /* Ӯ ӯ */ + 0x04f0, 501, /* Ӱ ӱ */ + 0x04f2, 501, /* Ӳ ӳ */ + 0x04f4, 501, /* Ӵ ӵ */ + 0x04f8, 501, /* Ӹ ӹ */ + 0x1e00, 501, /* Ḁ ḁ */ + 0x1e02, 501, /* Ḃ ḃ */ + 0x1e04, 501, /* Ḅ ḅ */ + 0x1e06, 501, /* Ḇ ḇ */ + 0x1e08, 501, /* Ḉ ḉ */ + 0x1e0a, 501, /* Ḋ ḋ */ + 0x1e0c, 501, /* Ḍ ḍ */ + 0x1e0e, 501, /* Ḏ ḏ */ + 0x1e10, 501, /* Ḑ ḑ */ + 0x1e12, 501, /* Ḓ ḓ */ + 0x1e14, 501, /* Ḕ ḕ */ + 0x1e16, 501, /* Ḗ ḗ */ + 0x1e18, 501, /* Ḙ ḙ */ + 0x1e1a, 501, /* Ḛ ḛ */ + 0x1e1c, 501, /* Ḝ ḝ */ + 0x1e1e, 501, /* Ḟ ḟ */ + 0x1e20, 501, /* Ḡ ḡ */ + 0x1e22, 501, /* Ḣ ḣ */ + 0x1e24, 501, /* Ḥ ḥ */ + 0x1e26, 501, /* Ḧ ḧ */ + 0x1e28, 501, /* Ḩ ḩ */ + 0x1e2a, 501, /* Ḫ ḫ */ + 0x1e2c, 501, /* Ḭ ḭ */ + 0x1e2e, 501, /* Ḯ ḯ */ + 0x1e30, 501, /* Ḱ ḱ */ + 0x1e32, 501, /* Ḳ ḳ */ + 0x1e34, 501, /* Ḵ ḵ */ + 0x1e36, 501, /* Ḷ ḷ */ + 0x1e38, 501, /* Ḹ ḹ */ + 0x1e3a, 501, /* Ḻ ḻ */ + 0x1e3c, 501, /* Ḽ ḽ */ + 0x1e3e, 501, /* Ḿ ḿ */ + 0x1e40, 501, /* Ṁ ṁ */ + 0x1e42, 501, /* Ṃ ṃ */ + 0x1e44, 501, /* Ṅ ṅ */ + 0x1e46, 501, /* Ṇ ṇ */ + 0x1e48, 501, /* Ṉ ṉ */ + 0x1e4a, 501, /* Ṋ ṋ */ + 0x1e4c, 501, /* Ṍ ṍ */ + 0x1e4e, 501, /* Ṏ ṏ */ + 0x1e50, 501, /* Ṑ ṑ */ + 0x1e52, 501, /* Ṓ ṓ */ + 0x1e54, 501, /* Ṕ ṕ */ + 0x1e56, 501, /* Ṗ ṗ */ + 0x1e58, 501, /* Ṙ ṙ */ + 0x1e5a, 501, /* Ṛ ṛ */ + 0x1e5c, 501, /* Ṝ ṝ */ + 0x1e5e, 501, /* Ṟ ṟ */ + 0x1e60, 501, /* Ṡ ṡ */ + 0x1e62, 501, /* Ṣ ṣ */ + 0x1e64, 501, /* Ṥ ṥ */ + 0x1e66, 501, /* Ṧ ṧ */ + 0x1e68, 501, /* Ṩ ṩ */ + 0x1e6a, 501, /* Ṫ ṫ */ + 0x1e6c, 501, /* Ṭ ṭ */ + 0x1e6e, 501, /* Ṯ ṯ */ + 0x1e70, 501, /* Ṱ ṱ */ + 0x1e72, 501, /* Ṳ ṳ */ + 0x1e74, 501, /* Ṵ ṵ */ + 0x1e76, 501, /* Ṷ ṷ */ + 0x1e78, 501, /* Ṹ ṹ */ + 0x1e7a, 501, /* Ṻ ṻ */ + 0x1e7c, 501, /* Ṽ ṽ */ + 0x1e7e, 501, /* Ṿ ṿ */ + 0x1e80, 501, /* Ẁ ẁ */ + 0x1e82, 501, /* Ẃ ẃ */ + 0x1e84, 501, /* Ẅ ẅ */ + 0x1e86, 501, /* Ẇ ẇ */ + 0x1e88, 501, /* Ẉ ẉ */ + 0x1e8a, 501, /* Ẋ ẋ */ + 0x1e8c, 501, /* Ẍ ẍ */ + 0x1e8e, 501, /* Ẏ ẏ */ + 0x1e90, 501, /* Ẑ ẑ */ + 0x1e92, 501, /* Ẓ ẓ */ + 0x1e94, 501, /* Ẕ ẕ */ + 0x1ea0, 501, /* Ạ ạ */ + 0x1ea2, 501, /* Ả ả */ + 0x1ea4, 501, /* Ấ ấ */ + 0x1ea6, 501, /* Ầ ầ */ + 0x1ea8, 501, /* Ẩ ẩ */ + 0x1eaa, 501, /* Ẫ ẫ */ + 0x1eac, 501, /* Ậ ậ */ + 0x1eae, 501, /* Ắ ắ */ + 0x1eb0, 501, /* Ằ ằ */ + 0x1eb2, 501, /* Ẳ ẳ */ + 0x1eb4, 501, /* Ẵ ẵ */ + 0x1eb6, 501, /* Ặ ặ */ + 0x1eb8, 501, /* Ẹ ẹ */ + 0x1eba, 501, /* Ẻ ẻ */ + 0x1ebc, 501, /* Ẽ ẽ */ + 0x1ebe, 501, /* Ế ế */ + 0x1ec0, 501, /* Ề ề */ + 0x1ec2, 501, /* Ể ể */ + 0x1ec4, 501, /* Ễ ễ */ + 0x1ec6, 501, /* Ệ ệ */ + 0x1ec8, 501, /* Ỉ ỉ */ + 0x1eca, 501, /* Ị ị */ + 0x1ecc, 501, /* Ọ ọ */ + 0x1ece, 501, /* Ỏ ỏ */ + 0x1ed0, 501, /* Ố ố */ + 0x1ed2, 501, /* Ồ ồ */ + 0x1ed4, 501, /* Ổ ổ */ + 0x1ed6, 501, /* Ỗ ỗ */ + 0x1ed8, 501, /* Ộ ộ */ + 0x1eda, 501, /* Ớ ớ */ + 0x1edc, 501, /* Ờ ờ */ + 0x1ede, 501, /* Ở ở */ + 0x1ee0, 501, /* Ỡ ỡ */ + 0x1ee2, 501, /* Ợ ợ */ + 0x1ee4, 501, /* Ụ ụ */ + 0x1ee6, 501, /* Ủ ủ */ + 0x1ee8, 501, /* Ứ ứ */ + 0x1eea, 501, /* Ừ ừ */ + 0x1eec, 501, /* Ử ử */ + 0x1eee, 501, /* Ữ ữ */ + 0x1ef0, 501, /* Ự ự */ + 0x1ef2, 501, /* Ỳ ỳ */ + 0x1ef4, 501, /* Ỵ ỵ */ + 0x1ef6, 501, /* Ỷ ỷ */ + 0x1ef8, 501, /* Ỹ ỹ */ + 0x1f59, 492, /* Ὑ ὑ */ + 0x1f5b, 492, /* Ὓ ὓ */ + 0x1f5d, 492, /* Ὕ ὕ */ + 0x1f5f, 492, /* Ὗ ὗ */ + 0x1fbc, 491, /* ᾼ ᾳ */ + 0x1fcc, 491, /* ῌ ῃ */ + 0x1fec, 493, /* Ῥ ῥ */ + 0x1ffc, 491, /* ῼ ῳ */ +}; + +/* + * title characters are those between + * upper and lower case. ie DZ Dz dz + */ +static +Rune __totitle1[] = +{ + 0x01c4, 501, /* DŽ Dž */ + 0x01c6, 499, /* dž Dž */ + 0x01c7, 501, /* LJ Lj */ + 0x01c9, 499, /* lj Lj */ + 0x01ca, 501, /* NJ Nj */ + 0x01cc, 499, /* nj Nj */ + 0x01f1, 501, /* DZ Dz */ + 0x01f3, 499, /* dz Dz */ +}; + +static +Rune* +bsearch(Rune c, Rune *t, int n, int ne) +{ + Rune *p; + int m; + + while(n > 1) { + m = n/2; + p = t + m*ne; + if(c >= p[0]) { + t = p; + n = n-m; + } else + n = m; + } + if(n && c >= t[0]) + return t; + return 0; +} + +Rune +tolowerrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 500; + p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +Rune +toupperrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 500; + p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +Rune +totitlerune(Rune c) +{ + Rune *p; + + p = bsearch(c, __totitle1, nelem(__totitle1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +int +islowerrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); + if(p && c == p[0]) + return 1; + return 0; +} + +int +isupperrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); + if(p && c == p[0]) + return 1; + return 0; +} + +int +isalpharune(Rune c) +{ + Rune *p; + + if(isupperrune(c) || islowerrune(c)) + return 1; + p = bsearch(c, __alpha2, nelem(__alpha2)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __alpha1, nelem(__alpha1), 1); + if(p && c == p[0]) + return 1; + return 0; +} + +int +istitlerune(Rune c) +{ + return isupperrune(c) && islowerrune(c); +} + +int +isspacerune(Rune c) +{ + Rune *p; + + p = bsearch(c, __space2, nelem(__space2)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + return 0; +} diff --git a/src/libutf/utf.7 b/src/libutf/utf.7 new file mode 100644 index 00000000..97b7b1e7 --- /dev/null +++ b/src/libutf/utf.7 @@ -0,0 +1,91 @@ +.TH UTF 7 +.SH NAME +UTF, Unicode, ASCII, rune \- character set and format +.SH DESCRIPTION +The Plan 9 character set and representation are +based on the Unicode Standard and on the ISO multibyte +.SM UTF-8 +encoding (Universal Character +Set Transformation Format, 8 bits wide). +The Unicode Standard represents its characters in 16 +bits; +.SM UTF-8 +represents such +values in an 8-bit byte stream. +Throughout this manual, +.SM UTF-8 +is shortened to +.SM UTF. +.PP +In Plan 9, a +.I rune +is a 16-bit quantity representing a Unicode character. +Internally, programs may store characters as runes. +However, any external manifestation of textual information, +in files or at the interface between programs, uses a +machine-independent, byte-stream encoding called +.SM UTF. +.PP +.SM UTF +is designed so the 7-bit +.SM ASCII +set (values hexadecimal 00 to 7F), +appear only as themselves +in the encoding. +Runes with values above 7F appear as sequences of two or more +bytes with values only from 80 to FF. +.PP +The +.SM UTF +encoding of the Unicode Standard is backward compatible with +.SM ASCII\c +: +programs presented only with +.SM ASCII +work on Plan 9 +even if not written to deal with +.SM UTF, +as do +programs that deal with uninterpreted byte streams. +However, programs that perform semantic processing on +.SM ASCII +graphic +characters must convert from +.SM UTF +to runes +in order to work properly with non-\c +.SM ASCII +input. +See +.IR rune (2). +.PP +Letting numbers be binary, +a rune x is converted to a multibyte +.SM UTF +sequence +as follows: +.PP +01. x in [00000000.0bbbbbbb] → 0bbbbbbb +.br +10. x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb +.br +11. x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb, 10bbbbbb +.br +.PP +Conversion 01 provides a one-byte sequence that spans the +.SM ASCII +character set in a compatible way. +Conversions 10 and 11 represent higher-valued characters +as sequences of two or three bytes with the high bit set. +Plan 9 does not support the 4, 5, and 6 byte sequences proposed by X-Open. +When there are multiple ways to encode a value, for example rune 0, +the shortest encoding is used. +.PP +In the inverse mapping, +any sequence except those described above +is incorrect and is converted to rune hexadecimal 0080. +.SH "SEE ALSO" +.IR ascii (1), +.IR tcs (1), +.IR rune (3), +.IR "The Unicode Standard" . diff --git a/src/libutf/utf.h b/src/libutf/utf.h new file mode 100644 index 00000000..623bfda9 --- /dev/null +++ b/src/libutf/utf.h @@ -0,0 +1,51 @@ +#ifndef _UTFH_ +#define _UTFH_ 1 + +typedef unsigned short Rune; /* 16 bits */ + +enum +{ + UTFmax = 3, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0x80, /* decoding error in UTF */ +}; + +/* + * rune routines + */ +extern int runetochar(char*, Rune*); +extern int chartorune(Rune*, char*); +extern int runelen(long); +extern int runenlen(Rune*, int); +extern int fullrune(char*, int); +extern int utflen(char*); +extern int utfnlen(char*, long); +extern char* utfrune(char*, long); +extern char* utfrrune(char*, long); +extern char* utfutf(char*, char*); +extern char* utfecpy(char*, char*, char*); + +extern Rune* runestrcat(Rune*, Rune*); +extern Rune* runestrchr(Rune*, Rune); +extern int runestrcmp(Rune*, Rune*); +extern Rune* runestrcpy(Rune*, Rune*); +extern Rune* runestrncpy(Rune*, Rune*, long); +extern Rune* runestrecpy(Rune*, Rune*, Rune*); +extern Rune* runestrdup(Rune*); +extern Rune* runestrncat(Rune*, Rune*, long); +extern int runestrncmp(Rune*, Rune*, long); +extern Rune* runestrrchr(Rune*, Rune); +extern long runestrlen(Rune*); +extern Rune* runestrstr(Rune*, Rune*); + +extern Rune tolowerrune(Rune); +extern Rune totitlerune(Rune); +extern Rune toupperrune(Rune); +extern int isalpharune(Rune); +extern int islowerrune(Rune); +extern int isspacerune(Rune); +extern int istitlerune(Rune); +extern int isupperrune(Rune); + +#endif diff --git a/src/libutf/utfdef.h b/src/libutf/utfdef.h new file mode 100644 index 00000000..4b58ae87 --- /dev/null +++ b/src/libutf/utfdef.h @@ -0,0 +1,14 @@ +#define uchar _utfuchar +#define ushort _utfushort +#define uint _utfuint +#define ulong _utfulong +#define vlong _utfvlong +#define uvlong _utfuvlong + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +#define nelem(x) (sizeof(x)/sizeof((x)[0])) +#define nil ((void*)0) diff --git a/src/libutf/utfecpy.c b/src/libutf/utfecpy.c new file mode 100644 index 00000000..57159350 --- /dev/null +++ b/src/libutf/utfecpy.c @@ -0,0 +1,36 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. 