aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/look.c
diff options
context:
space:
mode:
authorrsc <devnull@localhost>2004-04-19 18:13:05 +0000
committerrsc <devnull@localhost>2004-04-19 18:13:05 +0000
commitc8af1ab17b72f500c27688598dbb893f09f62c53 (patch)
treee082e537fed510ee98234a328c38787471dd71f0 /src/cmd/look.c
parentba19f6b5b46c539f9f2821d351837164b5824f04 (diff)
downloadplan9port-c8af1ab17b72f500c27688598dbb893f09f62c53.tar.gz
plan9port-c8af1ab17b72f500c27688598dbb893f09f62c53.tar.bz2
plan9port-c8af1ab17b72f500c27688598dbb893f09f62c53.zip
tweaks
Diffstat (limited to 'src/cmd/look.c')
-rw-r--r--src/cmd/look.c342
1 files changed, 342 insertions, 0 deletions
diff --git a/src/cmd/look.c b/src/cmd/look.c
new file mode 100644
index 00000000..5b300868
--- /dev/null
+++ b/src/cmd/look.c
@@ -0,0 +1,342 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+ /* Macros for Rune support of ctype.h-like functions */
+
+#define isupper(r) (L'A' <= (r) && (r) <= L'Z')
+#define islower(r) (L'a' <= (r) && (r) <= L'z')
+#define isalpha(r) (isupper(r) || islower(r))
+#define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
+
+#define isdigit(r) (L'0' <= (r) && (r) <= L'9')
+
+#define isalnum(r) (isalpha(r) || isdigit(r))
+
+#define isspace(r) ((r) == L' ' || (r) == L'\t' \
+ || (0x0A <= (r) && (r) <= 0x0D))
+
+#define tolower(r) ((r)-'A'+'a')
+
+#define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
+
+#define WORDSIZ 4000
+char *filename = "#9/lib/words";
+Biobuf *dfile;
+Biobuf bout;
+Biobuf bin;
+
+int fold;
+int direc;
+int exact;
+int iflag;
+int rev = 1; /*-1 for reverse-ordered file, not implemented*/
+int (*compare)(Rune*, Rune*);
+Rune tab = '\t';
+Rune entry[WORDSIZ];
+Rune word[WORDSIZ];
+Rune key[50], orig[50];
+Rune latin_fold_tab[] =
+{
+/* Table to fold latin 1 characters to ASCII equivalents
+ based at Rune value 0xc0
+
+ À Á Â Ã Ä Å Æ Ç
+ È É Ê Ë Ì Í Î Ï
+ Ð Ñ Ò Ó Ô Õ Ö ×
+ Ø Ù Ú Û Ü Ý Þ ß
+ à á â ã ä å æ ç
+ è é ê ë ì í î ï
+ ð ñ ò ó ô õ ö ÷
+ ø ù ú û ü ý þ ÿ
+*/
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+ 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+ 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
+ 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+ 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+ 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
+ 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
+};
+
+int locate(void);
+int acomp(Rune*, Rune*);
+int getword(Biobuf*, Rune *rp, int n);
+void torune(char*, Rune*);
+void rcanon(Rune*, Rune*);
+int ncomp(Rune*, Rune*);
+
+void
+main(int argc, char *argv[])
+{
+ int n;
+
+ filename = unsharp(filename);
+
+ Binit(&bin, 0, OREAD);
+ Binit(&bout, 1, OWRITE);
+ compare = acomp;
+ ARGBEGIN{
+ case 'd':
+ direc++;
+ break;
+ case 'f':
+ fold++;
+ break;
+ case 'i':
+ iflag++;
+ break;
+ case 'n':
+ compare = ncomp;
+ break;
+ case 't':
+ chartorune(&tab,ARGF());
+ break;
+ case 'x':
+ exact++;
+ break;
+ default:
+ fprint(2, "%s: bad option %c\n", argv0, ARGC());
+ fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
+ exits("usage");
+ } ARGEND
+ if(!iflag){
+ if(argc >= 1) {
+ torune(argv[0], orig);
+ argv++;
+ argc--;
+ } else
+ iflag++;
+ }
+ if(argc < 1) {
+ direc++;
+ fold++;
+ } else
+ filename = argv[0];
+ if (!iflag)
+ rcanon(orig, key);
+ dfile = Bopen(filename, OREAD);
+ if(dfile == 0) {
+ fprint(2, "look: can't open %s\n", filename);
+ exits("no dictionary");
+ }
+ if(!iflag)
+ if(!locate())
+ exits("not found");
+ do {
+ if(iflag) {
+ Bflush(&bout);
+ if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
+ exits(0);
+ rcanon(orig, key);
+ if(!locate())
+ continue;
+ }
+ if (!exact || !acomp(word, key))
+ Bprint(&bout, "%S\n", entry);
+ while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
+ rcanon(entry, word);
+ n = compare(key, word);
+ switch(n) {
+ case -1:
+ if(exact)
+ break;
+ case 0:
+ if (!exact || !acomp(word, orig))
+ Bprint(&bout, "%S\n", entry);
+ continue;
+ }
+ break;
+ }
+ } while(iflag);
+ exits(0);
+}
+
+int
+locate(void)
+{
+ long top, bot, mid;
+ long c;
+ int n;
+
+ bot = 0;
+ top = Bseek(dfile, 0L, 2);
+ for(;;) {
+ mid = (top+bot) / 2;
+ Bseek(dfile, mid, 0);
+ do
+ c = Bgetrune(dfile);
+ while(c>=0 && c!='\n');
+ mid = Boffset(dfile);
+ if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
+ break;
+ rcanon(entry, word);
+ n = compare(key, word);
+ switch(n) {
+ case -2:
+ case -1:
+ case 0:
+ if(top <= mid)
+ break;
+ top = mid;
+ continue;
+ case 1:
+ case 2:
+ bot = mid;
+ continue;
+ }
+ break;
+ }
+ Bseek(dfile, bot, 0);
+ while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
+ rcanon(entry, word);
+ n = compare(key, word);
+ switch(n) {
+ case -2:
+ return 0;
+ case -1:
+ if(exact)
+ return 0;
+ case 0:
+ return 1;
+ case 1:
+ case 2:
+ continue;
+ }
+ }
+ return 0;
+}
+
+/*
+ * acomp(s, t) returns:
+ * -2 if s strictly precedes t
+ * -1 if s is a prefix of t
+ * 0 if s is the same as t
+ * 1 if t is a prefix of s
+ * 2 if t strictly precedes s
+ */
+
+int
+acomp(Rune *s, Rune *t)
+{
+ int cs, ct;
+
+ for(;;) {
+ cs = *s;
+ ct = *t;
+ if(cs != ct)
+ break;
+ if(cs == 0)
+ return 0;
+ s++;
+ t++;
+ }
+ if(cs == 0)
+ return -1;
+ if(ct == 0)
+ return 1;
+ if(cs < ct)
+ return -2;
+ return 2;
+}
+
+void
+torune(char *old, Rune *new)
+{
+ do old += chartorune(new, old);
+ while(*new++);
+}
+
+void
+rcanon(Rune *old, Rune *new)
+{
+ Rune r;
+
+ while((r = *old++) && r != tab) {
+ if (islatin1(r) && latin_fold_tab[r-0xc0])
+ r = latin_fold_tab[r-0xc0];
+ if(direc)
+ if(!(isalnum(r) || r == L' ' || r == L'\t'))
+ continue;
+ if(fold)
+ if(isupper(r))
+ r = tolower(r);
+ *new++ = r;
+ }
+ *new = 0;
+}
+
+int
+ncomp(Rune *s, Rune *t)
+{
+ Rune *is, *it, *js, *jt;
+ int a, b;
+ int ssgn, tsgn;
+
+ while(isspace(*s))
+ s++;
+ while(isspace(*t))
+ t++;
+ ssgn = tsgn = -2*rev;
+ if(*s == '-') {
+ s++;
+ ssgn = -ssgn;
+ }
+ if(*t == '-') {
+ t++;
+ tsgn = -tsgn;
+ }
+ for(is = s; isdigit(*is); is++)
+ ;
+ for(it = t; isdigit(*it); it++)
+ ;
+ js = is;
+ jt = it;
+ a = 0;
+ if(ssgn == tsgn)
+ while(it>t && is>s)
+ if(b = *--it - *--is)
+ a = b;
+ while(is > s)
+ if(*--is != '0')
+ return -ssgn;
+ while(it > t)
+ if(*--it != '0')
+ return tsgn;
+ if(a)
+ return sgn(a)*ssgn;
+ if(*(s=js) == '.')
+ s++;
+ if(*(t=jt) == '.')
+ t++;
+ if(ssgn == tsgn)
+ while(isdigit(*s) && isdigit(*t))
+ if(a = *t++ - *s++)
+ return sgn(a)*ssgn;
+ while(isdigit(*s))
+ if(*s++ != '0')
+ return -ssgn;
+ while(isdigit(*t))
+ if(*t++ != '0')
+ return tsgn;
+ return 0;
+}
+
+int
+getword(Biobuf *f, Rune *rp, int n)
+{
+ long c;
+
+ while(n-- > 0) {
+ c = Bgetrune(f);
+ if(c < 0)
+ return 0;
+ if(c == '\n') {
+ *rp = L'\0';
+ return 1;
+ }
+ *rp++ = c;
+ }
+ fprint(2, "Look: word too long. Bailing out.\n");
+ return 0;
+}