diff options
Diffstat (limited to 'src/cmd')
-rw-r--r-- | src/cmd/file.c | 1252 | ||||
-rw-r--r-- | src/cmd/postscript/text2post/mkfile | 23 | ||||
-rw-r--r-- | src/cmd/postscript/text2post/pjw.char.ps | 142 | ||||
-rw-r--r-- | src/cmd/postscript/text2post/text2post.c | 564 |
4 files changed, 1981 insertions, 0 deletions
diff --git a/src/cmd/file.c b/src/cmd/file.c new file mode 100644 index 00000000..f09977b1 --- /dev/null +++ b/src/cmd/file.c @@ -0,0 +1,1252 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ctype.h> +#include <mach.h> + +/* + * file - determine type of file + */ +#define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) + +uchar buf[6001]; +short cfreq[140]; +short wfreq[50]; +int nbuf; +Dir* mbuf; +int fd; +char *fname; +char *slash; + +enum +{ + Cword, + Fword, + Aword, + Alword, + Lword, + I1, + I2, + I3, + Clatin = 128, + Cbinary, + Cnull, + Ceascii, + Cutf, +}; +struct +{ + char* word; + int class; +} dict[] = +{ + "PATH", Lword, + "TEXT", Aword, + "adt", Alword, + "aggr", Alword, + "alef", Alword, + "array", Lword, + "block", Fword, + "chan", Alword, + "char", Cword, + "common", Fword, + "con", Lword, + "data", Fword, + "dimension", Fword, + "double", Cword, + "extern", Cword, + "bio", I2, + "float", Cword, + "fn", Lword, + "function", Fword, + "h", I3, + "implement", Lword, + "import", Lword, + "include", I1, + "int", Cword, + "integer", Fword, + "iota", Lword, + "libc", I2, + "long", Cword, + "module", Lword, + "real", Fword, + "ref", Lword, + "register", Cword, + "self", Lword, + "short", Cword, + "static", Cword, + "stdio", I2, + "struct", Cword, + "subroutine", Fword, + "u", I2, + "void", Cword, +}; + +/* codes for 'mode' field in language structure */ +enum { + Normal = 0, + First, /* first entry for language spanning several ranges */ + Multi, /* later entries " " " ... */ + Shared, /* codes used in several languages */ + }; + +struct +{ + int mode; /* see enum above */ + int count; + int low; + int high; + char *name; + +} language[] = +{ + Normal, 0, 0x0080, 0x0080, "Extended Latin", + Normal, 0, 0x0100, 0x01FF, "Extended Latin", + Normal, 0, 0x0370, 0x03FF, "Greek", + Normal, 0, 0x0400, 0x04FF, "Cyrillic", + Normal, 0, 0x0530, 0x058F, "Armenian", + Normal, 0, 0x0590, 0x05FF, "Hebrew", + Normal, 0, 0x0600, 0x06FF, "Arabic", + Normal, 0, 0x0900, 0x097F, "Devanagari", + Normal, 0, 0x0980, 0x09FF, "Bengali", + Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", + Normal, 0, 0x0A80, 0x0AFF, "Gujarati", + Normal, 0, 0x0B00, 0x0B7F, "Oriya", + Normal, 0, 0x0B80, 0x0BFF, "Tamil", + Normal, 0, 0x0C00, 0x0C7F, "Telugu", + Normal, 0, 0x0C80, 0x0CFF, "Kannada", + Normal, 0, 0x0D00, 0x0D7F, "Malayalam", + Normal, 0, 0x0E00, 0x0E7F, "Thai", + Normal, 0, 0x0E80, 0x0EFF, "Lao", + Normal, 0, 0x1000, 0x105F, "Tibetan", + Normal, 0, 0x10A0, 0x10FF, "Georgian", + Normal, 0, 0x3040, 0x30FF, "Japanese", + Normal, 0, 0x3100, 0x312F, "Chinese", + First, 0, 0x3130, 0x318F, "Korean", + Multi, 0, 0x3400, 0x3D2F, "Korean", + Shared, 0, 0x4e00, 0x9fff, "CJK", + Normal, 0, 0, 0, 0, /* terminal entry */ +}; + + +enum +{ + Fascii, /* printable ascii */ + Flatin, /* latin 1*/ + Futf, /* UTf character set */ + Fbinary, /* binary */ + Feascii, /* ASCII with control chars */ + Fnull, /* NULL in file */ +} guess; + +void bump_utf_count(Rune); +int cistrncmp(char*, char*, int); +void filetype(int); +int getfontnum(uchar*, uchar**); +int isas(void); +int isc(void); +int isenglish(void); +int ishp(void); +int ishtml(void); +int isrfc822(void); +int ismbox(void); +int islimbo(void); +int ismung(void); +int isp9bit(void); +int isp9font(void); +int isrtf(void); +int ismsdos(void); +int iself(void); +int istring(void); +int iff(void); +int long0(void); +int istar(void); +int p9bitnum(uchar*); +int p9subfont(uchar*); +void print_utf(void); +void type(char*, int); +int utf_count(void); +void wordfreq(void); + +int (*call[])(void) = +{ + long0, /* recognizable by first 4 bytes */ + istring, /* recognizable by first string */ + iff, /* interchange file format (strings) */ + isrfc822, /* email file */ + ismbox, /* mail box */ + istar, /* recognizable by tar checksum */ + ishtml, /* html keywords */ +/* iscint, /* compiler/assembler intermediate */ + islimbo, /* limbo source */ + isc, /* c & alef compiler key words */ + isas, /* assembler key words */ + ismung, /* entropy compressed/encrypted */ + isp9font, /* plan 9 font */ + isp9bit, /* plan 9 image (as from /dev/window) */ + isenglish, /* char frequency English */ + isrtf, /* rich text format */ + ismsdos, /* msdos exe (virus file attachement) */ + iself, /* ELF (foreign) executable */ + 0 +}; + +int mime; + +#define OCTET "application/octet-stream\n" +#define PLAIN "text/plain\n" + +void +main(int argc, char *argv[]) +{ + int i, j, maxlen; + char *cp; + Rune r; + + ARGBEGIN{ + case 'm': + mime = 1; + break; + default: + fprint(2, "usage: file [-m] [file...]\n"); + exits("usage"); + }ARGEND; + + maxlen = 0; + if(mime == 0 || argc > 1){ + for(i = 0; i < argc; i++) { + for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) + ; + if(j > maxlen) + maxlen = j; + } + } + if (argc <= 0) { + if(!mime) + print ("stdin: "); + filetype(0); + } + else { + for(i = 0; i < argc; i++) + type(argv[i], maxlen); + } + exits(0); +} + +void +type(char *file, int nlen) +{ + Rune r; + int i; + char *p; + + if(nlen > 0){ + slash = 0; + for (i = 0, p = file; *p; i++) { + if (*p == '/') /* find rightmost slash */ + slash = p; + p += chartorune(&r, p); /* count runes */ + } + print("%s:%*s",file, nlen-i+1, ""); + } + fname = file; + if ((fd = open(file, OREAD)) < 0) { + print("cannot open\n"); + return; + } + filetype(fd); + close(fd); +} + +void +filetype(int fd) +{ + Rune r; + int i, f, n; + char *p, *eob; + + free(mbuf); + mbuf = dirfstat(fd); + if(mbuf == nil){ + print("cannot stat: %r\n"); + return; + } + if(mbuf->mode & DMDIR) { + print(mime ? "text/directory\n" : "directory\n"); + return; + } + if(mbuf->type != 'M' && mbuf->type != '|') { + print(mime ? OCTET : "special file #%c/%s\n", + mbuf->type, mbuf->name); + return; + } + nbuf = read(fd, buf, sizeof(buf)-1); + + if(nbuf < 0) { + print("cannot read\n"); + return; + } + if(nbuf == 0) { + print(mime ? PLAIN : "empty file\n"); + return; + } + buf[nbuf] = 0; + + /* + * build histogram table + */ + memset(cfreq, 0, sizeof(cfreq)); + for (i = 0; language[i].name; i++) + language[i].count = 0; + eob = (char *)buf+nbuf; + for(n = 0, p = (char *)buf; p < eob; n++) { + if (!fullrune(p, eob-p) && eob-p < UTFmax) + break; + p += chartorune(&r, p); + if (r == 0) + f = Cnull; + else if (r <= 0x7f) { + if (!isprint(r) && !isspace(r)) + f = Ceascii; /* ASCII control char */ + else f = r; + } else if (r == 0x080) { + bump_utf_count(r); + f = Cutf; + } else if (r < 0xA0) + f = Cbinary; /* Invalid Runes */ + else if (r <= 0xff) + f = Clatin; /* Latin 1 */ + else { + bump_utf_count(r); + f = Cutf; /* UTF extension */ + } + cfreq[f]++; /* ASCII chars peg directly */ + } + /* + * gross classify + */ + if (cfreq[Cbinary]) + guess = Fbinary; + else if (cfreq[Cutf]) + guess = Futf; + else if (cfreq[Clatin]) + guess = Flatin; + else if (cfreq[Ceascii]) + guess = Feascii; + else if (cfreq[Cnull] == n) { + print(mime ? OCTET : "first block all null bytes\n"); + return; + } + else guess = Fascii; + /* + * lookup dictionary words + */ + memset(wfreq, 0, sizeof(wfreq)); + if(guess == Fascii || guess == Flatin || guess == Futf) + wordfreq(); + /* + * call individual classify routines + */ + for(i=0; call[i]; i++) + if((*call[i])()) + return; + + /* + * if all else fails, + * print out gross classification + */ + if (nbuf < 100 && !mime) + print(mime ? PLAIN : "short "); + if (guess == Fascii) + print(mime ? PLAIN : "Ascii\n"); + else if (guess == Feascii) + print(mime ? PLAIN : "extended ascii\n"); + else if (guess == Flatin) + print(mime ? PLAIN : "latin ascii\n"); + else if (guess == Futf && utf_count() < 4) + print_utf(); + else print(mime ? OCTET : "binary\n"); +} + +void +bump_utf_count(Rune r) +{ + int low, high, mid; + + high = sizeof(language)/sizeof(language[0])-1; + for (low = 0; low < high;) { + mid = (low+high)/2; + if (r >=language[mid].low) { + if (r <= language[mid].high) { + language[mid].count++; + break; + } else low = mid+1; + } else high = mid; + } +} + +int +utf_count(void) +{ + int i, count; + + count = 0; + for (i = 0; language[i].name; i++) + if (language[i].count > 0) + switch (language[i].mode) { + case Normal: + case First: + count++; + break; + default: + break; + } + return count; +} + +int +chkascii(void) +{ + int i; + + for (i = 'a'; i < 'z'; i++) + if (cfreq[i]) + return 1; + for (i = 'A'; i < 'Z'; i++) + if (cfreq[i]) + return 1; + return 0; +} + +int +find_first(char *name) +{ + int i; + + for (i = 0; language[i].name != 0; i++) + if (language[i].mode == First + && strcmp(language[i].name, name) == 0) + return i; + return -1; +} + +void +print_utf(void) +{ + int i, printed, j; + + if(mime){ + print(PLAIN); + return; + } + if (chkascii()) { + printed = 1; + print("Ascii"); + } else + printed = 0; + for (i = 0; language[i].name; i++) + if (language[i].count) { + switch(language[i].mode) { + case Multi: + j = find_first(language[i].name); + if (j < 0) + break; + if (language[j].count > 0) + break; + /* Fall through */ + case Normal: + case First: + if (printed) + print(" & "); + else printed = 1; + print("%s", language[i].name); + break; + case Shared: + default: + break; + } + } + if(!printed) + print("UTF"); + print(" text\n"); +} + +void +wordfreq(void) +{ + int low, high, mid, r; + uchar *p, *p2, c; + + p = buf; + for(;;) { + while (p < buf+nbuf && !isalpha(*p)) + p++; + if (p >= buf+nbuf) + return; + p2 = p; + while(p < buf+nbuf && isalpha(*p)) + p++; + c = *p; + *p = 0; + high = sizeof(dict)/sizeof(dict[0]); + for(low = 0;low < high;) { + mid = (low+high)/2; + r = strcmp(dict[mid].word, (char*)p2); + if(r == 0) { + wfreq[dict[mid].class]++; + break; + } + if(r < 0) + low = mid+1; + else + high = mid; + } + *p++ = c; + } +} + +typedef struct Filemagic Filemagic; +struct Filemagic { + ulong x; + ulong mask; + char *desc; + char *mime; +}; + +Filemagic long0tab[] = { + 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, + 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, + 0x32636170, 0xFFFF00FF, "pac4 audio file\n", OCTET, + 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, + 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, + 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip", + 070707, 0xFFFF, "cpio archive\n", OCTET, + 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi", + 0xfffa0000, 0xfffe0000, "mp3 audio", "audio/mpeg", +}; + +int +filemagic(Filemagic *tab, int ntab, ulong x) +{ + int i; + + for(i=0; i<ntab; i++) + if((x&tab[i].mask) == tab[i].x){ + print(mime ? tab[i].mime : tab[i].desc); + return 1; + } + return 0; +} + +int +long0(void) +{ +// Fhdr *f; + long x; + + seek(fd, 0, 0); /* reposition to start of file */ +/* + if(crackhdr(fd, &f)) { + print(mime ? OCTET : "%s\n", f.name); + return 1; + } +*/ + x = LENDIAN(buf); + if(filemagic(long0tab, nelem(long0tab), x)) + return 1; + return 0; +} + +/* from tar.c */ +enum { NAMSIZ = 100, TBLOCK = 512 }; + +union hblock +{ + char dummy[TBLOCK]; + struct header + { + char name[NAMSIZ]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char linkflag; + char linkname[NAMSIZ]; + /* rest are defined by POSIX's ustar format; see p1003.2b */ + char magic[6]; /* "ustar" */ + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[155]; /* if non-null, path = prefix "/" name */ + } dbuf; +}; + +int +checksum(union hblock *hp) +{ + int i; + char *cp; + struct header *hdr = &hp->dbuf; + + for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++) + *cp = ' '; + i = 0; + for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++) + i += *cp & 0xff; + return i; +} + +int +istar(void) +{ + int chksum; + char tblock[TBLOCK]; + union hblock *hp = (union hblock *)tblock; + struct header *hdr = &hp->dbuf; + + seek(fd, 0, 0); /* reposition to start of file */ + if (readn(fd, tblock, sizeof tblock) != sizeof tblock) + return 0; + chksum = strtol(hdr->chksum, 0, 8); + if (hdr->name[0] != '\0' && checksum(hp) == chksum) { + if (strcmp(hdr->magic, "ustar") == 0) + print(mime? "application/x-ustar\n": + "posix tar archive\n"); + else + print(mime? "application/x-tar\n": "tar archive\n"); + return 1; + } + return 0; +} + +/* + * initial words to classify file + */ +struct FILE_STRING +{ + char *key; + char *filetype; + int length; + char *mime; +} file_string[] = +{ + "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", + "!<arch>\n", "archive", 8, "application/octet-stream", + "070707", "cpio archive - ascii header", 6, "application/octet-stream", + "#!/bin/rc", "rc executable file", 9, "text/plain", + "#!/bin/sh", "sh executable file", 9, "text/plain", + "%!", "postscript", 2, "application/postscript", + "\004%!", "postscript", 3, "application/postscript", + "x T post", "troff output for post", 8, "application/troff", + "x T Latin1", "troff output for Latin1", 10, "application/troff", + "x T utf", "troff output for UTF", 7, "application/troff", + "x T 202", "troff output for 202", 7, "application/troff", + "x T aps", "troff output for aps", 7, "application/troff", + "GIF", "GIF image", 3, "image/gif", + "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", + "%PDF", "PDF", 4, "application/pdf", + "<html>\n", "HTML file", 7, "text/html", + "<HTML>\n", "HTML file", 7, "text/html", + "compressed\n", "Compressed image or subfont", 11, "application/octet-stream", + "\111\111\052\000", "tiff", 4, "image/tiff", + "\115\115\000\052", "tiff", 4, "image/tiff", + "\377\330\377\340", "jpeg", 4, "image/jpeg", + "\377\330\377\341", "jpeg", 4, "image/jpeg", + "\377\330\377\333", "jpeg", 4, "image/jpeg", + "BM", "bmp", 2, "image/bmp", + "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", + "<MakerFile ", "FrameMaker file", 11, "application/framemaker", + "\033%-12345X", "HPJCL file", 9, "application/hpjcl", + "ID3", "mp3 audio with id3", 3, "audio/mpeg", + 0,0,0,0 +}; + +int +istring(void) +{ + int i; + struct FILE_STRING *p; + + for(p = file_string; p->key; p++) { + if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) { + if(mime) + print("%s\n", p->mime); + else + print("%s\n", p->filetype); + return 1; + } + } + if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ + for(i = 5; i < nbuf; i++) + if(buf[i] == '\n') + break; + if(mime) + print(OCTET); + else + print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); + return 1; + } + return 0; +} + +int +iff(void) +{ + if (strncmp((char*)buf, "FORM", 4) == 0 && + strncmp((char*)buf+8, "AIFF", 4) == 0) { + print("%s\n", mime? "audio/x-aiff": "aiff audio"); + return 1; + } + return 0; +} + +char* html_string[] = +{ + "title", + "body", + "head", + "strong", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "ul", + "li", + "dl", + "br", + "em", + 0, +}; + +int +ishtml(void) +{ + uchar *p, *q; + int i, count; + + /* compare strings between '<' and '>' to html table */ + count = 0; + p = buf; + for(;;) { + while (p < buf+nbuf && *p != '<') + p++; + p++; + if (p >= buf+nbuf) + break; + if(*p == '/') + p++; + q = p; + while(p < buf+nbuf && *p != '>') + p++; + if (p >= buf+nbuf) + break; + for(i = 0; html_string[i]; i++) { + if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { + if(count++ > 4) { + print(mime ? "text/html\n" : "HTML file\n"); + return 1; + } + break; + } + } + p++; + } + return 0; +} + +char* rfc822_string[] = +{ + "from:", + "date:", + "to:", + "subject:", + "received:", + "reply to:", + "sender:", + 0, +}; + +int +isrfc822(void) +{ + + char *p, *q, *r; + int i, count; + + count = 0; + p = (char*)buf; + for(;;) { + q = strchr(p, '\n'); + if(q == nil) + break; + *q = 0; + if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){ + count++; + *q = '\n'; + p = q+1; + continue; + } + *q = '\n'; + if(*p != '\t' && *p != ' '){ + r = strchr(p, ':'); + if(r == 0 || r > q) + break; + for(i = 0; rfc822_string[i]; i++) { + if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){ + count++; + break; + } + } + } + p = q+1; + } + if(count >= 3){ + print(mime ? "message/rfc822\n" : "email file\n"); + return 1; + } + return 0; +} + +int +ismbox(void) +{ + char *p, *q; + + p = (char*)buf; + q = strchr(p, '\n'); + if(q == nil) + return 0; + *q = 0; + if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ + print(mime ? "text/plain\n" : "mail box\n"); + return 1; + } + *q = '\n'; + return 0; +} + +int +isc(void) +{ + int n; + + n = wfreq[I1]; + /* + * includes + */ + if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) + goto yes; + if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) + goto yes; + /* + * declarations + */ + if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) + goto yes; + /* + * assignments + */ + if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) + goto yes; + return 0; + +yes: + if(mime){ + print(PLAIN); + return 1; + } + if(wfreq[Alword] > 0) + print("alef program\n"); + else + print("c program\n"); + return 1; +} + +int +islimbo(void) +{ + + /* + * includes + */ + if(wfreq[Lword] < 4) + return 0; + print(mime ? PLAIN : "limbo program\n"); + return 1; +} + +int +isas(void) +{ + + /* + * includes + */ + if(wfreq[Aword] < 2) + return 0; + print(mime ? PLAIN : "as program\n"); + return 1; +} + +/* + * low entropy means encrypted + */ +int +ismung(void) +{ + int i, bucket[8]; + float cs; + + if(nbuf < 64) + return 0; + memset(bucket, 0, sizeof(bucket)); + for(i=0; i<64; i++) + bucket[(buf[i]>>5)&07] += 1; + + cs = 0.; + for(i=0; i<8; i++) + cs += (bucket[i]-8)*(bucket[i]-8); + cs /= 8.; + if(cs <= 24.322) { + if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d)) + print(mime ? OCTET : "compressed\n"); + else + print(mime ? OCTET : "encrypted\n"); + return 1; + } + return 0; +} + +/* + * english by punctuation and frequencies + */ +int +isenglish(void) +{ + int vow, comm, rare, badpun, punct; + char *p; + + if(guess != Fascii && guess != Feascii) + return 0; + badpun = 0; + punct = 0; + for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) + switch(*p) { + case '.': + case ',': + case ')': + case '%': + case ';': + case ':': + case '?': + punct++; + if(p[1] != ' ' && p[1] != '\n') + badpun++; + } + if(badpun*5 > punct) + return 0; + if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ + return 0; + if(2*cfreq[';'] > cfreq['e']) + return 0; + + vow = 0; + for(p="AEIOU"; *p; p++) { + vow += cfreq[*p]; + vow += cfreq[tolower(*p)]; + } + comm = 0; + for(p="ETAION"; *p; p++) { + comm += cfreq[*p]; + comm += cfreq[tolower(*p)]; + } + rare = 0; + for(p="VJKQXZ"; *p; p++) { + rare += cfreq[*p]; + rare += cfreq[tolower(*p)]; + } + if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { + print(mime ? PLAIN : "English text\n"); + return 1; + } + return 0; +} + +/* + * pick up a number with + * syntax _*[0-9]+_ + */ +#define P9BITLEN 12 +int +p9bitnum(uchar *bp) +{ + int n, c, len; + + len = P9BITLEN; + while(*bp == ' ') { + bp++; + len--; + if(len <= 0) + return -1; + } + n = 0; + while(len > 1) { + c = *bp++; + if(!isdigit(c)) + return -1; + n = n*10 + c-'0'; + len--; + } + if(*bp != ' ') + return -1; + return n; +} + +int +depthof(char *s, int *newp) +{ + char *es; + int d; + + *newp = 0; + es = s+12; + while(s<es && *s==' ') + s++; + if(s == es) + return -1; + if('0'<=*s && *s<='9') + return 1<<atoi(s); + + *newp = 1; + d = 0; + while(s<es && *s!=' '){ + s++; /* skip letter */ + d += strtoul(s, &s, 10); + } + + switch(d){ + case 32: + case 24: + case 16: + case 8: + return d; + } + return -1; +} + +int +isp9bit(void) +{ + int dep, lox, loy, hix, hiy, px, new; + ulong t; + long len; + char *newlabel; + + newlabel = "old "; + + dep = depthof((char*)buf + 0*P9BITLEN, &new); + if(new) + newlabel = ""; + lox = p9bitnum(buf + 1*P9BITLEN); + loy = p9bitnum(buf + 2*P9BITLEN); + hix = p9bitnum(buf + 3*P9BITLEN); + hiy = p9bitnum(buf + 4*P9BITLEN); + if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) + return 0; + + if(dep < 8){ + px = 8/dep; /* pixels per byte */ + /* set l to number of bytes of data per scan line */ + if(lox >= 0) + len = (hix+px-1)/px - lox/px; + else{ /* make positive before divide */ + t = (-lox)+px-1; + t = (t/px)*px; + len = (t+hix+px-1)/px; + } + }else + len = (hix-lox)*dep/8; + len *= (hiy-loy); /* col length */ + len += 5*P9BITLEN; /* size of initial ascii */ + + /* + * for image file, length is non-zero and must match calculation above + * for /dev/window and /dev/screen the length is always zero + * for subfont, the subfont header should follow immediately. + */ + if (len != 0 && mbuf->length == 0) { + print("%splan 9 image\n", newlabel); + return 1; + } + if (mbuf->length == len) { + print("%splan 9 image\n", newlabel); + return 1; + } + /* Ghostscript sometimes produces a little extra on the end */ + if (mbuf->length < len+P9BITLEN) { + print("%splan 9 image\n", newlabel); + return 1; + } + if (p9subfont(buf+len)) { + print("%ssubfont file\n", newlabel); + return 1; + } + return 0; +} + +int +p9subfont(uchar *p) +{ + int n, h, a; + + /* if image too big, assume it's a subfont */ + if (p+3*P9BITLEN > buf+sizeof(buf)) + return 1; + + n = p9bitnum(p + 0*P9BITLEN); /* char count */ + if (n < 0) + return 0; + h = p9bitnum(p + 1*P9BITLEN); /* height */ + if (h < 0) + return 0; + a = p9bitnum(p + 2*P9BITLEN); /* ascent */ + if (a < 0) + return 0; + return 1; +} + +#define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') + +int +isp9font(void) +{ + uchar *cp, *p; + int i, n; + char pathname[1024]; + + cp = buf; + if (!getfontnum(cp, &cp)) /* height */ + return 0; + if (!getfontnum(cp, &cp)) /* ascent */ + return 0; + for (i = 0; 1; i++) { + if (!getfontnum(cp, &cp)) /* min */ + break; + if (!getfontnum(cp, &cp)) /* max */ + return 0; + while (WHITESPACE(*cp)) + cp++; + for (p = cp; *cp && !WHITESPACE(*cp); cp++) + ; + /* construct a path name, if needed */ + n = 0; + if (*p != '/' && slash) { + n = slash-fname+1; + if (n < sizeof(pathname)) + memcpy(pathname, fname, n); + else n = 0; + } + if (n+cp-p < sizeof(pathname)) { + memcpy(pathname+n, p, cp-p); + n += cp-p; + pathname[n] = 0; + if (access(pathname, AEXIST) < 0) + return 0; + } + } + if (i) { + print(mime ? "text/plain\n" : "font file\n"); + return 1; + } + return 0; +} + +int +getfontnum(uchar *cp, uchar **rp) +{ + while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */ + cp++; + if (*cp < '0' || *cp > '9') + return 0; + strtoul((char *)cp, (char **)rp, 0); + if (!WHITESPACE(**rp)) + return 0; + return 1; +} + +int +isrtf(void) +{ + if(strstr((char *)buf, "\\rtf1")){ + print(mime ? "application/rtf\n" : "rich text format\n"); + return 1; + } + return 0; +} + +int +ismsdos(void) +{ + if (buf[0] == 0x4d && buf[1] == 0x5a){ + print(mime ? "application/x-msdownload\n" : "MSDOS executable\n"); + return 1; + } + return 0; +} + +int +iself(void) +{ + char *cpu[] = { /* NB: incomplete and arbitary list */ + [1] "WE32100", + [2] "SPARC", + [3] "i386", + [4] "M68000", + [5] "M88000", + [6] "i486", + [7] "i860", + [8] "R3000", + [9] "S370", + [10] "R4000", + [15] "HP-PA", + [18] "sparc v8+", + [19] "i960", + [20] "PPC-32", + [21] "PPC-64", + [40] "ARM", + [41] "Alpha", + [43] "sparc v9", + [50] "IA-46", + [62] "AMD64", + [75] "VAX", + }; + + + if (memcmp(buf, "\x7fELF", 4) == 0){ + if (!mime){ + int n = (buf[19] << 8) | buf[18]; + char *p = "unknown"; + + if (n > 0 && n < nelem(cpu) && cpu[n]) + p = cpu[n]; + else { + /* try the other byte order */ + n = (buf[18] << 8) | buf[19]; + if (n > 0 && n < nelem(cpu) && cpu[n]) + p = cpu[n]; + } + print("%s ELF executable\n", p); + } + else + print("application/x-elf-executable"); + return 1; + } + + return 0; +} diff --git a/src/cmd/postscript/text2post/mkfile b/src/cmd/postscript/text2post/mkfile new file mode 100644 index 00000000..3fb60ac6 --- /dev/null +++ b/src/cmd/postscript/text2post/mkfile @@ -0,0 +1,23 @@ +<$PLAN9/src/mkhdr + +<../config + +COMMONDIR=../common + +TARG=text2post + +OFILES=text2post.$O\ + +HFILES=$COMMONDIR/comments.h\ + $COMMONDIR/path.h\ + +BIN=$POSTBIN +<$PLAN9/src/mkone + +CFLAGS=-c -D'PROGRAMVERSION="0.1"' -D'DOROUND=1' -I$COMMONDIR + +install:V: $POSTLIB/pjw.char.ps + +$POSTLIB/pjw.char.ps: pjw.char.ps + cp $prereq $target + diff --git a/src/cmd/postscript/text2post/pjw.char.ps b/src/cmd/postscript/text2post/pjw.char.ps new file mode 100644 index 00000000..55308618 --- /dev/null +++ b/src/cmd/postscript/text2post/pjw.char.ps @@ -0,0 +1,142 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%BoundingBox: 1 -1 199 258 +%%Creator: MetaPost +%%CreationDate: 1994.06.28:1046 +/pjw1 { +37 211 moveto +37 206 lineto +41 206 lineto +43 212 lineto +44 212 46 212 46 210 curveto +41 198 35 186 35 174 curveto +50 174 66 175 81 173 curveto +81 171 lineto +61 171 lineto +61 170 lineto +86 170 lineto +88 168 89 166 90 164 curveto +101 164 111 169 122 172 curveto +139 172 lineto +125 194 115 219 93 233 curveto +87 234 80 236 74 235 curveto +64 229 54 224 46 217 curveto +45 217 44 217 44 218 curveto +61 242 lineto +67 248 72 258 82 258 curveto +94 257 107 256 119 254 curveto +134 247 147 239 161 231 curveto +180 231 lineto +180 226 174 228 171 226 curveto +178 220 185 214 192 207 curveto +189 207 187 207 187 205 curveto +190 205 193 204 196 203 curveto +198 193 202 181 193 181 curveto +193 177 198 175 198 171 curveto +196 171 194 171 194 169 curveto +195 165 196 161 198 157 curveto +194 147 193 135 184 130 curveto +180 130 lineto +169 107 lineto +171 103 176 104 180 104 curveto +180 93 176 81 166 76 curveto +162 76 157 77 155 80 curveto +153 80 152 80 151 80 curveto +151 71 lineto +155 71 lineto +155 68 153 65 152 63 curveto +145 62 lineto +141 47 144 31 144 15 curveto +141 0 lineto +124 0 107 -1 90 1 curveto +72 36 lineto +86 48 105 49 122 54 curveto +122 54 lineto +110 61 97 65 84 69 curveto +81 71 78 73 78 76 curveto +86 77 93 77 101 77 curveto +106 79 113 79 113 84 curveto +95 86 77 87 59 89 curveto +59 92 61 95 64 95 curveto +69 88 80 92 89 92 curveto +95 92 104 90 104 96 curveto +93 97 lineto +87 103 82 109 77 115 curveto +89 116 101 117 113 118 curveto +99 154 lineto +90 154 87 144 82 138 curveto +77 132 73 124 66 120 curveto +63 120 59 119 59 122 curveto +62 123 66 123 66 126 curveto +58 126 50 127 42 128 curveto +34 137 lineto +34 140 34 144 31 144 curveto +30 135 31 125 31 116 curveto +25 116 22 122 19 127 curveto +16 131 15 136 12 139 curveto +5 139 15 127 9 127 curveto +3 133 3 142 1 150 curveto +1 158 6 166 9 173 curveto +18 186 25 199 35 211 curveto +closepath +} bind def + +/pjw2 { +27 112 moveto +40 75 lineto +40 74 38 73 37 73 curveto +25 87 lineto +17 112 lineto +closepath +43 154 moveto +43 167 lineto +41 167 38 167 39 169 curveto +57 171 lineto +74 167 lineto +74 166 73 165 72 165 curveto +64 164 56 162 49 158 curveto +48 154 45 149 49 149 curveto +51 151 53 152 55 154 curveto +87 153 lineto +87 144 80 136 74 129 curveto +64 128 53 126 43 129 curveto +37 135 lineto +37 138 36 141 36 145 curveto +40 145 lineto +41 148 42 151 43 154 curveto closepath +108 145 moveto +119 145 130 143 141 140 curveto +146 134 155 129 149 126 curveto +138 124 126 123 116 129 curveto +113 134 108 139 108 145 curveto +closepath +114 96 moveto +116 103 118 110 121 117 curveto +128 117 134 112 139 107 curveto +139 101 137 96 132 93 curveto +closepath +134 162 moveto +115 162 lineto +115 162 115 163 115 164 curveto +134 164 lineto +closepath +117 73 moveto +115 78 121 81 125 85 curveto +129 85 lineto +130 83 131 81 131 79 curveto +128 74 lineto +124 74 121 73 117 73 curveto closepath +141 119 moveto +134 119 126 117 126 123 curveto +131 123 136 122 141 121 curveto +closepath +} bind def + +/pw { +%% pop +gsave +pointsize .0022 mul dup scale +currentpoint translate + pjw1 pjw2 eofill +grestore +6 0 rmoveto +} bind def diff --git a/src/cmd/postscript/text2post/text2post.c b/src/cmd/postscript/text2post/text2post.c new file mode 100644 index 00000000..cf6b109e --- /dev/null +++ b/src/cmd/postscript/text2post/text2post.c @@ -0,0 +1,564 @@ +#include <u.h> +#include <libc.h> +#include <ctype.h> +#include <bio.h> +#include <comments.h> +#include <path.h> + +#define UNKNOWNCHAR unsharp("#9/postscript/prologues/pjw.char.ps") + +char *optnames = "a:c:f:l:m:n:o:p:s:t:x:y:P:"; + +Biobuf *bstdin, *bstdout, *bstderr; +Biobuf *Bstdin, *Bstdout, *Bstderr; +int char_no = 0; /* character to be done on a line */ +int line_no = 0; /* line number on a page */ +int page_no = 0; /* page number in a document */ +int in_string; /* Boolean, to know whether or not we are inside a Postscript string */ +int spaces = 0; +int tabs = 0; +int pages_printed; +double aspectratio = 1.0; +int copies = 1; +double magnification = 1.0; +int landscape = 0; +int formsperpage = 1; +int linesperpage = 66; +int pointsize = 10; +double xoffset = .25; +double yoffset = .25; +char *passthrough = 0; +static int pplistmaxsize=0; + +unsigned char *pplist=0; /* bitmap list for storing pages to print */ + +struct strtab { + int size; + char *str; + int used; +}; + +struct strtab charcode[256] = { + {4, "\\000"}, {4, "\\001"}, {4, "\\002"}, {4, "\\003"}, + {4, "\\004"}, {4, "\\005"}, {4, "\\006"}, {4, "\\007"}, + {4, "\\010"}, {4, "\\011"}, {4, "\\012"}, {4, "\\013"}, + {4, "\\014"}, {4, "\\015"}, {4, "\\016"}, {4, "\\017"}, + {4, "\\020"}, {4, "\\021"}, {4, "\\022"}, {4, "\\023"}, + {4, "\\024"}, {4, "\\025"}, {4, "\\026"}, {4, "\\027"}, + {4, "\\030"}, {4, "\\031"}, {4, "\\032"}, {4, "\\033"}, + {4, "\\034"}, {4, "\\035"}, {4, "\\036"}, {4, "\\037"}, + {1, " "}, {1, "!"}, {1, "\""}, {1, "#"}, + {1, "$"}, {1, "%"}, {1, "&"}, {1, "'"}, + {2, "\\("}, {2, "\\)"}, {1, "*"}, {1, "+"}, + {1, ","}, {1, "-"}, {1, "."}, {1, "/"}, + {1, "0"}, {1, "1"}, {1, "2"}, {1, "3"}, + {1, "4"}, {1, "5"}, {1, "6"}, {1, "7"}, + {1, "8"}, {1, "9"}, {1, ":"}, {1, ";"}, + {1, "<"}, {1, "="}, {1, ">"}, {1, "?"}, + {1, "@"}, {1, "A"}, {1, "B"}, {1, "C"}, + {1, "D"}, {1, "E"}, {1, "F"}, {1, "G"}, + {1, "H"}, {1, "I"}, {1, "J"}, {1, "K"}, + {1, "L"}, {1, "M"}, {1, "N"}, {1, "O"}, + {1, "P"}, {1, "Q"}, {1, "R"}, {1, "S"}, + {1, "T"}, {1, "U"}, {1, "V"}, {1, "W"}, + {1, "X"}, {1, "Y"}, {1, "Z"}, {1, "["}, + {2, "\\\\"}, {1, "]"}, {1, "^"}, {1, "_"}, + {1, "`"}, {1, "a"}, {1, "b"}, {1, "c"}, + {1, "d"}, {1, "e"}, {1, "f"}, {1, "g"}, + {1, "h"}, {1, "i"}, {1, "j"}, {1, "k"}, + {1, "l"}, {1, "m"}, {1, "n"}, {1, "o"}, + {1, "p"}, {1, "q"}, {1, "r"}, {1, "s"}, + {1, "t"}, {1, "u"}, {1, "v"}, {1, "w"}, + {1, "x"}, {1, "y"}, {1, "z"}, {1, "{"}, + {1, "|"}, {1, "}"}, {1, "~"}, {4, "\\177"}, + {4, "\\200"}, {4, "\\201"}, {4, "\\202"}, {4, "\\203"}, + {4, "\\204"}, {4, "\\205"}, {4, "\\206"}, {4, "\\207"}, + {4, "\\210"}, {4, "\\211"}, {4, "\\212"}, {4, "\\213"}, + {4, "\\214"}, {4, "\\215"}, {4, "\\216"}, {4, "\\217"}, + {4, "\\220"}, {4, "\\221"}, {4, "\\222"}, {4, "\\223"}, + {4, "\\224"}, {4, "\\225"}, {4, "\\226"}, {4, "\\227"}, + {4, "\\230"}, {4, "\\231"}, {4, "\\232"}, {4, "\\233"}, + {4, "\\234"}, {4, "\\235"}, {4, "\\236"}, {4, "\\237"}, + {4, "\\240"}, {4, "\\241"}, {4, "\\242"}, {4, "\\243"}, + {4, "\\244"}, {4, "\\245"}, {4, "\\246"}, {4, "\\247"}, + {4, "\\250"}, {4, "\\251"}, {4, "\\252"}, {4, "\\253"}, + {4, "\\254"}, {4, "\\255"}, {4, "\\256"}, {4, "\\257"}, + {4, "\\260"}, {4, "\\261"}, {4, "\\262"}, {4, "\\263"}, + {4, "\\264"}, {4, "\\265"}, {4, "\\266"}, {4, "\\267"}, + {4, "\\270"}, {4, "\\271"}, {4, "\\272"}, {4, "\\273"}, + {4, "\\274"}, {4, "\\275"}, {4, "\\276"}, {4, "\\277"}, + {4, "\\300"}, {4, "\\301"}, {4, "\\302"}, {4, "\\303"}, + {4, "\\304"}, {4, "\\305"}, {4, "\\306"}, {4, "\\307"}, + {4, "\\310"}, {4, "\\311"}, {4, "\\312"}, {4, "\\313"}, + {4, "\\314"}, {4, "\\315"}, {4, "\\316"}, {4, "\\317"}, + {4, "\\320"}, {4, "\\321"}, {4, "\\322"}, {4, "\\323"}, + {4, "\\324"}, {4, "\\325"}, {4, "\\326"}, {4, "\\327"}, + {4, "\\330"}, {4, "\\331"}, {4, "\\332"}, {4, "\\333"}, + {4, "\\334"}, {4, "\\335"}, {4, "\\336"}, {4, "\\337"}, + {4, "\\340"}, {4, "\\341"}, {4, "\\342"}, {4, "\\343"}, + {4, "\\344"}, {4, "\\345"}, {4, "\\346"}, {4, "\\347"}, + {4, "\\350"}, {4, "\\351"}, {4, "\\352"}, {4, "\\353"}, + {4, "\\354"}, {4, "\\355"}, {4, "\\356"}, {4, "\\357"}, + {4, "\\360"}, {4, "\\361"}, {4, "\\362"}, {4, "\\363"}, + {4, "\\364"}, {4, "\\365"}, {4, "\\366"}, {4, "\\367"}, + {4, "\\370"}, {4, "\\371"}, {4, "\\372"}, {4, "\\373"}, + {4, "\\374"}, {4, "\\375"}, {4, "\\376"}, {4, "\\377"} +}; + +#define FONTABSIZE 0x27 + +struct strtab fontname[FONTABSIZE] = { + {19, "LucidaSansUnicode00", 0}, + {19, "LucidaSansUnicode01", 0}, + {19, "LucidaSansUnicode02", 0}, + {19, "LucidaSansUnicode03", 0}, + {19, "LucidaSansUnicode04", 0}, + {19, "LucidaSansUnicode05", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {0, "", 0}, + {19, "LucidaSansUnicode20", 0}, + {19, "LucidaSansUnicode21", 0}, + {19, "LucidaSansUnicode22", 0}, + {0, "", 0}, + {19, "LucidaSansUnicode24", 0}, + {19, "LucidaSansUnicode25", 0}, + {7, "Courier", 0} +}; + +/* This was taken from postprint */ + +int +cat(char *filename) { + Biobuf *bfile, *Bfile; + int n; + static char buf[Bsize]; + + bstdin = Bopen(filename, 0); + if (bstdin == 0) { + return(1); + } + Bstdin = bstdin; + if ((bfile = Bopen(filename, OREAD)) == 0) { + return(1); + } + Bfile = bfile; + while ((n=Bread(Bfile, buf, Bsize)) > 0) { + if (Bwrite(Bstdout, buf, n) != n) { + return(1); + } + } + if (n != 0) { + return(1); + } + return(0); +} + +void +prologues(void) { + char *ts; + int tabstop; + + Bprint(Bstdout, "%s", CONFORMING); + Bprint(Bstdout, "%s %s\n", VERSION, PROGRAMVERSION); + Bprint(Bstdout, "%s %s\n", DOCUMENTFONTS, ATEND); + Bprint(Bstdout, "%s %s\n", PAGES, ATEND); + Bprint(Bstdout, "%s", ENDCOMMENTS); + + if (cat(POSTPRINT)) { + Bprint(Bstderr, "can't read %s", POSTPRINT); + exits("prologue"); + } + + if (DOROUND) + cat(ROUNDPAGE); + + tabstop = 0; + ts = getenv("tabstop"); + if(ts != nil) + tabstop = strtol(ts, nil, 0); + if(tabstop == 0) + tabstop = 8; + Bprint(Bstdout, "/f {findfont pointsize scalefont setfont} bind def\n"); + Bprint(Bstdout, "/tabwidth /Courier f ("); + while(tabstop--) + Bputc(Bstdout, 'n'); + Bprint(Bstdout, ") stringwidth pop def\n"); + Bprint(Bstdout, "/tab {tabwidth 0 ne {currentpoint 3 1 roll exch tabwidth mul add tabwidth\n"); + Bprint(Bstdout, "\tdiv truncate tabwidth mul exch moveto} if} bind def\n"); + Bprint(Bstdout, "/spacewidth /%s f ( ) stringwidth pop def\n", fontname[0].str); + Bprint(Bstdout, "/sp {spacewidth mul 0 rmoveto} bind def\n"); + Bprint(Bstdout, "%s", ENDPROLOG); + Bprint(Bstdout, "%s", BEGINSETUP); + Bprint(Bstdout, "mark\n"); + + if (formsperpage > 1) { + Bprint(Bstdout, "%s %d\n", FORMSPERPAGE, formsperpage); + Bprint(Bstdout, "/formsperpage %d def\n", formsperpage); + } + if (aspectratio != 1) Bprint(Bstdout, "/aspectratio %g def\n", aspectratio); + if (copies != 1) Bprint(Bstdout, "/#copies %d store\n", copies); + if (landscape) Bprint(Bstdout, "/landscape true def\n"); + if (magnification != 1) Bprint(Bstdout, "/magnification %s def\n", magnification); + if (pointsize != 10) Bprint(Bstdout, "/pointsize %d def\n", pointsize); + if (xoffset != .25) Bprint(Bstdout, "/xoffset %g def\n", xoffset); + if (yoffset != .25) Bprint(Bstdout, "/yoffset %g def\n", yoffset); + cat(unsharp("#9/postscript/prologues/Latin1.enc")); + if (passthrough != 0) Bprint(Bstdout, "%s\n", passthrough); + Bprint(Bstdout, "setup\n"); + if (formsperpage > 1) { + cat(FORMFILE); + Bprint(Bstdout, "%d setupforms \n", formsperpage); + } + if (cat(UNKNOWNCHAR)) + Bprint(Bstderr, "cannot open %s\n", UNKNOWNCHAR); + Bprint(Bstdout, "%s", ENDSETUP); +} + +int +pageon(void) { + if (pplist == 0 && page_no != 0) return(1); /* no page list, print all pages */ + if (page_no/8 < pplistmaxsize && (pplist[page_no/8] & 1<<(page_no%8))) + return(1); + else + return(0); +} + +void +startpage(void) { + ++char_no; + ++line_no; + ++page_no; + if (pageon()) { + ++pages_printed; + Bprint(Bstdout, "%s %d %d\n", PAGE, page_no, pages_printed); + Bprint(Bstdout, "/saveobj save def\n"); + Bprint(Bstdout, "mark\n"); + Bprint(Bstdout, "%d pagesetup\n", pages_printed); + } +} + +void +endpage(void) { + line_no = 0; + char_no = 0; + if (pageon()) { + Bprint(Bstdout, "cleartomark\n"); + Bprint(Bstdout, "showpage\n"); + Bprint(Bstdout, "saveobj restore\n"); + Bprint(Bstdout, "%s %d %d\n", ENDPAGE, page_no, pages_printed); + } +} + +void +startstring(void) { + if (!in_string) { + if (pageon()) Bprint(Bstdout, "("); + in_string = 1; + } +} + +void +endstring(void) { + if (in_string) { + if (pageon()) Bprint(Bstdout, ") show "); + in_string = 0; + } +} + +void +prspace(void) { + if (spaces) { + endstring(); + if (pageon()) Bprint(Bstdout, "%d sp ", spaces); + spaces = 0; + } +} + +void +prtab(void) { + if (tabs) { + endstring(); + if (pageon()) Bprint(Bstdout, "%d tab ", tabs); + tabs = 0; + } +} + +void +txt2post(void) { + int lastfont = -1; + int lastchar = -1; + int thisfont, thischar; + long r; + + in_string = 0; + char_no = 0; + line_no = 0; + page_no = 0; + spaces = 0; + fontname[0].used++; + while ((r=Bgetrune(Bstdin)) >= 0) { + thischar = r & 0xff; + thisfont = (r>>8) & 0xff; + + if (line_no == 0 && char_no == 0) + startpage(); + + if (line_no == 1 && char_no == 1) { + if (pageon()) Bprint(Bstdout, " /%s f\n", fontname[thisfont].str); + lastfont = thisfont; + } + + switch (r) { + case ' ': + prtab(); + if (lastfont > 0) { + spaces++; + continue; + } + break; + case '\n': + case '\f': + startstring(); + if (pageon()) Bprint(Bstdout, ")l\n"); + char_no = 1; + in_string = 0; + spaces = 0; + tabs = 0; + if (++line_no > linesperpage || r == '\f') { + endpage(); + } + lastchar = -1; + continue; + case '\t': + prspace(); + tabs++; + char_no++; + lastchar = -1; + continue; + case '\b': + /* just toss out backspaces for now */ + if (lastchar != -1) { + endstring(); + if (pageon()) Bprint(Bstdout, "(%s) stringwidth pop neg 0 rmoveto ", charcode[lastchar].str); + } + char_no++; + lastchar = -1; + continue; + } + + /* do something if font is out of table range */ + if (thisfont>=FONTABSIZE || fontname[thisfont].size == 0) { + prspace(); + prtab(); + endstring(); + Bprint(Bstdout, "pw "); + char_no++; + lastchar = -1; + continue; + } + + if (thisfont != lastfont) { + endstring(); + if (pageon()) { + Bprint(Bstdout, "/%s f\n", fontname[thisfont].str); + } + fontname[thisfont].used++; + } + prspace(); + prtab(); + startstring(); + if (pageon()) Bprint(Bstdout, "%s", charcode[thischar].str); +/* if (pageon()) Bprint(Bstdout, "%2.2x", thischar); /* try hex strings*/ + char_no++; + lastchar = thischar; + lastfont = thisfont; + } + if (line_no != 0 || char_no != 0) { + if (char_no != 1) { + Bprint(Bstderr, "premature EOF: newline appended\n"); + startstring(); + if (pageon()) Bprint(Bstdout, ")l\n"); + } + endpage(); + } +} + +void +pagelist(char *list) { + char c; + int n, m; + int state, start, end; + + if (list == 0) return; + state = 1; + while ((c=*list) != '\0') { + n = 0; + while (isdigit(c)) { + n = n * 10 + c - '0'; + c = *++list; + } + switch (state) { + case 1: + start = n; + case 2: + if (n/8+1 > pplistmaxsize) { + pplistmaxsize = n/8+1; + if ((pplist = realloc(pplist, n/8+1)) == 0) { + Bprint(Bstderr, "cannot allocate memory for page list\n"); + exits("malloc"); + } + } + for (m=start; m<=n; m++) + pplist[m/8] |= 1<<(m%8); + break; + } + switch (c) { + case '-': + state = 2; + list++; + break; + case ',': + state = 1; + list++; + break; + case '\0': + break; + } + } +} + +void +finish(void) { + int i; + + Bprint(Bstdout, "%s", TRAILER); + Bprint(Bstdout, "done\n"); + Bprint(Bstdout, "%s", DOCUMENTFONTS); + + for (i=0; i<FONTABSIZE; i++) + if (fontname[i].used) + Bprint(Bstdout, " %s", fontname[i].str); + Bprint(Bstdout, "\n"); + + Bprint(Bstdout, "%s %d\n", PAGES, pages_printed); + +} + +main(int argc, char *argv[]) { + int i; + char *t; + Biobuf *input; + + if ((bstderr = (Biobuf *)malloc(sizeof(Biobuf))) < (Biobuf *)0) + exits("malloc"); + if (Binit(bstderr, 2, OWRITE) == Beof) + exits("Binit"); + Bstderr = bstderr; + + if ((bstdout = (Biobuf *)malloc(sizeof(Biobuf))) < (Biobuf *)0) + exits("malloc"); + if (Binit(bstdout, 1, OWRITE) == Beof) + exits("Binit"); + Bstdout = bstdout; + + ARGBEGIN{ + case 'a': /* aspect ratio */ + aspectratio = atof(ARGF()); + break; + case 'c': /* copies */ + copies = atoi(ARGF()); + break; + case 'f': /* primary font, for now */ + t = ARGF(); + fontname[0].str = malloc(strlen(t)+1); + strcpy(fontname[0].str, t); + break; + case 'l': /* lines per page */ + linesperpage = atoi(ARGF()); + break; + case 'm': /* magnification */ + magnification = atof(ARGF()); + break; + case 'n': /* forms per page */ + formsperpage = atoi(ARGF()); + break; + case 'o': /* output page list */ + pagelist(ARGF()); + break; + case 'p': /* landscape or portrait mode */ + if ( ARGF()[0] == 'l' ) + landscape = 1; + else + landscape = 0; + break; + case 's': /* point size */ + pointsize = atoi(ARGF()); + break; + case 'x': /* shift things horizontally */ + xoffset = atof(ARGF()); + break; + + case 'y': /* and vertically on the page */ + yoffset = atof(ARGF()); + break; + case 'P': /* PostScript pass through */ + t = ARGF(); + i = strlen(t) + 1; + passthrough = malloc(i); + if (passthrough == 0) { + Bprint(Bstderr, "cannot allocate memory for argument string\n"); + exits("malloc"); + } + strncpy(passthrough, t, i); + break; + default: /* don't know what to do for ch */ + Bprint(Bstderr, "unknown option %C\n", ARGC()); + break; + }ARGEND; + prologues(); + if (argc <= 0) { + if ((bstdin = (Biobuf *)malloc(sizeof(Biobuf))) < (Biobuf *)0) + exits("malloc"); + if (Binit(bstdin, 0, OREAD) == Beof) { + fprint(2, "cannot Binit stdin\n"); + exits("Binit"); + } + Bstdin = bstdin; + txt2post(); + } + for (i=0; i<argc; i++) { + bstdin = Bopen(argv[i], 0); + if (bstdin == 0) { + fprint(2, "cannot open file %s\n", argv[i]); + continue; + } + Bstdin = bstdin; + txt2post(); + } + finish(); + exits(""); +} |