diff options
Diffstat (limited to 'src/cmd/file.c')
-rw-r--r-- | src/cmd/file.c | 1252 |
1 files changed, 1252 insertions, 0 deletions
diff --git a/src/cmd/file.c b/src/cmd/file.c new file mode 100644 index 00000000..f09977b1 --- /dev/null +++ b/src/cmd/file.c @@ -0,0 +1,1252 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ctype.h> +#include <mach.h> + +/* + * file - determine type of file + */ +#define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24)) + +uchar buf[6001]; +short cfreq[140]; +short wfreq[50]; +int nbuf; +Dir* mbuf; +int fd; +char *fname; +char *slash; + +enum +{ + Cword, + Fword, + Aword, + Alword, + Lword, + I1, + I2, + I3, + Clatin = 128, + Cbinary, + Cnull, + Ceascii, + Cutf, +}; +struct +{ + char* word; + int class; +} dict[] = +{ + "PATH", Lword, + "TEXT", Aword, + "adt", Alword, + "aggr", Alword, + "alef", Alword, + "array", Lword, + "block", Fword, + "chan", Alword, + "char", Cword, + "common", Fword, + "con", Lword, + "data", Fword, + "dimension", Fword, + "double", Cword, + "extern", Cword, + "bio", I2, + "float", Cword, + "fn", Lword, + "function", Fword, + "h", I3, + "implement", Lword, + "import", Lword, + "include", I1, + "int", Cword, + "integer", Fword, + "iota", Lword, + "libc", I2, + "long", Cword, + "module", Lword, + "real", Fword, + "ref", Lword, + "register", Cword, + "self", Lword, + "short", Cword, + "static", Cword, + "stdio", I2, + "struct", Cword, + "subroutine", Fword, + "u", I2, + "void", Cword, +}; + +/* codes for 'mode' field in language structure */ +enum { + Normal = 0, + First, /* first entry for language spanning several ranges */ + Multi, /* later entries " " " ... */ + Shared, /* codes used in several languages */ + }; + +struct +{ + int mode; /* see enum above */ + int count; + int low; + int high; + char *name; + +} language[] = +{ + Normal, 0, 0x0080, 0x0080, "Extended Latin", + Normal, 0, 0x0100, 0x01FF, "Extended Latin", + Normal, 0, 0x0370, 0x03FF, "Greek", + Normal, 0, 0x0400, 0x04FF, "Cyrillic", + Normal, 0, 0x0530, 0x058F, "Armenian", + Normal, 0, 0x0590, 0x05FF, "Hebrew", + Normal, 0, 0x0600, 0x06FF, "Arabic", + Normal, 0, 0x0900, 0x097F, "Devanagari", + Normal, 0, 0x0980, 0x09FF, "Bengali", + Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi", + Normal, 0, 0x0A80, 0x0AFF, "Gujarati", + Normal, 0, 0x0B00, 0x0B7F, "Oriya", + Normal, 0, 0x0B80, 0x0BFF, "Tamil", + Normal, 0, 0x0C00, 0x0C7F, "Telugu", + Normal, 0, 0x0C80, 0x0CFF, "Kannada", + Normal, 0, 0x0D00, 0x0D7F, "Malayalam", + Normal, 0, 0x0E00, 0x0E7F, "Thai", + Normal, 0, 0x0E80, 0x0EFF, "Lao", + Normal, 0, 0x1000, 0x105F, "Tibetan", + Normal, 0, 0x10A0, 0x10FF, "Georgian", + Normal, 0, 0x3040, 0x30FF, "Japanese", + Normal, 0, 0x3100, 0x312F, "Chinese", + First, 0, 0x3130, 0x318F, "Korean", + Multi, 0, 0x3400, 0x3D2F, "Korean", + Shared, 0, 0x4e00, 0x9fff, "CJK", + Normal, 0, 0, 0, 0, /* terminal entry */ +}; + + +enum +{ + Fascii, /* printable ascii */ + Flatin, /* latin 1*/ + Futf, /* UTf character set */ + Fbinary, /* binary */ + Feascii, /* ASCII with control chars */ + Fnull, /* NULL in file */ +} guess; + +void bump_utf_count(Rune); +int cistrncmp(char*, char*, int); +void filetype(int); +int getfontnum(uchar*, uchar**); +int isas(void); +int isc(void); +int isenglish(void); +int ishp(void); +int ishtml(void); +int isrfc822(void); +int ismbox(void); +int islimbo(void); +int ismung(void); +int isp9bit(void); +int isp9font(void); +int isrtf(void); +int ismsdos(void); +int iself(void); +int istring(void); +int iff(void); +int long0(void); +int istar(void); +int p9bitnum(uchar*); +int p9subfont(uchar*); +void print_utf(void); +void type(char*, int); +int utf_count(void); +void wordfreq(void); + +int (*call[])(void) = +{ + long0, /* recognizable by first 4 bytes */ + istring, /* recognizable by first string */ + iff, /* interchange file format (strings) */ + isrfc822, /* email file */ + ismbox, /* mail box */ + istar, /* recognizable by tar checksum */ + ishtml, /* html keywords */ +/* iscint, /* compiler/assembler intermediate */ + islimbo, /* limbo source */ + isc, /* c & alef compiler key words */ + isas, /* assembler key words */ + ismung, /* entropy compressed/encrypted */ + isp9font, /* plan 9 font */ + isp9bit, /* plan 9 image (as from /dev/window) */ + isenglish, /* char frequency English */ + isrtf, /* rich text format */ + ismsdos, /* msdos exe (virus file attachement) */ + iself, /* ELF (foreign) executable */ + 0 +}; + +int mime; + +#define OCTET "application/octet-stream\n" +#define PLAIN "text/plain\n" + +void +main(int argc, char *argv[]) +{ + int i, j, maxlen; + char *cp; + Rune r; + + ARGBEGIN{ + case 'm': + mime = 1; + break; + default: + fprint(2, "usage: file [-m] [file...]\n"); + exits("usage"); + }ARGEND; + + maxlen = 0; + if(mime == 0 || argc > 1){ + for(i = 0; i < argc; i++) { + for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp)) + ; + if(j > maxlen) + maxlen = j; + } + } + if (argc <= 0) { + if(!mime) + print ("stdin: "); + filetype(0); + } + else { + for(i = 0; i < argc; i++) + type(argv[i], maxlen); + } + exits(0); +} + +void +type(char *file, int nlen) +{ + Rune r; + int i; + char *p; + + if(nlen > 0){ + slash = 0; + for (i = 0, p = file; *p; i++) { + if (*p == '/') /* find rightmost slash */ + slash = p; + p += chartorune(&r, p); /* count runes */ + } + print("%s:%*s",file, nlen-i+1, ""); + } + fname = file; + if ((fd = open(file, OREAD)) < 0) { + print("cannot open\n"); + return; + } + filetype(fd); + close(fd); +} + +void +filetype(int fd) +{ + Rune r; + int i, f, n; + char *p, *eob; + + free(mbuf); + mbuf = dirfstat(fd); + if(mbuf == nil){ + print("cannot stat: %r\n"); + return; + } + if(mbuf->mode & DMDIR) { + print(mime ? "text/directory\n" : "directory\n"); + return; + } + if(mbuf->type != 'M' && mbuf->type != '|') { + print(mime ? OCTET : "special file #%c/%s\n", + mbuf->type, mbuf->name); + return; + } + nbuf = read(fd, buf, sizeof(buf)-1); + + if(nbuf < 0) { + print("cannot read\n"); + return; + } + if(nbuf == 0) { + print(mime ? PLAIN : "empty file\n"); + return; + } + buf[nbuf] = 0; + + /* + * build histogram table + */ + memset(cfreq, 0, sizeof(cfreq)); + for (i = 0; language[i].name; i++) + language[i].count = 0; + eob = (char *)buf+nbuf; + for(n = 0, p = (char *)buf; p < eob; n++) { + if (!fullrune(p, eob-p) && eob-p < UTFmax) + break; + p += chartorune(&r, p); + if (r == 0) + f = Cnull; + else if (r <= 0x7f) { + if (!isprint(r) && !isspace(r)) + f = Ceascii; /* ASCII control char */ + else f = r; + } else if (r == 0x080) { + bump_utf_count(r); + f = Cutf; + } else if (r < 0xA0) + f = Cbinary; /* Invalid Runes */ + else if (r <= 0xff) + f = Clatin; /* Latin 1 */ + else { + bump_utf_count(r); + f = Cutf; /* UTF extension */ + } + cfreq[f]++; /* ASCII chars peg directly */ + } + /* + * gross classify + */ + if (cfreq[Cbinary]) + guess = Fbinary; + else if (cfreq[Cutf]) + guess = Futf; + else if (cfreq[Clatin]) + guess = Flatin; + else if (cfreq[Ceascii]) + guess = Feascii; + else if (cfreq[Cnull] == n) { + print(mime ? OCTET : "first block all null bytes\n"); + return; + } + else guess = Fascii; + /* + * lookup dictionary words + */ + memset(wfreq, 0, sizeof(wfreq)); + if(guess == Fascii || guess == Flatin || guess == Futf) + wordfreq(); + /* + * call individual classify routines + */ + for(i=0; call[i]; i++) + if((*call[i])()) + return; + + /* + * if all else fails, + * print out gross classification + */ + if (nbuf < 100 && !mime) + print(mime ? PLAIN : "short "); + if (guess == Fascii) + print(mime ? PLAIN : "Ascii\n"); + else if (guess == Feascii) + print(mime ? PLAIN : "extended ascii\n"); + else if (guess == Flatin) + print(mime ? PLAIN : "latin ascii\n"); + else if (guess == Futf && utf_count() < 4) + print_utf(); + else print(mime ? OCTET : "binary\n"); +} + +void +bump_utf_count(Rune r) +{ + int low, high, mid; + + high = sizeof(language)/sizeof(language[0])-1; + for (low = 0; low < high;) { + mid = (low+high)/2; + if (r >=language[mid].low) { + if (r <= language[mid].high) { + language[mid].count++; + break; + } else low = mid+1; + } else high = mid; + } +} + +int +utf_count(void) +{ + int i, count; + + count = 0; + for (i = 0; language[i].name; i++) + if (language[i].count > 0) + switch (language[i].mode) { + case Normal: + case First: + count++; + break; + default: + break; + } + return count; +} + +int +chkascii(void) +{ + int i; + + for (i = 'a'; i < 'z'; i++) + if (cfreq[i]) + return 1; + for (i = 'A'; i < 'Z'; i++) + if (cfreq[i]) + return 1; + return 0; +} + +int +find_first(char *name) +{ + int i; + + for (i = 0; language[i].name != 0; i++) + if (language[i].mode == First + && strcmp(language[i].name, name) == 0) + return i; + return -1; +} + +void +print_utf(void) +{ + int i, printed, j; + + if(mime){ + print(PLAIN); + return; + } + if (chkascii()) { + printed = 1; + print("Ascii"); + } else + printed = 0; + for (i = 0; language[i].name; i++) + if (language[i].count) { + switch(language[i].mode) { + case Multi: + j = find_first(language[i].name); + if (j < 0) + break; + if (language[j].count > 0) + break; + /* Fall through */ + case Normal: + case First: + if (printed) + print(" & "); + else printed = 1; + print("%s", language[i].name); + break; + case Shared: + default: + break; + } + } + if(!printed) + print("UTF"); + print(" text\n"); +} + +void +wordfreq(void) +{ + int low, high, mid, r; + uchar *p, *p2, c; + + p = buf; + for(;;) { + while (p < buf+nbuf && !isalpha(*p)) + p++; + if (p >= buf+nbuf) + return; + p2 = p; + while(p < buf+nbuf && isalpha(*p)) + p++; + c = *p; + *p = 0; + high = sizeof(dict)/sizeof(dict[0]); + for(low = 0;low < high;) { + mid = (low+high)/2; + r = strcmp(dict[mid].word, (char*)p2); + if(r == 0) { + wfreq[dict[mid].class]++; + break; + } + if(r < 0) + low = mid+1; + else + high = mid; + } + *p++ = c; + } +} + +typedef struct Filemagic Filemagic; +struct Filemagic { + ulong x; + ulong mask; + char *desc; + char *mime; +}; + +Filemagic long0tab[] = { + 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", OCTET, + 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", OCTET, + 0x32636170, 0xFFFF00FF, "pac4 audio file\n", OCTET, + 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", OCTET, + 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n", OCTET, + 0x04034B50, 0xFFFFFFFF, "zip archive\n", "application/zip", + 070707, 0xFFFF, "cpio archive\n", OCTET, + 0x2F7, 0xFFFF, "tex dvi\n", "application/dvi", + 0xfffa0000, 0xfffe0000, "mp3 audio", "audio/mpeg", +}; + +int +filemagic(Filemagic *tab, int ntab, ulong x) +{ + int i; + + for(i=0; i<ntab; i++) + if((x&tab[i].mask) == tab[i].x){ + print(mime ? tab[i].mime : tab[i].desc); + return 1; + } + return 0; +} + +int +long0(void) +{ +// Fhdr *f; + long x; + + seek(fd, 0, 0); /* reposition to start of file */ +/* + if(crackhdr(fd, &f)) { + print(mime ? OCTET : "%s\n", f.name); + return 1; + } +*/ + x = LENDIAN(buf); + if(filemagic(long0tab, nelem(long0tab), x)) + return 1; + return 0; +} + +/* from tar.c */ +enum { NAMSIZ = 100, TBLOCK = 512 }; + +union hblock +{ + char dummy[TBLOCK]; + struct header + { + char name[NAMSIZ]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char linkflag; + char linkname[NAMSIZ]; + /* rest are defined by POSIX's ustar format; see p1003.2b */ + char magic[6]; /* "ustar" */ + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[155]; /* if non-null, path = prefix "/" name */ + } dbuf; +}; + +int +checksum(union hblock *hp) +{ + int i; + char *cp; + struct header *hdr = &hp->dbuf; + + for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++) + *cp = ' '; + i = 0; + for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++) + i += *cp & 0xff; + return i; +} + +int +istar(void) +{ + int chksum; + char tblock[TBLOCK]; + union hblock *hp = (union hblock *)tblock; + struct header *hdr = &hp->dbuf; + + seek(fd, 0, 0); /* reposition to start of file */ + if (readn(fd, tblock, sizeof tblock) != sizeof tblock) + return 0; + chksum = strtol(hdr->chksum, 0, 8); + if (hdr->name[0] != '\0' && checksum(hp) == chksum) { + if (strcmp(hdr->magic, "ustar") == 0) + print(mime? "application/x-ustar\n": + "posix tar archive\n"); + else + print(mime? "application/x-tar\n": "tar archive\n"); + return 1; + } + return 0; +} + +/* + * initial words to classify file + */ +struct FILE_STRING +{ + char *key; + char *filetype; + int length; + char *mime; +} file_string[] = +{ + "!<arch>\n__.SYMDEF", "archive random library", 16, "application/octet-stream", + "!<arch>\n", "archive", 8, "application/octet-stream", + "070707", "cpio archive - ascii header", 6, "application/octet-stream", + "#!/bin/rc", "rc executable file", 9, "text/plain", + "#!/bin/sh", "sh executable file", 9, "text/plain", + "%!", "postscript", 2, "application/postscript", + "\004%!", "postscript", 3, "application/postscript", + "x T post", "troff output for post", 8, "application/troff", + "x T Latin1", "troff output for Latin1", 10, "application/troff", + "x T utf", "troff output for UTF", 7, "application/troff", + "x T 202", "troff output for 202", 7, "application/troff", + "x T aps", "troff output for aps", 7, "application/troff", + "GIF", "GIF image", 3, "image/gif", + "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript", + "%PDF", "PDF", 4, "application/pdf", + "<html>\n", "HTML file", 7, "text/html", + "<HTML>\n", "HTML file", 7, "text/html", + "compressed\n", "Compressed image or subfont", 11, "application/octet-stream", + "\111\111\052\000", "tiff", 4, "image/tiff", + "\115\115\000\052", "tiff", 4, "image/tiff", + "\377\330\377\340", "jpeg", 4, "image/jpeg", + "\377\330\377\341", "jpeg", 4, "image/jpeg", + "\377\330\377\333", "jpeg", 4, "image/jpeg", + "BM", "bmp", 2, "image/bmp", + "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/octet-stream", + "<MakerFile ", "FrameMaker file", 11, "application/framemaker", + "\033%-12345X", "HPJCL file", 9, "application/hpjcl", + "ID3", "mp3 audio with id3", 3, "audio/mpeg", + 0,0,0,0 +}; + +int +istring(void) +{ + int i; + struct FILE_STRING *p; + + for(p = file_string; p->key; p++) { + if(nbuf >= p->length && !memcmp(buf, p->key, p->length)) { + if(mime) + print("%s\n", p->mime); + else + print("%s\n", p->filetype); + return 1; + } + } + if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ + for(i = 5; i < nbuf; i++) + if(buf[i] == '\n') + break; + if(mime) + print(OCTET); + else + print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5); + return 1; + } + return 0; +} + +int +iff(void) +{ + if (strncmp((char*)buf, "FORM", 4) == 0 && + strncmp((char*)buf+8, "AIFF", 4) == 0) { + print("%s\n", mime? "audio/x-aiff": "aiff audio"); + return 1; + } + return 0; +} + +char* html_string[] = +{ + "title", + "body", + "head", + "strong", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "ul", + "li", + "dl", + "br", + "em", + 0, +}; + +int +ishtml(void) +{ + uchar *p, *q; + int i, count; + + /* compare strings between '<' and '>' to html table */ + count = 0; + p = buf; + for(;;) { + while (p < buf+nbuf && *p != '<') + p++; + p++; + if (p >= buf+nbuf) + break; + if(*p == '/') + p++; + q = p; + while(p < buf+nbuf && *p != '>') + p++; + if (p >= buf+nbuf) + break; + for(i = 0; html_string[i]; i++) { + if(cistrncmp(html_string[i], (char*)q, p-q) == 0) { + if(count++ > 4) { + print(mime ? "text/html\n" : "HTML file\n"); + return 1; + } + break; + } + } + p++; + } + return 0; +} + +char* rfc822_string[] = +{ + "from:", + "date:", + "to:", + "subject:", + "received:", + "reply to:", + "sender:", + 0, +}; + +int +isrfc822(void) +{ + + char *p, *q, *r; + int i, count; + + count = 0; + p = (char*)buf; + for(;;) { + q = strchr(p, '\n'); + if(q == nil) + break; + *q = 0; + if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){ + count++; + *q = '\n'; + p = q+1; + continue; + } + *q = '\n'; + if(*p != '\t' && *p != ' '){ + r = strchr(p, ':'); + if(r == 0 || r > q) + break; + for(i = 0; rfc822_string[i]; i++) { + if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){ + count++; + break; + } + } + } + p = q+1; + } + if(count >= 3){ + print(mime ? "message/rfc822\n" : "email file\n"); + return 1; + } + return 0; +} + +int +ismbox(void) +{ + char *p, *q; + + p = (char*)buf; + q = strchr(p, '\n'); + if(q == nil) + return 0; + *q = 0; + if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){ + print(mime ? "text/plain\n" : "mail box\n"); + return 1; + } + *q = '\n'; + return 0; +} + +int +isc(void) +{ + int n; + + n = wfreq[I1]; + /* + * includes + */ + if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) + goto yes; + if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n) + goto yes; + /* + * declarations + */ + if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) + goto yes; + /* + * assignments + */ + if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) + goto yes; + return 0; + +yes: + if(mime){ + print(PLAIN); + return 1; + } + if(wfreq[Alword] > 0) + print("alef program\n"); + else + print("c program\n"); + return 1; +} + +int +islimbo(void) +{ + + /* + * includes + */ + if(wfreq[Lword] < 4) + return 0; + print(mime ? PLAIN : "limbo program\n"); + return 1; +} + +int +isas(void) +{ + + /* + * includes + */ + if(wfreq[Aword] < 2) + return 0; + print(mime ? PLAIN : "as program\n"); + return 1; +} + +/* + * low entropy means encrypted + */ +int +ismung(void) +{ + int i, bucket[8]; + float cs; + + if(nbuf < 64) + return 0; + memset(bucket, 0, sizeof(bucket)); + for(i=0; i<64; i++) + bucket[(buf[i]>>5)&07] += 1; + + cs = 0.; + for(i=0; i<8; i++) + cs += (bucket[i]-8)*(bucket[i]-8); + cs /= 8.; + if(cs <= 24.322) { + if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d)) + print(mime ? OCTET : "compressed\n"); + else + print(mime ? OCTET : "encrypted\n"); + return 1; + } + return 0; +} + +/* + * english by punctuation and frequencies + */ +int +isenglish(void) +{ + int vow, comm, rare, badpun, punct; + char *p; + + if(guess != Fascii && guess != Feascii) + return 0; + badpun = 0; + punct = 0; + for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) + switch(*p) { + case '.': + case ',': + case ')': + case '%': + case ';': + case ':': + case '?': + punct++; + if(p[1] != ' ' && p[1] != '\n') + badpun++; + } + if(badpun*5 > punct) + return 0; + if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */ + return 0; + if(2*cfreq[';'] > cfreq['e']) + return 0; + + vow = 0; + for(p="AEIOU"; *p; p++) { + vow += cfreq[*p]; + vow += cfreq[tolower(*p)]; + } + comm = 0; + for(p="ETAION"; *p; p++) { + comm += cfreq[*p]; + comm += cfreq[tolower(*p)]; + } + rare = 0; + for(p="VJKQXZ"; *p; p++) { + rare += cfreq[*p]; + rare += cfreq[tolower(*p)]; + } + if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { + print(mime ? PLAIN : "English text\n"); + return 1; + } + return 0; +} + +/* + * pick up a number with + * syntax _*[0-9]+_ + */ +#define P9BITLEN 12 +int +p9bitnum(uchar *bp) +{ + int n, c, len; + + len = P9BITLEN; + while(*bp == ' ') { + bp++; + len--; + if(len <= 0) + return -1; + } + n = 0; + while(len > 1) { + c = *bp++; + if(!isdigit(c)) + return -1; + n = n*10 + c-'0'; + len--; + } + if(*bp != ' ') + return -1; + return n; +} + +int +depthof(char *s, int *newp) +{ + char *es; + int d; + + *newp = 0; + es = s+12; + while(s<es && *s==' ') + s++; + if(s == es) + return -1; + if('0'<=*s && *s<='9') + return 1<<atoi(s); + + *newp = 1; + d = 0; + while(s<es && *s!=' '){ + s++; /* skip letter */ + d += strtoul(s, &s, 10); + } + + switch(d){ + case 32: + case 24: + case 16: + case 8: + return d; + } + return -1; +} + +int +isp9bit(void) +{ + int dep, lox, loy, hix, hiy, px, new; + ulong t; + long len; + char *newlabel; + + newlabel = "old "; + + dep = depthof((char*)buf + 0*P9BITLEN, &new); + if(new) + newlabel = ""; + lox = p9bitnum(buf + 1*P9BITLEN); + loy = p9bitnum(buf + 2*P9BITLEN); + hix = p9bitnum(buf + 3*P9BITLEN); + hiy = p9bitnum(buf + 4*P9BITLEN); + if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) + return 0; + + if(dep < 8){ + px = 8/dep; /* pixels per byte */ + /* set l to number of bytes of data per scan line */ + if(lox >= 0) + len = (hix+px-1)/px - lox/px; + else{ /* make positive before divide */ + t = (-lox)+px-1; + t = (t/px)*px; + len = (t+hix+px-1)/px; + } + }else + len = (hix-lox)*dep/8; + len *= (hiy-loy); /* col length */ + len += 5*P9BITLEN; /* size of initial ascii */ + + /* + * for image file, length is non-zero and must match calculation above + * for /dev/window and /dev/screen the length is always zero + * for subfont, the subfont header should follow immediately. + */ + if (len != 0 && mbuf->length == 0) { + print("%splan 9 image\n", newlabel); + return 1; + } + if (mbuf->length == len) { + print("%splan 9 image\n", newlabel); + return 1; + } + /* Ghostscript sometimes produces a little extra on the end */ + if (mbuf->length < len+P9BITLEN) { + print("%splan 9 image\n", newlabel); + return 1; + } + if (p9subfont(buf+len)) { + print("%ssubfont file\n", newlabel); + return 1; + } + return 0; +} + +int +p9subfont(uchar *p) +{ + int n, h, a; + + /* if image too big, assume it's a subfont */ + if (p+3*P9BITLEN > buf+sizeof(buf)) + return 1; + + n = p9bitnum(p + 0*P9BITLEN); /* char count */ + if (n < 0) + return 0; + h = p9bitnum(p + 1*P9BITLEN); /* height */ + if (h < 0) + return 0; + a = p9bitnum(p + 2*P9BITLEN); /* ascent */ + if (a < 0) + return 0; + return 1; +} + +#define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') + +int +isp9font(void) +{ + uchar *cp, *p; + int i, n; + char pathname[1024]; + + cp = buf; + if (!getfontnum(cp, &cp)) /* height */ + return 0; + if (!getfontnum(cp, &cp)) /* ascent */ + return 0; + for (i = 0; 1; i++) { + if (!getfontnum(cp, &cp)) /* min */ + break; + if (!getfontnum(cp, &cp)) /* max */ + return 0; + while (WHITESPACE(*cp)) + cp++; + for (p = cp; *cp && !WHITESPACE(*cp); cp++) + ; + /* construct a path name, if needed */ + n = 0; + if (*p != '/' && slash) { + n = slash-fname+1; + if (n < sizeof(pathname)) + memcpy(pathname, fname, n); + else n = 0; + } + if (n+cp-p < sizeof(pathname)) { + memcpy(pathname+n, p, cp-p); + n += cp-p; + pathname[n] = 0; + if (access(pathname, AEXIST) < 0) + return 0; + } + } + if (i) { + print(mime ? "text/plain\n" : "font file\n"); + return 1; + } + return 0; +} + +int +getfontnum(uchar *cp, uchar **rp) +{ + while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */ + cp++; + if (*cp < '0' || *cp > '9') + return 0; + strtoul((char *)cp, (char **)rp, 0); + if (!WHITESPACE(**rp)) + return 0; + return 1; +} + +int +isrtf(void) +{ + if(strstr((char *)buf, "\\rtf1")){ + print(mime ? "application/rtf\n" : "rich text format\n"); + return 1; + } + return 0; +} + +int +ismsdos(void) +{ + if (buf[0] == 0x4d && buf[1] == 0x5a){ + print(mime ? "application/x-msdownload\n" : "MSDOS executable\n"); + return 1; + } + return 0; +} + +int +iself(void) +{ + char *cpu[] = { /* NB: incomplete and arbitary list */ + [1] "WE32100", + [2] "SPARC", + [3] "i386", + [4] "M68000", + [5] "M88000", + [6] "i486", + [7] "i860", + [8] "R3000", + [9] "S370", + [10] "R4000", + [15] "HP-PA", + [18] "sparc v8+", + [19] "i960", + [20] "PPC-32", + [21] "PPC-64", + [40] "ARM", + [41] "Alpha", + [43] "sparc v9", + [50] "IA-46", + [62] "AMD64", + [75] "VAX", + }; + + + if (memcmp(buf, "\x7fELF", 4) == 0){ + if (!mime){ + int n = (buf[19] << 8) | buf[18]; + char *p = "unknown"; + + if (n > 0 && n < nelem(cpu) && cpu[n]) + p = cpu[n]; + else { + /* try the other byte order */ + n = (buf[18] << 8) | buf[19]; + if (n > 0 && n < nelem(cpu) && cpu[n]) + p = cpu[n]; + } + print("%s ELF executable\n", p); + } + else + print("application/x-elf-executable"); + return 1; + } + + return 0; +} |