diff options
-rw-r--r-- | src/cmd/acidtypes/dat.h | 72 | ||||
-rw-r--r-- | src/cmd/acidtypes/dwarf.c | 193 | ||||
-rw-r--r-- | src/cmd/acidtypes/main.c | 66 | ||||
-rw-r--r-- | src/cmd/acidtypes/mkfile | 23 | ||||
-rw-r--r-- | src/cmd/acidtypes/stabs.c | 775 | ||||
-rw-r--r-- | src/cmd/acidtypes/type.c | 571 | ||||
-rw-r--r-- | src/cmd/acidtypes/util.c | 63 |
7 files changed, 1763 insertions, 0 deletions
diff --git a/src/cmd/acidtypes/dat.h b/src/cmd/acidtypes/dat.h new file mode 100644 index 00000000..30f22016 --- /dev/null +++ b/src/cmd/acidtypes/dat.h @@ -0,0 +1,72 @@ +typedef struct Type Type; +typedef struct Typeref Typeref; +typedef struct TypeList TypeList; + +enum +{ + None, + Base, + Enum, + Aggr, + Function, + Pointer, + Array, + Range, + Defer, + Typedef, +}; + +struct Type +{ /* Font Tab 4 */ + uint ty; /* None, Struct, ... */ + vlong lo; /* for range */ + char sue; + vlong hi; + uint gen; + uint n1; /* type number (impl dependent) */ + uint n2; /* another type number */ + char *name; /* name of type */ + char *suename; /* name of struct, union, enumeration */ + uint isunion; /* is this Struct a union? */ + uint printfmt; /* describes base type */ + uint xsizeof; /* size of type */ + Type *sub; /* subtype */ + uint n; /* count for t, tname, val */ + Type **t; /* members of sue, params of function */ + char **tname; /* associated names */ + long *val; /* associated offsets or values */ + uint didtypedef; /* internal flag */ + uint didrange; /* internal flag */ + uint printed; /* internal flag */ + Type *equiv; /* internal */ +}; + +struct TypeList +{ + Type *hd; + TypeList *tl; +}; + +void *erealloc(void*, uint); +void *emalloc(uint); +char *estrdup(char*); +void warn(char*, ...); + +Type *typebynum(uint n1, uint n2); +Type *typebysue(char, char*); +void printtypes(Biobuf*); +void renumber(TypeList*, uint); +void denumber(void); +TypeList *mktl(Type*, TypeList*); + +struct Dwarf; +struct Stab; +int dwarf2acid(struct Dwarf*, Biobuf*); +int stabs2acid(struct Stab*, Biobuf*); + +Type *newtype(void); +char *nameof(Type*, int); +void freetypes(void); + +extern char *prefix; +char *fixname(char*); diff --git a/src/cmd/acidtypes/dwarf.c b/src/cmd/acidtypes/dwarf.c new file mode 100644 index 00000000..f8c38685 --- /dev/null +++ b/src/cmd/acidtypes/dwarf.c @@ -0,0 +1,193 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <mach.h> +#include <elf.h> +#include <dwarf.h> +#include "dat.h" + +static ulong +valof(uint ty, DwarfVal *v) +{ + switch(ty){ + default: +fmtinstall('H', encodefmt); +fprint(2, "valof %d %.*H\n", ty, v->b.len, v->b.data); + return 0; + case TConstant: + return v->c; + } +} + +static Type* +xnewtype(uint ty, DwarfSym *s) +{ + Type *t; + + t = typebynum(s->unit+s->uoff, 0); + t->ty = ty; + return t; +} + +int +dwarf2acid(Dwarf *d, Biobuf *b) +{ + char *fn; + DwarfSym s; + Type *t; + + /* pass over dwarf section pulling out type info */ + + if(dwarfenum(d, &s) < 0) + return -1; + + while(dwarfnextsym(d, &s, s.depth!=1) == 1){ + top: + switch(s.attrs.tag){ + case TagSubprogram: + case TagLexDwarfBlock: + dwarfnextsym(d, &s, 1); + goto top; + + case TagTypedef: + t = xnewtype(Typedef, &s); + t->name = s.attrs.name; + t->sub = typebynum(s.attrs.type, 0); + break; + case TagBaseType: + t = xnewtype(Base, &s); + t->xsizeof = s.attrs.bytesize; + switch(s.attrs.encoding){ + default: + case TypeAddress: + t->printfmt = 'x'; + break; + case TypeBoolean: + case TypeUnsigned: + case TypeSigned: + case TypeSignedChar: + case TypeUnsignedChar: + t->printfmt = 'd'; + break; + case TypeFloat: + t->printfmt = 'f'; + break; + case TypeComplexFloat: + t->printfmt = 'F'; + break; + case TypeImaginaryFloat: + t->printfmt = 'i'; + break; + } + break; + case TagPointerType: + t = xnewtype(Pointer, &s); + t->sub = typebynum(s.attrs.type, 0); + break; + case TagStructType: + case TagUnionType: + t = xnewtype(Aggr, &s); + t->sue = s.attrs.tag==TagStructType ? 's' : 'u'; + t->xsizeof = s.attrs.bytesize; + t->suename = s.attrs.name; + t->isunion = s.attrs.tag==TagUnionType; + dwarfnextsym(d, &s, 1); + if(s.depth != 2) + goto top; + do{ + if(!s.attrs.have.name || !s.attrs.have.type || s.attrs.tag != TagMember) + continue; + if(t->n%32 == 0){ + t->tname = erealloc(t->tname, (t->n+32)*sizeof(t->tname[0])); + t->val = erealloc(t->val, (t->n+32)*sizeof(t->val[0])); + t->t = erealloc(t->t, (t->n+32)*sizeof(t->t[0])); + } + t->tname[t->n] = s.attrs.name; + if(t->isunion) + t->val[t->n] = 0; + else + t->val[t->n] = valof(s.attrs.have.datamemberloc, &s.attrs.datamemberloc); + t->t[t->n] = typebynum(s.attrs.type, 0); + t->n++; + }while(dwarfnextsym(d, &s, 1) == 1 && s.depth==2); + goto top; + break; + case TagSubroutineType: + t = xnewtype(Function, &s); + break; + case TagConstType: + case TagVolatileType: + t = xnewtype(Defer, &s); + t->sub = typebynum(s.attrs.type, 0); + break; + case TagArrayType: + t = xnewtype(Array, &s); + t->sub = typebynum(s.attrs.type, 0); + break; + case TagEnumerationType: + t = xnewtype(Enum, &s); + t->sue = 'e'; + t->suename = s.attrs.name; + t->xsizeof = s.attrs.bytesize; + dwarfnextsym(d, &s, 1); + if(s.depth != 2) + goto top; + do{ + if(!s.attrs.have.name || !s.attrs.have.constvalue || s.attrs.tag != TagEnumerator) + continue; + if(t->n%32 == 0){ + t->tname = erealloc(t->tname, (t->n+32)*sizeof(t->tname[0])); + t->val = erealloc(t->val, (t->n+32)*sizeof(t->val[0])); + } + t->tname[t->n] = s.attrs.name; + t->val[t->n] = valof(s.attrs.have.constvalue, &s.attrs.constvalue); + t->n++; + }while(dwarfnextsym(d, &s, 1) == 1 && s.depth==2); + goto top; + break; + } + } + + printtypes(b); + + /* pass over dwarf section pulling out type definitions */ + + if(dwarfenum(d, &s) < 0) + goto out; + + fn = nil; + while(dwarfnextsym(d, &s, 1) == 1){ + if(s.depth == 1) + fn = nil; + switch(s.attrs.tag){ + case TagSubprogram: + fn = s.attrs.name; + break; + case TagFormalParameter: + if(s.depth != 2) + break; + /* fall through */ + case TagVariable: + if(s.attrs.name == nil || s.attrs.type == 0) + continue; + t = typebynum(s.attrs.type, 0); + if(t->ty == Pointer){ + t = t->sub; + if(t && t->equiv) + t = t->equiv; + } + if(t == nil) + break; + if(t->ty != Aggr) + break; + Bprint(b, "complex %s %s%s%s;\n", nameof(t, 1), + fn ? fixname(fn) : "", fn ? ":" : "", fixname(s.attrs.name)); + break; + } + } + +out: + freetypes(); + return 0; +} + diff --git a/src/cmd/acidtypes/main.c b/src/cmd/acidtypes/main.c new file mode 100644 index 00000000..0c747524 --- /dev/null +++ b/src/cmd/acidtypes/main.c @@ -0,0 +1,66 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <mach.h> +#include "dat.h" + +void +usage(void) +{ + fprint(2, "usage: acidtypes [-p prefix] executable...\n"); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + int i; + Fhdr *fp; + Biobuf b; + char err[ERRMAX]; + + quotefmtinstall(); + + ARGBEGIN{ + case 'p': + prefix = EARGF(usage()); + break; + default: + usage(); + }ARGEND + + if(argc < 1) + usage(); + + Binit(&b, 1, OWRITE); + for(i=0; i<argc; i++){ + Bprint(&b, "\n//\n// symbols for %s\n//\n\n", argv[i]); + if((fp = crackhdr(argv[i], OREAD)) == nil){ + rerrstr(err, sizeof err); + Bprint(&b, "// open %s: %s\n\n", argv[i], err); + fprint(2, "open %s: %s\n", argv[i], err); + continue; + } + if(fp->dwarf){ + if(dwarf2acid(fp->dwarf, &b) < 0){ + rerrstr(err, sizeof err); + Bprint(&b, "// dwarf2acid %s: %s\n\n", argv[i], err); + fprint(2, "dwarf2acid %s: %s\n", argv[i], err); + } + }else if(fp->stabs.stabbase){ + if(stabs2acid(&fp->stabs, &b) < 0){ + rerrstr(err, sizeof err); + Bprint(&b, "// dwarf2acid %s: %s\n\n", argv[i], err); + fprint(2, "dwarf2acid %s: %s\n", argv[i], err); + } + }else{ + Bprint(&b, "// no debugging symbols in %s\n\n", argv[i]); + // fprint(2, "no debugging symbols in %s\n", argv[i]); + } + uncrackhdr(fp); + } + Bflush(&b); + Bterm(&b); + exits(0); +} + diff --git a/src/cmd/acidtypes/mkfile b/src/cmd/acidtypes/mkfile new file mode 100644 index 00000000..e9e830e3 --- /dev/null +++ b/src/cmd/acidtypes/mkfile @@ -0,0 +1,23 @@ +<$PLAN9/src/mkhdr + +TARG=acidtypes + +OFILES=\ + dwarf.$O\ + main.$O\ + stabs.$O\ + type.$O\ + util.$O\ + +HFILES=\ + dat.h\ + ../../libmach/mach.h\ + ../../libmach/elf.h\ + ../../libmach/dwarf.h\ + ../../libmach/stabs.h\ + +CFLAGS=-I../../libmach $CFLAGS +SHORTLIB=mach bio 9 + +<$PLAN9/src/mkone + diff --git a/src/cmd/acidtypes/stabs.c b/src/cmd/acidtypes/stabs.c new file mode 100644 index 00000000..a5f3738c --- /dev/null +++ b/src/cmd/acidtypes/stabs.c @@ -0,0 +1,775 @@ +#include <u.h> +#include <errno.h> +#include <libc.h> +#include <bio.h> +#include <mach.h> +#include <stabs.h> +#include <ctype.h> +#include "dat.h" + +static jmp_buf kaboom; + +static Type *parsename(char*, char**); +static Type *parseinfo(char*, char**); +static int parsenum(char*, int*, int*, char**); +static int parseattr(char*, char**, char**); +static Type *parsedefn(char *p, Type *t, char **pp); +static int parsebound(char**); +static vlong parsebigint(char**); + +typedef struct Sym Sym; +struct Sym +{ + char *fn; + char *name; + Type *type; + Sym *next; +}; + +typedef struct Ftypes Ftypes; +struct Ftypes +{ + Ftypes *down; + Ftypes *next; + char *file; + TypeList *list; +}; + +Ftypes *fstack; +Ftypes *allftypes; + +static Sym* +mksym(char *fn, char *name, Type *type) +{ + Sym *s; + + s = emalloc(sizeof *s); + s->fn = fn; + s->name = name; + s->type = type; + return s; +} + +static char* +estrndup(char *s, int n) +{ + char *t; + + t = emalloc(n+1); + memmove(t, s, n); + return t; +} + +static char* +mkpath(char *dir, char *name) +{ + char *s; + if(name[0] == '/') + return estrdup(name); + else{ + s = emalloc(strlen(dir)+strlen(name)+1); + strcpy(s, dir); + strcat(s, name); + return s; + } +} + +static Ftypes* +mkftypes(char *dir, char *name) +{ + Ftypes *f; + + f = emalloc(sizeof(*f)); + f->file = mkpath(dir, name); + f->next = allftypes; + allftypes = f; + return f; +} + +static Ftypes* +findftypes(char *dir, char *name) +{ + char *s; + Ftypes *f, *found; + + found = nil; + s = mkpath(dir, name); + for(f=allftypes; f; f=f->next) + if(strcmp(f->file, s) == 0) + found = f; + return found; +} + +static void +oops(void) +{ + longjmp(kaboom, 1); +} + +/* find a : but skip over :: */ +static char* +findcolon(char *p) +{ + while((p = strchr(p, ':')) != nil && *(p+1) == ':') + p += 2; + if(p == nil) + oops(); + return p; +} + +static void +semi(char **p) +{ + if(**p != ';') + oops(); + (*p)++; +} + +static void +comma(char **p) +{ + if(**p != ',') + oops(); + (*p)++; +} + +static int +parseint(char **pp) +{ + if(!isdigit(**pp)) + oops(); + return strtol(*pp, pp, 10); +} + +/* + name ::= symbol_opt info + */ +static Type* +parsename(char *desc, char **pp) +{ + if(*desc == 'c') + return nil; + + if(isdigit(*desc) || *desc=='-' || *desc=='(') + return parseinfo(desc, pp); + if(*desc == 0) + oops(); + return parseinfo(desc+1, pp); +} + +/* + info ::= num | num '=' attr* defn + */ +static Type* +parseinfo(char *desc, char **pp) +{ + int n1, n2; + Type *t; + char *attr; + + parsenum(desc, &n1, &n2, &desc); + t = typebynum(n1, n2); + if(*desc != '='){ + *pp = desc; + return t; + } + desc++; + if(fstack) + fstack->list = mktl(t, fstack->list); + while(parseattr(desc, &attr, &desc) >= 0){ + if(*attr == 's') + t->xsizeof = atoi(attr+1)/8; + } + return parsedefn(desc, t, pp); +} + +/* + num ::= integer | '(' integer ',' integer ')' +*/ +static int +parsenum(char *p, int *n1, int *n2, char **pp) +{ + if(isdigit(*p)){ + *n1 = strtol(p, &p, 10); + *n2 = 0; + *pp = p; + return 0; + } + if(*p == '('){ + *n1 = strtol(p+1, &p, 10); + if(*p != ',') + oops(); + *n2 = strtol(p+1, &p, 10); + if(*p != ')') + oops(); + *pp = p+1; + return 0; + } + oops(); + return -1; +} + +/* + attr ::= '@' text ';' + + text is + 'a' integer (alignment) + 'p' integer (pointer class) + 'P' (packed type) + 's' integer (size of type in bits) + 'S' (string instead of array of chars) +*/ +static int +parseattr(char *p, char **text, char **pp) +{ + if(*p != '@') + return -1; + *text = p+1; + if((p = strchr(p, ';')) == nil) + oops(); + *pp = p+1; + return 0; +} + + +typedef struct Basic Basic; +struct Basic +{ + int size; + int fmt; +}; + +static Basic baseints[] = +{ + 0, 0, +/*1*/ 4, 'd', /* int32 */ +/*2*/ 1, 'd', /* char8 */ +/*3*/ 2, 'd', /* int16 */ +/*4*/ 4, 'd', /* long int32 */ +/*5*/ 1, 'x', /* uchar8 */ +/*6*/ 1, 'd', /* schar8 */ +/*7*/ 2, 'x', /* uint16 */ +/*8*/ 4, 'x', /* uint32 */ +/*9*/ 4, 'x', /* uint32 */ +/*10*/ 4, 'x', /* ulong32 */ +/*11*/ 0, 0, /* void */ +/*12*/ 4, 'f', /* float */ +/*13*/ 8, 'f', /* double */ +/*14*/ 10, 'f', /* long double */ +/*15*/ 4, 'd', /* int32 */ +/*16*/ 4, 'd', /* bool32 */ +/*17*/ 2, 'f', /* short real */ +/*18*/ 4, 'f', /* real */ +/*19*/ 4, 'x', /* stringptr */ +/*20*/ 1, 'd', /* character8 */ +/*21*/ 1, 'x', /* logical*1 */ +/*22*/ 2, 'x', /* logical*2 */ +/*23*/ 4, 'X', /* logical*4 */ +/*24*/ 4, 'X', /* logical32 */ +/*25*/ 8, 'F', /* complex (two single) */ +/*26*/ 16, 'F', /* complex (two double) */ +/*27*/ 1, 'd', /* integer*1 */ +/*28*/ 2, 'd', /* integer*2 */ +/*29*/ 4, 'd', /* integer*4 */ +/*30*/ 2, 'x', /* wide char */ +/*31*/ 8, 'd', /* int64 */ +/*32*/ 8, 'x', /* uint64 */ +/*33*/ 8, 'x', /* logical*8 */ +/*34*/ 8, 'd', /* integer*8 */ +}; + +static Basic basefloats[] = +{ + 0,0, +/*1*/ 4, 'f', /* 32-bit */ +/*2*/ 8, 'f', /* 64-bit */ +/*3*/ 8, 'F', /* complex */ +/*4*/ 4, 'F', /* complex16 */ +/*5*/ 8, 'F', /* complex32 */ +/*6*/ 10, 'f', /* long double */ +}; + +/* + defn ::= info + | 'b' ('u' | 's') 'c'? width; offset; nbits; (builtin, signed/unsigned, char/not, width in bytes, offset & nbits of type) + | 'w' (aix wide char type, not used) + | 'R' fptype; bytes; (fptype 1=32-bit, 2=64-bit, 3=complex, 4=complex16, 5=complex32, 6=long double) + | 'g' typeinfo ';' nbits (aix floating, not used) + | 'c' typeinfo ';' nbits (aix complex, not used) + | 'b' typeinfo ';' bytes (ibm, no idea) + | 'B' typeinfo (volatile typref) + | 'd' typeinfo (file of typeref) + | 'k' typeinfo (const typeref) + | 'M' typeinfo ';' length (multiple instance type, fortran) + | 'S' typeinfo (set, typeref must have small number of values) + | '*' typeinfo (pointer to typeref) + | 'x' ('s'|'u'|'e') name ':' (struct, union, enum reference. name can have '::' in it) + | 'r' typeinfo ';' low ';' high ';' (subrange. typeref can be type being defined for base types!) + low and high are bounds + if bound is octal power of two, it's a big negative number + | ('a'|'P') indextypedef arraytypeinfo (array, index should be range type) + indextype is type definition not typeinfo (need not say typenum=) + P means packed array + | 'A' arraytypeinfo (open array (no index bounds)) + | 'D' dims ';' typeinfo (dims-dimensional dynamic array) + | 'E' dims ';' typeinfo (subarray of N-dimensional array) + | 'n' typeinfo ';' bytes (max length string) + | 'z' typeinfo ';' bytes (no idea what difference is from 'n') + | 'N' (pascal stringptr) + | 'e' (name ':' bigint ',')* ';' (enum listing) + | ('s'|'u') bytes (name ':' type ',' bitoffset ',' bitsize ';')* ';' (struct/union defn) + tag is given as name in stabs entry, with 'T' symbol + | 'f' typeinfo ';' (function returning type) + | 'f' rettypeinfo ',' paramcount ';' (typeinfo ',' (0|1) ';')* ';' + | 'p' paramcount ';' (typeinfo ',' (0|1) ';')* ';' + | 'F' rettypeinfo ',' paramcount ';' (name ':' typeinfo ',' (0|1) ';')* ';' + | 'R' paramcount ';' (name ':' typeinfo ',' (0|1) ';')* ';' + (the 0 or 1 is pass-by-reference vs pass-by-value) + (the 0 or 1 is pass-by-reference vs pass-by-value) +*/ + +static Type* +parsedefn(char *p, Type *t, char **pp) +{ + char c, *name; + int ischar, namelen, n, wid, offset, bits, sign; + long val; + Type *tt; + + if(*p == '(' || isdigit(*p)){ + t->ty = Defer; + t->sub = parseinfo(p, pp); + return t; + } + + switch(c = *p){ + case '-': /* builtin */ + n = strtol(p+1, &p, 10); + if(n >= nelem(baseints) || n < 0) + n = 0; + t->ty = Base; + t->xsizeof = baseints[n].size; + t->printfmt = baseints[n].fmt; + break; + case 'b': /* builtin */ + p++; + if(*p != 'u' && *p != 's') + oops(); + sign = (*p == 's'); + p++; + if(*p == 'c'){ + ischar = 1; + p++; + } + wid = parseint(&p); + semi(&p); + offset = parseint(&p); + semi(&p); + bits = parseint(&p); + semi(&p); + t->ty = Base; + t->xsizeof = wid; + if(sign == 1) + t->printfmt = 'd'; + else + t->printfmt = 'x'; + break; + case 'R': /* fp type */ + n = parseint(&p); + semi(&p); + wid = parseint(&p); + semi(&p); + t->ty = Base; + t->xsizeof = wid; + if(n < 0 || n >= nelem(basefloats)) + n = 0; + t->xsizeof = basefloats[n].size; + t->printfmt = basefloats[n].fmt; + break; + case 'r': /* subrange */ + t->ty = Range; + t->sub = parseinfo(p+1, &p); + if(*(p-1) == ';' && *p != ';') + p--; + semi(&p); + t->lo = parsebound(&p); + semi(&p); + t->hi = parsebound(&p); + semi(&p); + break; + case 'B': /* volatile */ + case 'k': /* const */ + t->ty = Defer; + t->sub = parseinfo(p+1, &p); + break; + case '*': /* pointer */ + case 'A': /* open array */ + t->ty = Pointer; + t->sub = parseinfo(p+1, &p); + break; + case 'a': /* array */ + case 'P': /* packed array */ + t->ty = Pointer; + tt = newtype(); + parsedefn(p+1, tt, &p); /* index type */ + if(*p == ';') + p++; + tt = newtype(); + t->sub = tt; + parsedefn(p, tt, &p); /* element type */ + break; + case 'e': /* enum listing */ + p++; + t->sue = 'e'; + t->ty = Enum; + while(*p != ';'){ + name = p; + p = findcolon(p)+1; + namelen = (p-name)-1; + val = parsebigint(&p); + comma(&p); + if(t->n%32 == 0){ + t->tname = erealloc(t->tname, (t->n+32)*sizeof(t->tname[0])); + t->val = erealloc(t->val, (t->n+32)*sizeof(t->val[0])); + } + t->tname[t->n] = estrndup(name, namelen); + t->val[t->n] = val; + t->n++; + } + semi(&p); + break; + + case 's': /* struct */ + case 'u': /* union */ + p++; + t->sue = c; + t->ty = Aggr; + n = parseint(&p); + while(*p != ';'){ + name = p; + p = findcolon(p)+1; + namelen = (p-name)-1; + tt = parseinfo(p, &p); + comma(&p); + offset = parseint(&p); + comma(&p); + bits = parseint(&p); + semi(&p); + if(t->n%32 == 0){ + t->tname = erealloc(t->tname, (t->n+32)*sizeof(t->tname[0])); + t->val = erealloc(t->val, (t->n+32)*sizeof(t->val[0])); + t->t = erealloc(t->t, (t->n+32)*sizeof(t->t[0])); + } + t->tname[t->n] = estrndup(name, namelen); + t->val[t->n] = offset; + t->t[t->n] = tt; + t->n++; + } + semi(&p); + break; + + case 'x': /* struct, union, enum reference */ + p++; + t->ty = Defer; + if(*p != 's' && *p != 'u' && *p != 'e') + oops(); + c = *p; + name = p+1; + p = findcolon(p+1); + name = estrndup(name, p-name); + t->sub = typebysue(c, name); + p++; + break; + +#if 0 /* AIX */ + case 'f': /* function */ + case 'p': /* procedure */ + case 'F': /* Pascal function */ + /* case 'R': /* Pascal procedure */ + /* + * Even though we don't use the info, we have + * to parse it in case it is embedded in other descriptions. + */ + t->ty = Function; + p++; + if(c == 'f' || c == 'F'){ + t->sub = parseinfo(p, &p); + if(*p != ','){ + if(*p == ';') + p++; + break; + } + comma(&p); + } + n = parseint(&p); /* number of params */ + semi(&p); + while(*p != ';'){ + if(c == 'F' || c == 'R'){ + name = p; /* parameter name */ + p = findcolon(p)+1; + } + parseinfo(p, &p); /* param type */ + comma(&p); + parseint(&p); /* bool: passed by value? */ + semi(&p); + } + semi(&p); + break; +#endif + + case 'f': /* static function */ + case 'F': /* global function */ + t->ty = Function; + p++; + t->sub = parseinfo(p, &p); + break; + + /* + * We'll never see any of this stuff. + * When we do, we can worry about it. + */ + case 'D': /* n-dimensional array */ + case 'E': /* subarray of n-dimensional array */ + case 'M': /* fortran multiple instance type */ + case 'N': /* pascal string ptr */ + case 'S': /* set */ + case 'c': /* aix complex */ + case 'd': /* file of */ + case 'g': /* aix float */ + case 'n': /* max length string */ + case 'w': /* aix wide char */ + case 'z': /* another max length string */ + default: + fprint(2, "unsupported type char %c (%d)\n", *p, *p); + oops(); + } + *pp = p; + return t; +} + +/* + bound ::= + 'A' offset (bound is on stack by ref at offset offset from arg list) + | 'T' offset (bound is on stack by val at offset offset from arg list) + | 'a' regnum (bound passed by reference in register) + | 't' regnum (bound passed by value in register) + | 'J' (no bound) + | bigint +*/ +static int +parsebound(char **pp) +{ + char *p; + int n; + + n = 0; + p = *pp; + switch(*p){ + case 'A': /* bound is on stack by reference at offset n from arg list */ + case 'T': /* bound is on stack by value at offset n from arg list */ + case 'a': /* bound is passed by reference in register n */ + case 't': /* bound is passed by value in register n */ + p++; + parseint(&p); + break; + case 'J': /* no bound */ + p++; + break; + default: + n = parsebigint(&p); + break; + } + *pp = p; + return n; +} + +/* + bigint ::= '-'? decimal + | 0 octal + | -1 + */ +static vlong +parsebigint(char **pp) +{ + char *p; + int n, neg; + + p = *pp; + if(*p == '0'){ + errno = 0; + n = strtoll(p, &p, 8); + if(errno) + n = 0; + goto out; + } + neg = 0; + if(*p == '-'){ + neg = 1; + p++; + } + if(!isdigit(*p)) + oops(); + n = strtol(p, &p, 10); + if(neg) + n = -n; + +out: + *pp = p; + return n; +} + +int +stabs2acid(Stab *stabs, Biobuf *b) +{ + int fno, i; + char c, *dir, *fn, *file, *name, *desc, *p; + Ftypes *f; + Type *t, *tt; + StabSym sym; + Sym *symbols, *s; + Sym **lsym; + + dir = nil; + file = nil; + fno = 0; + fn = nil; + symbols = nil; + lsym = &symbols; + for(i=0; stabsym(stabs, i, &sym)>=0; i++){ + switch(sym.type){ + case N_SO: + if(sym.name){ + if(sym.name[0] && sym.name[strlen(sym.name)-1] == '/') + dir = sym.name; + else + file = sym.name; + } + denumber(); + fstack = nil; + fno = 0; + break; + case N_BINCL: + fno++; + f = mkftypes(dir, sym.name); + f->down = fstack; + fstack = f; + break; + case N_EINCL: + if(fstack) + fstack = fstack->down; + break; + case N_EXCL: + fno++; + if((f = findftypes(dir, sym.name)) == nil){ + fprint(2, "cannot find remembered %s\n", sym.name); + continue; + } + renumber(f->list, fno); + break; + case N_GSYM: + case N_FUN: + case N_PSYM: + case N_LSYM: + case N_LCSYM: + case N_STSYM: + case N_RSYM: + name = sym.name; + if(name == nil){ + if(sym.type==N_FUN) + fn = nil; + continue; + } + if((p = strchr(name, ':')) == nil) + continue; + name = estrndup(name, p-name); + desc = ++p; + c = *desc; + if(c == 'c'){ + fprint(2, "skip constant %s\n", name); + continue; + } + if(setjmp(kaboom)){ + fprint(2, "cannot parse %s\n", name); + continue; + } + t = parsename(desc, &p); + if(t == nil) + continue; + if(*p != 0) + fprint(2, "extra desc '%s' in '%s'\n", p, desc); + /* void is defined as itself */ + if(t->ty==Defer && t->sub==t && strcmp(name, "void")==0){ + t->ty = Base; + t->xsizeof = 0; + t->printfmt = '0'; + } + if(*name==' ' && *(name+1) == 0) + *name = 0; + /* attach names to structs, unions, enums */ + if(c=='T' && *name && t->sue){ + t->suename = name; + if(t->name == nil) + t->name = name; + tt = typebysue(t->sue, name); + tt->ty = Defer; + tt->sub = t; + } + if(c=='t'){ + tt = newtype(); + tt->ty = Typedef; + tt->name = name; + tt->sub = t; + } + /* define base c types */ + if(t->ty==None || t->ty==Range){ + if(strcmp(name, "char") == 0){ + t->ty = Base; + t->xsizeof = 1; + t->printfmt = 'x'; + } + if(strcmp(name, "int") == 0){ + t->ty = Base; + t->xsizeof = 4; + t->printfmt = 'd'; + } + } + /* record declaration in list for later. */ + if(c != 't' && c != 'T') + switch(sym.type){ + case N_GSYM: + *lsym = mksym(nil, name, t); + lsym = &(*lsym)->next; + break; + case N_FUN: + fn = name; + break; + case N_PSYM: + case N_LSYM: + case N_LCSYM: + case N_STSYM: + case N_RSYM: + *lsym = mksym(fn, name, t); + lsym = &(*lsym)->next; + break; + } + break; + } + } + + printtypes(b); + + for(s=symbols; s; s=s->next){ + t = s->type; + if(t->ty == Pointer){ + t = t->sub; + if(t && t->equiv) + t = t->equiv; + } + if(t == nil || t->ty != Aggr) + continue; + Bprint(b, "complex %s %s%s%s;\n", nameof(t, 1), + s->fn ? fixname(s->fn) : "", s->fn ? ":" : "", fixname(s->name)); + } + + return 0; +} diff --git a/src/cmd/acidtypes/type.c b/src/cmd/acidtypes/type.c new file mode 100644 index 00000000..872c38b3 --- /dev/null +++ b/src/cmd/acidtypes/type.c @@ -0,0 +1,571 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <mach.h> +#include <ctype.h> +#include "dat.h" + +char *prefix = ""; + +static TypeList *thash[1021]; +static TypeList *namehash[1021]; +static TypeList *alltypes; + +static uint +hash(uint num, uint num1) +{ + return (num*1009 + num1*1013) % nelem(thash); +} + +static uint +shash(char *s) +{ + uint h; + + h = 0; + for(; *s; s++) + h = 37*h + *s; + return h%nelem(namehash); +} + +void +addnamehash(Type *t) +{ + uint h; + + if(t->name){ + h = shash(t->name); + namehash[h] = mktl(t, namehash[h]); + } +} + +static void +addhash(Type *t) +{ + uint h; + + if(t->n1 || t->n2){ + h = hash(t->n1, t->n2); + thash[h] = mktl(t, thash[h]); + } + if(t->name) + addnamehash(t); +} + +Type* +typebysue(char sue, char *name) +{ + Type *t; + TypeList *tl; + + for(tl=namehash[shash(name)]; tl; tl=tl->tl){ + t = tl->hd; + if(t->sue==sue && t->suename && strcmp(name, t->suename)==0) + return t; + } + t = newtype(); + if(sue=='e') + t->ty = Enum; + else + t->ty = Aggr; + if(sue=='u') + t->isunion = 1; + t->sue = sue; + t->suename = name; + addnamehash(t); + return t; +} + +Type* +typebynum(uint n1, uint n2) +{ + Type *t; + TypeList *tl; + + if(n1 || n2){ + for(tl=thash[hash(n1, n2)]; tl; tl=tl->tl){ + t = tl->hd; + if(t->n1==n1 && t->n2==n2) + return t; + } + } + + t = newtype(); + t->n1 = n1; + t->n2 = n2; + addhash(t); + return t; +} + +Type* +newtype(void) +{ + Type *t; + static int gen; + + t = emalloc(sizeof *t); + t->gen = ++gen; + alltypes = mktl(t, alltypes); + return t; +} + +struct { + char *old; + char *new; +} fixes[] = { /* Font Tab 4 */ + "append", "$append", + "builtin", "$builtin", + "complex", "$complex", + "delete", "$delete", + "do", "$do", + "else", "$else", + "eval", "$eval", + "fmt", "$fmt", + "fn", "$fn", + "head", "$head", + "if", "$if", + "local", "$local", + "loop", "$loop", + "ret", "$ret", + "tail", "$tail", + "then", "$then", + "whatis", "$whatis", + "while", "$while", +}; + +char* +fixname(char *name) +{ + int i; + + if(name == nil) + return nil; + for(i=0; i<nelem(fixes); i++) + if(name[0]==fixes[i].old[0] && strcmp(name, fixes[i].old)==0) + return fixes[i].new; + return name; +} + +void +denumber(void) +{ + memset(thash, 0, sizeof thash); + memset(namehash, 0, sizeof namehash); +} + +void +renumber(TypeList *tl, uint n1) +{ + Type *t; + + for(; tl; tl=tl->tl){ + t = tl->hd; + t->n1 = n1; + addhash(t); + } +} + +static Type* +defer(Type *t) +{ + Type *u, *oldt; + int n; + + u = t; + n = 0; + oldt = t; + while(t && (t->ty == Defer || t->ty == Typedef)){ + if(n++%2) + u = u->sub; + t = t->sub; + if(t == u) /* cycle */ + goto cycle; + } + return t; + +cycle: + fprint(2, "cycle\n"); + t = oldt; + n = 0; + while(t && (t->ty==Defer || t->ty==Typedef)){ + fprint(2, "t %p/%d %s\n", t, t->ty, t->name); + if(t == u && n++ == 2) + break; + t = t->sub; + } + return u; +} + +static void +dotypedef(Type *t) +{ + if(t->ty != Typedef && t->ty != Defer) + return; + + if(t->didtypedef) + return; + + t->didtypedef = 1; + if(t->sub == nil) + return; + + /* push names downward to remove anonymity */ + if(t->name && t->sub->name == nil) + t->sub->name = t->name; + + dotypedef(t->sub); +} + +static int +countbytes(uvlong x) +{ + int n; + + for(n=0; x; n++) + x>>=8; + return n; +} + +static void +dorange(Type *t) +{ + Type *tt; + + if(t->ty != Range) + return; + if(t->didrange) + return; + t->didrange = 1; + tt = defer(t->sub); + if(tt == nil) + return; + dorange(tt); + if(t != tt && tt->ty != Base) + return; + t->ty = Base; + t->xsizeof = tt->xsizeof; + if(t->lo == 0) + t->printfmt = 'x'; + else + t->printfmt = 'd'; + if(t->xsizeof == 0) + t->xsizeof = countbytes(t->hi); +} + + +char* +nameof(Type *t, int doanon) +{ + static char buf[1024]; + char *p; + + if(t->name) + strcpy(buf, fixname(t->name)); + else if(t->suename) + snprint(buf, sizeof buf, "%s_%s", t->isunion ? "union" : "struct", t->suename); + else if(doanon) + snprint(buf, sizeof buf, "%s_%lud_", prefix, t->gen); + else + return ""; + for(p=buf; *p; p++) + if(isspace(*p)) + *p = '_'; + return buf; +} + +static char +basecharof(Type *t) //XXX +{ + switch(t->xsizeof){ + default: + return 'X'; + case 1: + return 'b'; + case 2: + if(t->printfmt=='d') + return 'd'; + else + return 'x'; + case 4: + if(t->printfmt=='d') + return 'D'; + else if(t->printfmt=='f') + return 'f'; + else + return 'X'; + case 8: + if(t->printfmt=='d') + return 'V'; + else if(t->printfmt=='f') + return 'F'; + else + return 'Y'; + } +} + +static int +nilstrcmp(char *a, char *b) +{ + if(a == b) + return 0; + if(a == nil) + return -1; + if(b == nil) + return 1; + return strcmp(a, b); +} + +static int +typecmp(Type *t, Type *u) +{ + int i; + + if(t == u) + return 0; + if(t == nil) + return -1; + if(u == nil) + return 1; + + if(t->ty < u->ty) + return -1; + if(t->ty > u->ty) + return 1; + + if(t->isunion != u->isunion) + return t->isunion - u->isunion; + + i = nilstrcmp(t->name, u->name); + if(i) + return i; + + i = nilstrcmp(t->suename, u->suename); + if(i) + return i; + + if(t->name || t->suename) + return 0; + + if(t->ty==Enum){ + if(t->n < u->n) + return -1; + if(t->n > u->n) + return 1; + if(t->n == 0) + return 0; + i = strcmp(t->tname[0], u->tname[0]); + return i; + } + if(t < u) + return -1; + if(t > u) + return 1; + return 0; +} + +static int +qtypecmp(const void *va, const void *vb) +{ + Type *t, *u; + + t = *(Type**)va; + u = *(Type**)vb; + return typecmp(t, u); +} + +void +printtype(Biobuf *b, Type *t) +{ + char *name; + int j, nprint; + Type *tt, *ttt; + + if(t->printed) + return; + t->printed = 1; + switch(t->ty){ + case Aggr: + name = nameof(t, 1); + Bprint(b, "sizeof%s = %lud;\n", name, t->xsizeof); + Bprint(b, "aggr %s {\n", name); + nprint = 0; + for(j=0; j<t->n; j++){ + tt = defer(t->t[j]); + if(tt && tt->equiv) + tt = tt->equiv; + if(tt == nil){ + Bprint(b, "// oops: nil type\n"); + continue; + } + switch(tt->ty){ + default: + Bprint(b, "// oops: unknown type %d for %p/%s (%d,%d; %c,%s; %p)\n", + tt->ty, tt, fixname(t->tname[j]), + tt->n1, tt->n2, tt->sue ? tt->sue : '.', tt->suename, tt->sub); +Bprint(b, "// t->t[j] = %p\n", ttt=t->t[j]); +while(ttt){ +Bprint(b, "// %s %d (%d,%d) sub %p\n", ttt->name, ttt->ty, ttt->n1, ttt->n2, ttt->sub); +ttt=ttt->sub; +} + case Base: + case Pointer: + case Enum: + case Array: + case Function: + nprint++; + Bprint(b, "\t'%c' %lud %s;\n", basecharof(tt), t->val[j], fixname(t->tname[j])); + break; + case Aggr: + nprint++; + Bprint(b, "\t%s %lud %s;\n", nameof(tt, 1), t->val[j], fixname(t->tname[j])); + break; + } + } + if(nprint == 0) + Bprint(b, "\t'X' 0 __dummy;\n"); + Bprint(b, "};\n\n"); + + name = nameof(t, 1); /* might have smashed it */ + Bprint(b, "defn %s(addr) { indent_%s(addr, \"\"); }\n", name, name); + Bprint(b, "defn\nindent_%s(addr, indent) {\n", name); + Bprint(b, "\tcomplex %s addr;\n", name); + for(j=0; j<t->n; j++){ + name = fixname(t->tname[j]); + tt = defer(t->t[j]); + if(tt == nil){ + Bprint(b, "// oops nil %s\n", name); + continue; + } + switch(tt->ty){ + case Base: + base: + Bprint(b, "\tprint(indent, \"%s\t\", addr.%s, \"\\n\");\n", + name, name); + break; + case Pointer: + ttt = defer(tt->sub); + if(ttt && ttt->ty == Aggr) + Bprint(b, "\tprint(indent, \"%s\t%s(\", addr.%s, \")\\n\");\n", + name, nameof(ttt, 1), name); + else + goto base; + break; + case Array: + Bprint(b, "\tprint(indent, \"%s\t\", addr.%s\\X, \"\\n\");\n", + name, name); + break; + case Enum: + Bprint(b, "\tprint(indent, \"%s\t\", addr.%s, \" \", %s(addr.%s), \"\\n\");\n", + name, name, nameof(tt, 1), name); + break; + case Aggr: + Bprint(b, "\tprint(indent, \"%s\t%s{\\n\");\n", + name, nameof(tt, 0)); + Bprint(b, "\tindent_%s(addr+%lud, indent+\" \");\n", + nameof(tt, 1), t->val[j]); + Bprint(b, "\tprint(indent, \"\t}\\n\");\n"); + break; + } + } + Bprint(b, "};\n\n"); + break; + + case Enum: + name = nameof(t, 1); + Bprint(b, "// enum %s\n", name); + for(j=0; j<t->n; j++) + Bprint(b, "%s = %ld;\n", fixname(t->tname[j]), t->val[j]); + + Bprint(b, "vals_%s = {\n", name); + for(j=0; j<t->n; j++) + Bprint(b, "\t%lud,\n", t->val[j]); + Bprint(b, "};\n"); + Bprint(b, "names_%s = {\n", name); + for(j=0; j<t->n; j++) + Bprint(b, "\t\"%s\",\n", fixname(t->tname[j])); + Bprint(b, "};\n"); + Bprint(b, "defn\n%s(val) {\n", name); + Bprint(b, "\tlocal i;\n"); + Bprint(b, "\ti = match(val, vals_%s);\n", name); + Bprint(b, "\tif i >= 0 then return names_%s[i];\n", name); + Bprint(b, "\treturn \"???\";\n"); + Bprint(b, "};\n"); + break; + } +} + +void +printtypes(Biobuf *b) +{ + int i, n, nn; + Type *t, *tt, **all; + TypeList *tl; + + /* check that pointer resolved */ + for(tl=alltypes; tl; tl=tl->tl){ + t = tl->hd; + if(t->ty==None){ + if(t->n1 || t->n2) + warn("type %d,%d referenced but not defined", t->n1, t->n2); + else if(t->sue && t->suename) + warn("%s %s referenced but not defined", + t->sue=='s' ? "struct" : + t->sue=='u' ? "union" : + t->sue=='e' ? "enum" : "???", t->suename); + } + } + + /* push typedefs down, base types up */ + n = 0; + for(tl=alltypes; tl; tl=tl->tl){ + n++; + t = tl->hd; + if(t->ty == Typedef || t->ty == Defer) + dotypedef(t); + } + + /* push ranges around */ + for(tl=alltypes; tl; tl=tl->tl) + dorange(tl->hd); + + /* + * only take one type of a given name; acid is going to do this anyway, + * and this will reduce the amount of code we output considerably. + * we could run a DFA equivalence relaxation sort of algorithm + * to find the actual equivalence classes, and then rename types + * appropriately, but this will do for now. + */ + all = emalloc(n*sizeof(all[0])); + n = 0; + for(tl=alltypes; tl; tl=tl->tl) + all[n++] = tl->hd; + + qsort(all, n, sizeof(all[0]), qtypecmp); + + nn = 0; + for(i=0; i<n; i++){ + if(i==0 || typecmp(all[i-1], all[i]) != 0) + all[nn++] = all[i]; + else + all[i]->equiv = all[nn-1]; + } + + for(tl=alltypes; tl; tl=tl->tl){ + t = tl->hd; + tt = defer(t); + if(tt && tt->equiv) + t->equiv = tt->equiv; + } + + for(i=0; i<nn; i++) + printtype(b, all[i]); + + free(all); +} + +void +freetypes(void) +{ + memset(thash, 0, sizeof(thash)); + memset(namehash, 0, sizeof namehash); +} diff --git a/src/cmd/acidtypes/util.c b/src/cmd/acidtypes/util.c new file mode 100644 index 00000000..0dbc63e9 --- /dev/null +++ b/src/cmd/acidtypes/util.c @@ -0,0 +1,63 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <mach.h> +#include "dat.h" + +static int nwarn; + +void +warn(char *fmt, ...) +{ + va_list arg; + + if(++nwarn < 5){ + va_start(arg, fmt); + fprint(2, "warning: "); + vfprint(2, fmt, arg); + fprint(2, "\n"); + va_end(arg); + }else if(nwarn == 5) + fprint(2, "[additional warnings elided...]\n"); +} + +void* +erealloc(void *v, uint n) +{ + v = realloc(v, n); + if(v == nil) + sysfatal("realloc: %r"); + return v; +} + +void* +emalloc(uint n) +{ + void *v; + + v = mallocz(n, 1); + if(v == nil) + sysfatal("malloc: %r"); + return v; +} + +char* +estrdup(char *s) +{ + s = strdup(s); + if(s == nil) + sysfatal("strdup: %r"); + return s; +} + +TypeList* +mktl(Type *hd, TypeList *tail) +{ + TypeList *tl; + + tl = emalloc(sizeof(*tl)); + tl->hd = hd; + tl->tl = tail; + return tl; +} + |