diff options
author | rsc <devnull@localhost> | 2005-11-29 04:05:36 +0000 |
---|---|---|
committer | rsc <devnull@localhost> | 2005-11-29 04:05:36 +0000 |
commit | 2eb305240f559fa69a08969184ce9585d16f8b02 (patch) | |
tree | a1adfad569e76e075bab3daf3b040a180f4ea2b1 /src/libmach | |
parent | 2d658c0d185051b0bea78c8273a2933a586e617b (diff) | |
download | plan9port-2eb305240f559fa69a08969184ce9585d16f8b02.tar.gz plan9port-2eb305240f559fa69a08969184ce9585d16f8b02.tar.bz2 plan9port-2eb305240f559fa69a08969184ce9585d16f8b02.zip |
More demangling.
Diffstat (limited to 'src/libmach')
-rw-r--r-- | src/libmach/manglegcc2.c | 587 | ||||
-rw-r--r-- | src/libmach/mkfile | 3 |
2 files changed, 423 insertions, 167 deletions
diff --git a/src/libmach/manglegcc2.c b/src/libmach/manglegcc2.c index 035ffd35..59f4d46e 100644 --- a/src/libmach/manglegcc2.c +++ b/src/libmach/manglegcc2.c @@ -1,6 +1,20 @@ /* * gcc2 name demangler. + * + * gcc2 follows the C++ Annotated Reference Manual section 7.2.1 + * name mangling description with a few changes. + * See gpcompare.texi, gxxint_15.html in this directory for the changes. + * + * Not implemented: + * unicode mangling */ +/* +RULES TO ADD: + +_10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_ + + +*/ #include <u.h> #include <libc.h> #include <bio.h> @@ -24,114 +38,310 @@ chartabsearch(Chartab *ct, int c) return nil; } -typedef struct Gccstate Gccstate; -struct Gccstate +static Chartab typetab[] = +{ + 'b', "bool", + 'c', "char", + 'd', "double", + 'e', "...", + 'f', "float", + 'i', "int", + 'J', "complex", + 'l', "long", + 'r', "long double", + 's', "short", + 'v', "void", + 'w', "wchar_t", + 'x', "long long", + 0, 0 +}; + +static Chartab modifiertab[] = { - char *name[128]; - int nname; + 'C', "const", + 'S', "signed", /* means static for member functions */ + 'U', "unsigned", + 'V', "volatile", + + 'G', "garbage", /* no idea what this is */ + 0, 0 }; -static int gccname(char**, char**, Gccstate*); + +static char constructor[] = "constructor"; +static char destructor[] = "destructor"; +static char gconstructor[] = "$gconstructor"; /* global destructor */ +static char gdestructor[] = "$gdestructor"; /* global destructor */ + +static char manglestarts[] = "123456789CFHQSUVt"; + +static int gccname(char**, char**); +static char *demanglegcc2a(char*, char*); +static char *demanglegcc2b(char*, char*); +static char *demanglegcc2c(char*, char*); +static int gccnumber(char**, int*, int); + char* demanglegcc2(char *s, char *buf) { - char *p, *os, *name, *t; - int namelen; - Gccstate state; + char *name, *os, *p, *t; + int isfn, namelen; - state.nname = 0; - os = s; - p = buf; - if(memcmp(os, "_._", 3) == 0){ - name = "destructor"; + /* + * Pick off some cases that seem not to fit the pattern. + */ + if((t = demanglegcc2a(s, buf)) != nil) + return t; + if((t = demanglegcc2b(s, buf)) != nil) + return t; + if((t = demanglegcc2c(s, buf)) != nil) + return t; + + /* + * First, figure out whether this is a mangled name. + * The name begins with a short version of the name, then __. + * Of course, some C names begin with __ too, so the ultimate + * test is whether what follows __ looks reasonable. + * We use a test on the first letter instead. + * + * Constructors have no name - they begin __ (double underscore). + * Destructors break the rule - they begin _._ (underscore, dot, underscore). + */ + os = s; + isfn = 0; + if(memcmp(s, "_._", 3) == 0){ + isfn = 1; + name = destructor; + namelen = strlen(name); + s += 3; + }else if(memcmp(s, "_GLOBAL_.D.__", 13) == 0){ + isfn = 1; + name = gdestructor; + namelen = strlen(name); + s += 13; + }else if(memcmp(s, "_GLOBAL_.D._", 12) == 0){ + isfn = 0; + name = gdestructor; namelen = strlen(name); - s = os+3; + s += 12; + }else if(memcmp(s, "_GLOBAL_.I.__", 13) == 0){ + isfn = 1; + name = gconstructor; + namelen = strlen(name); + s += 13; + }else if(memcmp(s, "_GLOBAL_.I._", 12) == 0){ + isfn = 0; + name = gconstructor; + namelen = strlen(name); + s += 12; }else{ - /* the mangled part begins with the final __ */ - if((s = strstr(os, "__")) == nil) + t = strstr(os, "__"); + if(t == nil) return os; do{ - t = s; - if(strchr("123456789FHQt", s[2])) + s = t; + if(strchr(manglestarts, *(s+2))) break; - }while((s = strstr(t+1, "__")) != nil); - - s = t; + }while((t = strstr(s+1, "__")) != nil); + name = os; - namelen = t - os; + namelen = s - os; if(namelen == 0){ - name = "constructor"; + isfn = 1; + name = constructor; namelen = strlen(name); } s += 2; } - - switch(*s){ - default: + + /* + * Now s points at the mangled crap (maybe). + * and name is the final element of the name. + */ + if(strchr(manglestarts, *s) == nil) return os; - - case 'F': /* plain function */ - s++; - break; - case 'Q': - case 'H': - case 't': - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - if(!gccname(&s, &p, &state)){ - if(debug) fprint(2, "bad name: %s\n", os); + p = buf; + if(*s == 'F'){ + /* global function, no extra name pieces, just types */ + isfn = 1; + }else{ + /* parse extra name pieces */ + if(!gccname(&s, &p)){ + if(debug) + fprint(2, "parsename %s: %r\n", s); return os; } + + /* if we have a constructor or destructor, try to use the C++ name */ + t = nil; + if(name == constructor || name == destructor){ + *p = 0; + t = strrchr(buf, ':'); + if(t == nil) + t = buf; + } strcpy(p, "::"); p += 2; - break; + if(t){ + namelen = strlen(t)-2; + if(name == destructor) + *p++ = '~'; + name = t; + } } - memmove(p, name, namelen); p += namelen; + + if(*s == 'F'){ + /* might be from above, or might follow name pieces */ + s++; + isfn = 1; + } - if(*s && *s != '_'){ - /* the rest of the name is the argument types */ + /* the rest of the name is argument types - could skip this */ + if(*s || isfn){ *p++ = '('; - while(*s != 0 && *s != '_' && gccname(&s, &p, &state)) + while(*s != 0 && *s != '_'){ + if(!gccname(&s, &p)) + break; *p++ = ','; + } if(*(p-1) == ',') p--; *p++ = ')'; } - - if(*s == '_'){ - /* the remainder is the type of the return value */ - } + if(*s == '_'){ + /* return type (left over from H) */ + } + *p = 0; return buf; } -static Chartab typetab[] = +/* + * _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_ + * _t12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc.npos + * (maybe the funny syntax means they are private) + */ +static char* +demanglegcc2a(char *s, char *buf) { - 'b', "bool", - 'c', "char", - 'd', "double", - 'i', "int", - 'l', "long", - 'v', "void", - 0, 0 -}; + char *p; + + if(*s != '_' || strchr(manglestarts, *(s+1)) == nil) + return nil; + p = buf; + s++; + if(!gccname(&s, &p)) + return nil; + if(*s != '.') + return nil; + s++; + strcpy(p, "::"); + p += 2; + strcpy(p, s); + return buf; +} + +/* + * _tfb => type info for bool + * __vt_7ostream => vtbl for ostream + */ +static char* +demanglegcc2b(char *s, char *buf) +{ + char *p; + char *t; + + if(memcmp(s, "__ti", 4) == 0){ + t = "$typeinfo"; + s += 4; + }else if(memcmp(s, "__tf", 4) == 0){ + t = "$typeinfofn"; + s += 4; + }else if(memcmp(s, "__vt_", 5) == 0){ + t = "$vtbl"; + s += 5; + }else + return nil; + p = buf; + for(;;){ + if(*s == 0 || !gccname(&s, &p)) + return nil; + if(*s == 0) + break; + if(*s != '.' && *s != '$') + return nil; + strcpy(p, "::"); + p += 2; + s++; + } + strcpy(p, "::"); + p += 2; + strcpy(p, t); + return buf; +} + +/* + * __thunk_176__._Q210LogMessage9LogStream => thunk (offset -176) for LogMessage::LogStream + */ +static char* +demanglegcc2c(char *s, char *buf) +{ + int n; + char *p; + + if(memcmp(s, "__thunk_", 8) != 0) + return nil; + s += 8; + if(!gccnumber(&s, &n, 1)) + return nil; + if(memcmp(s, "__._", 4) != 0) /* might as well be morse code */ + return nil; + s += 4; + p = buf; + if(!gccname(&s, &p)) + return nil; + strcpy(p, "::$thunk"); + return buf; +} + +/* + * Parse a number, a non-empty run of digits. + * If many==0, then only one digit is used, even + * if it is followed by more. When we need a big + * number in a one-digit slot, it gets bracketed by underscores. + */ static int -gccnumber(char **ps, int *pn) +gccnumber(char **ps, int *pn, int many) { char *s; - int n; + int n, eatunderscore; s = *ps; - if(!isdigit((uchar)*s)) + eatunderscore = 0; + if(!many && *s == '_'){ + many = 1; + s++; + eatunderscore = 1; + } + if(!isdigit((uchar)*s)){ + bad: + werrstr("bad number %.20s", *ps); return 0; - n = strtol(s, &s, 10); - if(*s == '_') + } + if(many) + n = strtol(s, &s, 10); + else + n = *s++ - '0'; + if(eatunderscore){ + if(*s != '_') + goto bad; s++; + } *ps = s; *pn = n; return 1; @@ -143,10 +353,10 @@ gccnumber(char **ps, int *pn) * Let's see how far we can go before that becomes a problem. */ static int -gccname(char **ps, char **pp, Gccstate *state) +gccname(char **ps, char **pp) { int i, n, m, val; - char c, *os, *s, *t, *p; + char *os, *s, *t, *p, *p0, *p1; s = *ps; os = s; @@ -154,96 +364,159 @@ gccname(char **ps, char **pp, Gccstate *state) /* print("\tgccname: %s\n", s); */ -#if 0 - /* overloaded operators */ - for(i=0; operators[i].shrt; i++){ - if(memcmp(operators[i].shrt, s, 2) == 0){ - strcpy(p, "operator$"); - strcat(p, operators[i].lng); - p += strlen(p); - s += 2; - goto suffix; - } - } -#endif /* basic types */ if((t = chartabsearch(typetab, *s)) != nil){ s++; strcpy(p, t); p += strlen(t); - goto suffix; + goto out; } - + + /* modifiers */ + if((t = chartabsearch(modifiertab, *s)) != nil){ + s++; + if(!gccname(&s, &p)) + return 0; + /* + * These don't end up in the right place + * and i don't care anyway + * (AssertHeld__C17ReaderWriterMutex) + */ + /* + *p++ = ' '; + strcpy(p, t); + p += strlen(p); + */ + goto out; + } + switch(*s){ default: bad: - if(debug) fprint(2, "gccname: %s (%s)\n", os, s); + if(debug) + fprint(2, "gccname: %s (%s)\n", os, s); + werrstr("bad name %.20s", s); return 0; - case '1': case '2': case '3': case '4': /* name length */ + case '1': case '2': case '3': case '4': /* length-prefixed string */ case '5': case '6': case '7': case '8': case '9': - n = strtol(s, &s, 10); + if(!gccnumber(&s, &n, 1)) + return 0; memmove(p, s, n); p += n; s += n; break; - case 'C': /* const */ + case 'A': /* array */ + t = s; s++; - strcpy(p, "const "); - p += strlen(p); - if(!gccname(&s, &p, state)) + if(!gccnumber(&s, &n, 1)) + return 0; + if(*s != '_'){ + werrstr("bad array %.20s", t); return 0; + } + s++; + sprint(p, "array[%d] ", n); + p += strlen(p); break; - case 'U': /* unsigned */ + case 'F': /* function */ + t = s; s++; - strcpy(p, "unsigned "); - p += strlen(p); - if(!gccname(&s, &p, state)) + strcpy(p, "fn("); + p += 3; + /* arguments */ + while(*s && *s != '_') + if(!gccname(&s, &p)) + return 0; + if(*s != '_'){ + werrstr("unexpected end in function: %s", t); return 0; + } + s++; + strcpy(p, " => "); + p += 4; + /* return type */ + if(!gccname(&s, &p)) + return 0; + *p++ = ')'; break; -#if 0 - case 'L': /* default value */ + case 'H': /* template specialization */ t = s; s++; - if(!gccname(&s, &p, state)) - return 0; - if(!isdigit((uchar)*s)){ - fprint(2, "bad value: %s\n", t); + if(!gccnumber(&s, &n, 0)) return 0; + p0 = p; + /* template arguments */ + *p++ = '<'; + for(i=0; i<n; i++){ + val = 1; + if(*s == 'Z'){ /* argument is a type, not value */ + val = 0; + s++; + } + if(!gccname(&s, &p)) + return 0; + if(val){ + if(!gccnumber(&s, &m, 1)) /* gccnumber: 1 or 0? */ + return 0; + sprint(p, "=%d", m); + p += strlen(p); + } + if(i+1<n) + *p++ = ','; } - n = strtol(s, &s, 10); - if(*s != 'E'){ - fprint(2, "bad value2: %s\n", t); + *p++ = '>'; + if(*s != '_'){ + werrstr("bad template %s", t); return 0; } - sprint(p, "=%d", n); - p += strlen(p); s++; + p1 = p; + /* name */ + if(!gccname(&s, &p)) + return 0; + /* XXX +__adjust_heap__H3ZPt4pair2Zt12basic_string3ZcZt11char_traits1ZcZt9allocator1ZcZt12basic_string3ZcZt11char_traits1ZcZt9allocator1ZcZiZt4pair2Zt12basic_string3ZcZt11char_traits1ZcZt9allocator1ZcZt12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc_X01X11X11X21_v + */ + /* XXX swap p0, p1, p - maybe defer to main */ break; -#endif - case 'N': /* repeated name/type */ - case 'X': - c = *s++; - if(!isdigit((uchar)*s) || !isdigit((uchar)*(s+1))) + case 'M': /* M1S: pointer to member */ + if(*(s+1) != '1' || *(s+2) != 'S') goto bad; - n = *s++ - '0'; - m = *s++ - '0'; - sprint(p, "%c%d/%d", c, n, m); + s += 3; + strcpy(p, "mptr "); + p += 5; + if(!gccname(&s, &p)) + return 0; + break; + + case 'N': /* multiply-repeated type */ + s++; + if(!gccnumber(&s, &n, 0) || !gccnumber(&s, &m, 0)) + return 0; + sprint(p, "T%dx%d", m, n); p += strlen(p); break; - case 'Q': /* hierarchical name */ + case 'P': /* pointer */ s++; - if(!isdigit((uchar)*s)) - goto bad; - n = *s++ - '0'; + strcpy(p, "ptr "); + p += 4; + if(!gccname(&s, &p)) + return 0; + break; + + case 'Q': /* qualified name */ + s++; + if(!gccnumber(&s, &n, 0)) + return 0; for(i=0; i<n; i++){ - if(!gccname(&s, &p, state)){ - if(debug) fprint(2, "bad name in hierarchy: %s in %s\n", s, os); + if(!gccname(&s, &p)){ + werrstr("in hierarchy: %r"); return 0; } if(i+1 < n){ @@ -252,84 +525,64 @@ gccname(char **ps, char **pp, Gccstate *state) } } break; - - case 'P': /* pointer to */ - s++; - if(!gccname(&s, &p, state)) - return 0; - *p++ = '*'; - break; - case 'R': /* reference to */ + case 'R': /* reference */ s++; - if(!gccname(&s, &p, state)) + strcpy(p, "ref "); + p += 4; + if(!gccname(&s, &p)) return 0; - *p++ = '&'; break; - case 'S': /* standard or previously-seen name */ + case 't': /* class template instantiation */ + /* should share code with case 'H' */ + t = s; s++; - if('0' <= *s && *s <= '9'){ - /* previously seen */ - t = s-1; - n = strtol(s, &s, 10); - if(*s != '_'){ - fprint(2, "bad S: %s\n", t); - return 0; - } - s++; - sprint(p, "S%d_", n); - p += strlen(p); - break; - } - goto bad; - - case 't': /* named template */ - c = *s++; - if(!gccname(&s, &p, state)) + if(!gccname(&s, &p)) return 0; - goto template; - case 'H': /* nameless template */ - c = *s++; - template: - if(!gccnumber(&s, &n)) - goto bad; + if(!gccnumber(&s, &n, 0)) + return 0; + p0 = p; + /* template arguments */ *p++ = '<'; for(i=0; i<n; i++){ val = 1; - if(*s == 'Z'){ + if(*s == 'Z'){ /* argument is a type, not value */ val = 0; s++; } - if(!gccname(&s, &p, state)) - goto bad; + if(!gccname(&s, &p)) + return 0; if(val){ - if(!gccnumber(&s, &m)) - goto bad; + if(!gccnumber(&s, &m, 1)) /* gccnumber: 1 or 0? */ + return 0; sprint(p, "=%d", m); p += strlen(p); } - if(i+1 < n) + if(i+1<n) *p++ = ','; } *p++ = '>'; - if(c == 'H'){ - if(*s != '_') - goto bad; - s++; - } break; - - case 'T': /* previously-seen type??? e.g., T2 */ - t = s; - for(s++; isdigit((uchar)*s); s++) - ; - memmove(p, t, s-t); - p += s-t; - break; + + case 'T': /* once-repeated type */ + s++; + if(!gccnumber(&s, &n, 0)) + return 0; + sprint(p, "T%d", n); + p += strlen(p); + break; + + case 'X': /* type parameter in 'H' */ + if(!isdigit((uchar)*(s+1)) || !isdigit((uchar)*(s+2))) + goto bad; + memmove(p, s, 3); + p += 3; + s += 3; + break; } -suffix: +out: *ps = s; *pp = p; return 1; diff --git a/src/libmach/mkfile b/src/libmach/mkfile index d66f3f35..3f976be3 100644 --- a/src/libmach/mkfile +++ b/src/libmach/mkfile @@ -69,6 +69,9 @@ t: t.o $LIBDIR/$LIB elfnm: elfnm.o $LIBDIR/$LIB $LD -o $target $prereq -l9 +demangler: demangler.o $LIBDIR/$LIB + $LD -o $target $prereq -l9 + SunOS.$O: nosys.c Darwin.$O: nosys.c |