From 2eb305240f559fa69a08969184ce9585d16f8b02 Mon Sep 17 00:00:00 2001 From: rsc Date: Tue, 29 Nov 2005 04:05:36 +0000 Subject: More demangling. --- src/libmach/manglegcc2.c | 587 +++++++++++++++++++++++++++++++++-------------- src/libmach/mkfile | 3 + 2 files changed, 423 insertions(+), 167 deletions(-) (limited to 'src') diff --git a/src/libmach/manglegcc2.c b/src/libmach/manglegcc2.c index 035ffd35..59f4d46e 100644 --- a/src/libmach/manglegcc2.c +++ b/src/libmach/manglegcc2.c @@ -1,6 +1,20 @@ /* * gcc2 name demangler. + * + * gcc2 follows the C++ Annotated Reference Manual section 7.2.1 + * name mangling description with a few changes. + * See gpcompare.texi, gxxint_15.html in this directory for the changes. + * + * Not implemented: + * unicode mangling */ +/* +RULES TO ADD: + +_10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_ + + +*/ #include #include #include @@ -24,114 +38,310 @@ chartabsearch(Chartab *ct, int c) return nil; } -typedef struct Gccstate Gccstate; -struct Gccstate +static Chartab typetab[] = +{ + 'b', "bool", + 'c', "char", + 'd', "double", + 'e', "...", + 'f', "float", + 'i', "int", + 'J', "complex", + 'l', "long", + 'r', "long double", + 's', "short", + 'v', "void", + 'w', "wchar_t", + 'x', "long long", + 0, 0 +}; + +static Chartab modifiertab[] = { - char *name[128]; - int nname; + 'C', "const", + 'S', "signed", /* means static for member functions */ + 'U', "unsigned", + 'V', "volatile", + + 'G', "garbage", /* no idea what this is */ + 0, 0 }; -static int gccname(char**, char**, Gccstate*); + +static char constructor[] = "constructor"; +static char destructor[] = "destructor"; +static char gconstructor[] = "$gconstructor"; /* global destructor */ +static char gdestructor[] = "$gdestructor"; /* global destructor */ + +static char manglestarts[] = "123456789CFHQSUVt"; + +static int gccname(char**, char**); +static char *demanglegcc2a(char*, char*); +static char *demanglegcc2b(char*, char*); +static char *demanglegcc2c(char*, char*); +static int gccnumber(char**, int*, int); + char* demanglegcc2(char *s, char *buf) { - char *p, *os, *name, *t; - int namelen; - Gccstate state; + char *name, *os, *p, *t; + int isfn, namelen; - state.nname = 0; - os = s; - p = buf; - if(memcmp(os, "_._", 3) == 0){ - name = "destructor"; + /* + * Pick off some cases that seem not to fit the pattern. + */ + if((t = demanglegcc2a(s, buf)) != nil) + return t; + if((t = demanglegcc2b(s, buf)) != nil) + return t; + if((t = demanglegcc2c(s, buf)) != nil) + return t; + + /* + * First, figure out whether this is a mangled name. + * The name begins with a short version of the name, then __. + * Of course, some C names begin with __ too, so the ultimate + * test is whether what follows __ looks reasonable. + * We use a test on the first letter instead. + * + * Constructors have no name - they begin __ (double underscore). + * Destructors break the rule - they begin _._ (underscore, dot, underscore). + */ + os = s; + isfn = 0; + if(memcmp(s, "_._", 3) == 0){ + isfn = 1; + name = destructor; + namelen = strlen(name); + s += 3; + }else if(memcmp(s, "_GLOBAL_.D.__", 13) == 0){ + isfn = 1; + name = gdestructor; + namelen = strlen(name); + s += 13; + }else if(memcmp(s, "_GLOBAL_.D._", 12) == 0){ + isfn = 0; + name = gdestructor; namelen = strlen(name); - s = os+3; + s += 12; + }else if(memcmp(s, "_GLOBAL_.I.__", 13) == 0){ + isfn = 1; + name = gconstructor; + namelen = strlen(name); + s += 13; + }else if(memcmp(s, "_GLOBAL_.I._", 12) == 0){ + isfn = 0; + name = gconstructor; + namelen = strlen(name); + s += 12; }else{ - /* the mangled part begins with the final __ */ - if((s = strstr(os, "__")) == nil) + t = strstr(os, "__"); + if(t == nil) return os; do{ - t = s; - if(strchr("123456789FHQt", s[2])) + s = t; + if(strchr(manglestarts, *(s+2))) break; - }while((s = strstr(t+1, "__")) != nil); - - s = t; + }while((t = strstr(s+1, "__")) != nil); + name = os; - namelen = t - os; + namelen = s - os; if(namelen == 0){ - name = "constructor"; + isfn = 1; + name = constructor; namelen = strlen(name); } s += 2; } - - switch(*s){ - default: + + /* + * Now s points at the mangled crap (maybe). + * and name is the final element of the name. + */ + if(strchr(manglestarts, *s) == nil) return os; - - case 'F': /* plain function */ - s++; - break; - case 'Q': - case 'H': - case 't': - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - if(!gccname(&s, &p, &state)){ - if(debug) fprint(2, "bad name: %s\n", os); + p = buf; + if(*s == 'F'){ + /* global function, no extra name pieces, just types */ + isfn = 1; + }else{ + /* parse extra name pieces */ + if(!gccname(&s, &p)){ + if(debug) + fprint(2, "parsename %s: %r\n", s); return os; } + + /* if we have a constructor or destructor, try to use the C++ name */ + t = nil; + if(name == constructor || name == destructor){ + *p = 0; + t = strrchr(buf, ':'); + if(t == nil) + t = buf; + } strcpy(p, "::"); p += 2; - break; + if(t){ + namelen = strlen(t)-2; + if(name == destructor) + *p++ = '~'; + name = t; + } } - memmove(p, name, namelen); p += namelen; + + if(*s == 'F'){ + /* might be from above, or might follow name pieces */ + s++; + isfn = 1; + } - if(*s && *s != '_'){ - /* the rest of the name is the argument types */ + /* the rest of the name is argument types - could skip this */ + if(*s || isfn){ *p++ = '('; - while(*s != 0 && *s != '_' && gccname(&s, &p, &state)) + while(*s != 0 && *s != '_'){ + if(!gccname(&s, &p)) + break; *p++ = ','; + } if(*(p-1) == ',') p--; *p++ = ')'; } - - if(*s == '_'){ - /* the remainder is the type of the return value */ - } + if(*s == '_'){ + /* return type (left over from H) */ + } + *p = 0; return buf; } -static Chartab typetab[] = +/* + * _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_ + * _t12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc.npos + * (maybe the funny syntax means they are private) + */ +static char* +demanglegcc2a(char *s, char *buf) { - 'b', "bool", - 'c', "char", - 'd', "double", - 'i', "int", - 'l', "long", - 'v', "void", - 0, 0 -}; + char *p; + + if(*s != '_' || strchr(manglestarts, *(s+1)) == nil) + return nil; + p = buf; + s++; + if(!gccname(&s, &p)) + return nil; + if(*s != '.') + return nil; + s++; + strcpy(p, "::"); + p += 2; + strcpy(p, s); + return buf; +} + +/* + * _tfb => type info for bool + * __vt_7ostream => vtbl for ostream + */ +static char* +demanglegcc2b(char *s, char *buf) +{ + char *p; + char *t; + + if(memcmp(s, "__ti", 4) == 0){ + t = "$typeinfo"; + s += 4; + }else if(memcmp(s, "__tf", 4) == 0){ + t = "$typeinfofn"; + s += 4; + }else if(memcmp(s, "__vt_", 5) == 0){ + t = "$vtbl"; + s += 5; + }else + return nil; + p = buf; + for(;;){ + if(*s == 0 || !gccname(&s, &p)) + return nil; + if(*s == 0) + break; + if(*s != '.' && *s != '$') + return nil; + strcpy(p, "::"); + p += 2; + s++; + } + strcpy(p, "::"); + p += 2; + strcpy(p, t); + return buf; +} + +/* + * __thunk_176__._Q210LogMessage9LogStream => thunk (offset -176) for LogMessage::LogStream + */ +static char* +demanglegcc2c(char *s, char *buf) +{ + int n; + char *p; + + if(memcmp(s, "__thunk_", 8) != 0) + return nil; + s += 8; + if(!gccnumber(&s, &n, 1)) + return nil; + if(memcmp(s, "__._", 4) != 0) /* might as well be morse code */ + return nil; + s += 4; + p = buf; + if(!gccname(&s, &p)) + return nil; + strcpy(p, "::$thunk"); + return buf; +} + +/* + * Parse a number, a non-empty run of digits. + * If many==0, then only one digit is used, even + * if it is followed by more. When we need a big + * number in a one-digit slot, it gets bracketed by underscores. + */ static int -gccnumber(char **ps, int *pn) +gccnumber(char **ps, int *pn, int many) { char *s; - int n; + int n, eatunderscore; s = *ps; - if(!isdigit((uchar)*s)) + eatunderscore = 0; + if(!many && *s == '_'){ + many = 1; + s++; + eatunderscore = 1; + } + if(!isdigit((uchar)*s)){ + bad: + werrstr("bad number %.20s", *ps); return 0; - n = strtol(s, &s, 10); - if(*s == '_') + } + if(many) + n = strtol(s, &s, 10); + else + n = *s++ - '0'; + if(eatunderscore){ + if(*s != '_') + goto bad; s++; + } *ps = s; *pn = n; return 1; @@ -143,10 +353,10 @@ gccnumber(char **ps, int *pn) * Let's see how far we can go before that becomes a problem. */ static int -gccname(char **ps, char **pp, Gccstate *state) +gccname(char **ps, char **pp) { int i, n, m, val; - char c, *os, *s, *t, *p; + char *os, *s, *t, *p, *p0, *p1; s = *ps; os = s; @@ -154,96 +364,159 @@ gccname(char **ps, char **pp, Gccstate *state) /* print("\tgccname: %s\n", s); */ -#if 0 - /* overloaded operators */ - for(i=0; operators[i].shrt; i++){ - if(memcmp(operators[i].shrt, s, 2) == 0){ - strcpy(p, "operator$"); - strcat(p, operators[i].lng); - p += strlen(p); - s += 2; - goto suffix; - } - } -#endif /* basic types */ if((t = chartabsearch(typetab, *s)) != nil){ s++; strcpy(p, t); p += strlen(t); - goto suffix; + goto out; } - + + /* modifiers */ + if((t = chartabsearch(modifiertab, *s)) != nil){ + s++; + if(!gccname(&s, &p)) + return 0; + /* + * These don't end up in the right place + * and i don't care anyway + * (AssertHeld__C17ReaderWriterMutex) + */ + /* + *p++ = ' '; + strcpy(p, t); + p += strlen(p); + */ + goto out; + } + switch(*s){ default: bad: - if(debug) fprint(2, "gccname: %s (%s)\n", os, s); + if(debug) + fprint(2, "gccname: %s (%s)\n", os, s); + werrstr("bad name %.20s", s); return 0; - case '1': case '2': case '3': case '4': /* name length */ + case '1': case '2': case '3': case '4': /* length-prefixed string */ case '5': case '6': case '7': case '8': case '9': - n = strtol(s, &s, 10); + if(!gccnumber(&s, &n, 1)) + return 0; memmove(p, s, n); p += n; s += n; break; - case 'C': /* const */ + case 'A': /* array */ + t = s; s++; - strcpy(p, "const "); - p += strlen(p); - if(!gccname(&s, &p, state)) + if(!gccnumber(&s, &n, 1)) + return 0; + if(*s != '_'){ + werrstr("bad array %.20s", t); return 0; + } + s++; + sprint(p, "array[%d] ", n); + p += strlen(p); break; - case 'U': /* unsigned */ + case 'F': /* function */ + t = s; s++; - strcpy(p, "unsigned "); - p += strlen(p); - if(!gccname(&s, &p, state)) + strcpy(p, "fn("); + p += 3; + /* arguments */ + while(*s && *s != '_') + if(!gccname(&s, &p)) + return 0; + if(*s != '_'){ + werrstr("unexpected end in function: %s", t); return 0; + } + s++; + strcpy(p, " => "); + p += 4; + /* return type */ + if(!gccname(&s, &p)) + return 0; + *p++ = ')'; break; -#if 0 - case 'L': /* default value */ + case 'H': /* template specialization */ t = s; s++; - if(!gccname(&s, &p, state)) - return 0; - if(!isdigit((uchar)*s)){ - fprint(2, "bad value: %s\n", t); + if(!gccnumber(&s, &n, 0)) return 0; + p0 = p; + /* template arguments */ + *p++ = '<'; + for(i=0; i