/* * gcc2 name demangler. * * gcc2 follows the C++ Annotated Reference Manual section 7.2.1 * name mangling description with a few changes. * See gpcompare.texi, gxxint_15.html in this directory for the changes. * * Not implemented: * unicode mangling * renaming of operator functions */ /* RULES TO ADD: _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_ */ #include <u.h> #include <libc.h> #include <bio.h> #include <mach.h> #define debug 0 typedef struct Chartab Chartab; struct Chartab { char c; char *s; }; static char* chartabsearch(Chartab *ct, int c) { for(; ct->c; ct++) if(ct->c == c) return ct->s; return nil; } static Chartab typetab[] = { 'b', "bool", 'c', "char", 'd', "double", 'e', "...", 'f', "float", 'i', "int", 'J', "complex", 'l', "long", 'r', "long double", 's', "short", 'v', "void", 'w', "wchar_t", 'x', "long long", 0, 0 }; static Chartab modifiertab[] = { 'C', "const", 'S', "signed", /* means static for member functions */ 'U', "unsigned", 'V', "volatile", 'G', "garbage", /* no idea what this is */ 0, 0 }; static char constructor[] = "constructor"; static char destructor[] = "destructor"; static char gconstructor[] = "$gconstructor"; /* global destructor */ static char gdestructor[] = "$gdestructor"; /* global destructor */ static char manglestarts[] = "123456789CFHQSUVt"; static int gccname(char**, char**); static char *demanglegcc2a(char*, char*); static char *demanglegcc2b(char*, char*); static char *demanglegcc2c(char*, char*); static int gccnumber(char**, int*, int); char* demanglegcc2(char *s, char *buf) { char *name, *os, *p, *t; int isfn, namelen; /* * Pick off some cases that seem not to fit the pattern. */ if((t = demanglegcc2a(s, buf)) != nil) return t; if((t = demanglegcc2b(s, buf)) != nil) return t; if((t = demanglegcc2c(s, buf)) != nil) return t; /* * First, figure out whether this is a mangled name. * The name begins with a short version of the name, then __. * Of course, some C names begin with __ too, so the ultimate * test is whether what follows __ looks reasonable. * We use a test on the first letter instead. * * Constructors have no name - they begin __ (double underscore). * Destructors break the rule - they begin _._ (underscore, dot, underscore). */ os = s; isfn = 0; if(memcmp(s, "_._", 3) == 0){ isfn = 1; name = destructor; namelen = strlen(name); s += 3; }else if(memcmp(s, "_GLOBAL_.D.__", 13) == 0){ isfn = 1; name = gdestructor; namelen = strlen(name); s += 13; }else if(memcmp(s, "_GLOBAL_.D._", 12) == 0){ isfn = 0; name = gdestructor; namelen = strlen(name); s += 12; }else if(memcmp(s, "_GLOBAL_.I.__", 13) == 0){ isfn = 1; name = gconstructor; namelen = strlen(name); s += 13; }else if(memcmp(s, "_GLOBAL_.I._", 12) == 0){ isfn = 0; name = gconstructor; namelen = strlen(name); s += 12; }else{ t = strstr(os, "__"); if(t == nil) return os; do{ s = t; if(strchr(manglestarts, *(s+2))) break; }while((t = strstr(s+1, "__")) != nil); name = os; namelen = s - os; if(namelen == 0){ isfn = 1; name = constructor; namelen = strlen(name); } s += 2; } /* * Now s points at the mangled crap (maybe). * and name is the final element of the name. */ if(strchr(manglestarts, *s) == nil) return os; p = buf; if(*s == 'F'){ /* global function, no extra name pieces, just types */ isfn = 1; }else{ /* parse extra name pieces */ if(!gccname(&s, &p)){ if(debug) fprint(2, "parsename %s: %r\n", s); return os; } /* if we have a constructor or destructor, try to use the C++ name */ t = nil; if(name == constructor || name == destructor){ *p = 0; t = strrchr(buf, ':'); if(t) t++; else t = buf; } strcpy(p, "::"); p += 2; if(t){ namelen = strlen(t)-2; if(name == destructor) *p++ = '~'; name = t; } } if(p >= buf+2 && memcmp(p-2, "::", 2) == 0 && *(p-3) == ')') p -= 2; memmove(p, name, namelen); p += namelen; if(*s == 'F'){ /* might be from above, or might follow name pieces */ s++; isfn = 1; } /* the rest of the name is argument types - could skip this */ if(*s || isfn){ *p++ = '('; while(*s != 0 && *s != '_'){ if(!gccname(&s, &p)) break; *p++ = ','; } if(*(p-1) == ',') p--; *p++ = ')'; } if(*s == '_'){ /* return type (left over from H) */ } *p = 0; return buf; } /* * _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_ * _t12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc.npos * (maybe the funny syntax means they are private) */ static char* demanglegcc2a(char *s, char *buf) { char *p; if(*s != '_' || strchr(manglestarts, *(s+1)) == nil) return nil; p = buf; s++; if(!gccname(&s, &p)) return nil; if(*s != '.') return nil; s++; strcpy(p, "::"); p += 2; strcpy(p, s); return buf; } /* * _tfb => type info for bool * __vt_7ostream => vtbl for ostream */ static char* demanglegcc2b(char *s, char *buf) { char *p; char *t; if(memcmp(s, "__ti", 4) == 0){ t = "$typeinfo"; s += 4; }else if(memcmp(s, "__tf", 4) == 0){ t = "$typeinfofn"; s += 4; }else if(memcmp(s, "__vt_", 5) == 0){ t = "$vtbl"; s += 5; }else return nil; p = buf; for(;;){ if(*s == 0 || !gccname(&s, &p)) return nil; if(*s == 0) break; if(*s != '.' && *s != '$') return nil; strcpy(p, "::"); p += 2; s++; } strcpy(p, "::"); p += 2; strcpy(p, t); return buf; } /* * __thunk_176__._Q210LogMessage9LogStream => thunk (offset -176) for LogMessage::LogStream */ static char* demanglegcc2c(char *s, char *buf) { int n; char *p; if(memcmp(s, "__thunk_", 8) != 0) return nil; s += 8; if(!gccnumber(&s, &n, 1)) return nil; if(memcmp(s, "__._", 4) != 0) /* might as well be morse code */ return nil; s += 4; p = buf; if(!gccname(&s, &p)) return nil; strcpy(p, "::$thunk"); return buf; } /* * Parse a number, a non-empty run of digits. * If many==0, then only one digit is used, even * if it is followed by more. When we need a big * number in a one-digit slot, it gets bracketed by underscores. */ static int gccnumber(char **ps, int *pn, int many) { char *s; int n, eatunderscore; s = *ps; eatunderscore = 0; if(!many && *s == '_'){ many = 1; s++; eatunderscore = 1; } if(!isdigit((uchar)*s)){ bad: werrstr("bad number %.20s", *ps); return 0; } if(many) n = strtol(s, &s, 10); else n = *s++ - '0'; if(eatunderscore){ if(*s != '_') goto bad; s++; } *ps = s; *pn = n; return 1; } /* * Pick apart the next mangled name section. * Names and types are treated as the same. * Let's see how far we can go before that becomes a problem. */ static int gccname(char **ps, char **pp) { int i, n, m, val; char *os, *s, *t, *p, *p0, *p1; s = *ps; os = s; p = *pp; /* print("\tgccname: %s\n", s); */ /* basic types */ if((t = chartabsearch(typetab, *s)) != nil){ s++; strcpy(p, t); p += strlen(t); goto out; } /* modifiers */ if((t = chartabsearch(modifiertab, *s)) != nil){ s++; if(!gccname(&s, &p)) return 0; /* * These don't end up in the right place * and i don't care anyway * (AssertHeld__C17ReaderWriterMutex) */ /* *p++ = ' '; strcpy(p, t); p += strlen(p); */ goto out; } switch(*s){ default: bad: if(debug) fprint(2, "gccname: %s (%s)\n", os, s); werrstr("bad name %.20s", s); return 0; case '1': case '2': case '3': case '4': /* length-prefixed string */ case '5': case '6': case '7': case '8': case '9': if(!gccnumber(&s, &n, 1)) return 0; memmove(p, s, n); p += n; s += n; break; case 'A': /* array */ t = s; s++; if(!gccnumber(&s, &n, 1)) return 0; if(*s != '_'){ werrstr("bad array %.20s", t); return 0; } s++; sprint(p, "array[%d] ", n); p += strlen(p); break; case 'F': /* function */ t = s; s++; strcpy(p, "fn("); p += 3; /* arguments */ while(*s && *s != '_') if(!gccname(&s, &p)) return 0; if(*s != '_'){ werrstr("unexpected end in function: %s", t); return 0; } s++; strcpy(p, " => "); p += 4; /* return type */ if(!gccname(&s, &p)) return 0; *p++ = ')'; break; case 'H': /* template specialization */ if(memcmp(s-2, "__", 2) != 0) fprint(2, "wow: %s\n", s-2); t = s; s++; if(!gccnumber(&s, &n, 0)) return 0; p0 = p; /* template arguments */ *p++ = '<'; for(i=0; i<n; i++){ val = 1; if(*s == 'Z'){ /* argument is a type, not value */ val = 0; s++; } if(!gccname(&s, &p)) return 0; if(val){ if(!gccnumber(&s, &m, 1)) /* gccnumber: 1 or 0? */ return 0; sprint(p, "=%d", m); p += strlen(p); } if(i+1<n) *p++ = ','; } *p++ = '>'; if(*s != '_'){ werrstr("bad template %s", t); return 0; } s++; /* * Can't seem to tell difference between a qualifying name * and arguments. Not sure which is which. It appears that if * you get a name, use it, otherwise look for types. * The G type qualifier appears to have no effect other than * turning an ambiguous name into a definite type. * * SetFlag__H1Zb_P15FlagSettingMode_v * => void SetFlag<bool>(FlagSettingMode *) * SetFlag__H1Zb_15FlagSettingMode_v * => void FlagSettingMode::SetFlag<bool>() * SetFlag__H1Zb_G15FlagSettingMode_v * => void SetFlag<bool>(FlagSettingMode) */ if(strchr("ACFGPRSUVX", *s)){ /* args */ t = s; p1 = p; *p++ = '('; while(*s != '_'){ if(*s == 0 || !gccname(&s, &p)){ werrstr("bad H args: %s", t); return 0; } } *p++ = ')'; s++; }else{ p1 = p; /* name */ if(!gccname(&s, &p)) return 0; } /* * Need to do some rearrangement of <> () and names here. * Doesn't matter since we strip out the <> and () anyway. */ break; case 'M': /* M1S: pointer to member */ if(*(s+1) != '1' || *(s+2) != 'S') goto bad; s += 3; strcpy(p, "mptr "); p += 5; if(!gccname(&s, &p)) return 0; break; case 'N': /* multiply-repeated type */ s++; if(!gccnumber(&s, &n, 0) || !gccnumber(&s, &m, 0)) return 0; sprint(p, "T%dx%d", m, n); p += strlen(p); break; case 'P': /* pointer */ s++; strcpy(p, "ptr "); p += 4; if(!gccname(&s, &p)) return 0; break; case 'Q': /* qualified name */ s++; if(!gccnumber(&s, &n, 0)) return 0; for(i=0; i<n; i++){ if(!gccname(&s, &p)){ werrstr("in hierarchy: %r"); return 0; } if(i+1 < n){ strcpy(p, "::"); p += 2; } } break; case 'R': /* reference */ s++; strcpy(p, "ref "); p += 4; if(!gccname(&s, &p)) return 0; break; case 't': /* class template instantiation */ /* should share code with case 'H' */ t = s; s++; if(!gccname(&s, &p)) return 0; if(!gccnumber(&s, &n, 0)) return 0; p0 = p; /* template arguments */ *p++ = '<'; for(i=0; i<n; i++){ val = 1; if(*s == 'Z'){ /* argument is a type, not value */ val = 0; s++; } if(!gccname(&s, &p)) return 0; if(val){ if(!gccnumber(&s, &m, 1)) /* gccnumber: 1 or 0? */ return 0; sprint(p, "=%d", m); p += strlen(p); } if(i+1<n) *p++ = ','; } *p++ = '>'; break; case 'T': /* once-repeated type */ s++; if(!gccnumber(&s, &n, 0)) return 0; sprint(p, "T%d", n); p += strlen(p); break; case 'X': /* type parameter in 'H' */ if(!isdigit((uchar)*(s+1)) || !isdigit((uchar)*(s+2))) goto bad; memmove(p, s, 3); p += 3; s += 3; break; } out: *ps = s; *pp = p; return 1; }