aboutsummaryrefslogtreecommitdiff
path: root/src/libmach
diff options
context:
space:
mode:
authorrsc <devnull@localhost>2005-11-29 04:05:36 +0000
committerrsc <devnull@localhost>2005-11-29 04:05:36 +0000
commit2eb305240f559fa69a08969184ce9585d16f8b02 (patch)
treea1adfad569e76e075bab3daf3b040a180f4ea2b1 /src/libmach
parent2d658c0d185051b0bea78c8273a2933a586e617b (diff)
downloadplan9port-2eb305240f559fa69a08969184ce9585d16f8b02.tar.gz
plan9port-2eb305240f559fa69a08969184ce9585d16f8b02.tar.bz2
plan9port-2eb305240f559fa69a08969184ce9585d16f8b02.zip
More demangling.
Diffstat (limited to 'src/libmach')
-rw-r--r--src/libmach/manglegcc2.c587
-rw-r--r--src/libmach/mkfile3
2 files changed, 423 insertions, 167 deletions
diff --git a/src/libmach/manglegcc2.c b/src/libmach/manglegcc2.c
index 035ffd35..59f4d46e 100644
--- a/src/libmach/manglegcc2.c
+++ b/src/libmach/manglegcc2.c
@@ -1,6 +1,20 @@
/*
* gcc2 name demangler.
+ *
+ * gcc2 follows the C++ Annotated Reference Manual section 7.2.1
+ * name mangling description with a few changes.
+ * See gpcompare.texi, gxxint_15.html in this directory for the changes.
+ *
+ * Not implemented:
+ * unicode mangling
*/
+/*
+RULES TO ADD:
+
+_10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_
+
+
+*/
#include <u.h>
#include <libc.h>
#include <bio.h>
@@ -24,114 +38,310 @@ chartabsearch(Chartab *ct, int c)
return nil;
}
-typedef struct Gccstate Gccstate;
-struct Gccstate
+static Chartab typetab[] =
+{
+ 'b', "bool",
+ 'c', "char",
+ 'd', "double",
+ 'e', "...",
+ 'f', "float",
+ 'i', "int",
+ 'J', "complex",
+ 'l', "long",
+ 'r', "long double",
+ 's', "short",
+ 'v', "void",
+ 'w', "wchar_t",
+ 'x', "long long",
+ 0, 0
+};
+
+static Chartab modifiertab[] =
{
- char *name[128];
- int nname;
+ 'C', "const",
+ 'S', "signed", /* means static for member functions */
+ 'U', "unsigned",
+ 'V', "volatile",
+
+ 'G', "garbage", /* no idea what this is */
+ 0, 0
};
-static int gccname(char**, char**, Gccstate*);
+
+static char constructor[] = "constructor";
+static char destructor[] = "destructor";
+static char gconstructor[] = "$gconstructor"; /* global destructor */
+static char gdestructor[] = "$gdestructor"; /* global destructor */
+
+static char manglestarts[] = "123456789CFHQSUVt";
+
+static int gccname(char**, char**);
+static char *demanglegcc2a(char*, char*);
+static char *demanglegcc2b(char*, char*);
+static char *demanglegcc2c(char*, char*);
+static int gccnumber(char**, int*, int);
+
char*
demanglegcc2(char *s, char *buf)
{
- char *p, *os, *name, *t;
- int namelen;
- Gccstate state;
+ char *name, *os, *p, *t;
+ int isfn, namelen;
- state.nname = 0;
- os = s;
- p = buf;
- if(memcmp(os, "_._", 3) == 0){
- name = "destructor";
+ /*
+ * Pick off some cases that seem not to fit the pattern.
+ */
+ if((t = demanglegcc2a(s, buf)) != nil)
+ return t;
+ if((t = demanglegcc2b(s, buf)) != nil)
+ return t;
+ if((t = demanglegcc2c(s, buf)) != nil)
+ return t;
+
+ /*
+ * First, figure out whether this is a mangled name.
+ * The name begins with a short version of the name, then __.
+ * Of course, some C names begin with __ too, so the ultimate
+ * test is whether what follows __ looks reasonable.
+ * We use a test on the first letter instead.
+ *
+ * Constructors have no name - they begin __ (double underscore).
+ * Destructors break the rule - they begin _._ (underscore, dot, underscore).
+ */
+ os = s;
+ isfn = 0;
+ if(memcmp(s, "_._", 3) == 0){
+ isfn = 1;
+ name = destructor;
+ namelen = strlen(name);
+ s += 3;
+ }else if(memcmp(s, "_GLOBAL_.D.__", 13) == 0){
+ isfn = 1;
+ name = gdestructor;
+ namelen = strlen(name);
+ s += 13;
+ }else if(memcmp(s, "_GLOBAL_.D._", 12) == 0){
+ isfn = 0;
+ name = gdestructor;
namelen = strlen(name);
- s = os+3;
+ s += 12;
+ }else if(memcmp(s, "_GLOBAL_.I.__", 13) == 0){
+ isfn = 1;
+ name = gconstructor;
+ namelen = strlen(name);
+ s += 13;
+ }else if(memcmp(s, "_GLOBAL_.I._", 12) == 0){
+ isfn = 0;
+ name = gconstructor;
+ namelen = strlen(name);
+ s += 12;
}else{
- /* the mangled part begins with the final __ */
- if((s = strstr(os, "__")) == nil)
+ t = strstr(os, "__");
+ if(t == nil)
return os;
do{
- t = s;
- if(strchr("123456789FHQt", s[2]))
+ s = t;
+ if(strchr(manglestarts, *(s+2)))
break;
- }while((s = strstr(t+1, "__")) != nil);
-
- s = t;
+ }while((t = strstr(s+1, "__")) != nil);
+
name = os;
- namelen = t - os;
+ namelen = s - os;
if(namelen == 0){
- name = "constructor";
+ isfn = 1;
+ name = constructor;
namelen = strlen(name);
}
s += 2;
}
-
- switch(*s){
- default:
+
+ /*
+ * Now s points at the mangled crap (maybe).
+ * and name is the final element of the name.
+ */
+ if(strchr(manglestarts, *s) == nil)
return os;
-
- case 'F': /* plain function */
- s++;
- break;
- case 'Q':
- case 'H':
- case 't':
- case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- if(!gccname(&s, &p, &state)){
- if(debug) fprint(2, "bad name: %s\n", os);
+ p = buf;
+ if(*s == 'F'){
+ /* global function, no extra name pieces, just types */
+ isfn = 1;
+ }else{
+ /* parse extra name pieces */
+ if(!gccname(&s, &p)){
+ if(debug)
+ fprint(2, "parsename %s: %r\n", s);
return os;
}
+
+ /* if we have a constructor or destructor, try to use the C++ name */
+ t = nil;
+ if(name == constructor || name == destructor){
+ *p = 0;
+ t = strrchr(buf, ':');
+ if(t == nil)
+ t = buf;
+ }
strcpy(p, "::");
p += 2;
- break;
+ if(t){
+ namelen = strlen(t)-2;
+ if(name == destructor)
+ *p++ = '~';
+ name = t;
+ }
}
-
memmove(p, name, namelen);
p += namelen;
+
+ if(*s == 'F'){
+ /* might be from above, or might follow name pieces */
+ s++;
+ isfn = 1;
+ }
- if(*s && *s != '_'){
- /* the rest of the name is the argument types */
+ /* the rest of the name is argument types - could skip this */
+ if(*s || isfn){
*p++ = '(';
- while(*s != 0 && *s != '_' && gccname(&s, &p, &state))
+ while(*s != 0 && *s != '_'){
+ if(!gccname(&s, &p))
+ break;
*p++ = ',';
+ }
if(*(p-1) == ',')
p--;
*p++ = ')';
}
-
- if(*s == '_'){
- /* the remainder is the type of the return value */
- }
+ if(*s == '_'){
+ /* return type (left over from H) */
+ }
+
*p = 0;
return buf;
}
-static Chartab typetab[] =
+/*
+ * _10CycleTimer.cycles_per_ms_ => CycleTimer::cycles_per_ms_
+ * _t12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc.npos
+ * (maybe the funny syntax means they are private)
+ */
+static char*
+demanglegcc2a(char *s, char *buf)
{
- 'b', "bool",
- 'c', "char",
- 'd', "double",
- 'i', "int",
- 'l', "long",
- 'v', "void",
- 0, 0
-};
+ char *p;
+
+ if(*s != '_' || strchr(manglestarts, *(s+1)) == nil)
+ return nil;
+ p = buf;
+ s++;
+ if(!gccname(&s, &p))
+ return nil;
+ if(*s != '.')
+ return nil;
+ s++;
+ strcpy(p, "::");
+ p += 2;
+ strcpy(p, s);
+ return buf;
+}
+
+/*
+ * _tfb => type info for bool
+ * __vt_7ostream => vtbl for ostream
+ */
+static char*
+demanglegcc2b(char *s, char *buf)
+{
+ char *p;
+ char *t;
+
+ if(memcmp(s, "__ti", 4) == 0){
+ t = "$typeinfo";
+ s += 4;
+ }else if(memcmp(s, "__tf", 4) == 0){
+ t = "$typeinfofn";
+ s += 4;
+ }else if(memcmp(s, "__vt_", 5) == 0){
+ t = "$vtbl";
+ s += 5;
+ }else
+ return nil;
+ p = buf;
+ for(;;){
+ if(*s == 0 || !gccname(&s, &p))
+ return nil;
+ if(*s == 0)
+ break;
+ if(*s != '.' && *s != '$')
+ return nil;
+ strcpy(p, "::");
+ p += 2;
+ s++;
+ }
+ strcpy(p, "::");
+ p += 2;
+ strcpy(p, t);
+ return buf;
+}
+
+/*
+ * __thunk_176__._Q210LogMessage9LogStream => thunk (offset -176) for LogMessage::LogStream
+ */
+static char*
+demanglegcc2c(char *s, char *buf)
+{
+ int n;
+ char *p;
+
+ if(memcmp(s, "__thunk_", 8) != 0)
+ return nil;
+ s += 8;
+ if(!gccnumber(&s, &n, 1))
+ return nil;
+ if(memcmp(s, "__._", 4) != 0) /* might as well be morse code */
+ return nil;
+ s += 4;
+ p = buf;
+ if(!gccname(&s, &p))
+ return nil;
+ strcpy(p, "::$thunk");
+ return buf;
+}
+
+/*
+ * Parse a number, a non-empty run of digits.
+ * If many==0, then only one digit is used, even
+ * if it is followed by more. When we need a big
+ * number in a one-digit slot, it gets bracketed by underscores.
+ */
static int
-gccnumber(char **ps, int *pn)
+gccnumber(char **ps, int *pn, int many)
{
char *s;
- int n;
+ int n, eatunderscore;
s = *ps;
- if(!isdigit((uchar)*s))
+ eatunderscore = 0;
+ if(!many && *s == '_'){
+ many = 1;
+ s++;
+ eatunderscore = 1;
+ }
+ if(!isdigit((uchar)*s)){
+ bad:
+ werrstr("bad number %.20s", *ps);
return 0;
- n = strtol(s, &s, 10);
- if(*s == '_')
+ }
+ if(many)
+ n = strtol(s, &s, 10);
+ else
+ n = *s++ - '0';
+ if(eatunderscore){
+ if(*s != '_')
+ goto bad;
s++;
+ }
*ps = s;
*pn = n;
return 1;
@@ -143,10 +353,10 @@ gccnumber(char **ps, int *pn)
* Let's see how far we can go before that becomes a problem.
*/
static int
-gccname(char **ps, char **pp, Gccstate *state)
+gccname(char **ps, char **pp)
{
int i, n, m, val;
- char c, *os, *s, *t, *p;
+ char *os, *s, *t, *p, *p0, *p1;
s = *ps;
os = s;
@@ -154,96 +364,159 @@ gccname(char **ps, char **pp, Gccstate *state)
/* print("\tgccname: %s\n", s); */
-#if 0
- /* overloaded operators */
- for(i=0; operators[i].shrt; i++){
- if(memcmp(operators[i].shrt, s, 2) == 0){
- strcpy(p, "operator$");
- strcat(p, operators[i].lng);
- p += strlen(p);
- s += 2;
- goto suffix;
- }
- }
-#endif
/* basic types */
if((t = chartabsearch(typetab, *s)) != nil){
s++;
strcpy(p, t);
p += strlen(t);
- goto suffix;
+ goto out;
}
-
+
+ /* modifiers */
+ if((t = chartabsearch(modifiertab, *s)) != nil){
+ s++;
+ if(!gccname(&s, &p))
+ return 0;
+ /*
+ * These don't end up in the right place
+ * and i don't care anyway
+ * (AssertHeld__C17ReaderWriterMutex)
+ */
+ /*
+ *p++ = ' ';
+ strcpy(p, t);
+ p += strlen(p);
+ */
+ goto out;
+ }
+
switch(*s){
default:
bad:
- if(debug) fprint(2, "gccname: %s (%s)\n", os, s);
+ if(debug)
+ fprint(2, "gccname: %s (%s)\n", os, s);
+ werrstr("bad name %.20s", s);
return 0;
- case '1': case '2': case '3': case '4': /* name length */
+ case '1': case '2': case '3': case '4': /* length-prefixed string */
case '5': case '6': case '7': case '8': case '9':
- n = strtol(s, &s, 10);
+ if(!gccnumber(&s, &n, 1))
+ return 0;
memmove(p, s, n);
p += n;
s += n;
break;
- case 'C': /* const */
+ case 'A': /* array */
+ t = s;
s++;
- strcpy(p, "const ");
- p += strlen(p);
- if(!gccname(&s, &p, state))
+ if(!gccnumber(&s, &n, 1))
+ return 0;
+ if(*s != '_'){
+ werrstr("bad array %.20s", t);
return 0;
+ }
+ s++;
+ sprint(p, "array[%d] ", n);
+ p += strlen(p);
break;
- case 'U': /* unsigned */
+ case 'F': /* function */
+ t = s;
s++;
- strcpy(p, "unsigned ");
- p += strlen(p);
- if(!gccname(&s, &p, state))
+ strcpy(p, "fn(");
+ p += 3;
+ /* arguments */
+ while(*s && *s != '_')
+ if(!gccname(&s, &p))
+ return 0;
+ if(*s != '_'){
+ werrstr("unexpected end in function: %s", t);
return 0;
+ }
+ s++;
+ strcpy(p, " => ");
+ p += 4;
+ /* return type */
+ if(!gccname(&s, &p))
+ return 0;
+ *p++ = ')';
break;
-#if 0
- case 'L': /* default value */
+ case 'H': /* template specialization */
t = s;
s++;
- if(!gccname(&s, &p, state))
- return 0;
- if(!isdigit((uchar)*s)){
- fprint(2, "bad value: %s\n", t);
+ if(!gccnumber(&s, &n, 0))
return 0;
+ p0 = p;
+ /* template arguments */
+ *p++ = '<';
+ for(i=0; i<n; i++){
+ val = 1;
+ if(*s == 'Z'){ /* argument is a type, not value */
+ val = 0;
+ s++;
+ }
+ if(!gccname(&s, &p))
+ return 0;
+ if(val){
+ if(!gccnumber(&s, &m, 1)) /* gccnumber: 1 or 0? */
+ return 0;
+ sprint(p, "=%d", m);
+ p += strlen(p);
+ }
+ if(i+1<n)
+ *p++ = ',';
}
- n = strtol(s, &s, 10);
- if(*s != 'E'){
- fprint(2, "bad value2: %s\n", t);
+ *p++ = '>';
+ if(*s != '_'){
+ werrstr("bad template %s", t);
return 0;
}
- sprint(p, "=%d", n);
- p += strlen(p);
s++;
+ p1 = p;
+ /* name */
+ if(!gccname(&s, &p))
+ return 0;
+ /* XXX
+__adjust_heap__H3ZPt4pair2Zt12basic_string3ZcZt11char_traits1ZcZt9allocator1ZcZt12basic_string3ZcZt11char_traits1ZcZt9allocator1ZcZiZt4pair2Zt12basic_string3ZcZt11char_traits1ZcZt9allocator1ZcZt12basic_string3ZcZt11char_traits1ZcZt9allocator1Zc_X01X11X11X21_v
+ */
+ /* XXX swap p0, p1, p - maybe defer to main */
break;
-#endif
- case 'N': /* repeated name/type */
- case 'X':
- c = *s++;
- if(!isdigit((uchar)*s) || !isdigit((uchar)*(s+1)))
+ case 'M': /* M1S: pointer to member */
+ if(*(s+1) != '1' || *(s+2) != 'S')
goto bad;
- n = *s++ - '0';
- m = *s++ - '0';
- sprint(p, "%c%d/%d", c, n, m);
+ s += 3;
+ strcpy(p, "mptr ");
+ p += 5;
+ if(!gccname(&s, &p))
+ return 0;
+ break;
+
+ case 'N': /* multiply-repeated type */
+ s++;
+ if(!gccnumber(&s, &n, 0) || !gccnumber(&s, &m, 0))
+ return 0;
+ sprint(p, "T%dx%d", m, n);
p += strlen(p);
break;
- case 'Q': /* hierarchical name */
+ case 'P': /* pointer */
s++;
- if(!isdigit((uchar)*s))
- goto bad;
- n = *s++ - '0';
+ strcpy(p, "ptr ");
+ p += 4;
+ if(!gccname(&s, &p))
+ return 0;
+ break;
+
+ case 'Q': /* qualified name */
+ s++;
+ if(!gccnumber(&s, &n, 0))
+ return 0;
for(i=0; i<n; i++){
- if(!gccname(&s, &p, state)){
- if(debug) fprint(2, "bad name in hierarchy: %s in %s\n", s, os);
+ if(!gccname(&s, &p)){
+ werrstr("in hierarchy: %r");
return 0;
}
if(i+1 < n){
@@ -252,84 +525,64 @@ gccname(char **ps, char **pp, Gccstate *state)
}
}
break;
-
- case 'P': /* pointer to */
- s++;
- if(!gccname(&s, &p, state))
- return 0;
- *p++ = '*';
- break;
- case 'R': /* reference to */
+ case 'R': /* reference */
s++;
- if(!gccname(&s, &p, state))
+ strcpy(p, "ref ");
+ p += 4;
+ if(!gccname(&s, &p))
return 0;
- *p++ = '&';
break;
- case 'S': /* standard or previously-seen name */
+ case 't': /* class template instantiation */
+ /* should share code with case 'H' */
+ t = s;
s++;
- if('0' <= *s && *s <= '9'){
- /* previously seen */
- t = s-1;
- n = strtol(s, &s, 10);
- if(*s != '_'){
- fprint(2, "bad S: %s\n", t);
- return 0;
- }
- s++;
- sprint(p, "S%d_", n);
- p += strlen(p);
- break;
- }
- goto bad;
-
- case 't': /* named template */
- c = *s++;
- if(!gccname(&s, &p, state))
+ if(!gccname(&s, &p))
return 0;
- goto template;
- case 'H': /* nameless template */
- c = *s++;
- template:
- if(!gccnumber(&s, &n))
- goto bad;
+ if(!gccnumber(&s, &n, 0))
+ return 0;
+ p0 = p;
+ /* template arguments */
*p++ = '<';
for(i=0; i<n; i++){
val = 1;
- if(*s == 'Z'){
+ if(*s == 'Z'){ /* argument is a type, not value */
val = 0;
s++;
}
- if(!gccname(&s, &p, state))
- goto bad;
+ if(!gccname(&s, &p))
+ return 0;
if(val){
- if(!gccnumber(&s, &m))
- goto bad;
+ if(!gccnumber(&s, &m, 1)) /* gccnumber: 1 or 0? */
+ return 0;
sprint(p, "=%d", m);
p += strlen(p);
}
- if(i+1 < n)
+ if(i+1<n)
*p++ = ',';
}
*p++ = '>';
- if(c == 'H'){
- if(*s != '_')
- goto bad;
- s++;
- }
break;
-
- case 'T': /* previously-seen type??? e.g., T2 */
- t = s;
- for(s++; isdigit((uchar)*s); s++)
- ;
- memmove(p, t, s-t);
- p += s-t;
- break;
+
+ case 'T': /* once-repeated type */
+ s++;
+ if(!gccnumber(&s, &n, 0))
+ return 0;
+ sprint(p, "T%d", n);
+ p += strlen(p);
+ break;
+
+ case 'X': /* type parameter in 'H' */
+ if(!isdigit((uchar)*(s+1)) || !isdigit((uchar)*(s+2)))
+ goto bad;
+ memmove(p, s, 3);
+ p += 3;
+ s += 3;
+ break;
}
-suffix:
+out:
*ps = s;
*pp = p;
return 1;
diff --git a/src/libmach/mkfile b/src/libmach/mkfile
index d66f3f35..3f976be3 100644
--- a/src/libmach/mkfile
+++ b/src/libmach/mkfile
@@ -69,6 +69,9 @@ t: t.o $LIBDIR/$LIB
elfnm: elfnm.o $LIBDIR/$LIB
$LD -o $target $prereq -l9
+demangler: demangler.o $LIBDIR/$LIB
+ $LD -o $target $prereq -l9
+
SunOS.$O: nosys.c
Darwin.$O: nosys.c