diff options
author | Russ Cox <rsc@swtch.com> | 2011-02-04 15:41:39 -0500 |
---|---|---|
committer | Russ Cox <rsc@swtch.com> | 2011-02-04 15:41:39 -0500 |
commit | 31bffaba60d1bdce33fcee001f8bfe718871e714 (patch) | |
tree | 427f9282081354aa63c5e5e056a2b09e6ed12756 /src/cmd/eqn | |
parent | ff262e102983021abc2223e83131eea1f7eb5d58 (diff) | |
download | plan9port-31bffaba60d1bdce33fcee001f8bfe718871e714.tar.gz plan9port-31bffaba60d1bdce33fcee001f8bfe718871e714.tar.bz2 plan9port-31bffaba60d1bdce33fcee001f8bfe718871e714.zip |
eqn: add working mbtowc and wctomb
Fixes UTF-8 input files.
R=rsc
http://codereview.appspot.com/4132042
Diffstat (limited to 'src/cmd/eqn')
-rw-r--r-- | src/cmd/eqn/mbwc.c | 165 | ||||
-rw-r--r-- | src/cmd/eqn/mkfile | 1 |
2 files changed, 166 insertions, 0 deletions
diff --git a/src/cmd/eqn/mbwc.c b/src/cmd/eqn/mbwc.c new file mode 100644 index 00000000..c97b036c --- /dev/null +++ b/src/cmd/eqn/mbwc.c @@ -0,0 +1,165 @@ +#include <stdlib.h> + +/* + * Use the FSS-UTF transformation proposed by posix. + * We define 7 byte types: + * T0 0xxxxxxx 7 free bits + * Tx 10xxxxxx 6 free bits + * T1 110xxxxx 5 free bits + * T2 1110xxxx 4 free bits + * + * Encoding is as follows. + * From hex Thru hex Sequence Bits + * 00000000 0000007F T0 7 + * 00000080 000007FF T1 Tx 11 + * 00000800 0000FFFF T2 Tx Tx 16 + */ + +int +mblen(const char *s, size_t n) +{ + + return mbtowc(0, s, n); +} + +int +mbtowc(wchar_t *pwc, const char *s, size_t n) +{ + int c, c1, c2; + long l; + + if(!s) + return 0; + + if(n < 1) + goto bad; + c = s[0] & 0xff; + if((c & 0x80) == 0x00) { + if(pwc) + *pwc = c; + if(c == 0) + return 0; + return 1; + } + + if(n < 2) + goto bad; + c1 = (s[1] ^ 0x80) & 0xff; + if((c1 & 0xC0) != 0x00) + goto bad; + if((c & 0xE0) == 0xC0) { + l = ((c << 6) | c1) & 0x7FF; + if(l < 0x080) + goto bad; + if(pwc) + *pwc = l; + return 2; + } + + if(n < 3) + goto bad; + c2 = (s[2] ^ 0x80) & 0xff; + if((c2 & 0xC0) != 0x00) + goto bad; + if((c & 0xF0) == 0xE0) { + l = ((((c << 6) | c1) << 6) | c2) & 0xFFFF; + if(l < 0x0800) + goto bad; + if(pwc) + *pwc = l; + return 3; + } + + /* + * bad decoding + */ +bad: + return -1; + +} + +int +wctomb(char *s, wchar_t wchar) +{ + long c; + + if(!s) + return 0; + + c = wchar & 0xFFFF; + if(c < 0x80) { + s[0] = c; + return 1; + } + + if(c < 0x800) { + s[0] = 0xC0 | (c >> 6); + s[1] = 0x80 | (c & 0x3F); + return 2; + } + + s[0] = 0xE0 | (c >> 12); + s[1] = 0x80 | ((c >> 6) & 0x3F); + s[2] = 0x80 | (c & 0x3F); + return 3; +} + +size_t +mbstowcs(wchar_t *pwcs, const char *s, size_t n) +{ + int i, d, c; + + for(i=0; i < n; i++) { + c = *s & 0xff; + if(c < 0x80) { + *pwcs = c; + if(c == 0) + break; + s++; + } else { + d = mbtowc(pwcs, s, 3); + if(d <= 0) + return (size_t)((d<0) ? -1 : i); + s += d; + } + pwcs++; + } + return i; +} + +size_t +wcstombs(char *s, const wchar_t *pwcs, size_t n) +{ + int d; + long c; + char *p, *pe; + char buf[3]; + + p = s; + pe = p+n-3; + while(p < pe) { + c = *pwcs++; + if(c < 0x80) + *p++ = c; + else + p += wctomb(p, c); + if(c == 0) + return p-s; + } + while(p < pe+3) { + c = *pwcs++; + d = wctomb(buf, c); + if(p+d <= pe+3) { + *p++ = buf[0]; + if(d > 1) { + *p++ = buf[1]; + if(d > 2) + *p++ = buf[2]; + } + } + if(c == 0) + break; + } + return p-s; +} + diff --git a/src/cmd/eqn/mkfile b/src/cmd/eqn/mkfile index e66212ee..0ef1b9ea 100644 --- a/src/cmd/eqn/mkfile +++ b/src/cmd/eqn/mkfile @@ -15,6 +15,7 @@ OFILES=main.$O\ lookup.$O\ mark.$O\ matrix.$O\ + mbwc.$O\ move.$O\ over.$O\ paren.$O\ |