From a0583cf2560feff2dcfcccb52279569bcf27fa5e Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Sun, 13 Sep 2009 17:43:37 -0400 Subject: tcs: update for 32-bit Rune http://codereview.appspot.com/117063 --- src/cmd/tcs/plan9.h | 24 +-------- src/cmd/tcs/utf.c | 152 ++-------------------------------------------------- 2 files changed, 4 insertions(+), 172 deletions(-) (limited to 'src/cmd/tcs') diff --git a/src/cmd/tcs/plan9.h b/src/cmd/tcs/plan9.h index 4d002cb0..f9366ec4 100644 --- a/src/cmd/tcs/plan9.h +++ b/src/cmd/tcs/plan9.h @@ -1,23 +1 @@ -typedef unsigned short Rune; /* 16 bits */ -typedef unsigned char uchar; -#define Runeerror 0x80 /* decoding error in UTF */ -#define Runeself 0x80 /* rune and UTF sequences are the same (<) */ -#define UTFmax 6 /* maximum bytes per rune */ - -/* - plan 9 argument parsing -*/ -#define ARGBEGIN for((argv0? 0: (argv0= *argv)),argv++,argc--;\ - argv[0] && argv[0][0]=='-' && argv[0][1];\ - argc--, argv++) {\ - char *_args, *_argt, _argc;\ - _args = &argv[0][1];\ - if(_args[0]=='-' && _args[1]==0){\ - argc--; argv++; break;\ - }\ - _argc=0;while(*_args) switch(_argc= *_args++) -#define ARGEND } -#define ARGF() (_argt=_args, _args="",\ - (*_argt? _argt: argv[1]? (argc--, *++argv): 0)) -#define ARGC() _argc -extern char *argv0; +Compatibility file removed; must compile using Plan 9 definitions. diff --git a/src/cmd/tcs/utf.c b/src/cmd/tcs/utf.c index 4045332d..3970e596 100644 --- a/src/cmd/tcs/utf.c +++ b/src/cmd/tcs/utf.c @@ -139,168 +139,22 @@ isoutf_out(Rune *base, int n, long *notused) } -enum -{ - Char1 = Runeself, Rune1 = Runeself, - Char21 = 0xA1, Rune21 = 0x0100, - Char22 = 0xF6, Rune22 = 0x4016, - Char3 = 0xFC, Rune3 = 0x10000, /* really 0x38E2E */ - Esc = 0xBE, Bad = Runeerror -}; - -static uchar U[256]; -static uchar T[256]; - -static -void -mktable(void) -{ - int i, u; - - for(i=0; i<256; i++) { - u = i + (0x5E - 0xA0); - if(i < 0xA0) - u = i + (0xDF - 0x7F); - if(i < 0x7F) - u = i + (0x00 - 0x21); - if(i < 0x21) - u = i + (0xBE - 0x00); - U[i] = u; - T[u] = i; - } -} - int isochartorune(Rune *rune, char *str) { - int c, c1, c2; - long l; - - if(U[0] == 0) - mktable(); - - /* - * one character sequence - * 00000-0009F => 00-9F - */ - c = *(uchar*)str; - if(c < Char1) { - *rune = c; - return 1; - } - - /* - * two character sequence - * 000A0-000FF => A0; A0-FF - */ - c1 = *(uchar*)(str+1); - if(c < Char21) { - if(c1 >= Rune1 && c1 < Rune21) { - *rune = c1; - return 2; - } - goto bad; - } - - /* - * two character sequence - * 00100-04015 => A1-F5; 21-7E/A0-FF - */ - c1 = U[c1]; - if(c1 >= Esc) - goto bad; - if(c < Char22) { - *rune = (c-Char21)*Esc + c1 + Rune21; - return 2; - } - - /* - * three character sequence - * 04016-38E2D => A6-FB; 21-7E/A0-FF - */ - c2 = U[*(uchar*)(str+2)]; - if(c2 >= Esc) - goto bad; - if(c < Char3) { - l = (c-Char22)*Esc*Esc + c1*Esc + c2 + Rune22; - if(l >= Rune3) - goto bad; - *rune = l; - return 3; - } - - /* - * bad decoding - */ -bad: - *rune = Bad; - return 1; + return chartorune(rune, str); } int runetoisoutf(char *str, Rune *rune) { - long c; - - if(T[0] == 0) - mktable(); - - /* - * one character sequence - * 00000-0009F => 00-9F - */ - c = *rune; - if(c < Rune1) { - str[0] = c; - return 1; - } - - /* - * two character sequence - * 000A0-000FF => A0; A0-FF - */ - if(c < Rune21) { - str[0] = (char)Char1; - str[1] = c; - return 2; - } - - /* - * two character sequence - * 00100-04015 => A1-F5; 21-7E/A0-FF - */ - if(c < Rune22) { - c -= Rune21; - str[0] = c/Esc + Char21; - str[1] = T[c%Esc]; - return 2; - } - - /* - * three character sequence - * 04016-38E2D => A6-FB; 21-7E/A0-FF - */ - c -= Rune22; - str[0] = c/(Esc*Esc) + Char22; - str[1] = T[c/Esc%Esc]; - str[2] = T[c%Esc]; - return 3; + return runetochar(str, rune); } int fullisorune(char *str, int n) { - int c; - - if(n > 0) { - c = *(uchar*)str; - if(c < Char1) - return 1; - if(n > 1) - if(c < Char22 || n > 2) - return 1; - } - return 0; + return fullrune(str, n); } enum -- cgit v1.2.3