import from plan9

author: rsc <devnull@localhost> 2006-05-21 18:57:51 +0000
committer: rsc <devnull@localhost> 2006-05-21 18:57:51 +0000
commit: 536f9b83c0bed9986800d806c74ae4d225628fe3 (patch)
tree: 22c8ef2b2ee5e6520a7a8cb2ff493845ee16525f /src/cmd/tcs
parent: 44fc56d8c3cc534bf903133c63a9c9ecb42e5b63 (diff)
download: plan9port-536f9b83c0bed9986800d806c74ae4d225628fe3.tar.gz
plan9port-536f9b83c0bed9986800d806c74ae4d225628fe3.tar.bz2
plan9port-536f9b83c0bed9986800d806c74ae4d225628fe3.zip
9 files changed, 320 insertions, 62 deletions
diff --git a/src/cmd/tcs/conv.h b/src/cmd/tcs/conv.h
index fc35a105..5e18a065 100644
--- a/src/cmd/tcs/conv.h
+++ b/src/cmd/tcs/conv.h
@@ -13,6 +13,8 @@ void uksc_in(int fd, long *notused, struct convert *out);
 void uksc_out(Rune *base, int n, long *notused);
 void html_in(int fd, long *notused, struct convert *out);
 void html_out(Rune *base, int n, long *notused);
+void tune_in(int fd, long *notused, struct convert *out);
+void tune_out(Rune *base, int n, long *notused);
 
 #define		emit(x)		*(*r)++ = (x)
 #define		NRUNE		65536
diff --git a/src/cmd/tcs/conv_big5.c b/src/cmd/tcs/conv_big5.c
index 111bf5c4..496cae66 100644
--- a/src/cmd/tcs/conv_big5.c
+++ b/src/cmd/tcs/conv_big5.c
@@ -110,6 +110,7 @@ big5_in(int fd, long *notused, struct convert *out)
 	big5proc(-1, &r, nin);
 	if(r > ob)
 		OUT(out, ob, r-ob);
+	OUT(out, ob, 0);
 }
 
 void
diff --git a/src/cmd/tcs/conv_gb.c b/src/cmd/tcs/conv_gb.c
index 70835257..6838b774 100644
--- a/src/cmd/tcs/conv_gb.c
+++ b/src/cmd/tcs/conv_gb.c
@@ -88,6 +88,7 @@ gb_in(int fd, long *notused, struct convert *out)
 	gbproc(-1, &r, nin);
 	if(r > ob)
 		OUT(out, ob, r-ob);
+	OUT(out, ob, 0);
 }
 
 void
diff --git a/src/cmd/tcs/conv_jis.c b/src/cmd/tcs/conv_jis.c
index 18579d70..86275141 100644
--- a/src/cmd/tcs/conv_jis.c
+++ b/src/cmd/tcs/conv_jis.c
@@ -363,6 +363,7 @@ do_in(int fd, void (*procfn)(int, Rune **, long), struct convert *out)
 	(*procfn)(-1, &r, nin);
 	if(r > ob)
 		OUT(out, ob, r-ob);
+	OUT(out, ob, 0);
 }
 
 void
diff --git a/src/cmd/tcs/conv_ksc.c b/src/cmd/tcs/conv_ksc.c
index cbc17f5b..293ffad1 100644
--- a/src/cmd/tcs/conv_ksc.c
+++ b/src/cmd/tcs/conv_ksc.c
@@ -109,6 +109,7 @@ uksc_in(int fd, long *notused, struct convert *out)
 	ukscproc(-1, &r, nin);
 	if(r > ob)
 		OUT(out, ob, r-ob);
+	OUT(out, ob, 0);
 }
 
 void
diff --git a/src/cmd/tcs/html.c b/src/cmd/tcs/html.c
index 8a27f1c2..89436060 100644
--- a/src/cmd/tcs/html.c
+++ b/src/cmd/tcs/html.c
@@ -19,132 +19,251 @@ static Hchar byname[] =
 	{"Aacute", 193},
 	{"Acirc", 194},
 	{"Agrave", 192},
+	{"Alpha", 913},
 	{"Aring", 197},
 	{"Atilde", 195},
 	{"Auml", 196},
+	{"Beta", 914},
 	{"Ccedil", 199},
+	{"Chi", 935},
+	{"Dagger", 8225},
+	{"Delta", 916},
 	{"ETH", 208},
 	{"Eacute", 201},
 	{"Ecirc", 202},
 	{"Egrave", 200},
+	{"Epsilon", 917},
+	{"Eta", 919},
 	{"Euml", 203},
+	{"Gamma", 915},
 	{"Iacute", 205},
 	{"Icirc", 206},
 	{"Igrave", 204},
+	{"Iota", 921},
 	{"Iuml", 207},
+	{"Kappa", 922},
+	{"Lambda", 923},
+	{"Mu", 924},
 	{"Ntilde", 209},
+	{"Nu", 925},
+	{"OElig", 338},
 	{"Oacute", 211},
 	{"Ocirc", 212},
 	{"Ograve", 210},
+	{"Omega", 937},
+	{"Omicron", 927},
 	{"Oslash", 216},
 	{"Otilde", 213},
 	{"Ouml", 214},
+	{"Phi", 934},
+	{"Pi", 928},
+	{"Prime", 8243},
+	{"Psi", 936},
+	{"Rho", 929},
+	{"Scaron", 352},
+	{"Sigma", 931},
 	{"THORN", 222},
+	{"Tau", 932},
+	{"Theta", 920},
 	{"Uacute", 218},
 	{"Ucirc", 219},
 	{"Ugrave", 217},
+	{"Upsilon", 933},
 	{"Uuml", 220},
+	{"Xi", 926},
 	{"Yacute", 221},
+	{"Yuml", 376},
+	{"Zeta", 918},
 	{"aacute", 225},
 	{"acirc", 226},
 	{"acute", 180},
 	{"aelig", 230},
 	{"agrave", 224},
+	{"alefsym", 8501},
 	{"alpha", 945},
+	{"amp", 38},
+	{"and", 8743},
+	{"ang", 8736},
 	{"aring", 229},
+	{"asymp", 8776},
 	{"atilde", 227},
 	{"auml", 228},
+	{"bdquo", 8222},
 	{"beta", 946},
 	{"brvbar", 166},
+	{"bull", 8226},
+	{"cap", 8745},
 	{"ccedil", 231},
 	{"cdots", 8943},
 	{"cedil", 184},
 	{"cent", 162},
 	{"chi", 967},
+	{"circ", 710},
+	{"clubs", 9827},
+	{"cong", 8773},
 	{"copy", 169},
+	{"crarr", 8629},
+	{"cup", 8746},
 	{"curren", 164},
+	{"dArr", 8659},
+	{"dagger", 8224},
+	{"darr", 8595},
 	{"ddots", 8945},
 	{"deg", 176},
 	{"delta", 948},
+	{"diams", 9830},
 	{"divide", 247},
 	{"eacute", 233},
 	{"ecirc", 234},
 	{"egrave", 232},
 	{"emdash", 8212},	/* non-standard but commonly used */
+	{"empty", 8709},
 	{"emsp", 8195},
 	{"endash", 8211},	/* non-standard but commonly used */
 	{"ensp", 8194},
 	{"epsilon", 949},
+	{"equiv", 8801},
 	{"eta", 951},
 	{"eth", 240},
 	{"euml", 235},
+	{"euro", 8364},
+	{"exist", 8707},
+	{"fnof", 402},
+	{"forall", 8704},
 	{"frac12", 189},
 	{"frac14", 188},
 	{"frac34", 190},
+	{"frasl", 8260},
 	{"gamma", 947},
+	{"ge", 8805},
+	{"gt", 62},
+	{"hArr", 8660},
+	{"harr", 8596},
+	{"hearts", 9829},
+	{"hellip", 8230},
 	{"iacute", 237},
 	{"icirc", 238},
 	{"iexcl", 161},
 	{"igrave", 236},
+	{"image", 8465},
+	{"infin", 8734},
+	{"int", 8747},
 	{"iota", 953},
 	{"iquest", 191},
+	{"isin", 8712},
 	{"iuml", 239},
 	{"kappa", 954},
+	{"lArr", 8656},
 	{"lambda", 955},
+	{"lang", 9001},
 	{"laquo", 171},
-	{"ldquo", 8220},
+	{"larr", 8592},
+	{"lceil", 8968},
 	{"ldots", 8230},
+	{"ldquo", 8220},
+	{"le", 8804},
+	{"lfloor", 8970},
+	{"lowast", 8727},
+	{"loz", 9674},
+	{"lrm", 8206},
+	{"lsaquo", 8249},
 	{"lsquo", 8216},
+	{"lt", 60},
 	{"macr", 175},
 	{"mdash", 8212},
 	{"micro", 181},
 	{"middot", 183},
+	{"minus", 8722},
 	{"mu", 956},
+	{"nabla", 8711},
 	{"nbsp", 160},
 	{"ndash", 8211},
+	{"ne", 8800},
+	{"ni", 8715},
 	{"not", 172},
+	{"notin", 8713},
+	{"nsub", 8836},
 	{"ntilde", 241},
 	{"nu", 957},
 	{"oacute", 243},
 	{"ocirc", 244},
+	{"oelig", 339},
 	{"ograve", 242},
+	{"oline", 8254},
 	{"omega", 969},
 	{"omicron", 959},
+	{"oplus", 8853},
+	{"or", 8744},
 	{"ordf", 170},
 	{"ordm", 186},
 	{"oslash", 248},
 	{"otilde", 245},
+	{"otimes", 8855},
 	{"ouml", 246},
 	{"para", 182},
+	{"part", 8706},
+	{"permil", 8240},
+	{"perp", 8869},
 	{"phi", 966},
 	{"pi", 960},
+	{"piv", 982},
 	{"plusmn", 177},
 	{"pound", 163},
+	{"prime", 8242},
+	{"prod", 8719},
+	{"prop", 8733},
 	{"psi", 968},
 	{"quad", 8193},
+	{"quot", 34},
+	{"rArr", 8658},
+	{"radic", 8730},
+	{"rang", 9002},
 	{"raquo", 187},
+	{"rarr", 8594},
+	{"rceil", 8969},
 	{"rdquo", 8221},
+	{"real", 8476},
 	{"reg", 174},
+	{"rfloor", 8971},
 	{"rho", 961},
+	{"rlm", 8207},
+	{"rsaquo", 8250},
 	{"rsquo", 8217},
+	{"sbquo", 8218},
+	{"scaron", 353},
+	{"sdot", 8901},
 	{"sect", 167},
 	{"shy", 173},
 	{"sigma", 963},
+	{"sigmaf", 962},
+	{"sim", 8764},
 	{"sp", 8194},
+	{"spades", 9824},
+	{"sub", 8834},
+	{"sube", 8838},
+	{"sum", 8721},
+	{"sup", 8835},
 	{"sup1", 185},
 	{"sup2", 178},
 	{"sup3", 179},
+	{"supe", 8839},
 	{"szlig", 223},
 	{"tau", 964},
+	{"there4", 8756},
 	{"theta", 952},
+	{"thetasym", 977},
 	{"thinsp", 8201},
 	{"thorn", 254},
+	{"tilde", 732},
 	{"times", 215},
 	{"trade", 8482},
+	{"uArr", 8657},
 	{"uacute", 250},
+	{"uarr", 8593},
 	{"ucirc", 251},
 	{"ugrave", 249},
 	{"uml", 168},
+	{"upsih", 978},
 	{"upsilon", 965},
 	{"uuml", 252},
 	{"varepsilon", 8712},
@@ -154,11 +273,14 @@ static Hchar byname[] =
 	{"vdots", 8942},
 	{"vsigma", 962},
 	{"vtheta", 977},
+	{"weierp", 8472},
 	{"xi", 958},
 	{"yacute", 253},
 	{"yen", 165},
 	{"yuml", 255},
-	{"zeta", 950}
+	{"zeta", 950},
+	{"zwj", 8205},
+	{"zwnj", 8204}
 };
 
 static Hchar byrune[nelem(byname)];
@@ -302,6 +424,7 @@ html_in(int fd, long *x, struct convert *out)
 	}
 	if(r > rbuf)
 		OUT(out, rbuf, r-rbuf);
+	OUT(out, rbuf, 0);
 }
 
 /*
@@ -314,6 +437,7 @@ html_out(Rune *r, int n, long *x)
 	Biobuf b;
 	Rune *er;
 	
+	USED(x);
 	html_init();
 	Binit(&b, 1, OWRITE);
 	er = r+n;
diff --git a/src/cmd/tcs/mkfile b/src/cmd/tcs/mkfile
index fc1ce5ab..1b86ab94 100644
--- a/src/cmd/tcs/mkfile
+++ b/src/cmd/tcs/mkfile
@@ -11,7 +11,8 @@ OFILES=tcs.$O\
 	kuten208.$O\
 	gb.$O\
 	ksc.$O\
-	big5.$O
+	big5.$O\
+	tune.$O\
 
 <$PLAN9/src/mkone
 CFLAGS= -DPLAN9 $CFLAGS
@@ -23,6 +24,9 @@ tcs.$O big5.$O:		big5.h
 tcs.$O gb.$O:		gb.h
 tcs.$O:			cyrillic.h
 tcs.$O:			conv.h
+tcs.$O:	8859.h
+tcs.$O:	ms.h
+tcs.$O:	misc.h
 conv%.$O:		conv.h
 conv_ksc.$O:		ksc.h
 
diff --git a/src/cmd/tcs/tcs.c b/src/cmd/tcs/tcs.c
index bb2f61f7..d7d18e41 100644
--- a/src/cmd/tcs/tcs.c
+++ b/src/cmd/tcs/tcs.c
@@ -54,7 +54,7 @@ main(int argc, char **argv)
 		clean = 1;
 		break;
 	case 'f':
-		from = ARGF();
+		from = EARGF(usage());
 		break;	
 	case 'l':
 		listem = 1;
@@ -63,7 +63,7 @@ main(int argc, char **argv)
 		squawk = 0;
 		break;
 	case 't':
-		to = ARGF();
+		to = EARGF(usage());
 		break;
 	case 'v':
 		verbose = 1;
@@ -160,7 +160,7 @@ conv(char *name, int from)
 	struct convert *c;
 
 	for(c = convert; c->name; c++){
-		if(strcmp(c->name, name) != 0)
+		if(cistrcmp(c->name, name) != 0)
 			continue;
 		if(c->flags&Table)
 			return(c);
@@ -208,23 +208,79 @@ unicode_in(int fd, long *notused, struct convert *out)
 	}
 	while((n = read(fd, (char *)buf, 2*N)) > 0){
 		ninput += n;
+		if(swabme)
+			swab2((char *)buf, n);
 		if(n&1){
 			if(squawk)
 				EPR "%s: odd byte count in %s\n", argv0, file);
 			nerrors++;
 			if(clean)
 				n--;
-			else {
-				n++;
-				buf[n/2] = Runeerror;
-				if(swabme)	/* swab so later swab undoes it */
-					swab2((char *)&buf[n/2], 2);
-			}
+			else
+				buf[n++/2] = Runeerror;
+		}
+		OUT(out, buf, n/2);
+	}
+}
+
+void
+unicode_in_be(int fd, long *notused, struct convert *out)
+{
+	int i, n;
+	Rune buf[N], r;
+	uchar *p;
+
+	USED(notused);
+	while((n = read(fd, (char *)buf, 2*N)) > 0){
+		ninput += n;
+		p = (uchar*)buf;
+		for(i=0; i<n/2; i++){
+			r = *p++<<8;
+			r |= *p++;
+			buf[i] = r;
+		}
+		if(n&1){
+			if(squawk)
+				EPR "%s: odd byte count in %s\n", argv0, file);
+			nerrors++;
+			if(clean)
+				n--;
+			else
+				buf[n++/2] = Runeerror;
 		}
-		if(swabme)
-			swab2((char *)buf, n);
 		OUT(out, buf, n/2);
 	}
+	OUT(out, buf, 0);
+}
+
+void
+unicode_in_le(int fd, long *notused, struct convert *out)
+{
+	int i, n;
+	Rune buf[N], r;
+	uchar *p;
+
+	USED(notused);
+	while((n = read(fd, (char *)buf, 2*N)) > 0){
+		ninput += n;
+		p = (uchar*)buf;
+		for(i=0; i<n/2; i++){
+			r = *p++;
+			r |= *p++<<8;
+			buf[i] = r;
+		}
+		if(n&1){
+			if(squawk)
+				EPR "%s: odd byte count in %s\n", argv0, file);
+			nerrors++;
+			if(clean)
+				n--;
+			else
+				buf[n++/2] = Runeerror;
+		}
+		OUT(out, buf, n/2);
+	}
+	OUT(out, buf, 0);
 }
 
 void
@@ -245,6 +301,44 @@ unicode_out(Rune *base, int n, long *notused)
 }
 
 void
+unicode_out_be(Rune *base, int n, long *notused)
+{
+	int i;
+	uchar *p;
+	Rune r;
+
+	USED(notused);
+	p = (uchar*)base;
+	for(i=0; i<n; i++){
+		r = base[i];
+		*p++ = r>>8;
+		*p++ = r;
+	}
+	nrunes += n;
+	noutput += 2*n;
+	write(1, (char *)base, 2*n);
+}
+
+void
+unicode_out_le(Rune *base, int n, long *notused)
+{
+	int i;
+	uchar *p;
+	Rune r;
+
+	USED(notused);
+	p = (uchar*)base;
+	for(i=0; i<n; i++){
+		r = base[i];
+		*p++ = r;
+		*p++ = r>>8;
+	}
+	nrunes += n;
+	noutput += 2*n;
+	write(1, (char *)base, 2*n);
+}
+
+void
 intable(int fd, long *table, struct convert *out)
 {
 	uchar buf[N];
@@ -270,6 +364,7 @@ intable(int fd, long *table, struct convert *out)
 		}
 		OUT(out, runes, r-runes);
 	}
+	OUT(out, runes, 0);
 	if(n < 0){
 #ifdef	PLAN9
 		EPR "%s: input read: %r\n", argv0);
@@ -403,64 +498,91 @@ struct convert convert[] =
 	{ "av", "Alternativnyj Variant", Table, (void *)tabav },
 	{ "big5", "Big 5 (HKU)", From|Func, 0, (Fnptr)big5_in },
 	{ "big5", "Big 5 (HKU)", Func, 0, (Fnptr)big5_out },
-	{ "cp437", "Code Page 437 (US)", Table, (void*)tabcp437 },
-	{ "cp720", "Code Page 720 (Arabic)", Table, (void*)tabcp720 },
-	{ "cp737", "Code Page 737 (Greek)", Table, (void*)tabcp737 },
-	{ "cp775", "Code Page 775 (Baltic)", Table, (void*)tabcp775 },
-	{ "cp850", "Code Page 850 (Multilingual Latin I)", Table, (void*)tabcp850 },
-	{ "cp852", "Code Page 852 (Latin II)", Table, (void*)tabcp852 },
-	{ "cp855", "Code Page 855 (Cyrillic)", Table, (void*)tabcp855 },
-	{ "cp857", "Code Page 857 (Turkish)", Table, (void*)tabcp857 },
-	{ "cp858", "Code Page 858 (Multilingual Latin I+Euro)", Table, (void*)tabcp858 },
-	{ "cp862", "Code Page 862 (Hebrew)", Table, (void*)tabcp862 },
-	{ "cp866", "Code Page 866 (Russian)", Table, (void*)tabcp866 },
-	{ "cp874", "Code Page 874 (Thai)", Table, (void*)tabcp874 },
-	{ "cp1250", "Code Page 1250 (Central Europe)", Table, (void *)tabcp1250 },
-	{ "cp1251", "Code Page 1251 (Cyrillic)", Table, (void *)tabcp1251 },
-	{ "cp1252", "Code Page 1252 (Latin I)", Table, (void *)tabcp1252 },
-	{ "cp1253", "Code Page 1253 (Greek)", Table, (void *)tabcp1253 },
-	{ "cp1254", "Code Page 1254 (Turkish)", Table, (void *)tabcp1254 },
-	{ "cp1255", "Code Page 1255 (Hebrew)", Table, (void *)tabcp1255 },
-	{ "cp1256", "Code Page 1256 (Arabic)", Table, (void *)tabcp1256 },
-	{ "cp1257", "Code Page 1257 (Baltic)", Table, (void *)tabcp1257 },
-	{ "cp1258", "Code Page 1258 (Vietnam)", Table, (void *)tabcp1258 },
 	{ "ebcdic", "EBCDIC", Table, (void *)tabebcdic },	/* 6f is recommended bad map */
 	{ "euc-k", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uksc_in },
 	{ "euc-k", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_out },
-	{ "gb", "GB2312-80 (Chinese)", From|Func, 0, (Fnptr)gb_in },
-	{ "gb", "GB2312-80 (Chinese)", Func, 0, (Fnptr)gb_out },
+	{ "gb2312", "GB2312-80 (Chinese)", From|Func, 0, (Fnptr)gb_in },
+	{ "gb2312", "GB2312-80 (Chinese)", Func, 0, (Fnptr)gb_out },
 	{ "html", "HTML", From|Func, 0, (Fnptr)html_in },
 	{ "html", "HTML", Func, 0, (Fnptr)html_out },
+	{ "ibm437", "IBM Code Page 437 (US)", Table, (void*)tabcp437 },
+	{ "ibm720", "IBM Code Page 720 (Arabic)", Table, (void*)tabcp720 },
+	{ "ibm737", "IBM Code Page 737 (Greek)", Table, (void*)tabcp737 },
+	{ "ibm775", "IBM Code Page 775 (Baltic)", Table, (void*)tabcp775 },
+	{ "ibm850", "IBM Code Page 850 (Multilingual Latin I)", Table, (void*)tabcp850 },
+	{ "ibm852", "IBM Code Page 852 (Latin II)", Table, (void*)tabcp852 },
+	{ "ibm855", "IBM Code Page 855 (Cyrillic)", Table, (void*)tabcp855 },
+	{ "ibm857", "IBM Code Page 857 (Turkish)", Table, (void*)tabcp857 },
+	{ "ibm858", "IBM Code Page 858 (Multilingual Latin I+Euro)", Table, (void*)tabcp858 },
+	{ "ibm862", "IBM Code Page 862 (Hebrew)", Table, (void*)tabcp862 },
+	{ "ibm866", "IBM Code Page 866 (Russian)", Table, (void*)tabcp866 },
+	{ "ibm874", "IBM Code Page 874 (Thai)", Table, (void*)tabcp874 },
+	{ "iso-2022-jp", "alias for jis-kanji (MIME)", From|Func, 0, (Fnptr)jisjis_in },
+	{ "iso-2022-jp", "alias for jis-kanji (MIME)", Func, 0, (Fnptr)jisjis_out },
+	{ "iso-8859-1", "alias for 8859-1 (MIME)", Table, (void *)tab8859_1 },
+	{ "iso-8859-2", "alias for 8859-2 (MIME)", Table, (void *)tab8859_2 },
+	{ "iso-8859-3", "alias for 8859-3 (MIME)", Table, (void *)tab8859_3 },
+	{ "iso-8859-4", "alias for 8859-4 (MIME)", Table, (void *)tab8859_4 },
+	{ "iso-8859-5", "alias for 8859-5 (MIME)", Table, (void *)tab8859_5 },
+	{ "iso-8859-6", "alias for 8859-6 (MIME)", Table, (void *)tab8859_6 },
+	{ "iso-8859-7", "alias for 8859-7 (MIME)", Table, (void *)tab8859_7 },
+	{ "iso-8859-8", "alias for 8859-8 (MIME)", Table, (void *)tab8859_8 },
+	{ "iso-8859-9", "alias for 8859-9 (MIME)", Table, (void *)tab8859_9 },
+	{ "iso-8859-10", "alias for 8859-10 (MIME)", Table, (void *)tab8859_10 },
+	{ "iso-8859-15", "alias for 8859-15 (MIME)", Table, (void *)tab8859_15 },
 	{ "jis", "guesses at the JIS encoding", From|Func, 0, (Fnptr)jis_in },
 	{ "jis-kanji", "ISO 2022-JP (Japanese)", From|Func, 0, (Fnptr)jisjis_in },
 	{ "jis-kanji", "ISO 2022-JP (Japanese)", Func, 0, (Fnptr)jisjis_out },
 	{ "koi8", "KOI-8 (GOST 19769-74)", Table, (void *)tabkoi8 },
-	{ "latin1", "ISO 8859-1", Table, (void *)tab8859_1 },
+	{ "koi8-r", "alias for koi8 (MIME)", Table, (void *)tabkoi8 },
+	{ "latin1", "alias for 8859-1", Table, (void *)tab8859_1 },
 	{ "macrom", "Macintosh Standard Roman character set", Table, (void *)tabmacroman },
-	{ "microsoft", "Windows (CP 1252)", Table, (void *)tabcp1252 },
-	{ "msdos", "IBM PC (CP 437)", Table, (void *)tabcp437 },
-	{ "msdos2", "IBM PC (CP 437 with graphics in C0)", Table, (void *)tabmsdos2 },
+	{ "microsoft", "alias for windows1252", Table, (void *)tabcp1252 },
 	{ "ms-kanji", "Microsoft, or Shift-JIS", From|Func, 0, (Fnptr)msjis_in },
 	{ "ms-kanji", "Microsoft, or Shift-JIS", Func, 0, (Fnptr)msjis_out },
+	{ "msdos", "IBM PC (alias for ibm437)", Table, (void *)tabcp437 },
+	{ "msdos2", "IBM PC (ibm437 with graphics in C0)", Table, (void *)tabmsdos2 },
 	{ "next", "NEXTSTEP character set", Table, (void *)tabnextstep },
 	{ "ov", "Osnovnoj Variant", Table, (void *)tabov },
-	{ "ps2", "IBM PS/2: (CP 850)", Table, (void *)tabcp850 },
+	{ "ps2", "IBM PS/2: (alias for ibm850)", Table, (void *)tabcp850 },
 	{ "sf1", "ISO-646: Finnish/Swedish SF-1 variant", Table, (void *)tabsf1 },
 	{ "sf2", "ISO-646: Finnish/Swedish SF-2 variant (recommended)", Table, (void *)tabsf2 },
-	{ "tis", "Thai+ASCII (TIS 620-1986)", Table, (void *)tabtis620 },
+	{ "tis-620", "Thai+ASCII (TIS 620-1986)", Table, (void *)tabtis620 },
+	{ "tune", "TUNE (Tamil)", From|Func, 0, (Fnptr)tune_in },
+	{ "tune", "TUNE (Tamil)", Func, 0, (Fnptr)tune_out },
 	{ "ucode", "Russian U-code", Table, (void *)tabucode },
 	{ "ujis", "EUC-JX: JIS 0208", From|Func, 0, (Fnptr)ujis_in },
 	{ "ujis", "EUC-JX: JIS 0208", Func, 0, (Fnptr)ujis_out },
 	{ "unicode", "Unicode 1.1", From|Func, 0, (Fnptr)unicode_in },
 	{ "unicode", "Unicode 1.1", Func, 0, (Fnptr)unicode_out },
-	{ "utf1", "UTF-1 (ISO 10646 Annex A)", From|Func, 0, (Fnptr)isoutf_in },
-	{ "utf1", "UTF-1 (ISO 10646 Annex A)", Func, 0, (Fnptr)isoutf_out },
+	{ "unicode-be", "Unicode 1.1 big-endian", From|Func, 0, (Fnptr)unicode_in_be },
+	{ "unicode-be", "Unicode 1.1 big-endian", Func, 0, (Fnptr)unicode_out_be },
+	{ "unicode-le", "Unicode 1.1 little-endian", From|Func, 0, (Fnptr)unicode_in_le },
+	{ "unicode-le", "Unicode 1.1 little-endian", Func, 0, (Fnptr)unicode_out_le },
+	{ "us-ascii", "alias for ascii (MIME)", Table, (void *)tabascii },
 	{ "utf", "FSS-UTF a.k.a. UTF-8", From|Func, 0, (Fnptr)utf_in },
 	{ "utf", "FSS-UTF a.k.a. UTF-8", Func, 0, (Fnptr)utf_out },
-	{ "utf-l2", "from", From|Func, 0, (Fnptr)utf_in },
-	{ "utf-l2", "to", Func, 0, (Fnptr)utf_out },
+	{ "utf1", "UTF-1 (ISO 10646 Annex A)", From|Func, 0, (Fnptr)isoutf_in },
+	{ "utf1", "UTF-1 (ISO 10646 Annex A)", Func, 0, (Fnptr)isoutf_out },
+	{ "utf-8", "alias for utf (MIME)", From|Func, 0, (Fnptr)utf_in },
+	{ "utf-8", "alias for utf (MIME)", Func, 0, (Fnptr)utf_out },
+	{ "utf-16", "alias for unicode (MIME)", From|Func, 0, (Fnptr)unicode_in },
+	{ "utf-16", "alias for unicode (MIME)", Func, 0, (Fnptr)unicode_out },
+	{ "utf-16be", "alias for unicode-be (MIME)", From|Func, 0, (Fnptr)unicode_in_be },
+	{ "utf-16be", "alias for unicode-be (MIME)", Func, 0, (Fnptr)unicode_out_be },
+	{ "utf-16le", "alias for unicode-le (MIME)", From|Func, 0, (Fnptr)unicode_in_le },
+	{ "utf-16le", "alias for unicode-le (MIME)", Func, 0, (Fnptr)unicode_out_le },
 	{ "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 },
 	{ "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 },
-	{ "viscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },
+	{ "vscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },
+	{ "windows-1250", "Windows Code Page 1250 (Central Europe)", Table, (void *)tabcp1250 },
+	{ "windows-1251", "Windows Code Page 1251 (Cyrillic)", Table, (void *)tabcp1251 },
+	{ "windows-1252", "Windows Code Page 1252 (Latin I)", Table, (void *)tabcp1252 },
+	{ "windows-1253", "Windows Code Page 1253 (Greek)", Table, (void *)tabcp1253 },
+	{ "windows-1254", "Windows Code Page 1254 (Turkish)", Table, (void *)tabcp1254 },
+	{ "windows-1255", "Windows Code Page 1255 (Hebrew)", Table, (void *)tabcp1255 },
+	{ "windows-1256", "Windows Code Page 1256 (Arabic)", Table, (void *)tabcp1256 },
+	{ "windows-1257", "Windows Code Page 1257 (Baltic)", Table, (void *)tabcp1257 },
+	{ "windows-1258", "Windows Code Page 1258 (Vietnam)", Table, (void *)tabcp1258 },
 	{ 0 }
 };
diff --git a/src/cmd/tcs/utf.c b/src/cmd/tcs/utf.c
index 9aad892b..f87a310b 100644
--- a/src/cmd/tcs/utf.c
+++ b/src/cmd/tcs/utf.c
@@ -45,15 +45,15 @@ utf_in(int fd, long *notused, struct convert *out)
 	tot = 0;
 	while((n = read(fd, buf+tot, N-tot)) >= 0){
 		tot += n;
-		for(i=j=0; i<tot; ){
+		for(i=j=0; i<tot-UTFmax || (n==0 && i<tot); ){
 			c = our_mbtowc(&l, buf+i, tot-i);
-			if(c == -2)
-				break;
 			if(c == -1){
 				if(squawk)
 					EPR "%s: bad UTF sequence near byte %ld in input\n", argv0, ninput+i);
-				if(clean)
+				if(clean){
+					i++;
 					continue;
+				}
 				nerrors++;
 				l = Runeerror;
 				c = 1;
@@ -69,6 +69,7 @@ utf_in(int fd, long *notused, struct convert *out)
 		if(n == 0)
 			break;
 	}
+	OUT(out, runes, 0);
 }
 
 void
@@ -100,11 +101,13 @@ isoutf_in(int fd, long *notused, struct convert *out)
 			if(!fullisorune(buf+i, tot-i))
 				break;
 			c = isochartorune(&runes[j], buf+i);
-			if(runes[j] == Runeerror){
+			if(runes[j] == Runeerror && c == 1){
 				if(squawk)
 					EPR "%s: bad UTF sequence near byte %ld in input\n", argv0, ninput+i);
-				if(clean)
+				if(clean){
+					i++;
 					continue;
+				}
 				nerrors++;
 			}
 			j++;
@@ -118,6 +121,7 @@ isoutf_in(int fd, long *notused, struct convert *out)
 		if(n == 0)
 			break;
 	}
+	OUT(out, runes, 0);
 }
 
 void
@@ -393,19 +397,19 @@ our_mbtowc(unsigned long *p, char *s, unsigned n)
 		return 0;		/* no shift states */
 
 	if(n < 1)
-		goto badlen;
+		goto bad;
 	us = (uchar*)s;
 	c0 = us[0];
 	if(c0 >= T3) {
 		if(n < 3)
-			goto badlen;
+			goto bad;
 		c1 = us[1] ^ Tx;
 		c2 = us[2] ^ Tx;
 		if((c1|c2) & T2)
 			goto bad;
 		if(c0 >= T5) {
 			if(n < 5)
-				goto badlen;
+				goto bad;
 			c3 = us[3] ^ Tx;
 			c4 = us[4] ^ Tx;
 			if((c3|c4) & T2)
@@ -413,7 +417,7 @@ our_mbtowc(unsigned long *p, char *s, unsigned n)
 			if(c0 >= T6) {
 				/* 6 bytes */
 				if(n < 6)
-					goto badlen;
+					goto bad;
 				c5 = us[5] ^ Tx;
 				if(c5 & T2)
 					goto bad;
@@ -437,7 +441,7 @@ our_mbtowc(unsigned long *p, char *s, unsigned n)
 		if(c0 >= T4) {
 			/* 4 bytes */
 			if(n < 4)
-				goto badlen;
+				goto bad;
 			c3 = us[3] ^ Tx;
 			if(c3 & T2)
 				goto bad;
@@ -460,7 +464,7 @@ our_mbtowc(unsigned long *p, char *s, unsigned n)
 	if(c0 >= T2) {
 		/* 2 bytes */
 		if(n < 2)
-			goto badlen;
+			goto bad;
 		c1 = us[1] ^ Tx;
 		if(c1 & T2)
 			goto bad;
@@ -480,6 +484,4 @@ our_mbtowc(unsigned long *p, char *s, unsigned n)
 bad:
 	errno = EILSEQ;
 	return -1;
-badlen:
-	return -2;
 }
author	rsc <devnull@localhost>	2006-05-21 18:57:51 +0000
committer	rsc <devnull@localhost>	2006-05-21 18:57:51 +0000
commit	536f9b83c0bed9986800d806c74ae4d225628fe3 (patch)
tree	22c8ef2b2ee5e6520a7a8cb2ff493845ee16525f /src/cmd/tcs
parent	44fc56d8c3cc534bf903133c63a9c9ecb42e5b63 (diff)
download	plan9port-536f9b83c0bed9986800d806c74ae4d225628fe3.tar.gz plan9port-536f9b83c0bed9986800d806c74ae4d225628fe3.tar.bz2 plan9port-536f9b83c0bed9986800d806c74ae4d225628fe3.zip