aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/unicode.c
blob: aec44b75005e57862fb5e82257ddafe69421addc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#include <u.h>
#include <libc.h>
#include <bio.h>

char	usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] char ... }";
char	hex[] = "0123456789abcdefABCDEF";
int	numout = 0;
int	text = 0;
char	*err;
Biobuf	bout;

char	*range(char*[]);
char	*nums(char*[]);
char	*chars(char*[]);

void
main(int argc, char *argv[])
{
	ARGBEGIN{
	case 'n':
		numout = 1;
		break;
	case 't':
		text = 1;
		break;
	}ARGEND
	Binit(&bout, 1, OWRITE);
	if(argc == 0){
		fprint(2, "usage: %s\n", usage);
		exits("usage");
	}
	if(!numout && utfrune(argv[0], '-'))
		exits(range(argv));
	if(numout || strchr(hex, argv[0][0])==0)
		exits(nums(argv));
	exits(chars(argv));
}

char*
range(char *argv[])
{
	char *q;
	int min, max;
	int i;

	while(*argv){
		q = *argv;
		if(strchr(hex, q[0]) == 0){
    err:
			fprint(2, "unicode: bad range %s\n", *argv);
			return "bad range";
		}
		min = strtoul(q, &q, 16);
		if(min<0 || min>Runemax || *q!='-')
			goto err;
		q++;
		if(strchr(hex, *q) == 0)
			goto err;
		max = strtoul(q, &q, 16);
		if(max<0 || max>Runemax || max<min || *q!=0)
			goto err;
		i = 0;
		do{
			Bprint(&bout, "%.4x %C", min, min);
			i++;
			if(min==max || (i&7)==0)
				Bprint(&bout, "\n");
			else
				Bprint(&bout, "\t");
			min++;
		}while(min<=max);
		argv++;
	}
	return 0;
}

char*
nums(char *argv[])
{
	char *q;
	Rune r;
	int w;

	while(*argv){
		q = *argv;
		while(*q){
			w = chartorune(&r, q);
			if(r==0x80 && (q[0]&0xFF)!=0x80){
				fprint(2, "unicode: invalid utf string %s\n", *argv);
				return "bad utf";
			}
			Bprint(&bout, "%.4x\n", r);
			q += w;
		}
		argv++;
	}
	return 0;
}

char*
chars(char *argv[])
{
	char *q;
	int m;

	while(*argv){
		q = *argv;
		if(strchr(hex, q[0]) == 0){
    err:
			fprint(2, "unicode: bad unicode value %s\n", *argv);
			return "bad char";
		}
		m = strtoul(q, &q, 16);
		if(m<0 || m>Runemax || *q!=0)
			goto err;
		Bprint(&bout, "%C", m);
		if(!text)
			Bprint(&bout, "\n");
		argv++;
	}
	return 0;
}