diff options
Diffstat (limited to 'src/libutf')
48 files changed, 2834 insertions, 0 deletions
diff --git a/src/libutf/LICENSE b/src/libutf/LICENSE new file mode 100644 index 00000000..ad76cd52 --- /dev/null +++ b/src/libutf/LICENSE @@ -0,0 +1,13 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 1998-2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ diff --git a/src/libutf/Make.Darwin-PowerMacintosh b/src/libutf/Make.Darwin-PowerMacintosh new file mode 100644 index 00000000..14b8d4e7 --- /dev/null +++ b/src/libutf/Make.Darwin-PowerMacintosh @@ -0,0 +1,6 @@ +CC=gcc +CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I${PREFIX}/include +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O diff --git a/src/libutf/Make.FreeBSD-386 b/src/libutf/Make.FreeBSD-386 new file mode 100644 index 00000000..087ed3ab --- /dev/null +++ b/src/libutf/Make.FreeBSD-386 @@ -0,0 +1,7 @@ +CC=gcc +CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I$(PREFIX)/include +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME) +NAN=nan64.$O diff --git a/src/libutf/Make.HP-UX-9000 b/src/libutf/Make.HP-UX-9000 new file mode 100644 index 00000000..edbdc111 --- /dev/null +++ b/src/libutf/Make.HP-UX-9000 @@ -0,0 +1,6 @@ +CC=cc +CFLAGS=-O -c -Ae -I. +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O diff --git a/src/libutf/Make.Linux-386 b/src/libutf/Make.Linux-386 new file mode 100644 index 00000000..74b0252c --- /dev/null +++ b/src/libutf/Make.Linux-386 @@ -0,0 +1,7 @@ +CC=gcc +CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME) +NAN=nan64.$O diff --git a/src/libutf/Make.NetBSD-386 b/src/libutf/Make.NetBSD-386 new file mode 100644 index 00000000..087ed3ab --- /dev/null +++ b/src/libutf/Make.NetBSD-386 @@ -0,0 +1,7 @@ +CC=gcc +CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c -I. -I$(PREFIX)/include +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O # default, can be overriden by Make.$(SYSNAME) +NAN=nan64.$O diff --git a/src/libutf/Make.OSF1-alpha b/src/libutf/Make.OSF1-alpha new file mode 100644 index 00000000..3d45279b --- /dev/null +++ b/src/libutf/Make.OSF1-alpha @@ -0,0 +1,6 @@ +CC=cc +CFLAGS+=-g -c -I. +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O diff --git a/src/libutf/Make.SunOS-sun4u b/src/libutf/Make.SunOS-sun4u new file mode 100644 index 00000000..c5fe67b8 --- /dev/null +++ b/src/libutf/Make.SunOS-sun4u @@ -0,0 +1,2 @@ +include Make.SunOS-sun4u-$(CC) +NAN=nan64.$O diff --git a/src/libutf/Make.SunOS-sun4u-cc b/src/libutf/Make.SunOS-sun4u-cc new file mode 100644 index 00000000..829301de --- /dev/null +++ b/src/libutf/Make.SunOS-sun4u-cc @@ -0,0 +1,6 @@ +CC=cc +CFLAGS+=-g -c -I. -O +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O diff --git a/src/libutf/Make.SunOS-sun4u-gcc b/src/libutf/Make.SunOS-sun4u-gcc new file mode 100644 index 00000000..5c415948 --- /dev/null +++ b/src/libutf/Make.SunOS-sun4u-gcc @@ -0,0 +1,6 @@ +CC=gcc +CFLAGS+=-Wall -Wno-missing-braces -Wno-parentheses -Wno-switch -O2 -g -c +O=o +AR=ar +ARFLAGS=rvc +NAN=nan64.$O diff --git a/src/libutf/Makefile b/src/libutf/Makefile new file mode 100644 index 00000000..419a8a65 --- /dev/null +++ b/src/libutf/Makefile @@ -0,0 +1,112 @@ + +# this works in gnu make +SYSNAME:=${shell uname} +OBJTYPE:=${shell uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g'} + +# this works in bsd make +SYSNAME!=uname +OBJTYPE!=uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g' + +# the gnu rules will mess up bsd but not vice versa, +# hence the gnu rules come first. + +include Make.$(SYSNAME)-$(OBJTYPE) + +PREFIX=/usr/local + +NUKEFILES= + +TGZFILES= + +LIB=libutf.a +VERSION=2.0 +PORTPLACE=devel/libutf +NAME=libutf + +OFILES=\ + rune.$O\ + runestrcat.$O\ + runestrchr.$O\ + runestrcmp.$O\ + runestrcpy.$O\ + runestrdup.$O\ + runestrlen.$O\ + runestrecpy.$O\ + runestrncat.$O\ + runestrncmp.$O\ + runestrncpy.$O\ + runestrrchr.$O\ + runestrstr.$O\ + runetype.$O\ + utfecpy.$O\ + utflen.$O\ + utfnlen.$O\ + utfrrune.$O\ + utfrune.$O\ + utfutf.$O\ + +HFILES=\ + utf.h\ + +all: $(LIB) + +install: $(LIB) + test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3 + install -c -m 0644 isalpharune.3 $(PREFIX)/man/man3/isalpharune.3 + install -c -m 0644 utf.7 $(PREFIX)/man/man7/utf.7 + install -c -m 0644 rune.3 $(PREFIX)/man/man3/rune.3 + install -c -m 0644 runestrcat.3 $(PREFIX)/man/man3/runestrcat.3 + install -c -m 0644 utf.h $(PREFIX)/include/utf.h + install -c -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + +$(LIB): $(OFILES) + $(AR) $(ARFLAGS) $(LIB) $(OFILES) + +NUKEFILES+=$(LIB) +.c.$O: + $(CC) $(CFLAGS) -I$(PREFIX)/include $*.c + +%.$O: %.c + $(CC) $(CFLAGS) -I$(PREFIX)/include $*.c + + +$(OFILES): $(HFILES) + +tgz: + rm -rf $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION) + cp Makefile Make.* README LICENSE NOTICE *.[ch137] rpm.spec bundle.ports $(TGZFILES) $(NAME)-$(VERSION) + tar cf - $(NAME)-$(VERSION) | gzip >$(NAME)-$(VERSION).tgz + rm -rf $(NAME)-$(VERSION) + +clean: + rm -f $(OFILES) $(LIB) + +nuke: + rm -f $(OFILES) *.tgz *.rpm $(NUKEFILES) + +rpm: + make tgz + cp $(NAME)-$(VERSION).tgz /usr/src/RPM/SOURCES + rpm -ba rpm.spec + cp /usr/src/RPM/SRPMS/$(NAME)-$(VERSION)-1.src.rpm . + cp /usr/src/RPM/RPMS/i586/$(NAME)-$(VERSION)-1.i586.rpm . + scp *.rpm rsc@amsterdam.lcs.mit.edu:public_html/software + +PORTDIR=/usr/ports/$(PORTPLACE) + +ports: + make tgz + rm -rf $(PORTDIR) + mkdir $(PORTDIR) + cp $(NAME)-$(VERSION).tgz /usr/ports/distfiles + cat bundle.ports | (cd $(PORTDIR) && awk '$$1=="---" && $$3=="---" { ofile=$$2; next} {if(ofile) print >ofile}') + (cd $(PORTDIR); make makesum) + (cd $(PORTDIR); make) + (cd $(PORTDIR); /usr/local/bin/portlint) + rm -rf $(PORTDIR)/work + shar `find $(PORTDIR)` > ports.shar + (cd $(PORTDIR); tar cf - *) | gzip >$(NAME)-$(VERSION)-ports.tgz + scp *.tgz rsc@amsterdam.lcs.mit.edu:public_html/software + +.phony: all clean nuke install tgz rpm ports diff --git a/src/libutf/Makefile.BOT b/src/libutf/Makefile.BOT new file mode 100644 index 00000000..ad852a1f --- /dev/null +++ b/src/libutf/Makefile.BOT @@ -0,0 +1,47 @@ +.c.$O: + $(CC) $(CFLAGS) -I/usr/X11R6/include -I../sam -I$(PREFIX)/include $*.c + +%.$O: %.c + $(CC) $(CFLAGS) -I/usr/X11R6/include -I../sam -I$(PREFIX)/include $*.c + + +$(OFILES): $(HFILES) + +tgz: + rm -rf $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION) + cp Makefile Make.* README LICENSE NOTICE *.[ch137] rpm.spec bundle.ports $(TGZFILES) $(NAME)-$(VERSION) + tar cf - $(NAME)-$(VERSION) | gzip >$(NAME)-$(VERSION).tgz + rm -rf $(NAME)-$(VERSION) + +clean: + rm -f $(OFILES) $(LIB) + +nuke: + rm -f $(OFILES) *.tgz *.rpm $(NUKEFILES) + +rpm: + make tgz + cp $(NAME)-$(VERSION).tgz /usr/src/RPM/SOURCES + rpm -ba rpm.spec + cp /usr/src/RPM/SRPMS/$(NAME)-$(VERSION)-1.src.rpm . + cp /usr/src/RPM/RPMS/i586/$(NAME)-$(VERSION)-1.i586.rpm . + scp *.rpm rsc@amsterdam.lcs.mit.edu:public_html/software + +PORTDIR=/usr/ports/$(PORTPLACE) + +ports: + make tgz + rm -rf $(PORTDIR) + mkdir $(PORTDIR) + cp $(NAME)-$(VERSION).tgz /usr/ports/distfiles + cat bundle.ports | (cd $(PORTDIR) && awk '$$1=="---" && $$3=="---" { ofile=$$2; next} {if(ofile) print >ofile}') + (cd $(PORTDIR); make makesum) + (cd $(PORTDIR); make) + (cd $(PORTDIR); /usr/local/bin/portlint) + rm -rf $(PORTDIR)/work + shar `find $(PORTDIR)` > ports.shar + (cd $(PORTDIR); tar cf - *) | gzip >$(NAME)-$(VERSION)-ports.tgz + scp *.tgz rsc@amsterdam.lcs.mit.edu:public_html/software + +.phony: all clean nuke install tgz rpm ports diff --git a/src/libutf/Makefile.CMD b/src/libutf/Makefile.CMD new file mode 100644 index 00000000..a817fd3c --- /dev/null +++ b/src/libutf/Makefile.CMD @@ -0,0 +1,5 @@ + +$(TARG): $(OFILES) + $(CC) -o $(TARG) $(OFILES) -L$(PREFIX)/lib -lframe -ldraw -lthread -l9 -lregexp9 -lbio -lfmt -lutf -L/usr/X11R6/lib -lX11 -lm + + diff --git a/src/libutf/Makefile.LIB b/src/libutf/Makefile.LIB new file mode 100644 index 00000000..9ebf5606 --- /dev/null +++ b/src/libutf/Makefile.LIB @@ -0,0 +1,4 @@ +$(LIB): $(OFILES) + $(AR) $(ARFLAGS) $(LIB) $(OFILES) + +NUKEFILES+=$(LIB) diff --git a/src/libutf/Makefile.MID b/src/libutf/Makefile.MID new file mode 100644 index 00000000..9c142d44 --- /dev/null +++ b/src/libutf/Makefile.MID @@ -0,0 +1,41 @@ +LIB=libutf.a +VERSION=2.0 +PORTPLACE=devel/libutf +NAME=libutf + +OFILES=\ + rune.$O\ + runestrcat.$O\ + runestrchr.$O\ + runestrcmp.$O\ + runestrcpy.$O\ + runestrdup.$O\ + runestrlen.$O\ + runestrecpy.$O\ + runestrncat.$O\ + runestrncmp.$O\ + runestrncpy.$O\ + runestrrchr.$O\ + runestrstr.$O\ + runetype.$O\ + utfecpy.$O\ + utflen.$O\ + utfnlen.$O\ + utfrrune.$O\ + utfrune.$O\ + utfutf.$O\ + +HFILES=\ + utf.h\ + +all: $(LIB) + +install: $(LIB) + test -d $(PREFIX)/man/man3 || mkdir $(PREFIX)/man/man3 + install -c -m 0644 isalpharune.3 $(PREFIX)/man/man3/isalpharune.3 + install -c -m 0644 utf.7 $(PREFIX)/man/man7/utf.7 + install -c -m 0644 rune.3 $(PREFIX)/man/man3/rune.3 + install -c -m 0644 runestrcat.3 $(PREFIX)/man/man3/runestrcat.3 + install -c -m 0644 utf.h $(PREFIX)/include/utf.h + install -c -m 0644 $(LIB) $(PREFIX)/lib/$(LIB) + diff --git a/src/libutf/Makefile.TOP b/src/libutf/Makefile.TOP new file mode 100644 index 00000000..f6abc8c1 --- /dev/null +++ b/src/libutf/Makefile.TOP @@ -0,0 +1,20 @@ + +# this works in gnu make +SYSNAME:=${shell uname} +OBJTYPE:=${shell uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g'} + +# this works in bsd make +SYSNAME!=uname +OBJTYPE!=uname -m | sed 's;i.86;386;; s;/.*;;; s; ;;g' + +# the gnu rules will mess up bsd but not vice versa, +# hence the gnu rules come first. + +include Make.$(SYSNAME)-$(OBJTYPE) + +PREFIX=/usr/local + +NUKEFILES= + +TGZFILES= + diff --git a/src/libutf/NOTICE b/src/libutf/NOTICE new file mode 100644 index 00000000..ad76cd52 --- /dev/null +++ b/src/libutf/NOTICE @@ -0,0 +1,13 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 1998-2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ diff --git a/src/libutf/README b/src/libutf/README new file mode 100644 index 00000000..ad76cd52 --- /dev/null +++ b/src/libutf/README @@ -0,0 +1,13 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 1998-2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ diff --git a/src/libutf/bundle.ports b/src/libutf/bundle.ports new file mode 100644 index 00000000..d5ae00b6 --- /dev/null +++ b/src/libutf/bundle.ports @@ -0,0 +1,43 @@ +--- Makefile --- +# New ports collection makefile for: libutf +# Date Created: 11 Feb 2003 +# Whom: rsc +# +# THIS LINE NEEDS REPLACING. IT'S HERE TO GET BY PORTLINT +# $FreeBSD: ports/devel/libfmt/Makefile,v 1.1 2003/02/12 00:51:22 rsc Exp $ + +PORTNAME= libutf +PORTVERSION= 2.0 +CATEGORIES= devel +MASTER_SITES= http://pdos.lcs.mit.edu/~rsc/software/ +EXTRACT_SUFX= .tgz + +MAINTAINER= rsc@post.harvard.edu + +MAN3= rune.3 runestrcat.3 isalpharune.3 +MAN7= utf.7 + +USE_REINPLACE=yes + +.include <bsd.port.pre.mk> + +post-patch: + ${REINPLACE_CMD} -e 's,$$(PREFIX),${PREFIX},g' ${WRKSRC}/Makefile + +.include <bsd.port.post.mk> +--- pkg-comment --- +UTF8 support library from Plan 9 +--- pkg-descr --- +UTF8 support library from Plan 9. + +WWW: http://pdos.lcs.mit.edu/~rsc/software/#libutf +http://plan9.bell-labs.com/magic/man2html/3/rune + +Russ Cox +rsc@post.harvard.edu +--- pkg-plist --- +lib/libutf.a +include/utf.h +--- /dev/null --- +This is just a way to make sure blank lines don't +creep into pkg-plist. diff --git a/src/libutf/isalpharune.3 b/src/libutf/isalpharune.3 new file mode 100644 index 00000000..f9f58cde --- /dev/null +++ b/src/libutf/isalpharune.3 @@ -0,0 +1,47 @@ +.TH ISALPHARUNE 3 +.SH NAME +isalpharune, islowerrune, isspacerune, istitlerune, isupperrune, tolowerrune, totitlerune, toupperrune \- Unicode character classes and cases +.SH SYNOPSIS +.B #include <utf.h> +.PP +.B +int isalpharune(Rune c) +.PP +.B +int islowerrune(Rune c) +.PP +.B +int isspacerune(Rune c) +.PP +.B +int istitlerune(Rune c) +.PP +.B +int isupperrune(Rune c) +.PP +.B +Rune tolowerrune(Rune c) +.PP +.B +Rune totitlerune(Rune c) +.PP +.B +Rune toupperrune(Rune c) +.SH DESCRIPTION +These routines examine and operate on Unicode characters, +in particular a subset of their properties as defined in the Unicode standard. +Unicode defines some characters as alphabetic and specifies three cases: +upper, lower, and title. +Analogously to +.IR ctype (3) +for +.SM ASCII\c +, +these routines +test types and modify cases for Unicode characters. +The names are self-explanatory. +.PP +The case-conversion routines return the character unchanged if it has no case. +.SH "SEE ALSO +.IR ctype (3) , +.IR "The Unicode Standard" . diff --git a/src/libutf/lib9.h b/src/libutf/lib9.h new file mode 100644 index 00000000..e6128ae4 --- /dev/null +++ b/src/libutf/lib9.h @@ -0,0 +1,17 @@ +#include <string.h> +#include "utf.h" + +#define nil ((void*)0) + +#define uchar _fmtuchar +#define ushort _fmtushort +#define uint _fmtuint +#define ulong _fmtulong +#define vlong _fmtvlong +#define uvlong _fmtuvlong + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + diff --git a/src/libutf/mkfile b/src/libutf/mkfile new file mode 100644 index 00000000..9893a06d --- /dev/null +++ b/src/libutf/mkfile @@ -0,0 +1,9 @@ +all:V: Makefile Make.FreeBSD-386 Make.Linux-386 Make.NetBSD-386 Make.HP-UX-9000 Make.OSF1-alpha \ + Make.SunOS-sun4u Make.SunOS-sun4u-cc Make.SunOS-sun4u-gcc \ + Make.Darwin-PowerMacintosh + +Makefile:D: ../libutf/Makefile.TOP Makefile.MID ../libutf/Makefile.LIB ../libutf/Makefile.BOT + cat $prereq >$target + +Make.%: ../libutf/Make.% + cp $prereq $target diff --git a/src/libutf/rpm.spec b/src/libutf/rpm.spec new file mode 100644 index 00000000..74127833 --- /dev/null +++ b/src/libutf/rpm.spec @@ -0,0 +1,28 @@ +Summary: Port of Plan 9's UTF8 support functions +Name: libutf +Version: 2.0 +Release: 1 +Group: Development/C +Copyright: Public Domain +Packager: Russ Cox <rsc@post.harvard.edu> +Source: http://pdos.lcs.mit.edu/~rsc/software/libutf-2.0.tgz +URL: http://pdos.lcs.mit.edu/~rsc/software/#libutf + +%description +Libutf is a port of Plan 9's UTF8 support functions. +%prep +%setup + +%build +make + +%install +make install + +%files +/usr/local/include/utf.h +/usr/local/lib/libutf.a +/usr/local/man/man3/runestrcat.3 +/usr/local/man/man3/isalpharune.3 +/usr/local/man/man3/rune.3 +/usr/local/man/man7/utf.7 diff --git a/src/libutf/rune.3 b/src/libutf/rune.3 new file mode 100644 index 00000000..7ce978ae --- /dev/null +++ b/src/libutf/rune.3 @@ -0,0 +1,187 @@ +.TH RUNE 3 +.SH NAME +runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion +.SH SYNOPSIS +.ta \w'\fLchar*xx'u +.B #include <utf.h> +.PP +.B +int runetochar(char *s, Rune *r) +.PP +.B +int chartorune(Rune *r, char *s) +.PP +.B +int runelen(long r) +.PP +.B +int runenlen(Rune *r, int n) +.PP +.B +int fullrune(char *s, int n) +.PP +.B +char* utfecpy(char *s1, char *es1, char *s2) +.PP +.B +int utflen(char *s) +.PP +.B +int utfnlen(char *s, long n) +.PP +.B +char* utfrune(char *s, long c) +.PP +.B +char* utfrrune(char *s, long c) +.PP +.B +char* utfutf(char *s1, char *s2) +.SH DESCRIPTION +These routines convert to and from a +.SM UTF +byte stream and runes. +.PP +.I Runetochar +copies one rune at +.I r +to at most +.B UTFmax +bytes starting at +.I s +and returns the number of bytes copied. +.BR UTFmax , +defined as +.B 3 +in +.BR <libc.h> , +is the maximum number of bytes required to represent a rune. +.PP +.I Chartorune +copies at most +.B UTFmax +bytes starting at +.I s +to one rune at +.I r +and returns the number of bytes copied. +If the input is not exactly in +.SM UTF +format, +.I chartorune +will convert to 0x80 and return 1. +.PP +.I Runelen +returns the number of bytes +required to convert +.I r +into +.SM UTF. +.PP +.I Runenlen +returns the number of bytes +required to convert the +.I n +runes pointed to by +.I r +into +.SM UTF. +.PP +.I Fullrune +returns 1 if the string +.I s +of length +.I n +is long enough to be decoded by +.I chartorune +and 0 otherwise. +This does not guarantee that the string +contains a legal +.SM UTF +encoding. +This routine is used by programs that +obtain input a byte at +a time and need to know when a full rune +has arrived. +.PP +The following routines are analogous to the +corresponding string routines with +.B utf +substituted for +.B str +and +.B rune +substituted for +.BR chr . +.PP +.I Utfecpy +copies UTF sequences until a null sequence has been copied, but writes no +sequences beyond +.IR es1 . +If any sequences are copied, +.I s1 +is terminated by a null sequence, and a pointer to that sequence is returned. +Otherwise, the original +.I s1 +is returned. +.PP +.I Utflen +returns the number of runes that +are represented by the +.SM UTF +string +.IR s . +.PP +.I Utfnlen +returns the number of complete runes that +are represented by the first +.I n +bytes of +.SM UTF +string +.IR s . +If the last few bytes of the string contain an incompletely coded rune, +.I utfnlen +will not count them; in this way, it differs from +.IR utflen , +which includes every byte of the string. +.PP +.I Utfrune +.RI ( utfrrune ) +returns a pointer to the first (last) +occurrence of rune +.I c +in the +.SM UTF +string +.IR s , +or 0 if +.I c +does not occur in the string. +The NUL byte terminating a string is considered to +be part of the string +.IR s . +.PP +.I Utfutf +returns a pointer to the first occurrence of +the +.SM UTF +string +.I s2 +as a +.SM UTF +substring of +.IR s1 , +or 0 if there is none. +If +.I s2 +is the null string, +.I utfutf +returns +.IR s1 . +.SH HISTORY +These routines were written by Rob Pike and Ken Thompson +and first appeared in Plan 9. +.SH SEE ALSO +.IR utf (7), +.IR tcs (1) diff --git a/src/libutf/rune.c b/src/libutf/rune.c new file mode 100644 index 00000000..e1aaa9be --- /dev/null +++ b/src/libutf/rune.c @@ -0,0 +1,177 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +enum +{ + Bit1 = 7, + Bitx = 6, + Bit2 = 5, + Bit3 = 4, + Bit4 = 3, + + T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ + Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ + T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ + T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ + T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ + + Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ + Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ + Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ + + Maskx = (1<<Bitx)-1, /* 0011 1111 */ + Testx = Maskx ^ 0xFF, /* 1100 0000 */ + + Bad = Runeerror, +}; + +int +chartorune(Rune *rune, char *str) +{ + int c, c1, c2; + long l; + + /* + * one character sequence + * 00000-0007F => T1 + */ + c = *(uchar*)str; + if(c < Tx) { + *rune = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + c1 = *(uchar*)(str+1) ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(uchar*)(str+2) ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +} + +int +runetochar(char *str, Rune *rune) +{ + long c; + + /* + * one character sequence + * 00000-0007F => 00-7F + */ + c = *rune; + if(c <= Rune1) { + str[0] = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + if(c <= Rune2) { + str[0] = T2 | (c >> 1*Bitx); + str[1] = Tx | (c & Maskx); + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + str[0] = T3 | (c >> 2*Bitx); + str[1] = Tx | ((c >> 1*Bitx) & Maskx); + str[2] = Tx | (c & Maskx); + return 3; +} + +int +runelen(long c) +{ + Rune rune; + char str[10]; + + rune = c; + return runetochar(str, &rune); +} + +int +runenlen(Rune *r, int nrune) +{ + int nb, c; + + nb = 0; + while(nrune--) { + c = *r++; + if(c <= Rune1) + nb++; + else + if(c <= Rune2) + nb += 2; + else + nb += 3; + } + return nb; +} + +int +fullrune(char *str, int n) +{ + int c; + + if(n > 0) { + c = *(uchar*)str; + if(c < Tx) + return 1; + if(n > 1) + if(c < T3 || n > 2) + return 1; + } + return 0; +} diff --git a/src/libutf/runestrcat.3 b/src/libutf/runestrcat.3 new file mode 100644 index 00000000..c012e59e --- /dev/null +++ b/src/libutf/runestrcat.3 @@ -0,0 +1,65 @@ +.TH RUNESTRCAT 3 +.SH NAME +runestrcat, +runestrncat, +runestrcmp, +runestrncmp, +runestrcpy, +runestrncpy, +runestrecpy, +runestrlen, +runestrchr, +runestrrchr, +runestrdup, +runestrstr \- rune string operations +.SH SYNOPSIS +.B #include <utf.h> +.PP +.ta \w'\fLRune* \fP'u +.B +Rune* runestrcat(Rune *s1, Rune *s2) +.PP +.B +Rune* runestrncat(Rune *s1, Rune *s2, long n) +.PP +.B +int runestrcmp(Rune *s1, Rune *s2) +.PP +.B +int runestrncmp(Rune *s1, Rune *s2, long n) +.PP +.B +Rune* runestrcpy(Rune *s1, Rune *s2) +.PP +.B +Rune* runestrncpy(Rune *s1, Rune *s2, long n) +.PP +.B +Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2) +.PP +.B +long runestrlen(Rune *s) +.PP +.B +Rune* runestrchr(Rune *s, Rune c) +.PP +.B +Rune* runestrrchr(Rune *s, Rune c) +.PP +.B +Rune* runestrdup(Rune *s) +.PP +.B +Rune* runestrstr(Rune *s1, Rune *s2) +.SH DESCRIPTION +These functions are rune string analogues of +the corresponding functions in +.IR strcat (3). +.SH HISTORY +These routines first appeared in Plan 9. +.SH SEE ALSO +.IR memmove (3), +.IR rune (3), +.IR strcat (2) +.SH BUGS +The outcome of overlapping moves varies among implementations. diff --git a/src/libutf/runestrcat.c b/src/libutf/runestrcat.c new file mode 100644 index 00000000..e1e8c9d5 --- /dev/null +++ b/src/libutf/runestrcat.c @@ -0,0 +1,25 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +Rune* +runestrcat(Rune *s1, Rune *s2) +{ + + runestrcpy(runestrchr(s1, 0), s2); + return s1; +} diff --git a/src/libutf/runestrchr.c b/src/libutf/runestrchr.c new file mode 100644 index 00000000..45ee673a --- /dev/null +++ b/src/libutf/runestrchr.c @@ -0,0 +1,35 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +Rune* +runestrchr(Rune *s, Rune c) +{ + Rune c0 = c; + Rune c1; + + if(c == 0) { + while(*s++) + ; + return s-1; + } + + while(c1 = *s++) + if(c1 == c0) + return s-1; + return 0; +} diff --git a/src/libutf/runestrcmp.c b/src/libutf/runestrcmp.c new file mode 100644 index 00000000..d14145f3 --- /dev/null +++ b/src/libutf/runestrcmp.c @@ -0,0 +1,35 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +int +runestrcmp(Rune *s1, Rune *s2) +{ + Rune c1, c2; + + for(;;) { + c1 = *s1++; + c2 = *s2++; + if(c1 != c2) { + if(c1 > c2) + return 1; + return -1; + } + if(c1 == 0) + return 0; + } +} diff --git a/src/libutf/runestrcpy.c b/src/libutf/runestrcpy.c new file mode 100644 index 00000000..9d7aa026 --- /dev/null +++ b/src/libutf/runestrcpy.c @@ -0,0 +1,28 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +Rune* +runestrcpy(Rune *s1, Rune *s2) +{ + Rune *os1; + + os1 = s1; + while(*s1++ = *s2++) + ; + return os1; +} diff --git a/src/libutf/runestrdup.c b/src/libutf/runestrdup.c new file mode 100644 index 00000000..97a889c8 --- /dev/null +++ b/src/libutf/runestrdup.c @@ -0,0 +1,30 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include "utf.h" +#include "utfdef.h" + +Rune* +runestrdup(Rune *s) +{ + Rune *ns; + + ns = malloc(sizeof(Rune)*(runestrlen(s) + 1)); + if(ns == 0) + return 0; + + return runestrcpy(ns, s); +} diff --git a/src/libutf/runestrecpy.c b/src/libutf/runestrecpy.c new file mode 100644 index 00000000..f3e4a096 --- /dev/null +++ b/src/libutf/runestrecpy.c @@ -0,0 +1,32 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +Rune* +runestrecpy(Rune *s1, Rune *es1, Rune *s2) +{ + if(s1 >= es1) + return s1; + + while(*s1++ = *s2++){ + if(s1 == es1){ + *--s1 = '\0'; + break; + } + } + return s1; +} diff --git a/src/libutf/runestrlen.c b/src/libutf/runestrlen.c new file mode 100644 index 00000000..bca3050c --- /dev/null +++ b/src/libutf/runestrlen.c @@ -0,0 +1,24 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +long +runestrlen(Rune *s) +{ + + return runestrchr(s, 0) - s; +} diff --git a/src/libutf/runestrncat.c b/src/libutf/runestrncat.c new file mode 100644 index 00000000..74400c30 --- /dev/null +++ b/src/libutf/runestrncat.c @@ -0,0 +1,32 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +Rune* +runestrncat(Rune *s1, Rune *s2, long n) +{ + Rune *os1; + + os1 = s1; + s1 = runestrchr(s1, 0); + while(*s1++ = *s2++) + if(--n < 0) { + s1[-1] = 0; + break; + } + return os1; +} diff --git a/src/libutf/runestrncmp.c b/src/libutf/runestrncmp.c new file mode 100644 index 00000000..dbd9da45 --- /dev/null +++ b/src/libutf/runestrncmp.c @@ -0,0 +1,37 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +int +runestrncmp(Rune *s1, Rune *s2, long n) +{ + Rune c1, c2; + + while(n > 0) { + c1 = *s1++; + c2 = *s2++; + n--; + if(c1 != c2) { + if(c1 > c2) + return 1; + return -1; + } + if(c1 == 0) + break; + } + return 0; +} diff --git a/src/libutf/runestrncpy.c b/src/libutf/runestrncpy.c new file mode 100644 index 00000000..acba7bc2 --- /dev/null +++ b/src/libutf/runestrncpy.c @@ -0,0 +1,33 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +Rune* +runestrncpy(Rune *s1, Rune *s2, long n) +{ + int i; + Rune *os1; + + os1 = s1; + for(i = 0; i < n; i++) + if((*s1++ = *s2++) == 0) { + while(++i < n) + *s1++ = 0; + return os1; + } + return os1; +} diff --git a/src/libutf/runestrrchr.c b/src/libutf/runestrrchr.c new file mode 100644 index 00000000..1f40c5a6 --- /dev/null +++ b/src/libutf/runestrrchr.c @@ -0,0 +1,30 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +Rune* +runestrrchr(Rune *s, Rune c) +{ + Rune *r; + + if(c == 0) + return runestrchr(s, 0); + r = 0; + while(s = runestrchr(s, c)) + r = s++; + return r; +} diff --git a/src/libutf/runestrstr.c b/src/libutf/runestrstr.c new file mode 100644 index 00000000..48583a3d --- /dev/null +++ b/src/libutf/runestrstr.c @@ -0,0 +1,44 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +/* + * Return pointer to first occurrence of s2 in s1, + * 0 if none + */ +Rune* +runestrstr(Rune *s1, Rune *s2) +{ + Rune *p, *pa, *pb; + int c0, c; + + c0 = *s2; + if(c0 == 0) + return s1; + s2++; + for(p=runestrchr(s1, c0); p; p=runestrchr(p+1, c0)) { + pa = p; + for(pb=s2;; pb++) { + c = *pb; + if(c == 0) + return p; + if(c != *++pa) + break; + } + } + return 0; +} diff --git a/src/libutf/runetype.c b/src/libutf/runetype.c new file mode 100644 index 00000000..ffae30e6 --- /dev/null +++ b/src/libutf/runetype.c @@ -0,0 +1,1152 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +/* + * alpha ranges - + * only covers ranges not in lower||upper + */ +static +Rune __alpha2[] = +{ + 0x00d8, 0x00f6, /* Ø - ö */ + 0x00f8, 0x01f5, /* ø - ǵ */ + 0x0250, 0x02a8, /* ɐ - ʨ */ + 0x038e, 0x03a1, /* Ύ - Ρ */ + 0x03a3, 0x03ce, /* Σ - ώ */ + 0x03d0, 0x03d6, /* ϐ - ϖ */ + 0x03e2, 0x03f3, /* Ϣ - ϳ */ + 0x0490, 0x04c4, /* Ґ - ӄ */ + 0x0561, 0x0587, /* ա - և */ + 0x05d0, 0x05ea, /* א - ת */ + 0x05f0, 0x05f2, /* װ - ײ */ + 0x0621, 0x063a, /* ء - غ */ + 0x0640, 0x064a, /* ـ - ي */ + 0x0671, 0x06b7, /* ٱ - ڷ */ + 0x06ba, 0x06be, /* ں - ھ */ + 0x06c0, 0x06ce, /* ۀ - ێ */ + 0x06d0, 0x06d3, /* ې - ۓ */ + 0x0905, 0x0939, /* अ - ह */ + 0x0958, 0x0961, /* क़ - ॡ */ + 0x0985, 0x098c, /* অ - ঌ */ + 0x098f, 0x0990, /* এ - ঐ */ + 0x0993, 0x09a8, /* ও - ন */ + 0x09aa, 0x09b0, /* প - র */ + 0x09b6, 0x09b9, /* শ - হ */ + 0x09dc, 0x09dd, /* ড় - ঢ় */ + 0x09df, 0x09e1, /* য় - ৡ */ + 0x09f0, 0x09f1, /* ৰ - ৱ */ + 0x0a05, 0x0a0a, /* ਅ - ਊ */ + 0x0a0f, 0x0a10, /* ਏ - ਐ */ + 0x0a13, 0x0a28, /* ਓ - ਨ */ + 0x0a2a, 0x0a30, /* ਪ - ਰ */ + 0x0a32, 0x0a33, /* ਲ - ਲ਼ */ + 0x0a35, 0x0a36, /* ਵ - ਸ਼ */ + 0x0a38, 0x0a39, /* ਸ - ਹ */ + 0x0a59, 0x0a5c, /* ਖ਼ - ੜ */ + 0x0a85, 0x0a8b, /* અ - ઋ */ + 0x0a8f, 0x0a91, /* એ - ઑ */ + 0x0a93, 0x0aa8, /* ઓ - ન */ + 0x0aaa, 0x0ab0, /* પ - ર */ + 0x0ab2, 0x0ab3, /* લ - ળ */ + 0x0ab5, 0x0ab9, /* વ - હ */ + 0x0b05, 0x0b0c, /* ଅ - ଌ */ + 0x0b0f, 0x0b10, /* ଏ - ଐ */ + 0x0b13, 0x0b28, /* ଓ - ନ */ + 0x0b2a, 0x0b30, /* ପ - ର */ + 0x0b32, 0x0b33, /* ଲ - ଳ */ + 0x0b36, 0x0b39, /* ଶ - ହ */ + 0x0b5c, 0x0b5d, /* ଡ଼ - ଢ଼ */ + 0x0b5f, 0x0b61, /* ୟ - ୡ */ + 0x0b85, 0x0b8a, /* அ - ஊ */ + 0x0b8e, 0x0b90, /* எ - ஐ */ + 0x0b92, 0x0b95, /* ஒ - க */ + 0x0b99, 0x0b9a, /* ங - ச */ + 0x0b9e, 0x0b9f, /* ஞ - ட */ + 0x0ba3, 0x0ba4, /* ண - த */ + 0x0ba8, 0x0baa, /* ந - ப */ + 0x0bae, 0x0bb5, /* ம - வ */ + 0x0bb7, 0x0bb9, /* ஷ - ஹ */ + 0x0c05, 0x0c0c, /* అ - ఌ */ + 0x0c0e, 0x0c10, /* ఎ - ఐ */ + 0x0c12, 0x0c28, /* ఒ - న */ + 0x0c2a, 0x0c33, /* ప - ళ */ + 0x0c35, 0x0c39, /* వ - హ */ + 0x0c60, 0x0c61, /* ౠ - ౡ */ + 0x0c85, 0x0c8c, /* ಅ - ಌ */ + 0x0c8e, 0x0c90, /* ಎ - ಐ */ + 0x0c92, 0x0ca8, /* ಒ - ನ */ + 0x0caa, 0x0cb3, /* ಪ - ಳ */ + 0x0cb5, 0x0cb9, /* ವ - ಹ */ + 0x0ce0, 0x0ce1, /* ೠ - ೡ */ + 0x0d05, 0x0d0c, /* അ - ഌ */ + 0x0d0e, 0x0d10, /* എ - ഐ */ + 0x0d12, 0x0d28, /* ഒ - ന */ + 0x0d2a, 0x0d39, /* പ - ഹ */ + 0x0d60, 0x0d61, /* ൠ - ൡ */ + 0x0e01, 0x0e30, /* ก - ะ */ + 0x0e32, 0x0e33, /* า - ำ */ + 0x0e40, 0x0e46, /* เ - ๆ */ + 0x0e5a, 0x0e5b, /* ๚ - ๛ */ + 0x0e81, 0x0e82, /* ກ - ຂ */ + 0x0e87, 0x0e88, /* ງ - ຈ */ + 0x0e94, 0x0e97, /* ດ - ທ */ + 0x0e99, 0x0e9f, /* ນ - ຟ */ + 0x0ea1, 0x0ea3, /* ມ - ຣ */ + 0x0eaa, 0x0eab, /* ສ - ຫ */ + 0x0ead, 0x0eae, /* ອ - ຮ */ + 0x0eb2, 0x0eb3, /* າ - ຳ */ + 0x0ec0, 0x0ec4, /* ເ - ໄ */ + 0x0edc, 0x0edd, /* ໜ - ໝ */ + 0x0f18, 0x0f19, /* ༘ - ༙ */ + 0x0f40, 0x0f47, /* ཀ - ཇ */ + 0x0f49, 0x0f69, /* ཉ - ཀྵ */ + 0x10d0, 0x10f6, /* ა - ჶ */ + 0x1100, 0x1159, /* ᄀ - ᅙ */ + 0x115f, 0x11a2, /* ᅟ - ᆢ */ + 0x11a8, 0x11f9, /* ᆨ - ᇹ */ + 0x1e00, 0x1e9b, /* Ḁ - ẛ */ + 0x1f50, 0x1f57, /* ὐ - ὗ */ + 0x1f80, 0x1fb4, /* ᾀ - ᾴ */ + 0x1fb6, 0x1fbc, /* ᾶ - ᾼ */ + 0x1fc2, 0x1fc4, /* ῂ - ῄ */ + 0x1fc6, 0x1fcc, /* ῆ - ῌ */ + 0x1fd0, 0x1fd3, /* ῐ - ΐ */ + 0x1fd6, 0x1fdb, /* ῖ - Ί */ + 0x1fe0, 0x1fec, /* ῠ - Ῥ */ + 0x1ff2, 0x1ff4, /* ῲ - ῴ */ + 0x1ff6, 0x1ffc, /* ῶ - ῼ */ + 0x210a, 0x2113, /* ℊ - ℓ */ + 0x2115, 0x211d, /* ℕ - ℝ */ + 0x2120, 0x2122, /* ℠ - ™ */ + 0x212a, 0x2131, /* K - ℱ */ + 0x2133, 0x2138, /* ℳ - ℸ */ + 0x3041, 0x3094, /* ぁ - ゔ */ + 0x30a1, 0x30fa, /* ァ - ヺ */ + 0x3105, 0x312c, /* ㄅ - ㄬ */ + 0x3131, 0x318e, /* ㄱ - ㆎ */ + 0x3192, 0x319f, /* ㆒ - ㆟ */ + 0x3260, 0x327b, /* ㉠ - ㉻ */ + 0x328a, 0x32b0, /* ㊊ - ㊰ */ + 0x32d0, 0x32fe, /* ㋐ - ㋾ */ + 0x3300, 0x3357, /* ㌀ - ㍗ */ + 0x3371, 0x3376, /* ㍱ - ㍶ */ + 0x337b, 0x3394, /* ㍻ - ㎔ */ + 0x3399, 0x339e, /* ㎙ - ㎞ */ + 0x33a9, 0x33ad, /* ㎩ - ㎭ */ + 0x33b0, 0x33c1, /* ㎰ - ㏁ */ + 0x33c3, 0x33c5, /* ㏃ - ㏅ */ + 0x33c7, 0x33d7, /* ㏇ - ㏗ */ + 0x33d9, 0x33dd, /* ㏙ - ㏝ */ + 0x4e00, 0x9fff, /* 一 - 鿿 */ + 0xac00, 0xd7a3, /* 가 - 힣 */ + 0xf900, 0xfb06, /* 豈 - st */ + 0xfb13, 0xfb17, /* ﬓ - ﬗ */ + 0xfb1f, 0xfb28, /* ײַ - ﬨ */ + 0xfb2a, 0xfb36, /* שׁ - זּ */ + 0xfb38, 0xfb3c, /* טּ - לּ */ + 0xfb40, 0xfb41, /* נּ - סּ */ + 0xfb43, 0xfb44, /* ףּ - פּ */ + 0xfb46, 0xfbb1, /* צּ - ﮱ */ + 0xfbd3, 0xfd3d, /* ﯓ - ﴽ */ + 0xfd50, 0xfd8f, /* ﵐ - ﶏ */ + 0xfd92, 0xfdc7, /* ﶒ - ﷇ */ + 0xfdf0, 0xfdf9, /* ﷰ - ﷹ */ + 0xfe70, 0xfe72, /* ﹰ - ﹲ */ + 0xfe76, 0xfefc, /* ﹶ - ﻼ */ + 0xff66, 0xff6f, /* ヲ - ッ */ + 0xff71, 0xff9d, /* ア - ン */ + 0xffa0, 0xffbe, /* ᅠ - ᄒ */ + 0xffc2, 0xffc7, /* ᅡ - ᅦ */ + 0xffca, 0xffcf, /* ᅧ - ᅬ */ + 0xffd2, 0xffd7, /* ᅭ - ᅲ */ + 0xffda, 0xffdc, /* ᅳ - ᅵ */ +}; + +/* + * alpha singlets - + * only covers ranges not in lower||upper + */ +static +Rune __alpha1[] = +{ + 0x00aa, /* ª */ + 0x00b5, /* µ */ + 0x00ba, /* º */ + 0x03da, /* Ϛ */ + 0x03dc, /* Ϝ */ + 0x03de, /* Ϟ */ + 0x03e0, /* Ϡ */ + 0x06d5, /* ە */ + 0x09b2, /* ল */ + 0x0a5e, /* ਫ਼ */ + 0x0a8d, /* ઍ */ + 0x0ae0, /* ૠ */ + 0x0b9c, /* ஜ */ + 0x0cde, /* ೞ */ + 0x0e4f, /* ๏ */ + 0x0e84, /* ຄ */ + 0x0e8a, /* ຊ */ + 0x0e8d, /* ຍ */ + 0x0ea5, /* ລ */ + 0x0ea7, /* ວ */ + 0x0eb0, /* ະ */ + 0x0ebd, /* ຽ */ + 0x1fbe, /* ι */ + 0x207f, /* ⁿ */ + 0x20a8, /* ₨ */ + 0x2102, /* ℂ */ + 0x2107, /* ℇ */ + 0x2124, /* ℤ */ + 0x2126, /* Ω */ + 0x2128, /* ℨ */ + 0xfb3e, /* מּ */ + 0xfe74, /* ﹴ */ +}; + +/* + * space ranges + */ +static +Rune __space2[] = +{ + 0x0009, 0x000a, /* tab and newline */ + 0x0020, 0x0020, /* space */ + 0x00a0, 0x00a0, /* */ + 0x2000, 0x200b, /* - */ + 0x2028, 0x2029, /*
-
*/ + 0x3000, 0x3000, /* */ + 0xfeff, 0xfeff, /* */ +}; + +/* + * lower case ranges + * 3rd col is conversion excess 500 + */ +static +Rune __toupper2[] = +{ + 0x0061, 0x007a, 468, /* a-z A-Z */ + 0x00e0, 0x00f6, 468, /* à-ö À-Ö */ + 0x00f8, 0x00fe, 468, /* ø-þ Ø-Þ */ + 0x0256, 0x0257, 295, /* ɖ-ɗ Ɖ-Ɗ */ + 0x0258, 0x0259, 298, /* ɘ-ə Ǝ-Ə */ + 0x028a, 0x028b, 283, /* ʊ-ʋ Ʊ-Ʋ */ + 0x03ad, 0x03af, 463, /* έ-ί Έ-Ί */ + 0x03b1, 0x03c1, 468, /* α-ρ Α-Ρ */ + 0x03c3, 0x03cb, 468, /* σ-ϋ Σ-Ϋ */ + 0x03cd, 0x03ce, 437, /* ύ-ώ Ύ-Ώ */ + 0x0430, 0x044f, 468, /* а-я А-Я */ + 0x0451, 0x045c, 420, /* ё-ќ Ё-Ќ */ + 0x045e, 0x045f, 420, /* ў-џ Ў-Џ */ + 0x0561, 0x0586, 452, /* ա-ֆ Ա-Ֆ */ + 0x1f00, 0x1f07, 508, /* ἀ-ἇ Ἀ-Ἇ */ + 0x1f10, 0x1f15, 508, /* ἐ-ἕ Ἐ-Ἕ */ + 0x1f20, 0x1f27, 508, /* ἠ-ἧ Ἠ-Ἧ */ + 0x1f30, 0x1f37, 508, /* ἰ-ἷ Ἰ-Ἷ */ + 0x1f40, 0x1f45, 508, /* ὀ-ὅ Ὀ-Ὅ */ + 0x1f60, 0x1f67, 508, /* ὠ-ὧ Ὠ-Ὧ */ + 0x1f70, 0x1f71, 574, /* ὰ-ά Ὰ-Ά */ + 0x1f72, 0x1f75, 586, /* ὲ-ή Ὲ-Ή */ + 0x1f76, 0x1f77, 600, /* ὶ-ί Ὶ-Ί */ + 0x1f78, 0x1f79, 628, /* ὸ-ό Ὸ-Ό */ + 0x1f7a, 0x1f7b, 612, /* ὺ-ύ Ὺ-Ύ */ + 0x1f7c, 0x1f7d, 626, /* ὼ-ώ Ὼ-Ώ */ + 0x1f80, 0x1f87, 508, /* ᾀ-ᾇ ᾈ-ᾏ */ + 0x1f90, 0x1f97, 508, /* ᾐ-ᾗ ᾘ-ᾟ */ + 0x1fa0, 0x1fa7, 508, /* ᾠ-ᾧ ᾨ-ᾯ */ + 0x1fb0, 0x1fb1, 508, /* ᾰ-ᾱ Ᾰ-Ᾱ */ + 0x1fd0, 0x1fd1, 508, /* ῐ-ῑ Ῐ-Ῑ */ + 0x1fe0, 0x1fe1, 508, /* ῠ-ῡ Ῠ-Ῡ */ + 0x2170, 0x217f, 484, /* ⅰ-ⅿ Ⅰ-Ⅿ */ + 0x24d0, 0x24e9, 474, /* ⓐ-ⓩ Ⓐ-Ⓩ */ + 0xff41, 0xff5a, 468, /* a-z A-Z */ +}; + +/* + * lower case singlets + * 2nd col is conversion excess 500 + */ +static +Rune __toupper1[] = +{ + 0x00ff, 621, /* ÿ Ÿ */ + 0x0101, 499, /* ā Ā */ + 0x0103, 499, /* ă Ă */ + 0x0105, 499, /* ą Ą */ + 0x0107, 499, /* ć Ć */ + 0x0109, 499, /* ĉ Ĉ */ + 0x010b, 499, /* ċ Ċ */ + 0x010d, 499, /* č Č */ + 0x010f, 499, /* ď Ď */ + 0x0111, 499, /* đ Đ */ + 0x0113, 499, /* ē Ē */ + 0x0115, 499, /* ĕ Ĕ */ + 0x0117, 499, /* ė Ė */ + 0x0119, 499, /* ę Ę */ + 0x011b, 499, /* ě Ě */ + 0x011d, 499, /* ĝ Ĝ */ + 0x011f, 499, /* ğ Ğ */ + 0x0121, 499, /* ġ Ġ */ + 0x0123, 499, /* ģ Ģ */ + 0x0125, 499, /* ĥ Ĥ */ + 0x0127, 499, /* ħ Ħ */ + 0x0129, 499, /* ĩ Ĩ */ + 0x012b, 499, /* ī Ī */ + 0x012d, 499, /* ĭ Ĭ */ + 0x012f, 499, /* į Į */ + 0x0131, 268, /* ı I */ + 0x0133, 499, /* ij IJ */ + 0x0135, 499, /* ĵ Ĵ */ + 0x0137, 499, /* ķ Ķ */ + 0x013a, 499, /* ĺ Ĺ */ + 0x013c, 499, /* ļ Ļ */ + 0x013e, 499, /* ľ Ľ */ + 0x0140, 499, /* ŀ Ŀ */ + 0x0142, 499, /* ł Ł */ + 0x0144, 499, /* ń Ń */ + 0x0146, 499, /* ņ Ņ */ + 0x0148, 499, /* ň Ň */ + 0x014b, 499, /* ŋ Ŋ */ + 0x014d, 499, /* ō Ō */ + 0x014f, 499, /* ŏ Ŏ */ + 0x0151, 499, /* ő Ő */ + 0x0153, 499, /* œ Œ */ + 0x0155, 499, /* ŕ Ŕ */ + 0x0157, 499, /* ŗ Ŗ */ + 0x0159, 499, /* ř Ř */ + 0x015b, 499, /* ś Ś */ + 0x015d, 499, /* ŝ Ŝ */ + 0x015f, 499, /* ş Ş */ + 0x0161, 499, /* š Š */ + 0x0163, 499, /* ţ Ţ */ + 0x0165, 499, /* ť Ť */ + 0x0167, 499, /* ŧ Ŧ */ + 0x0169, 499, /* ũ Ũ */ + 0x016b, 499, /* ū Ū */ + 0x016d, 499, /* ŭ Ŭ */ + 0x016f, 499, /* ů Ů */ + 0x0171, 499, /* ű Ű */ + 0x0173, 499, /* ų Ų */ + 0x0175, 499, /* ŵ Ŵ */ + 0x0177, 499, /* ŷ Ŷ */ + 0x017a, 499, /* ź Ź */ + 0x017c, 499, /* ż Ż */ + 0x017e, 499, /* ž Ž */ + 0x017f, 200, /* ſ S */ + 0x0183, 499, /* ƃ Ƃ */ + 0x0185, 499, /* ƅ Ƅ */ + 0x0188, 499, /* ƈ Ƈ */ + 0x018c, 499, /* ƌ Ƌ */ + 0x0192, 499, /* ƒ Ƒ */ + 0x0199, 499, /* ƙ Ƙ */ + 0x01a1, 499, /* ơ Ơ */ + 0x01a3, 499, /* ƣ Ƣ */ + 0x01a5, 499, /* ƥ Ƥ */ + 0x01a8, 499, /* ƨ Ƨ */ + 0x01ad, 499, /* ƭ Ƭ */ + 0x01b0, 499, /* ư Ư */ + 0x01b4, 499, /* ƴ Ƴ */ + 0x01b6, 499, /* ƶ Ƶ */ + 0x01b9, 499, /* ƹ Ƹ */ + 0x01bd, 499, /* ƽ Ƽ */ + 0x01c5, 499, /* Dž DŽ */ + 0x01c6, 498, /* dž DŽ */ + 0x01c8, 499, /* Lj LJ */ + 0x01c9, 498, /* lj LJ */ + 0x01cb, 499, /* Nj NJ */ + 0x01cc, 498, /* nj NJ */ + 0x01ce, 499, /* ǎ Ǎ */ + 0x01d0, 499, /* ǐ Ǐ */ + 0x01d2, 499, /* ǒ Ǒ */ + 0x01d4, 499, /* ǔ Ǔ */ + 0x01d6, 499, /* ǖ Ǖ */ + 0x01d8, 499, /* ǘ Ǘ */ + 0x01da, 499, /* ǚ Ǚ */ + 0x01dc, 499, /* ǜ Ǜ */ + 0x01df, 499, /* ǟ Ǟ */ + 0x01e1, 499, /* ǡ Ǡ */ + 0x01e3, 499, /* ǣ Ǣ */ + 0x01e5, 499, /* ǥ Ǥ */ + 0x01e7, 499, /* ǧ Ǧ */ + 0x01e9, 499, /* ǩ Ǩ */ + 0x01eb, 499, /* ǫ Ǫ */ + 0x01ed, 499, /* ǭ Ǭ */ + 0x01ef, 499, /* ǯ Ǯ */ + 0x01f2, 499, /* Dz DZ */ + 0x01f3, 498, /* dz DZ */ + 0x01f5, 499, /* ǵ Ǵ */ + 0x01fb, 499, /* ǻ Ǻ */ + 0x01fd, 499, /* ǽ Ǽ */ + 0x01ff, 499, /* ǿ Ǿ */ + 0x0201, 499, /* ȁ Ȁ */ + 0x0203, 499, /* ȃ Ȃ */ + 0x0205, 499, /* ȅ Ȅ */ + 0x0207, 499, /* ȇ Ȇ */ + 0x0209, 499, /* ȉ Ȉ */ + 0x020b, 499, /* ȋ Ȋ */ + 0x020d, 499, /* ȍ Ȍ */ + 0x020f, 499, /* ȏ Ȏ */ + 0x0211, 499, /* ȑ Ȑ */ + 0x0213, 499, /* ȓ Ȓ */ + 0x0215, 499, /* ȕ Ȕ */ + 0x0217, 499, /* ȗ Ȗ */ + 0x0253, 290, /* ɓ Ɓ */ + 0x0254, 294, /* ɔ Ɔ */ + 0x025b, 297, /* ɛ Ɛ */ + 0x0260, 295, /* ɠ Ɠ */ + 0x0263, 293, /* ɣ Ɣ */ + 0x0268, 291, /* ɨ Ɨ */ + 0x0269, 289, /* ɩ Ɩ */ + 0x026f, 289, /* ɯ Ɯ */ + 0x0272, 287, /* ɲ Ɲ */ + 0x0283, 282, /* ʃ Ʃ */ + 0x0288, 282, /* ʈ Ʈ */ + 0x0292, 281, /* ʒ Ʒ */ + 0x03ac, 462, /* ά Ά */ + 0x03cc, 436, /* ό Ό */ + 0x03d0, 438, /* ϐ Β */ + 0x03d1, 443, /* ϑ Θ */ + 0x03d5, 453, /* ϕ Φ */ + 0x03d6, 446, /* ϖ Π */ + 0x03e3, 499, /* ϣ Ϣ */ + 0x03e5, 499, /* ϥ Ϥ */ + 0x03e7, 499, /* ϧ Ϧ */ + 0x03e9, 499, /* ϩ Ϩ */ + 0x03eb, 499, /* ϫ Ϫ */ + 0x03ed, 499, /* ϭ Ϭ */ + 0x03ef, 499, /* ϯ Ϯ */ + 0x03f0, 414, /* ϰ Κ */ + 0x03f1, 420, /* ϱ Ρ */ + 0x0461, 499, /* ѡ Ѡ */ + 0x0463, 499, /* ѣ Ѣ */ + 0x0465, 499, /* ѥ Ѥ */ + 0x0467, 499, /* ѧ Ѧ */ + 0x0469, 499, /* ѩ Ѩ */ + 0x046b, 499, /* ѫ Ѫ */ + 0x046d, 499, /* ѭ Ѭ */ + 0x046f, 499, /* ѯ Ѯ */ + 0x0471, 499, /* ѱ Ѱ */ + 0x0473, 499, /* ѳ Ѳ */ + 0x0475, 499, /* ѵ Ѵ */ + 0x0477, 499, /* ѷ Ѷ */ + 0x0479, 499, /* ѹ Ѹ */ + 0x047b, 499, /* ѻ Ѻ */ + 0x047d, 499, /* ѽ Ѽ */ + 0x047f, 499, /* ѿ Ѿ */ + 0x0481, 499, /* ҁ Ҁ */ + 0x0491, 499, /* ґ Ґ */ + 0x0493, 499, /* ғ Ғ */ + 0x0495, 499, /* ҕ Ҕ */ + 0x0497, 499, /* җ Җ */ + 0x0499, 499, /* ҙ Ҙ */ + 0x049b, 499, /* қ Қ */ + 0x049d, 499, /* ҝ Ҝ */ + 0x049f, 499, /* ҟ Ҟ */ + 0x04a1, 499, /* ҡ Ҡ */ + 0x04a3, 499, /* ң Ң */ + 0x04a5, 499, /* ҥ Ҥ */ + 0x04a7, 499, /* ҧ Ҧ */ + 0x04a9, 499, /* ҩ Ҩ */ + 0x04ab, 499, /* ҫ Ҫ */ + 0x04ad, 499, /* ҭ Ҭ */ + 0x04af, 499, /* ү Ү */ + 0x04b1, 499, /* ұ Ұ */ + 0x04b3, 499, /* ҳ Ҳ */ + 0x04b5, 499, /* ҵ Ҵ */ + 0x04b7, 499, /* ҷ Ҷ */ + 0x04b9, 499, /* ҹ Ҹ */ + 0x04bb, 499, /* һ Һ */ + 0x04bd, 499, /* ҽ Ҽ */ + 0x04bf, 499, /* ҿ Ҿ */ + 0x04c2, 499, /* ӂ Ӂ */ + 0x04c4, 499, /* ӄ Ӄ */ + 0x04c8, 499, /* ӈ Ӈ */ + 0x04cc, 499, /* ӌ Ӌ */ + 0x04d1, 499, /* ӑ Ӑ */ + 0x04d3, 499, /* ӓ Ӓ */ + 0x04d5, 499, /* ӕ Ӕ */ + 0x04d7, 499, /* ӗ Ӗ */ + 0x04d9, 499, /* ә Ә */ + 0x04db, 499, /* ӛ Ӛ */ + 0x04dd, 499, /* ӝ Ӝ */ + 0x04df, 499, /* ӟ Ӟ */ + 0x04e1, 499, /* ӡ Ӡ */ + 0x04e3, 499, /* ӣ Ӣ */ + 0x04e5, 499, /* ӥ Ӥ */ + 0x04e7, 499, /* ӧ Ӧ */ + 0x04e9, 499, /* ө Ө */ + 0x04eb, 499, /* ӫ Ӫ */ + 0x04ef, 499, /* ӯ Ӯ */ + 0x04f1, 499, /* ӱ Ӱ */ + 0x04f3, 499, /* ӳ Ӳ */ + 0x04f5, 499, /* ӵ Ӵ */ + 0x04f9, 499, /* ӹ Ӹ */ + 0x1e01, 499, /* ḁ Ḁ */ + 0x1e03, 499, /* ḃ Ḃ */ + 0x1e05, 499, /* ḅ Ḅ */ + 0x1e07, 499, /* ḇ Ḇ */ + 0x1e09, 499, /* ḉ Ḉ */ + 0x1e0b, 499, /* ḋ Ḋ */ + 0x1e0d, 499, /* ḍ Ḍ */ + 0x1e0f, 499, /* ḏ Ḏ */ + 0x1e11, 499, /* ḑ Ḑ */ + 0x1e13, 499, /* ḓ Ḓ */ + 0x1e15, 499, /* ḕ Ḕ */ + 0x1e17, 499, /* ḗ Ḗ */ + 0x1e19, 499, /* ḙ Ḙ */ + 0x1e1b, 499, /* ḛ Ḛ */ + 0x1e1d, 499, /* ḝ Ḝ */ + 0x1e1f, 499, /* ḟ Ḟ */ + 0x1e21, 499, /* ḡ Ḡ */ + 0x1e23, 499, /* ḣ Ḣ */ + 0x1e25, 499, /* ḥ Ḥ */ + 0x1e27, 499, /* ḧ Ḧ */ + 0x1e29, 499, /* ḩ Ḩ */ + 0x1e2b, 499, /* ḫ Ḫ */ + 0x1e2d, 499, /* ḭ Ḭ */ + 0x1e2f, 499, /* ḯ Ḯ */ + 0x1e31, 499, /* ḱ Ḱ */ + 0x1e33, 499, /* ḳ Ḳ */ + 0x1e35, 499, /* ḵ Ḵ */ + 0x1e37, 499, /* ḷ Ḷ */ + 0x1e39, 499, /* ḹ Ḹ */ + 0x1e3b, 499, /* ḻ Ḻ */ + 0x1e3d, 499, /* ḽ Ḽ */ + 0x1e3f, 499, /* ḿ Ḿ */ + 0x1e41, 499, /* ṁ Ṁ */ + 0x1e43, 499, /* ṃ Ṃ */ + 0x1e45, 499, /* ṅ Ṅ */ + 0x1e47, 499, /* ṇ Ṇ */ + 0x1e49, 499, /* ṉ Ṉ */ + 0x1e4b, 499, /* ṋ Ṋ */ + 0x1e4d, 499, /* ṍ Ṍ */ + 0x1e4f, 499, /* ṏ Ṏ */ + 0x1e51, 499, /* ṑ Ṑ */ + 0x1e53, 499, /* ṓ Ṓ */ + 0x1e55, 499, /* ṕ Ṕ */ + 0x1e57, 499, /* ṗ Ṗ */ + 0x1e59, 499, /* ṙ Ṙ */ + 0x1e5b, 499, /* ṛ Ṛ */ + 0x1e5d, 499, /* ṝ Ṝ */ + 0x1e5f, 499, /* ṟ Ṟ */ + 0x1e61, 499, /* ṡ Ṡ */ + 0x1e63, 499, /* ṣ Ṣ */ + 0x1e65, 499, /* ṥ Ṥ */ + 0x1e67, 499, /* ṧ Ṧ */ + 0x1e69, 499, /* ṩ Ṩ */ + 0x1e6b, 499, /* ṫ Ṫ */ + 0x1e6d, 499, /* ṭ Ṭ */ + 0x1e6f, 499, /* ṯ Ṯ */ + 0x1e71, 499, /* ṱ Ṱ */ + 0x1e73, 499, /* ṳ Ṳ */ + 0x1e75, 499, /* ṵ Ṵ */ + 0x1e77, 499, /* ṷ Ṷ */ + 0x1e79, 499, /* ṹ Ṹ */ + 0x1e7b, 499, /* ṻ Ṻ */ + 0x1e7d, 499, /* ṽ Ṽ */ + 0x1e7f, 499, /* ṿ Ṿ */ + 0x1e81, 499, /* ẁ Ẁ */ + 0x1e83, 499, /* ẃ Ẃ */ + 0x1e85, 499, /* ẅ Ẅ */ + 0x1e87, 499, /* ẇ Ẇ */ + 0x1e89, 499, /* ẉ Ẉ */ + 0x1e8b, 499, /* ẋ Ẋ */ + 0x1e8d, 499, /* ẍ Ẍ */ + 0x1e8f, 499, /* ẏ Ẏ */ + 0x1e91, 499, /* ẑ Ẑ */ + 0x1e93, 499, /* ẓ Ẓ */ + 0x1e95, 499, /* ẕ Ẕ */ + 0x1ea1, 499, /* ạ Ạ */ + 0x1ea3, 499, /* ả Ả */ + 0x1ea5, 499, /* ấ Ấ */ + 0x1ea7, 499, /* ầ Ầ */ + 0x1ea9, 499, /* ẩ Ẩ */ + 0x1eab, 499, /* ẫ Ẫ */ + 0x1ead, 499, /* ậ Ậ */ + 0x1eaf, 499, /* ắ Ắ */ + 0x1eb1, 499, /* ằ Ằ */ + 0x1eb3, 499, /* ẳ Ẳ */ + 0x1eb5, 499, /* ẵ Ẵ */ + 0x1eb7, 499, /* ặ Ặ */ + 0x1eb9, 499, /* ẹ Ẹ */ + 0x1ebb, 499, /* ẻ Ẻ */ + 0x1ebd, 499, /* ẽ Ẽ */ + 0x1ebf, 499, /* ế Ế */ + 0x1ec1, 499, /* ề Ề */ + 0x1ec3, 499, /* ể Ể */ + 0x1ec5, 499, /* ễ Ễ */ + 0x1ec7, 499, /* ệ Ệ */ + 0x1ec9, 499, /* ỉ Ỉ */ + 0x1ecb, 499, /* ị Ị */ + 0x1ecd, 499, /* ọ Ọ */ + 0x1ecf, 499, /* ỏ Ỏ */ + 0x1ed1, 499, /* ố Ố */ + 0x1ed3, 499, /* ồ Ồ */ + 0x1ed5, 499, /* ổ Ổ */ + 0x1ed7, 499, /* ỗ Ỗ */ + 0x1ed9, 499, /* ộ Ộ */ + 0x1edb, 499, /* ớ Ớ */ + 0x1edd, 499, /* ờ Ờ */ + 0x1edf, 499, /* ở Ở */ + 0x1ee1, 499, /* ỡ Ỡ */ + 0x1ee3, 499, /* ợ Ợ */ + 0x1ee5, 499, /* ụ Ụ */ + 0x1ee7, 499, /* ủ Ủ */ + 0x1ee9, 499, /* ứ Ứ */ + 0x1eeb, 499, /* ừ Ừ */ + 0x1eed, 499, /* ử Ử */ + 0x1eef, 499, /* ữ Ữ */ + 0x1ef1, 499, /* ự Ự */ + 0x1ef3, 499, /* ỳ Ỳ */ + 0x1ef5, 499, /* ỵ Ỵ */ + 0x1ef7, 499, /* ỷ Ỷ */ + 0x1ef9, 499, /* ỹ Ỹ */ + 0x1f51, 508, /* ὑ Ὑ */ + 0x1f53, 508, /* ὓ Ὓ */ + 0x1f55, 508, /* ὕ Ὕ */ + 0x1f57, 508, /* ὗ Ὗ */ + 0x1fb3, 509, /* ᾳ ᾼ */ + 0x1fc3, 509, /* ῃ ῌ */ + 0x1fe5, 507, /* ῥ Ῥ */ + 0x1ff3, 509, /* ῳ ῼ */ +}; + +/* + * upper case ranges + * 3rd col is conversion excess 500 + */ +static +Rune __tolower2[] = +{ + 0x0041, 0x005a, 532, /* A-Z a-z */ + 0x00c0, 0x00d6, 532, /* À-Ö à-ö */ + 0x00d8, 0x00de, 532, /* Ø-Þ ø-þ */ + 0x0189, 0x018a, 705, /* Ɖ-Ɗ ɖ-ɗ */ + 0x018e, 0x018f, 702, /* Ǝ-Ə ɘ-ə */ + 0x01b1, 0x01b2, 717, /* Ʊ-Ʋ ʊ-ʋ */ + 0x0388, 0x038a, 537, /* Έ-Ί έ-ί */ + 0x038e, 0x038f, 563, /* Ύ-Ώ ύ-ώ */ + 0x0391, 0x03a1, 532, /* Α-Ρ α-ρ */ + 0x03a3, 0x03ab, 532, /* Σ-Ϋ σ-ϋ */ + 0x0401, 0x040c, 580, /* Ё-Ќ ё-ќ */ + 0x040e, 0x040f, 580, /* Ў-Џ ў-џ */ + 0x0410, 0x042f, 532, /* А-Я а-я */ + 0x0531, 0x0556, 548, /* Ա-Ֆ ա-ֆ */ + 0x10a0, 0x10c5, 548, /* Ⴀ-Ⴥ ა-ჵ */ + 0x1f08, 0x1f0f, 492, /* Ἀ-Ἇ ἀ-ἇ */ + 0x1f18, 0x1f1d, 492, /* Ἐ-Ἕ ἐ-ἕ */ + 0x1f28, 0x1f2f, 492, /* Ἠ-Ἧ ἠ-ἧ */ + 0x1f38, 0x1f3f, 492, /* Ἰ-Ἷ ἰ-ἷ */ + 0x1f48, 0x1f4d, 492, /* Ὀ-Ὅ ὀ-ὅ */ + 0x1f68, 0x1f6f, 492, /* Ὠ-Ὧ ὠ-ὧ */ + 0x1f88, 0x1f8f, 492, /* ᾈ-ᾏ ᾀ-ᾇ */ + 0x1f98, 0x1f9f, 492, /* ᾘ-ᾟ ᾐ-ᾗ */ + 0x1fa8, 0x1faf, 492, /* ᾨ-ᾯ ᾠ-ᾧ */ + 0x1fb8, 0x1fb9, 492, /* Ᾰ-Ᾱ ᾰ-ᾱ */ + 0x1fba, 0x1fbb, 426, /* Ὰ-Ά ὰ-ά */ + 0x1fc8, 0x1fcb, 414, /* Ὲ-Ή ὲ-ή */ + 0x1fd8, 0x1fd9, 492, /* Ῐ-Ῑ ῐ-ῑ */ + 0x1fda, 0x1fdb, 400, /* Ὶ-Ί ὶ-ί */ + 0x1fe8, 0x1fe9, 492, /* Ῠ-Ῡ ῠ-ῡ */ + 0x1fea, 0x1feb, 388, /* Ὺ-Ύ ὺ-ύ */ + 0x1ff8, 0x1ff9, 372, /* Ὸ-Ό ὸ-ό */ + 0x1ffa, 0x1ffb, 374, /* Ὼ-Ώ ὼ-ώ */ + 0x2160, 0x216f, 516, /* Ⅰ-Ⅿ ⅰ-ⅿ */ + 0x24b6, 0x24cf, 526, /* Ⓐ-Ⓩ ⓐ-ⓩ */ + 0xff21, 0xff3a, 532, /* A-Z a-z */ +}; + +/* + * upper case singlets + * 2nd col is conversion excess 500 + */ +static +Rune __tolower1[] = +{ + 0x0100, 501, /* Ā ā */ + 0x0102, 501, /* Ă ă */ + 0x0104, 501, /* Ą ą */ + 0x0106, 501, /* Ć ć */ + 0x0108, 501, /* Ĉ ĉ */ + 0x010a, 501, /* Ċ ċ */ + 0x010c, 501, /* Č č */ + 0x010e, 501, /* Ď ď */ + 0x0110, 501, /* Đ đ */ + 0x0112, 501, /* Ē ē */ + 0x0114, 501, /* Ĕ ĕ */ + 0x0116, 501, /* Ė ė */ + 0x0118, 501, /* Ę ę */ + 0x011a, 501, /* Ě ě */ + 0x011c, 501, /* Ĝ ĝ */ + 0x011e, 501, /* Ğ ğ */ + 0x0120, 501, /* Ġ ġ */ + 0x0122, 501, /* Ģ ģ */ + 0x0124, 501, /* Ĥ ĥ */ + 0x0126, 501, /* Ħ ħ */ + 0x0128, 501, /* Ĩ ĩ */ + 0x012a, 501, /* Ī ī */ + 0x012c, 501, /* Ĭ ĭ */ + 0x012e, 501, /* Į į */ + 0x0130, 301, /* İ i */ + 0x0132, 501, /* IJ ij */ + 0x0134, 501, /* Ĵ ĵ */ + 0x0136, 501, /* Ķ ķ */ + 0x0139, 501, /* Ĺ ĺ */ + 0x013b, 501, /* Ļ ļ */ + 0x013d, 501, /* Ľ ľ */ + 0x013f, 501, /* Ŀ ŀ */ + 0x0141, 501, /* Ł ł */ + 0x0143, 501, /* Ń ń */ + 0x0145, 501, /* Ņ ņ */ + 0x0147, 501, /* Ň ň */ + 0x014a, 501, /* Ŋ ŋ */ + 0x014c, 501, /* Ō ō */ + 0x014e, 501, /* Ŏ ŏ */ + 0x0150, 501, /* Ő ő */ + 0x0152, 501, /* Œ œ */ + 0x0154, 501, /* Ŕ ŕ */ + 0x0156, 501, /* Ŗ ŗ */ + 0x0158, 501, /* Ř ř */ + 0x015a, 501, /* Ś ś */ + 0x015c, 501, /* Ŝ ŝ */ + 0x015e, 501, /* Ş ş */ + 0x0160, 501, /* Š š */ + 0x0162, 501, /* Ţ ţ */ + 0x0164, 501, /* Ť ť */ + 0x0166, 501, /* Ŧ ŧ */ + 0x0168, 501, /* Ũ ũ */ + 0x016a, 501, /* Ū ū */ + 0x016c, 501, /* Ŭ ŭ */ + 0x016e, 501, /* Ů ů */ + 0x0170, 501, /* Ű ű */ + 0x0172, 501, /* Ų ų */ + 0x0174, 501, /* Ŵ ŵ */ + 0x0176, 501, /* Ŷ ŷ */ + 0x0178, 379, /* Ÿ ÿ */ + 0x0179, 501, /* Ź ź */ + 0x017b, 501, /* Ż ż */ + 0x017d, 501, /* Ž ž */ + 0x0181, 710, /* Ɓ ɓ */ + 0x0182, 501, /* Ƃ ƃ */ + 0x0184, 501, /* Ƅ ƅ */ + 0x0186, 706, /* Ɔ ɔ */ + 0x0187, 501, /* Ƈ ƈ */ + 0x018b, 501, /* Ƌ ƌ */ + 0x0190, 703, /* Ɛ ɛ */ + 0x0191, 501, /* Ƒ ƒ */ + 0x0193, 705, /* Ɠ ɠ */ + 0x0194, 707, /* Ɣ ɣ */ + 0x0196, 711, /* Ɩ ɩ */ + 0x0197, 709, /* Ɨ ɨ */ + 0x0198, 501, /* Ƙ ƙ */ + 0x019c, 711, /* Ɯ ɯ */ + 0x019d, 713, /* Ɲ ɲ */ + 0x01a0, 501, /* Ơ ơ */ + 0x01a2, 501, /* Ƣ ƣ */ + 0x01a4, 501, /* Ƥ ƥ */ + 0x01a7, 501, /* Ƨ ƨ */ + 0x01a9, 718, /* Ʃ ʃ */ + 0x01ac, 501, /* Ƭ ƭ */ + 0x01ae, 718, /* Ʈ ʈ */ + 0x01af, 501, /* Ư ư */ + 0x01b3, 501, /* Ƴ ƴ */ + 0x01b5, 501, /* Ƶ ƶ */ + 0x01b7, 719, /* Ʒ ʒ */ + 0x01b8, 501, /* Ƹ ƹ */ + 0x01bc, 501, /* Ƽ ƽ */ + 0x01c4, 502, /* DŽ dž */ + 0x01c5, 501, /* Dž dž */ + 0x01c7, 502, /* LJ lj */ + 0x01c8, 501, /* Lj lj */ + 0x01ca, 502, /* NJ nj */ + 0x01cb, 501, /* Nj nj */ + 0x01cd, 501, /* Ǎ ǎ */ + 0x01cf, 501, /* Ǐ ǐ */ + 0x01d1, 501, /* Ǒ ǒ */ + 0x01d3, 501, /* Ǔ ǔ */ + 0x01d5, 501, /* Ǖ ǖ */ + 0x01d7, 501, /* Ǘ ǘ */ + 0x01d9, 501, /* Ǚ ǚ */ + 0x01db, 501, /* Ǜ ǜ */ + 0x01de, 501, /* Ǟ ǟ */ + 0x01e0, 501, /* Ǡ ǡ */ + 0x01e2, 501, /* Ǣ ǣ */ + 0x01e4, 501, /* Ǥ ǥ */ + 0x01e6, 501, /* Ǧ ǧ */ + 0x01e8, 501, /* Ǩ ǩ */ + 0x01ea, 501, /* Ǫ ǫ */ + 0x01ec, 501, /* Ǭ ǭ */ + 0x01ee, 501, /* Ǯ ǯ */ + 0x01f1, 502, /* DZ dz */ + 0x01f2, 501, /* Dz dz */ + 0x01f4, 501, /* Ǵ ǵ */ + 0x01fa, 501, /* Ǻ ǻ */ + 0x01fc, 501, /* Ǽ ǽ */ + 0x01fe, 501, /* Ǿ ǿ */ + 0x0200, 501, /* Ȁ ȁ */ + 0x0202, 501, /* Ȃ ȃ */ + 0x0204, 501, /* Ȅ ȅ */ + 0x0206, 501, /* Ȇ ȇ */ + 0x0208, 501, /* Ȉ ȉ */ + 0x020a, 501, /* Ȋ ȋ */ + 0x020c, 501, /* Ȍ ȍ */ + 0x020e, 501, /* Ȏ ȏ */ + 0x0210, 501, /* Ȑ ȑ */ + 0x0212, 501, /* Ȓ ȓ */ + 0x0214, 501, /* Ȕ ȕ */ + 0x0216, 501, /* Ȗ ȗ */ + 0x0386, 538, /* Ά ά */ + 0x038c, 564, /* Ό ό */ + 0x03e2, 501, /* Ϣ ϣ */ + 0x03e4, 501, /* Ϥ ϥ */ + 0x03e6, 501, /* Ϧ ϧ */ + 0x03e8, 501, /* Ϩ ϩ */ + 0x03ea, 501, /* Ϫ ϫ */ + 0x03ec, 501, /* Ϭ ϭ */ + 0x03ee, 501, /* Ϯ ϯ */ + 0x0460, 501, /* Ѡ ѡ */ + 0x0462, 501, /* Ѣ ѣ */ + 0x0464, 501, /* Ѥ ѥ */ + 0x0466, 501, /* Ѧ ѧ */ + 0x0468, 501, /* Ѩ ѩ */ + 0x046a, 501, /* Ѫ ѫ */ + 0x046c, 501, /* Ѭ ѭ */ + 0x046e, 501, /* Ѯ ѯ */ + 0x0470, 501, /* Ѱ ѱ */ + 0x0472, 501, /* Ѳ ѳ */ + 0x0474, 501, /* Ѵ ѵ */ + 0x0476, 501, /* Ѷ ѷ */ + 0x0478, 501, /* Ѹ ѹ */ + 0x047a, 501, /* Ѻ ѻ */ + 0x047c, 501, /* Ѽ ѽ */ + 0x047e, 501, /* Ѿ ѿ */ + 0x0480, 501, /* Ҁ ҁ */ + 0x0490, 501, /* Ґ ґ */ + 0x0492, 501, /* Ғ ғ */ + 0x0494, 501, /* Ҕ ҕ */ + 0x0496, 501, /* Җ җ */ + 0x0498, 501, /* Ҙ ҙ */ + 0x049a, 501, /* Қ қ */ + 0x049c, 501, /* Ҝ ҝ */ + 0x049e, 501, /* Ҟ ҟ */ + 0x04a0, 501, /* Ҡ ҡ */ + 0x04a2, 501, /* Ң ң */ + 0x04a4, 501, /* Ҥ ҥ */ + 0x04a6, 501, /* Ҧ ҧ */ + 0x04a8, 501, /* Ҩ ҩ */ + 0x04aa, 501, /* Ҫ ҫ */ + 0x04ac, 501, /* Ҭ ҭ */ + 0x04ae, 501, /* Ү ү */ + 0x04b0, 501, /* Ұ ұ */ + 0x04b2, 501, /* Ҳ ҳ */ + 0x04b4, 501, /* Ҵ ҵ */ + 0x04b6, 501, /* Ҷ ҷ */ + 0x04b8, 501, /* Ҹ ҹ */ + 0x04ba, 501, /* Һ һ */ + 0x04bc, 501, /* Ҽ ҽ */ + 0x04be, 501, /* Ҿ ҿ */ + 0x04c1, 501, /* Ӂ ӂ */ + 0x04c3, 501, /* Ӄ ӄ */ + 0x04c7, 501, /* Ӈ ӈ */ + 0x04cb, 501, /* Ӌ ӌ */ + 0x04d0, 501, /* Ӑ ӑ */ + 0x04d2, 501, /* Ӓ ӓ */ + 0x04d4, 501, /* Ӕ ӕ */ + 0x04d6, 501, /* Ӗ ӗ */ + 0x04d8, 501, /* Ә ә */ + 0x04da, 501, /* Ӛ ӛ */ + 0x04dc, 501, /* Ӝ ӝ */ + 0x04de, 501, /* Ӟ ӟ */ + 0x04e0, 501, /* Ӡ ӡ */ + 0x04e2, 501, /* Ӣ ӣ */ + 0x04e4, 501, /* Ӥ ӥ */ + 0x04e6, 501, /* Ӧ ӧ */ + 0x04e8, 501, /* Ө ө */ + 0x04ea, 501, /* Ӫ ӫ */ + 0x04ee, 501, /* Ӯ ӯ */ + 0x04f0, 501, /* Ӱ ӱ */ + 0x04f2, 501, /* Ӳ ӳ */ + 0x04f4, 501, /* Ӵ ӵ */ + 0x04f8, 501, /* Ӹ ӹ */ + 0x1e00, 501, /* Ḁ ḁ */ + 0x1e02, 501, /* Ḃ ḃ */ + 0x1e04, 501, /* Ḅ ḅ */ + 0x1e06, 501, /* Ḇ ḇ */ + 0x1e08, 501, /* Ḉ ḉ */ + 0x1e0a, 501, /* Ḋ ḋ */ + 0x1e0c, 501, /* Ḍ ḍ */ + 0x1e0e, 501, /* Ḏ ḏ */ + 0x1e10, 501, /* Ḑ ḑ */ + 0x1e12, 501, /* Ḓ ḓ */ + 0x1e14, 501, /* Ḕ ḕ */ + 0x1e16, 501, /* Ḗ ḗ */ + 0x1e18, 501, /* Ḙ ḙ */ + 0x1e1a, 501, /* Ḛ ḛ */ + 0x1e1c, 501, /* Ḝ ḝ */ + 0x1e1e, 501, /* Ḟ ḟ */ + 0x1e20, 501, /* Ḡ ḡ */ + 0x1e22, 501, /* Ḣ ḣ */ + 0x1e24, 501, /* Ḥ ḥ */ + 0x1e26, 501, /* Ḧ ḧ */ + 0x1e28, 501, /* Ḩ ḩ */ + 0x1e2a, 501, /* Ḫ ḫ */ + 0x1e2c, 501, /* Ḭ ḭ */ + 0x1e2e, 501, /* Ḯ ḯ */ + 0x1e30, 501, /* Ḱ ḱ */ + 0x1e32, 501, /* Ḳ ḳ */ + 0x1e34, 501, /* Ḵ ḵ */ + 0x1e36, 501, /* Ḷ ḷ */ + 0x1e38, 501, /* Ḹ ḹ */ + 0x1e3a, 501, /* Ḻ ḻ */ + 0x1e3c, 501, /* Ḽ ḽ */ + 0x1e3e, 501, /* Ḿ ḿ */ + 0x1e40, 501, /* Ṁ ṁ */ + 0x1e42, 501, /* Ṃ ṃ */ + 0x1e44, 501, /* Ṅ ṅ */ + 0x1e46, 501, /* Ṇ ṇ */ + 0x1e48, 501, /* Ṉ ṉ */ + 0x1e4a, 501, /* Ṋ ṋ */ + 0x1e4c, 501, /* Ṍ ṍ */ + 0x1e4e, 501, /* Ṏ ṏ */ + 0x1e50, 501, /* Ṑ ṑ */ + 0x1e52, 501, /* Ṓ ṓ */ + 0x1e54, 501, /* Ṕ ṕ */ + 0x1e56, 501, /* Ṗ ṗ */ + 0x1e58, 501, /* Ṙ ṙ */ + 0x1e5a, 501, /* Ṛ ṛ */ + 0x1e5c, 501, /* Ṝ ṝ */ + 0x1e5e, 501, /* Ṟ ṟ */ + 0x1e60, 501, /* Ṡ ṡ */ + 0x1e62, 501, /* Ṣ ṣ */ + 0x1e64, 501, /* Ṥ ṥ */ + 0x1e66, 501, /* Ṧ ṧ */ + 0x1e68, 501, /* Ṩ ṩ */ + 0x1e6a, 501, /* Ṫ ṫ */ + 0x1e6c, 501, /* Ṭ ṭ */ + 0x1e6e, 501, /* Ṯ ṯ */ + 0x1e70, 501, /* Ṱ ṱ */ + 0x1e72, 501, /* Ṳ ṳ */ + 0x1e74, 501, /* Ṵ ṵ */ + 0x1e76, 501, /* Ṷ ṷ */ + 0x1e78, 501, /* Ṹ ṹ */ + 0x1e7a, 501, /* Ṻ ṻ */ + 0x1e7c, 501, /* Ṽ ṽ */ + 0x1e7e, 501, /* Ṿ ṿ */ + 0x1e80, 501, /* Ẁ ẁ */ + 0x1e82, 501, /* Ẃ ẃ */ + 0x1e84, 501, /* Ẅ ẅ */ + 0x1e86, 501, /* Ẇ ẇ */ + 0x1e88, 501, /* Ẉ ẉ */ + 0x1e8a, 501, /* Ẋ ẋ */ + 0x1e8c, 501, /* Ẍ ẍ */ + 0x1e8e, 501, /* Ẏ ẏ */ + 0x1e90, 501, /* Ẑ ẑ */ + 0x1e92, 501, /* Ẓ ẓ */ + 0x1e94, 501, /* Ẕ ẕ */ + 0x1ea0, 501, /* Ạ ạ */ + 0x1ea2, 501, /* Ả ả */ + 0x1ea4, 501, /* Ấ ấ */ + 0x1ea6, 501, /* Ầ ầ */ + 0x1ea8, 501, /* Ẩ ẩ */ + 0x1eaa, 501, /* Ẫ ẫ */ + 0x1eac, 501, /* Ậ ậ */ + 0x1eae, 501, /* Ắ ắ */ + 0x1eb0, 501, /* Ằ ằ */ + 0x1eb2, 501, /* Ẳ ẳ */ + 0x1eb4, 501, /* Ẵ ẵ */ + 0x1eb6, 501, /* Ặ ặ */ + 0x1eb8, 501, /* Ẹ ẹ */ + 0x1eba, 501, /* Ẻ ẻ */ + 0x1ebc, 501, /* Ẽ ẽ */ + 0x1ebe, 501, /* Ế ế */ + 0x1ec0, 501, /* Ề ề */ + 0x1ec2, 501, /* Ể ể */ + 0x1ec4, 501, /* Ễ ễ */ + 0x1ec6, 501, /* Ệ ệ */ + 0x1ec8, 501, /* Ỉ ỉ */ + 0x1eca, 501, /* Ị ị */ + 0x1ecc, 501, /* Ọ ọ */ + 0x1ece, 501, /* Ỏ ỏ */ + 0x1ed0, 501, /* Ố ố */ + 0x1ed2, 501, /* Ồ ồ */ + 0x1ed4, 501, /* Ổ ổ */ + 0x1ed6, 501, /* Ỗ ỗ */ + 0x1ed8, 501, /* Ộ ộ */ + 0x1eda, 501, /* Ớ ớ */ + 0x1edc, 501, /* Ờ ờ */ + 0x1ede, 501, /* Ở ở */ + 0x1ee0, 501, /* Ỡ ỡ */ + 0x1ee2, 501, /* Ợ ợ */ + 0x1ee4, 501, /* Ụ ụ */ + 0x1ee6, 501, /* Ủ ủ */ + 0x1ee8, 501, /* Ứ ứ */ + 0x1eea, 501, /* Ừ ừ */ + 0x1eec, 501, /* Ử ử */ + 0x1eee, 501, /* Ữ ữ */ + 0x1ef0, 501, /* Ự ự */ + 0x1ef2, 501, /* Ỳ ỳ */ + 0x1ef4, 501, /* Ỵ ỵ */ + 0x1ef6, 501, /* Ỷ ỷ */ + 0x1ef8, 501, /* Ỹ ỹ */ + 0x1f59, 492, /* Ὑ ὑ */ + 0x1f5b, 492, /* Ὓ ὓ */ + 0x1f5d, 492, /* Ὕ ὕ */ + 0x1f5f, 492, /* Ὗ ὗ */ + 0x1fbc, 491, /* ᾼ ᾳ */ + 0x1fcc, 491, /* ῌ ῃ */ + 0x1fec, 493, /* Ῥ ῥ */ + 0x1ffc, 491, /* ῼ ῳ */ +}; + +/* + * title characters are those between + * upper and lower case. ie DZ Dz dz + */ +static +Rune __totitle1[] = +{ + 0x01c4, 501, /* DŽ Dž */ + 0x01c6, 499, /* dž Dž */ + 0x01c7, 501, /* LJ Lj */ + 0x01c9, 499, /* lj Lj */ + 0x01ca, 501, /* NJ Nj */ + 0x01cc, 499, /* nj Nj */ + 0x01f1, 501, /* DZ Dz */ + 0x01f3, 499, /* dz Dz */ +}; + +static +Rune* +bsearch(Rune c, Rune *t, int n, int ne) +{ + Rune *p; + int m; + + while(n > 1) { + m = n/2; + p = t + m*ne; + if(c >= p[0]) { + t = p; + n = n-m; + } else + n = m; + } + if(n && c >= t[0]) + return t; + return 0; +} + +Rune +tolowerrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 500; + p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +Rune +toupperrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 500; + p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +Rune +totitlerune(Rune c) +{ + Rune *p; + + p = bsearch(c, __totitle1, nelem(__totitle1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +int +islowerrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); + if(p && c == p[0]) + return 1; + return 0; +} + +int +isupperrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); + if(p && c == p[0]) + return 1; + return 0; +} + +int +isalpharune(Rune c) +{ + Rune *p; + + if(isupperrune(c) || islowerrune(c)) + return 1; + p = bsearch(c, __alpha2, nelem(__alpha2)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __alpha1, nelem(__alpha1), 1); + if(p && c == p[0]) + return 1; + return 0; +} + +int +istitlerune(Rune c) +{ + return isupperrune(c) && islowerrune(c); +} + +int +isspacerune(Rune c) +{ + Rune *p; + + p = bsearch(c, __space2, nelem(__space2)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + return 0; +} diff --git a/src/libutf/utf.7 b/src/libutf/utf.7 new file mode 100644 index 00000000..97b7b1e7 --- /dev/null +++ b/src/libutf/utf.7 @@ -0,0 +1,91 @@ +.TH UTF 7 +.SH NAME +UTF, Unicode, ASCII, rune \- character set and format +.SH DESCRIPTION +The Plan 9 character set and representation are +based on the Unicode Standard and on the ISO multibyte +.SM UTF-8 +encoding (Universal Character +Set Transformation Format, 8 bits wide). +The Unicode Standard represents its characters in 16 +bits; +.SM UTF-8 +represents such +values in an 8-bit byte stream. +Throughout this manual, +.SM UTF-8 +is shortened to +.SM UTF. +.PP +In Plan 9, a +.I rune +is a 16-bit quantity representing a Unicode character. +Internally, programs may store characters as runes. +However, any external manifestation of textual information, +in files or at the interface between programs, uses a +machine-independent, byte-stream encoding called +.SM UTF. +.PP +.SM UTF +is designed so the 7-bit +.SM ASCII +set (values hexadecimal 00 to 7F), +appear only as themselves +in the encoding. +Runes with values above 7F appear as sequences of two or more +bytes with values only from 80 to FF. +.PP +The +.SM UTF +encoding of the Unicode Standard is backward compatible with +.SM ASCII\c +: +programs presented only with +.SM ASCII +work on Plan 9 +even if not written to deal with +.SM UTF, +as do +programs that deal with uninterpreted byte streams. +However, programs that perform semantic processing on +.SM ASCII +graphic +characters must convert from +.SM UTF +to runes +in order to work properly with non-\c +.SM ASCII +input. +See +.IR rune (2). +.PP +Letting numbers be binary, +a rune x is converted to a multibyte +.SM UTF +sequence +as follows: +.PP +01. x in [00000000.0bbbbbbb] → 0bbbbbbb +.br +10. x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb +.br +11. x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb, 10bbbbbb +.br +.PP +Conversion 01 provides a one-byte sequence that spans the +.SM ASCII +character set in a compatible way. +Conversions 10 and 11 represent higher-valued characters +as sequences of two or three bytes with the high bit set. +Plan 9 does not support the 4, 5, and 6 byte sequences proposed by X-Open. +When there are multiple ways to encode a value, for example rune 0, +the shortest encoding is used. +.PP +In the inverse mapping, +any sequence except those described above +is incorrect and is converted to rune hexadecimal 0080. +.SH "SEE ALSO" +.IR ascii (1), +.IR tcs (1), +.IR rune (3), +.IR "The Unicode Standard" . diff --git a/src/libutf/utf.h b/src/libutf/utf.h new file mode 100644 index 00000000..623bfda9 --- /dev/null +++ b/src/libutf/utf.h @@ -0,0 +1,51 @@ +#ifndef _UTFH_ +#define _UTFH_ 1 + +typedef unsigned short Rune; /* 16 bits */ + +enum +{ + UTFmax = 3, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0x80, /* decoding error in UTF */ +}; + +/* + * rune routines + */ +extern int runetochar(char*, Rune*); +extern int chartorune(Rune*, char*); +extern int runelen(long); +extern int runenlen(Rune*, int); +extern int fullrune(char*, int); +extern int utflen(char*); +extern int utfnlen(char*, long); +extern char* utfrune(char*, long); +extern char* utfrrune(char*, long); +extern char* utfutf(char*, char*); +extern char* utfecpy(char*, char*, char*); + +extern Rune* runestrcat(Rune*, Rune*); +extern Rune* runestrchr(Rune*, Rune); +extern int runestrcmp(Rune*, Rune*); +extern Rune* runestrcpy(Rune*, Rune*); +extern Rune* runestrncpy(Rune*, Rune*, long); +extern Rune* runestrecpy(Rune*, Rune*, Rune*); +extern Rune* runestrdup(Rune*); +extern Rune* runestrncat(Rune*, Rune*, long); +extern int runestrncmp(Rune*, Rune*, long); +extern Rune* runestrrchr(Rune*, Rune); +extern long runestrlen(Rune*); +extern Rune* runestrstr(Rune*, Rune*); + +extern Rune tolowerrune(Rune); +extern Rune totitlerune(Rune); +extern Rune toupperrune(Rune); +extern int isalpharune(Rune); +extern int islowerrune(Rune); +extern int isspacerune(Rune); +extern int istitlerune(Rune); +extern int isupperrune(Rune); + +#endif diff --git a/src/libutf/utfdef.h b/src/libutf/utfdef.h new file mode 100644 index 00000000..4b58ae87 --- /dev/null +++ b/src/libutf/utfdef.h @@ -0,0 +1,14 @@ +#define uchar _utfuchar +#define ushort _utfushort +#define uint _utfuint +#define ulong _utfulong +#define vlong _utfvlong +#define uvlong _utfuvlong + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +#define nelem(x) (sizeof(x)/sizeof((x)[0])) +#define nil ((void*)0) diff --git a/src/libutf/utfecpy.c b/src/libutf/utfecpy.c new file mode 100644 index 00000000..57159350 --- /dev/null +++ b/src/libutf/utfecpy.c @@ -0,0 +1,36 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +char* +utfecpy(char *to, char *e, char *from) +{ + char *end; + + if(to >= e) + return to; + end = memccpy(to, from, '\0', e - to); + if(end == nil){ + end = e-1; + while(end>to && (*--end&0xC0)==0x80) + ; + *end = '\0'; + }else{ + end--; + } + return end; +} diff --git a/src/libutf/utflen.c b/src/libutf/utflen.c new file mode 100644 index 00000000..a10a5318 --- /dev/null +++ b/src/libutf/utflen.c @@ -0,0 +1,38 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +int +utflen(char *s) +{ + int c; + long n; + Rune rune; + + n = 0; + for(;;) { + c = *(uchar*)s; + if(c < Runeself) { + if(c == 0) + return n; + s++; + } else + s += chartorune(&rune, s); + n++; + } + return 0; +} diff --git a/src/libutf/utfnlen.c b/src/libutf/utfnlen.c new file mode 100644 index 00000000..66d674fd --- /dev/null +++ b/src/libutf/utfnlen.c @@ -0,0 +1,41 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +int +utfnlen(char *s, long m) +{ + int c; + long n; + Rune rune; + char *es; + + es = s + m; + for(n = 0; s < es; n++) { + c = *(uchar*)s; + if(c < Runeself){ + if(c == '\0') + break; + s++; + continue; + } + if(!fullrune(s, es-s)) + break; + s += chartorune(&rune, s); + } + return n; +} diff --git a/src/libutf/utfrrune.c b/src/libutf/utfrrune.c new file mode 100644 index 00000000..37d2a725 --- /dev/null +++ b/src/libutf/utfrrune.c @@ -0,0 +1,46 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +char* +utfrrune(char *s, long c) +{ + long c1; + Rune r; + char *s1; + + if(c < Runesync) /* not part of utf sequence */ + return strrchr(s, c); + + s1 = 0; + for(;;) { + c1 = *(uchar*)s; + if(c1 < Runeself) { /* one byte rune */ + if(c1 == 0) + return s1; + if(c1 == c) + s1 = s; + s++; + continue; + } + c1 = chartorune(&r, s); + if(r == c) + s1 = s; + s += c1; + } + return 0; +} diff --git a/src/libutf/utfrune.c b/src/libutf/utfrune.c new file mode 100644 index 00000000..1a2e04c9 --- /dev/null +++ b/src/libutf/utfrune.c @@ -0,0 +1,45 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + +char* +utfrune(char *s, long c) +{ + long c1; + Rune r; + int n; + + if(c < Runesync) /* not part of utf sequence */ + return strchr(s, c); + + for(;;) { + c1 = *(uchar*)s; + if(c1 < Runeself) { /* one byte rune */ + if(c1 == 0) + return 0; + if(c1 == c) + return s; + s++; + continue; + } + n = chartorune(&r, s); + if(r == c) + return s; + s += n; + } + return 0; +} diff --git a/src/libutf/utfutf.c b/src/libutf/utfutf.c new file mode 100644 index 00000000..ea360361 --- /dev/null +++ b/src/libutf/utfutf.c @@ -0,0 +1,41 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "utf.h" +#include "utfdef.h" + + +/* + * Return pointer to first occurrence of s2 in s1, + * 0 if none + */ +char* +utfutf(char *s1, char *s2) +{ + char *p; + long f, n1, n2; + Rune r; + + n1 = chartorune(&r, s2); + f = r; + if(f <= Runesync) /* represents self */ + return strstr(s1, s2); + + n2 = strlen(s2); + for(p=s1; p=utfrune(p, f); p+=n1) + if(strncmp(p, s2, n2) == 0) + return p; + return 0; +} |