diff options
Diffstat (limited to 'man/man3/rune.3')
-rw-r--r-- | man/man3/rune.3 | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/man/man3/rune.3 b/man/man3/rune.3 new file mode 100644 index 00000000..7ce978ae --- /dev/null +++ b/man/man3/rune.3 @@ -0,0 +1,187 @@ +.TH RUNE 3 +.SH NAME +runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion +.SH SYNOPSIS +.ta \w'\fLchar*xx'u +.B #include <utf.h> +.PP +.B +int runetochar(char *s, Rune *r) +.PP +.B +int chartorune(Rune *r, char *s) +.PP +.B +int runelen(long r) +.PP +.B +int runenlen(Rune *r, int n) +.PP +.B +int fullrune(char *s, int n) +.PP +.B +char* utfecpy(char *s1, char *es1, char *s2) +.PP +.B +int utflen(char *s) +.PP +.B +int utfnlen(char *s, long n) +.PP +.B +char* utfrune(char *s, long c) +.PP +.B +char* utfrrune(char *s, long c) +.PP +.B +char* utfutf(char *s1, char *s2) +.SH DESCRIPTION +These routines convert to and from a +.SM UTF +byte stream and runes. +.PP +.I Runetochar +copies one rune at +.I r +to at most +.B UTFmax +bytes starting at +.I s +and returns the number of bytes copied. +.BR UTFmax , +defined as +.B 3 +in +.BR <libc.h> , +is the maximum number of bytes required to represent a rune. +.PP +.I Chartorune +copies at most +.B UTFmax +bytes starting at +.I s +to one rune at +.I r +and returns the number of bytes copied. +If the input is not exactly in +.SM UTF +format, +.I chartorune +will convert to 0x80 and return 1. +.PP +.I Runelen +returns the number of bytes +required to convert +.I r +into +.SM UTF. +.PP +.I Runenlen +returns the number of bytes +required to convert the +.I n +runes pointed to by +.I r +into +.SM UTF. +.PP +.I Fullrune +returns 1 if the string +.I s +of length +.I n +is long enough to be decoded by +.I chartorune +and 0 otherwise. +This does not guarantee that the string +contains a legal +.SM UTF +encoding. +This routine is used by programs that +obtain input a byte at +a time and need to know when a full rune +has arrived. +.PP +The following routines are analogous to the +corresponding string routines with +.B utf +substituted for +.B str +and +.B rune +substituted for +.BR chr . +.PP +.I Utfecpy +copies UTF sequences until a null sequence has been copied, but writes no +sequences beyond +.IR es1 . +If any sequences are copied, +.I s1 +is terminated by a null sequence, and a pointer to that sequence is returned. +Otherwise, the original +.I s1 +is returned. +.PP +.I Utflen +returns the number of runes that +are represented by the +.SM UTF +string +.IR s . +.PP +.I Utfnlen +returns the number of complete runes that +are represented by the first +.I n +bytes of +.SM UTF +string +.IR s . +If the last few bytes of the string contain an incompletely coded rune, +.I utfnlen +will not count them; in this way, it differs from +.IR utflen , +which includes every byte of the string. +.PP +.I Utfrune +.RI ( utfrrune ) +returns a pointer to the first (last) +occurrence of rune +.I c +in the +.SM UTF +string +.IR s , +or 0 if +.I c +does not occur in the string. +The NUL byte terminating a string is considered to +be part of the string +.IR s . +.PP +.I Utfutf +returns a pointer to the first occurrence of +the +.SM UTF +string +.I s2 +as a +.SM UTF +substring of +.IR s1 , +or 0 if there is none. +If +.I s2 +is the null string, +.I utfutf +returns +.IR s1 . +.SH HISTORY +These routines were written by Rob Pike and Ken Thompson +and first appeared in Plan 9. +.SH SEE ALSO +.IR utf (7), +.IR tcs (1) |