aboutsummaryrefslogtreecommitdiff
path: root/src/libhtml/utils.c
diff options
context:
space:
mode:
authorwkj <devnull@localhost>2004-04-06 19:06:52 +0000
committerwkj <devnull@localhost>2004-04-06 19:06:52 +0000
commit7cf289ca89a7416999ae02330236042b0d37e3db (patch)
tree796d1363a7a53c72c28b199758ee674f1326a510 /src/libhtml/utils.c
parent3e3817f7c86658f60715dd93768eaf8285807985 (diff)
downloadplan9port-7cf289ca89a7416999ae02330236042b0d37e3db.tar.gz
plan9port-7cf289ca89a7416999ae02330236042b0d37e3db.tar.bz2
plan9port-7cf289ca89a7416999ae02330236042b0d37e3db.zip
Import version of libhtml that might actually work with ANSI C.
Diffstat (limited to 'src/libhtml/utils.c')
-rw-r--r--src/libhtml/utils.c591
1 files changed, 591 insertions, 0 deletions
diff --git a/src/libhtml/utils.c b/src/libhtml/utils.c
new file mode 100644
index 00000000..db22bba7
--- /dev/null
+++ b/src/libhtml/utils.c
@@ -0,0 +1,591 @@
+#include <u.h>
+#include <libc.h>
+#include <draw.h>
+#include <html.h>
+#include "impl.h"
+
+Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' };
+Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'};
+
+// All lists start out like List structure.
+// List itself can be used as list of int.
+int
+_listlen(List* l)
+{
+ int n = 0;
+
+ while(l != nil) {
+ l = l->next;
+ n++;
+ }
+ return n;
+}
+
+// Cons
+List*
+_newlist(int val, List* rest)
+{
+ List* ans;
+
+ ans = (List*)emalloc(sizeof(List));
+ ans->val = val;
+ ans->next = rest;
+ return ans;
+}
+
+// Reverse a list in place
+List*
+_revlist(List* l)
+{
+ List* newl;
+ List* nextl;
+
+ newl = nil;
+ while(l != nil) {
+ nextl = l->next;
+ l->next = newl;
+ newl = l;
+ l = nextl;
+ }
+ return newl;
+}
+
+// The next few routines take a "character class" as argument.
+// e.g., "a-zA-Z", or "^ \t\n"
+// (ranges indicated by - except in first position;
+// ^ is first position means "not in" the following class)
+
+// Splitl splits s[0:n] just before first character of class cl.
+// Answers go in (p1, n1) and (p2, n2).
+// If no split, the whole thing goes in the first component.
+// Note: answers contain pointers into original string.
+void
+_splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
+{
+ Rune* p;
+
+ p = _Strnclass(s, cl, n);
+ *p1 = s;
+ if(p == nil) {
+ *n1 = n;
+ *p2 = nil;
+ *n2 = 0;
+ }
+ else {
+ *p2 = p;
+ *n1 = p-s;
+ *n2 = n-*n1;
+ }
+}
+
+// Splitr splits s[0:n] just after last character of class cl.
+// Answers go in (p1, n1) and (p2, n2).
+// If no split, the whole thing goes in the last component.
+// Note: answers contain pointers into original string.
+void
+_splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
+{
+ Rune* p;
+
+ p = _Strnrclass(s, cl, n);
+ if(p == nil) {
+ *p1 = nil;
+ *n1 = 0;
+ *p2 = s;
+ *n2 = n;
+ }
+ else {
+ *p1 = s;
+ *p2 = p+1;
+ *n1 = *p2-s;
+ *n2 = n-*n1;
+ }
+}
+
+// Splitall splits s[0:n] into parts that are separated by characters from class cl.
+// Each part will have nonzero length.
+// At most alen parts are found, and pointers to their starts go into
+// the strarr array, while their lengths go into the lenarr array.
+// The return value is the number of parts found.
+int
+_splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
+{
+ int i;
+ Rune* p;
+ Rune* q;
+ Rune* slast;
+
+ if(s == nil || n == 0)
+ return 0;
+ i = 0;
+ p = s;
+ slast = s+n;
+ while(p < slast && i < alen) {
+ while(p < slast && _inclass(*p, cl))
+ p++;
+ if(p == slast)
+ break;
+ q = _Strnclass(p, cl, slast-p);
+ if(q == nil)
+ q = slast;
+ assert(q > p && q <= slast);
+ strarr[i] = p;
+ lenarr[i] = q-p;
+ i++;
+ p = q;
+ }
+ return i;
+}
+
+// Find part of s that excludes leading and trailing whitespace,
+// and return that part in *pans (and its length in *panslen).
+void
+_trimwhite(Rune* s, int n, Rune** pans, int* panslen)
+{
+ Rune* p;
+ Rune* q;
+
+ p = nil;
+ if(n > 0) {
+ p = _Strnclass(s, notwhitespace, n);
+ if(p != nil) {
+ q = _Strnrclass(s, notwhitespace, n);
+ assert(q != nil);
+ n = q+1-p;
+ }
+ }
+ *pans = p;
+ *panslen = n;
+}
+
+// _Strclass returns a pointer to the first element of s that is
+// a member of class cl, nil if none.
+Rune*
+_Strclass(Rune* s, Rune* cl)
+{
+ Rune* p;
+
+ for(p = s; *p != 0; p++)
+ if(_inclass(*p, cl))
+ return p;
+ return nil;
+}
+
+// _Strnclass returns a pointer to the first element of s[0:n] that is
+// a member of class cl, nil if none.
+Rune*
+_Strnclass(Rune* s, Rune* cl, int n)
+{
+ Rune* p;
+
+ for(p = s; n-- && *p != 0; p++)
+ if(_inclass(*p, cl))
+ return p;
+ return nil;
+}
+
+// _Strrclass returns a pointer to the last element of s that is
+// a member of class cl, nil if none
+Rune*
+_Strrclass(Rune* s, Rune* cl)
+{
+ Rune* p;
+
+ if(s == nil || *s == 0)
+ return nil;
+ p = s + runestrlen(s) - 1;
+ while(p >= s) {
+ if(_inclass(*p, cl))
+ return p;
+ p--;
+ };
+ return nil;
+}
+
+// _Strnrclass returns a pointer to the last element of s[0:n] that is
+// a member of class cl, nil if none
+Rune*
+_Strnrclass(Rune* s, Rune* cl, int n)
+{
+ Rune* p;
+
+ if(s == nil || *s == 0 || n == 0)
+ return nil;
+ p = s + n - 1;
+ while(p >= s) {
+ if(_inclass(*p, cl))
+ return p;
+ p--;
+ };
+ return nil;
+}
+
+// Is c in the class cl?
+int
+_inclass(Rune c, Rune* cl)
+{
+ int n;
+ int ans;
+ int negate;
+ int i;
+
+ n = _Strlen(cl);
+ if(n == 0)
+ return 0;
+ ans = 0;
+ negate = 0;
+ if(cl[0] == '^') {
+ negate = 1;
+ cl++;
+ n--;
+ }
+ for(i = 0; i < n; i++) {
+ if(cl[i] == '-' && i > 0 && i < n - 1) {
+ if(c >= cl[i - 1] && c <= cl[i + 1]) {
+ ans = 1;
+ break;
+ }
+ i++;
+ }
+ else if(c == cl[i]) {
+ ans = 1;
+ break;
+ }
+ }
+ if(negate)
+ ans = !ans;
+ return ans;
+}
+
+// Is pre a prefix of s?
+int
+_prefix(Rune* pre, Rune* s)
+{
+ int ns;
+ int n;
+ int k;
+
+ ns = _Strlen(s);
+ n = _Strlen(pre);
+ if(ns < n)
+ return 0;
+ for(k = 0; k < n; k++) {
+ if(pre[k] != s[k])
+ return 0;
+ }
+ return 1;
+}
+
+// Number of runes in (null-terminated) s
+int
+_Strlen(Rune* s)
+{
+ if(s == nil)
+ return 0;
+ return runestrlen(s);
+}
+
+// -1, 0, 1 as s1 is lexicographically less, equal greater than s2
+int
+_Strcmp(Rune *s1, Rune *s2)
+{
+ if(s1 == nil)
+ return (s2 == nil || *s2 == 0) ? 0 : -1;
+ if(s2 == nil)
+ return (*s1 == 0) ? 0 : 1;
+ return runestrcmp(s1, s2);
+}
+
+// Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
+// Also, do a case-insensitive match, assuming s2
+// has no chars in [A-Z], only their lowercase versions.
+// (This routine is used for in-place keyword lookup, where s2 is in a keyword
+// list and s1 is some substring, possibly mixed-case, in a buffer.)
+int
+_Strncmpci(Rune *s1, int n1, Rune *s2)
+{
+ Rune c1, c2;
+
+ for(;;) {
+ if(n1-- == 0) {
+ if(*s2 == 0)
+ return 0;
+ return -1;
+ }
+ c1 = *s1++;
+ c2 = *s2++;
+ if(c1 >= 'A' && c1 <= 'Z')
+ c1 = c1 - 'A' + 'a';
+ if(c1 != c2) {
+ if(c1 > c2)
+ return 1;
+ return -1;
+ }
+ }
+}
+
+// emalloc and copy
+Rune*
+_Strdup(Rune* s)
+{
+ if(s == nil)
+ return nil;
+ return _Strndup(s, runestrlen(s));
+}
+
+// emalloc and copy n chars of s (assume s is at least that long),
+// and add 0 terminator.
+// Return nil if n==0.
+Rune*
+_Strndup(Rune* s, int n)
+{
+ Rune* ans;
+
+ if(n <= 0)
+ return nil;
+ ans = _newstr(n);
+ memmove(ans, s, n*sizeof(Rune));
+ ans[n] = 0;
+ return ans;
+}
+// emalloc enough room for n Runes, plus 1 null terminator.
+// (Not initialized to anything.)
+Rune*
+_newstr(int n)
+{
+ return (Rune*)emalloc((n+1)*sizeof(Rune));
+}
+
+// emalloc and copy s+t
+Rune*
+_Strdup2(Rune* s, Rune* t)
+{
+ int ns, nt;
+ Rune* ans;
+ Rune* p;
+
+ ns = _Strlen(s);
+ nt = _Strlen(t);
+ if(ns+nt == 0)
+ return nil;
+ ans = _newstr(ns+nt);
+ p = _Stradd(ans, s, ns);
+ p = _Stradd(p, t, nt);
+ *p = 0;
+ return ans;
+}
+
+// Return emalloc'd substring s[start:stop],
+Rune*
+_Strsubstr(Rune* s, int start, int stop)
+{
+ Rune* t;
+
+ if(start == stop)
+ return nil;
+ t = _Strndup(s+start, stop-start);
+ return t;
+}
+
+// Copy n chars to s1 from s2, and return s1+n
+Rune*
+_Stradd(Rune* s1, Rune* s2, int n)
+{
+ if(n == 0)
+ return s1;
+ memmove(s1, s2, n*sizeof(Rune));
+ return s1+n;
+}
+
+// Like strtol, but converting from Rune* string
+
+//#define LONG_MAX 2147483647L
+//#define LONG_MIN -2147483648L
+
+long
+_Strtol(Rune* nptr, Rune** endptr, int base)
+{
+ Rune* p;
+ long n, nn;
+ int c, ovfl, v, neg, ndig;
+
+ p = nptr;
+ neg = 0;
+ n = 0;
+ ndig = 0;
+ ovfl = 0;
+
+ /*
+ * White space
+ */
+ for(;;p++){
+ switch(*p){
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case '\v':
+ continue;
+ }
+ break;
+ }
+
+ /*
+ * Sign
+ */
+ if(*p=='-' || *p=='+')
+ if(*p++ == '-')
+ neg = 1;
+
+ /*
+ * Base
+ */
+ if(base==0){
+ if(*p != '0')
+ base = 10;
+ else{
+ base = 8;
+ if(p[1]=='x' || p[1]=='X'){
+ p += 2;
+ base = 16;
+ }
+ }
+ }else if(base==16 && *p=='0'){
+ if(p[1]=='x' || p[1]=='X')
+ p += 2;
+ }else if(base<0 || 36<base)
+ goto Return;
+
+ /*
+ * Non-empty sequence of digits
+ */
+ for(;; p++,ndig++){
+ c = *p;
+ v = base;
+ if('0'<=c && c<='9')
+ v = c - '0';
+ else if('a'<=c && c<='z')
+ v = c - 'a' + 10;
+ else if('A'<=c && c<='Z')
+ v = c - 'A' + 10;
+ if(v >= base)
+ break;
+ nn = n*base + v;
+ if(nn < n)
+ ovfl = 1;
+ n = nn;
+ }
+
+ Return:
+ if(ndig == 0)
+ p = nptr;
+ if(endptr)
+ *endptr = p;
+ if(ovfl){
+ if(neg)
+ return LONG_MIN;
+ return LONG_MAX;
+ }
+ if(neg)
+ return -n;
+ return n;
+}
+
+// Convert buf[0:n], bytes whose character set is chset,
+// into a emalloc'd null-terminated Unicode string.
+Rune*
+toStr(uchar* buf, int n, int chset)
+{
+ int i;
+ int m;
+ Rune ch;
+ Rune* ans;
+
+ switch(chset) {
+ case US_Ascii:
+ case ISO_8859_1:
+ ans = (Rune*)emalloc((n+1)*sizeof(Rune));
+ for(i = 0; i < n; i++)
+ ans[i] = buf[i];
+ ans[n] = 0;
+ break;
+
+ case UTF_8:
+ m = 0;
+ for(i = 0; i < n; ) {
+ i += chartorune(&ch, (char*)(buf+i));
+ m++;
+ }
+ ans = (Rune*)emalloc((m+1)*sizeof(Rune));
+ m = 0;
+ for(i = 0; i < n; ) {
+ i += chartorune(&ch, (char*)(buf+i));
+ ans[m++] = ch;
+ }
+ ans[m] = 0;
+ break;
+
+ default:
+ ans = nil;
+ assert(0);
+ }
+ return ans;
+}
+
+// Convert buf[0:n], Unicode characters,
+// into an emalloc'd null-terminated string in character set chset.
+// Use 0x80 for unconvertable characters.
+uchar*
+fromStr(Rune* buf, int n, int chset)
+{
+ uchar* ans;
+ int i, lim, m;
+ Rune ch;
+ uchar* p;
+ uchar s[UTFmax];
+
+ ans = nil;
+ switch(chset) {
+ case US_Ascii:
+ case ISO_8859_1:
+ ans = (uchar*)emalloc(n+1);
+ lim = (chset==US_Ascii)? 127 : 255;
+ for(i = 0; i < n; i++) {
+ ch = buf[i];
+ if(ch > lim)
+ ch = 0x80;
+ ans[i] = ch;
+ }
+ ans[n] = 0;
+ break;
+
+ case UTF_8:
+ m = 0;
+ for(i = 0; i < n; i++) {
+ m += runetochar((char*)s, &buf[i]);
+ }
+ ans = (uchar*)emalloc(m+1);
+ p = ans;
+ for(i = 0; i < n; i++)
+ p += runetochar((char*)p, &buf[i]);
+ *p = 0;
+ break;
+
+ default:
+ assert(0);
+ }
+ return ans;
+
+}
+
+// Convert n to emalloc'd String.
+Rune*
+_ltoStr(int n)
+{
+ int m;
+ uchar buf[20];
+
+ m = snprint((char*)buf, sizeof(buf), "%d", n);
+ return toStr(buf, m, US_Ascii);
+}