aboutsummaryrefslogtreecommitdiff
path: root/src/lib9/tokenize.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib9/tokenize.c')
-rw-r--r--src/lib9/tokenize.c106
1 files changed, 106 insertions, 0 deletions
diff --git a/src/lib9/tokenize.c b/src/lib9/tokenize.c
new file mode 100644
index 00000000..6fa9fc73
--- /dev/null
+++ b/src/lib9/tokenize.c
@@ -0,0 +1,106 @@
+#include <lib9.h>
+
+static char qsep[] = " \t\r\n";
+
+static char*
+qtoken(char *s, char *sep)
+{
+ int quoting;
+ char *t;
+
+ quoting = 0;
+ t = s; /* s is output string, t is input string */
+ while(*t!='\0' && (quoting || utfrune(sep, *t)==nil)){
+ if(*t != '\''){
+ *s++ = *t++;
+ continue;
+ }
+ /* *t is a quote */
+ if(!quoting){
+ quoting = 1;
+ t++;
+ continue;
+ }
+ /* quoting and we're on a quote */
+ if(t[1] != '\''){
+ /* end of quoted section; absorb closing quote */
+ t++;
+ quoting = 0;
+ continue;
+ }
+ /* doubled quote; fold one quote into two */
+ t++;
+ *s++ = *t++;
+ }
+ if(*s != '\0'){
+ *s = '\0';
+ if(t == s)
+ t++;
+ }
+ return t;
+}
+
+static char*
+etoken(char *t, char *sep)
+{
+ int quoting;
+
+ /* move to end of next token */
+ quoting = 0;
+ while(*t!='\0' && (quoting || utfrune(sep, *t)==nil)){
+ if(*t != '\''){
+ t++;
+ continue;
+ }
+ /* *t is a quote */
+ if(!quoting){
+ quoting = 1;
+ t++;
+ continue;
+ }
+ /* quoting and we're on a quote */
+ if(t[1] != '\''){
+ /* end of quoted section; absorb closing quote */
+ t++;
+ quoting = 0;
+ continue;
+ }
+ /* doubled quote; fold one quote into two */
+ t += 2;
+ }
+ return t;
+}
+
+int
+gettokens(char *s, char **args, int maxargs, char *sep)
+{
+ int nargs;
+
+ for(nargs=0; nargs<maxargs; nargs++){
+ while(*s!='\0' && utfrune(sep, *s)!=nil)
+ *s++ = '\0';
+ if(*s == '\0')
+ break;
+ args[nargs] = s;
+ s = etoken(s, sep);
+ }
+
+ return nargs;
+}
+
+int
+tokenize(char *s, char **args, int maxargs)
+{
+ int nargs;
+
+ for(nargs=0; nargs<maxargs; nargs++){
+ while(*s!='\0' && utfrune(qsep, *s)!=nil)
+ s++;
+ if(*s == '\0')
+ break;
+ args[nargs] = s;
+ s = qtoken(s, qsep);
+ }
+
+ return nargs;
+}