aboutsummaryrefslogtreecommitdiff
path: root/src/libregexp
diff options
context:
space:
mode:
Diffstat (limited to 'src/libregexp')
-rw-r--r--src/libregexp/lib9.h8
-rw-r--r--src/libregexp/lib9.h.std6
-rw-r--r--src/libregexp/mkfile1
-rw-r--r--src/libregexp/regaux.c46
-rw-r--r--src/libregexp/regcomp.c10
-rw-r--r--src/libregexp/regcomp.h6
-rw-r--r--src/libregexp/regerror.c2
-rw-r--r--src/libregexp/regexec.c42
-rw-r--r--src/libregexp/regsub.c3
-rw-r--r--src/libregexp/rregaux.c26
-rw-r--r--src/libregexp/rregexec.c20
-rw-r--r--src/libregexp/rregsub.c22
12 files changed, 111 insertions, 81 deletions
diff --git a/src/libregexp/lib9.h b/src/libregexp/lib9.h
index d022656d..07a1f976 100644
--- a/src/libregexp/lib9.h
+++ b/src/libregexp/lib9.h
@@ -1,6 +1,2 @@
-#include <fmt.h>
-#include <setjmp.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-
+#include <u.h>
+#include <libc.h>
diff --git a/src/libregexp/lib9.h.std b/src/libregexp/lib9.h.std
new file mode 100644
index 00000000..d022656d
--- /dev/null
+++ b/src/libregexp/lib9.h.std
@@ -0,0 +1,6 @@
+#include <fmt.h>
+#include <setjmp.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
diff --git a/src/libregexp/mkfile b/src/libregexp/mkfile
index 8cb740cc..ad482240 100644
--- a/src/libregexp/mkfile
+++ b/src/libregexp/mkfile
@@ -9,7 +9,6 @@ OFILES=\
regexec.$O\
regsub.$O\
regaux.$O\
- rregaux.$O\
rregexec.$O\
rregsub.$O\
diff --git a/src/libregexp/regaux.c b/src/libregexp/regaux.c
index 956c1eb0..b854b5ac 100644
--- a/src/libregexp/regaux.c
+++ b/src/libregexp/regaux.c
@@ -30,19 +30,27 @@ _renewmatch(Resub *mp, int ms, Resublist *sp)
extern Relist*
_renewthread(Relist *lp, /* _relist to add to */
Reinst *ip, /* instruction to add */
+ int ms,
Resublist *sep) /* pointers to subexpressions */
{
Relist *p;
for(p=lp; p->inst; p++){
if(p->inst == ip){
- if((sep)->m[0].s.sp < p->se.m[0].s.sp)
- p->se = *sep;
+ if(sep->m[0].s.sp < p->se.m[0].s.sp){
+ if(ms > 1)
+ p->se = *sep;
+ else
+ p->se.m[0] = sep->m[0];
+ }
return 0;
}
}
p->inst = ip;
- p->se = *sep;
+ if(ms > 1)
+ p->se = *sep;
+ else
+ p->se.m[0] = sep->m[0];
(++p)->inst = 0;
return p;
}
@@ -54,6 +62,7 @@ _renewthread(Relist *lp, /* _relist to add to */
extern Relist*
_renewemptythread(Relist *lp, /* _relist to add to */
Reinst *ip, /* instruction to add */
+ int ms,
char *sp) /* pointers to subexpressions */
{
Relist *p;
@@ -61,16 +70,43 @@ _renewemptythread(Relist *lp, /* _relist to add to */
for(p=lp; p->inst; p++){
if(p->inst == ip){
if(sp < p->se.m[0].s.sp) {
- memset((void *)&p->se, 0, sizeof(p->se));
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
p->se.m[0].s.sp = sp;
}
return 0;
}
}
p->inst = ip;
- memset((void *)&p->se, 0, sizeof(p->se));
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
p->se.m[0].s.sp = sp;
(++p)->inst = 0;
return p;
}
+extern Relist*
+_rrenewemptythread(Relist *lp, /* _relist to add to */
+ Reinst *ip, /* instruction to add */
+ int ms,
+ Rune *rsp) /* pointers to subexpressions */
+{
+ Relist *p;
+
+ for(p=lp; p->inst; p++){
+ if(p->inst == ip){
+ if(rsp < p->se.m[0].s.rsp) {
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
+ p->se.m[0].s.rsp = rsp;
+ }
+ return 0;
+ }
+ }
+ p->inst = ip;
+ if(ms > 1)
+ memset(&p->se, 0, sizeof(p->se));
+ p->se.m[0].s.rsp = rsp;
+ (++p)->inst = 0;
+ return p;
+}
diff --git a/src/libregexp/regcomp.c b/src/libregexp/regcomp.c
index 6c6939c6..796efc41 100644
--- a/src/libregexp/regcomp.c
+++ b/src/libregexp/regcomp.c
@@ -15,8 +15,6 @@ struct Node
Reinst* last;
}Node;
-Reprog RePrOg;
-
#define NSTACK 20
static Node andstack[NSTACK];
static Node *andp;
@@ -252,7 +250,7 @@ optimize(Reprog *pp)
* and then relocate the code.
*/
size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
- npp = (Reprog *)realloc(pp, size);
+ npp = realloc(pp, size);
if(npp==0 || npp==pp)
return pp;
diff = (char *)npp - (char *)pp;
@@ -303,12 +301,12 @@ dump(Reprog *pp)
print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
if(l->type == RUNE)
- print("\t%C\n", l->r);
+ print("\t%C\n", l->u1.r);
else if(l->type == CCLASS || l->type == NCCLASS){
print("\t[");
if(l->type == NCCLASS)
print("^");
- for(p = l->cp->spans; p < l->cp->end; p += 2)
+ for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
if(p[0] == p[1])
print("%C", p[0]);
else
@@ -477,7 +475,7 @@ regcomp1(char *s, int literal, int dot_type)
Reprog *pp;
/* get memory for the program */
- pp = (Reprog *)malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
+ pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
if(pp == 0){
regerror("out of memory");
return 0;
diff --git a/src/libregexp/regcomp.h b/src/libregexp/regcomp.h
index a6728b0e..6c88cd09 100644
--- a/src/libregexp/regcomp.h
+++ b/src/libregexp/regcomp.h
@@ -68,7 +68,7 @@ struct Reljunk
Rune* reol;
};
-extern Relist* _renewthread(Relist*, Reinst*, Resublist*);
+extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
extern void _renewmatch(Resub*, int, Resublist*);
-extern Relist* _renewemptythread(Relist*, Reinst*, char*);
-extern Relist* _rrenewemptythread(Relist*, Reinst*, Rune*);
+extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
+extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);
diff --git a/src/libregexp/regerror.c b/src/libregexp/regerror.c
index 2cd1e3e2..99ff0c74 100644
--- a/src/libregexp/regerror.c
+++ b/src/libregexp/regerror.c
@@ -10,5 +10,5 @@ regerror(char *s)
strcat(buf, s);
strcat(buf, "\n");
write(2, buf, strlen(buf));
- exit(1);
+ exits("regerr");
}
diff --git a/src/libregexp/regexec.c b/src/libregexp/regexec.c
index c9f1eba2..62ab1a31 100644
--- a/src/libregexp/regexec.c
+++ b/src/libregexp/regexec.c
@@ -48,7 +48,7 @@ regexec1(Reprog *progp, /* program to run */
switch(j->starttype) {
case RUNE:
p = utfrune(s, j->startchar);
- if(p == 0)
+ if(p == 0 || s == j->eol)
return match;
s = p;
break;
@@ -56,14 +56,14 @@ regexec1(Reprog *progp, /* program to run */
if(s == bol)
break;
p = utfrune(s, '\n');
- if(p == 0)
+ if(p == 0 || s == j->eol)
return match;
s = p;
break;
}
}
r = *(uchar*)s;
- if(r < (Rune)Runeself)
+ if(r < Runeself)
n = 1;
else
n = chartorune(&r, s);
@@ -77,7 +77,7 @@ regexec1(Reprog *progp, /* program to run */
/* Add first instruction to current list */
if(match == 0)
- _renewemptythread(tl, progp->startinst, s);
+ _renewemptythread(tl, progp->startinst, ms, s);
/* Execute machine until current list is empty */
for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
@@ -85,7 +85,7 @@ regexec1(Reprog *progp, /* program to run */
switch(inst->type){
case RUNE: /* regular character */
if(inst->u1.r == r){
- if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
}
break;
@@ -97,11 +97,11 @@ regexec1(Reprog *progp, /* program to run */
continue;
case ANY:
if(r != '\n')
- if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case ANYNL:
- if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case BOL:
@@ -116,7 +116,7 @@ regexec1(Reprog *progp, /* program to run */
ep = inst->u1.cp->end;
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
}
@@ -127,12 +127,12 @@ regexec1(Reprog *progp, /* program to run */
if(r >= rp[0] && r <= rp[1])
break;
if(rp == ep)
- if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case OR:
/* evaluate right choice later */
- if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle)
+ if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
return -1;
/* efficiency: advance and re-evaluate */
continue;
@@ -162,15 +162,27 @@ regexec2(Reprog *progp, /* program to run */
Reljunk *j
)
{
- Relist relist0[BIGLISTSIZE], relist1[BIGLISTSIZE];
+ int rv;
+ Relist *relist0, *relist1;
/* mark space */
+ relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
+ if(relist0 == nil)
+ return -1;
+ relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
+ if(relist1 == nil){
+ free(relist1);
+ return -1;
+ }
j->relist[0] = relist0;
j->relist[1] = relist1;
- j->reliste[0] = relist0 + nelem(relist0) - 2;
- j->reliste[1] = relist1 + nelem(relist1) - 2;
+ j->reliste[0] = relist0 + BIGLISTSIZE - 2;
+ j->reliste[1] = relist1 + BIGLISTSIZE - 2;
- return regexec1(progp, bol, mp, ms, j);
+ rv = regexec1(progp, bol, mp, ms, j);
+ free(relist0);
+ free(relist1);
+ return rv;
}
extern int
@@ -196,7 +208,7 @@ regexec(Reprog *progp, /* program to run */
}
j.starttype = 0;
j.startchar = 0;
- if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) {
+ if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
j.starttype = RUNE;
j.startchar = progp->startinst->u1.r;
}
diff --git a/src/libregexp/regsub.c b/src/libregexp/regsub.c
index 6de2c957..ab5ad99d 100644
--- a/src/libregexp/regsub.c
+++ b/src/libregexp/regsub.c
@@ -53,9 +53,10 @@ regsub(char *sp, /* source string */
ssp < mp[0].e.ep; ssp++)
if(dp < ep)
*dp++ = *ssp;
- }else
+ }else{
if(dp < ep)
*dp++ = *sp;
+ }
sp++;
}
*dp = '\0';
diff --git a/src/libregexp/rregaux.c b/src/libregexp/rregaux.c
deleted file mode 100644
index f4cb0068..00000000
--- a/src/libregexp/rregaux.c
+++ /dev/null
@@ -1,26 +0,0 @@
-#include "lib9.h"
-#include "regexp9.h"
-#include "regcomp.h"
-
-extern Relist*
-_rrenewemptythread(Relist *lp, /* _relist to add to */
- Reinst *ip, /* instruction to add */
- Rune *rsp) /* pointers to subexpressions */
-{
- Relist *p;
-
- for(p=lp; p->inst; p++){
- if(p->inst == ip){
- if(rsp < p->se.m[0].s.rsp) {
- memset((void *)&p->se, 0, sizeof(p->se));
- p->se.m[0].s.rsp = rsp;
- }
- return 0;
- }
- }
- p->inst = ip;
- memset((void *)&p->se, 0, sizeof(p->se));
- p->se.m[0].s.rsp = rsp;
- (++p)->inst = 0;
- return p;
-}
diff --git a/src/libregexp/rregexec.c b/src/libregexp/rregexec.c
index e96c9721..eece0eb9 100644
--- a/src/libregexp/rregexec.c
+++ b/src/libregexp/rregexec.c
@@ -45,7 +45,7 @@ rregexec1(Reprog *progp, /* program to run */
switch(j->starttype) {
case RUNE:
while(*s != j->startchar) {
- if(*s == 0)
+ if(*s == 0 || s == j->reol)
return match;
s++;
}
@@ -54,7 +54,7 @@ rregexec1(Reprog *progp, /* program to run */
if(s == bol)
break;
while(*s != '\n') {
- if(*s == 0)
+ if(*s == 0 || s == j->reol)
return match;
s++;
}
@@ -72,7 +72,7 @@ rregexec1(Reprog *progp, /* program to run */
nl->inst = 0;
/* Add first instruction to current list */
- _rrenewemptythread(tl, progp->startinst, s);
+ _rrenewemptythread(tl, progp->startinst, ms, s);
/* Execute machine until current list is empty */
for(tlp=tl; tlp->inst; tlp++){
@@ -80,7 +80,7 @@ rregexec1(Reprog *progp, /* program to run */
switch(inst->type){
case RUNE: /* regular character */
if(inst->u1.r == r)
- if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case LBRA:
@@ -91,11 +91,11 @@ rregexec1(Reprog *progp, /* program to run */
continue;
case ANY:
if(r != '\n')
- if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case ANYNL:
- if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case BOL:
@@ -110,7 +110,7 @@ rregexec1(Reprog *progp, /* program to run */
ep = inst->u1.cp->end;
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){
- if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
}
@@ -121,12 +121,12 @@ rregexec1(Reprog *progp, /* program to run */
if(r >= rp[0] && r <= rp[1])
break;
if(rp == ep)
- if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
+ if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
return -1;
break;
case OR:
/* evaluate right choice later */
- if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle)
+ if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
return -1;
/* efficiency: advance and re-evaluate */
continue;
@@ -190,7 +190,7 @@ rregexec(Reprog *progp, /* program to run */
}
j.starttype = 0;
j.startchar = 0;
- if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) {
+ if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
j.starttype = RUNE;
j.startchar = progp->startinst->u1.r;
}
diff --git a/src/libregexp/rregsub.c b/src/libregexp/rregsub.c
index 15f3c174..5a4a564d 100644
--- a/src/libregexp/rregsub.c
+++ b/src/libregexp/rregsub.c
@@ -5,12 +5,14 @@
extern void
rregsub(Rune *sp, /* source string */
Rune *dp, /* destination string */
+ int dlen,
Resub *mp, /* subexpression elements */
int ms) /* number of elements pointed to by mp */
{
- Rune *ssp;
+ Rune *ssp, *ep;
int i;
+ ep = dp+(dlen/sizeof(Rune))-1;
while(*sp != '\0'){
if(*sp == '\\'){
switch(*++sp){
@@ -29,16 +31,19 @@ rregsub(Rune *sp, /* source string */
for(ssp = mp[i].s.rsp;
ssp < mp[i].e.rep;
ssp++)
- *dp++ = *ssp;
+ if(dp < ep)
+ *dp++ = *ssp;
break;
case '\\':
- *dp++ = '\\';
+ if(dp < ep)
+ *dp++ = '\\';
break;
case '\0':
sp--;
break;
default:
- *dp++ = *sp;
+ if(dp < ep)
+ *dp++ = *sp;
break;
}
}else if(*sp == '&'){
@@ -46,9 +51,12 @@ rregsub(Rune *sp, /* source string */
if(mp[0].s.rsp != 0)
for(ssp = mp[0].s.rsp;
ssp < mp[0].e.rep; ssp++)
- *dp++ = *ssp;
- }else
- *dp++ = *sp;
+ if(dp < ep)
+ *dp++ = *ssp;
+ }else{
+ if(dp < ep)
+ *dp++ = *sp;
+ }
sp++;
}
*dp = '\0';