aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cmd/acme/regx.c110
1 files changed, 51 insertions, 59 deletions
diff --git a/src/cmd/acme/regx.c b/src/cmd/acme/regx.c
index a76f617d..3a3e78c5 100644
--- a/src/cmd/acme/regx.c
+++ b/src/cmd/acme/regx.c
@@ -521,56 +521,26 @@ classmatch(int classno, int c, int negate)
}
/*
- * Add inst to the list [l, l+NLIST), but only if it is not there already.
- * These work lists are stored and processed in increasing
- * order of sep->p[0], so if the inst is there already, the one
- * there already is a more left match and takes priority.
- * (This works for backward searches too, because there
- * is no explicit comparison.)
+ * Note optimization in addinst:
+ * *l must be pending when addinst called; if *l has been looked
+ * at already, the optimization is a bug.
*/
-Ilist*
-addinst1(Ilist *l, Inst *inst, Rangeset *sep)
-{
- Ilist *p;
-
- for(p = l; p->inst; p++)
- if(p->inst==inst)
- return 0;
-
- if(p == l+NLIST)
- return l+NLIST;
-
- p->inst = inst;
- p->se = *sep;
- (p+1)->inst = 0;
- return p;
-}
-
int
addinst(Ilist *l, Inst *inst, Rangeset *sep)
{
- Ilist *ap;
-
- ap = addinst1(l, inst, sep);
- if(ap == 0)
- return 0;
- if(ap == l+NLIST)
- return -1;
-
- /*
- * Added inst to list at ap.
- * Expand any ORs right now, so that entire
- * work list ends up being sorted by increasing sep->p[0].
- */
- for(; ap->inst; ap++){
- if(ap->inst->type == OR){
- if(addinst1(l, ap->inst->u1.left, &ap->se) == l+NLIST)
- return -1;
- if(addinst1(l, ap->inst->u.right, &ap->se) == l+NLIST)
- return -1;
+ Ilist *p;
+
+ for(p = l; p->inst; p++){
+ if(p->inst==inst){
+ if((sep)->r[0].q0 < p->se.r[0].q0)
+ p->se= *sep; /* this would be bug */
+ return 0; /* It's already there */
}
}
- return 0;
+ p->inst = inst;
+ p->se= *sep;
+ (p+1)->inst = nil;
+ return 1;
}
int
@@ -587,6 +557,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
Inst *inst;
Ilist *tlp;
uint p;
+ int nnl, ntl;
int nc, c;
int wrapped;
int startchar;
@@ -595,6 +566,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
p = startp;
startchar = 0;
wrapped = 0;
+ nnl = 0;
if(startinst->type<OPERATOR)
startchar = startinst->type;
list[0][0].inst = list[1][0].inst = nil;
@@ -604,7 +576,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
else
nc = runestrlen(r);
/* Execute machine once for each character */
- nl = list[0];
for(;;p++){
doloop:
if(p>=eof || p>=nc){
@@ -623,7 +594,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
}
c = 0;
}else{
- if(((wrapped && p>=startp) || sel.r[0].q0>0) && nl->inst==0)
+ if(((wrapped && p>=startp) || sel.r[0].q0>0) && nnl==0)
break;
if(t != nil)
c = textreadc(t, p);
@@ -631,15 +602,18 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
c = r[p];
}
/* fast check for first char */
- if(startchar && nl->inst==0 && c!=startchar)
+ if(startchar && nnl==0 && c!=startchar)
continue;
tl = list[flag];
nl = list[flag^=1];
nl->inst = nil;
+ ntl = nnl;
+ nnl = 0;
if(sel.r[0].q0<0 && (!wrapped || p<startp || startp==eof)){
/* Add first instruction to this list */
sempty.r[0].q0 = p;
- if(addinst(tl, startinst, &sempty) < 0){
+ if(addinst(tl, startinst, &sempty))
+ if(++ntl >= NLIST){
Overflow:
warning(nil, "regexp list overflow\n");
sel.r[0].q0 = -1;
@@ -653,7 +627,8 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
default: /* regular character */
if(inst->type==c){
Addinst:
- if(addinst(nl, inst->u1.next, &tlp->se) < 0)
+ if(addinst(nl, inst->u1.next, &tlp->se))
+ if(++nnl >= NLIST)
goto Overflow;
}
break;
@@ -691,8 +666,13 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
goto Addinst;
break;
case OR:
- /* already expanded; just a placeholder */
- break;
+ /* evaluate right choice later */
+ if(addinst(tl, inst->u.right, &tlp->se))
+ if(++ntl >= NLIST)
+ goto Overflow;
+ /* efficiency: advance and re-evaluate */
+ inst = inst->u1.left;
+ goto Switchstmt;
case END: /* Match! */
tlp->se.r[0].q1 = p;
newmatch(&tlp->se);
@@ -720,11 +700,13 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
Inst *inst;
Ilist *tlp;
int p;
+ int nnl, ntl;
int c;
int wrapped;
int startchar;
flag = 0;
+ nnl = 0;
wrapped = 0;
p = startp;
startchar = 0;
@@ -733,7 +715,6 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
list[0][0].inst = list[1][0].inst = nil;
sel.r[0].q0= -1;
/* Execute machine once for each character, including terminal NUL */
- nl = list[0];
for(;;--p){
doloop:
if(p <= 0){
@@ -753,20 +734,24 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
}
c = 0;
}else{
- if(((wrapped && p<=startp) || sel.r[0].q0>0) && nl->inst==0)
+ if(((wrapped && p<=startp) || sel.r[0].q0>0) && nnl==0)
break;
c = textreadc(t, p-1);
}
/* fast check for first char */
- if(startchar && nl->inst==0 && c!=startchar)
+ if(startchar && nnl==0 && c!=startchar)
continue;
tl = list[flag];
nl = list[flag^=1];
nl->inst = nil;
+ ntl = nnl;
+ nnl = 0;
if(sel.r[0].q0<0 && (!wrapped || p>startp)){
/* Add first instruction to this list */
- sempty.r[0].q0 = p;
- if(addinst(tl, bstartinst, &sempty) < 0){
+ /* the minus is so the optimizations in addinst work */
+ sempty.r[0].q0 = -p;
+ if(addinst(tl, bstartinst, &sempty))
+ if(++ntl >= NLIST){
Overflow:
warning(nil, "regexp list overflow\n");
sel.r[0].q0 = -1;
@@ -780,7 +765,8 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
default: /* regular character */
if(inst->type == c){
Addinst:
- if(addinst(nl, inst->u1.next, &tlp->se) < 0)
+ if(addinst(nl, inst->u1.next, &tlp->se))
+ if(++nnl >= NLIST)
goto Overflow;
}
break;
@@ -818,9 +804,15 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
goto Addinst;
break;
case OR:
- /* already expanded; just a placeholder */
- break;
+ /* evaluate right choice later */
+ if(addinst(tl, inst->u.right, &tlp->se))
+ if(++ntl >= NLIST)
+ goto Overflow;
+ /* efficiency: advance and re-evaluate */
+ inst = inst->u1.left;
+ goto Switchstmt;
case END: /* Match! */
+ tlp->se.r[0].q0 = -tlp->se.r[0].q0; /* minus sign */
tlp->se.r[0].q1 = p;
bnewmatch(&tlp->se);
break;