diff options
-rw-r--r-- | src/cmd/acme/regx.c | 110 |
1 files changed, 51 insertions, 59 deletions
diff --git a/src/cmd/acme/regx.c b/src/cmd/acme/regx.c index a76f617d..3a3e78c5 100644 --- a/src/cmd/acme/regx.c +++ b/src/cmd/acme/regx.c @@ -521,56 +521,26 @@ classmatch(int classno, int c, int negate) } /* - * Add inst to the list [l, l+NLIST), but only if it is not there already. - * These work lists are stored and processed in increasing - * order of sep->p[0], so if the inst is there already, the one - * there already is a more left match and takes priority. - * (This works for backward searches too, because there - * is no explicit comparison.) + * Note optimization in addinst: + * *l must be pending when addinst called; if *l has been looked + * at already, the optimization is a bug. */ -Ilist* -addinst1(Ilist *l, Inst *inst, Rangeset *sep) -{ - Ilist *p; - - for(p = l; p->inst; p++) - if(p->inst==inst) - return 0; - - if(p == l+NLIST) - return l+NLIST; - - p->inst = inst; - p->se = *sep; - (p+1)->inst = 0; - return p; -} - int addinst(Ilist *l, Inst *inst, Rangeset *sep) { - Ilist *ap; - - ap = addinst1(l, inst, sep); - if(ap == 0) - return 0; - if(ap == l+NLIST) - return -1; - - /* - * Added inst to list at ap. - * Expand any ORs right now, so that entire - * work list ends up being sorted by increasing sep->p[0]. - */ - for(; ap->inst; ap++){ - if(ap->inst->type == OR){ - if(addinst1(l, ap->inst->u1.left, &ap->se) == l+NLIST) - return -1; - if(addinst1(l, ap->inst->u.right, &ap->se) == l+NLIST) - return -1; + Ilist *p; + + for(p = l; p->inst; p++){ + if(p->inst==inst){ + if((sep)->r[0].q0 < p->se.r[0].q0) + p->se= *sep; /* this would be bug */ + return 0; /* It's already there */ } } - return 0; + p->inst = inst; + p->se= *sep; + (p+1)->inst = nil; + return 1; } int @@ -587,6 +557,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp) Inst *inst; Ilist *tlp; uint p; + int nnl, ntl; int nc, c; int wrapped; int startchar; @@ -595,6 +566,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp) p = startp; startchar = 0; wrapped = 0; + nnl = 0; if(startinst->type<OPERATOR) startchar = startinst->type; list[0][0].inst = list[1][0].inst = nil; @@ -604,7 +576,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp) else nc = runestrlen(r); /* Execute machine once for each character */ - nl = list[0]; for(;;p++){ doloop: if(p>=eof || p>=nc){ @@ -623,7 +594,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp) } c = 0; }else{ - if(((wrapped && p>=startp) || sel.r[0].q0>0) && nl->inst==0) + if(((wrapped && p>=startp) || sel.r[0].q0>0) && nnl==0) break; if(t != nil) c = textreadc(t, p); @@ -631,15 +602,18 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp) c = r[p]; } /* fast check for first char */ - if(startchar && nl->inst==0 && c!=startchar) + if(startchar && nnl==0 && c!=startchar) continue; tl = list[flag]; nl = list[flag^=1]; nl->inst = nil; + ntl = nnl; + nnl = 0; if(sel.r[0].q0<0 && (!wrapped || p<startp || startp==eof)){ /* Add first instruction to this list */ sempty.r[0].q0 = p; - if(addinst(tl, startinst, &sempty) < 0){ + if(addinst(tl, startinst, &sempty)) + if(++ntl >= NLIST){ Overflow: warning(nil, "regexp list overflow\n"); sel.r[0].q0 = -1; @@ -653,7 +627,8 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp) default: /* regular character */ if(inst->type==c){ Addinst: - if(addinst(nl, inst->u1.next, &tlp->se) < 0) + if(addinst(nl, inst->u1.next, &tlp->se)) + if(++nnl >= NLIST) goto Overflow; } break; @@ -691,8 +666,13 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp) goto Addinst; break; case OR: - /* already expanded; just a placeholder */ - break; + /* evaluate right choice later */ + if(addinst(tl, inst->u.right, &tlp->se)) + if(++ntl >= NLIST) + goto Overflow; + /* efficiency: advance and re-evaluate */ + inst = inst->u1.left; + goto Switchstmt; case END: /* Match! */ tlp->se.r[0].q1 = p; newmatch(&tlp->se); @@ -720,11 +700,13 @@ rxbexecute(Text *t, uint startp, Rangeset *rp) Inst *inst; Ilist *tlp; int p; + int nnl, ntl; int c; int wrapped; int startchar; flag = 0; + nnl = 0; wrapped = 0; p = startp; startchar = 0; @@ -733,7 +715,6 @@ rxbexecute(Text *t, uint startp, Rangeset *rp) list[0][0].inst = list[1][0].inst = nil; sel.r[0].q0= -1; /* Execute machine once for each character, including terminal NUL */ - nl = list[0]; for(;;--p){ doloop: if(p <= 0){ @@ -753,20 +734,24 @@ rxbexecute(Text *t, uint startp, Rangeset *rp) } c = 0; }else{ - if(((wrapped && p<=startp) || sel.r[0].q0>0) && nl->inst==0) + if(((wrapped && p<=startp) || sel.r[0].q0>0) && nnl==0) break; c = textreadc(t, p-1); } /* fast check for first char */ - if(startchar && nl->inst==0 && c!=startchar) + if(startchar && nnl==0 && c!=startchar) continue; tl = list[flag]; nl = list[flag^=1]; nl->inst = nil; + ntl = nnl; + nnl = 0; if(sel.r[0].q0<0 && (!wrapped || p>startp)){ /* Add first instruction to this list */ - sempty.r[0].q0 = p; - if(addinst(tl, bstartinst, &sempty) < 0){ + /* the minus is so the optimizations in addinst work */ + sempty.r[0].q0 = -p; + if(addinst(tl, bstartinst, &sempty)) + if(++ntl >= NLIST){ Overflow: warning(nil, "regexp list overflow\n"); sel.r[0].q0 = -1; @@ -780,7 +765,8 @@ rxbexecute(Text *t, uint startp, Rangeset *rp) default: /* regular character */ if(inst->type == c){ Addinst: - if(addinst(nl, inst->u1.next, &tlp->se) < 0) + if(addinst(nl, inst->u1.next, &tlp->se)) + if(++nnl >= NLIST) goto Overflow; } break; @@ -818,9 +804,15 @@ rxbexecute(Text *t, uint startp, Rangeset *rp) goto Addinst; break; case OR: - /* already expanded; just a placeholder */ - break; + /* evaluate right choice later */ + if(addinst(tl, inst->u.right, &tlp->se)) + if(++ntl >= NLIST) + goto Overflow; + /* efficiency: advance and re-evaluate */ + inst = inst->u1.left; + goto Switchstmt; case END: /* Match! */ + tlp->se.r[0].q0 = -tlp->se.r[0].q0; /* minus sign */ tlp->se.r[0].q1 = p; bnewmatch(&tlp->se); break; |