From 3940506bccddeff3235cd8874c540813a3deaf6d Mon Sep 17 00:00:00 2001 From: rsc Date: Thu, 13 Jan 2005 04:56:07 +0000 Subject: forgotten files --- bin/sig | 29 + lib/bclib | 246 +++++ man/man1/9.1 | 19 + man/man3/9p-cmdbuf.3 | 119 ++ man/man3/9p-fid.3 | 204 ++++ man/man3/9p-file.3 | 223 ++++ man/man3/9p-intmap.3 | 126 +++ proto/allproto | 1 + src/cmd/sed.c | 1447 +++++++++++++++++++++++++ src/cmd/yacc.c | 2942 ++++++++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 5356 insertions(+) create mode 100755 bin/sig create mode 100755 lib/bclib create mode 100644 man/man1/9.1 create mode 100644 man/man3/9p-cmdbuf.3 create mode 100644 man/man3/9p-fid.3 create mode 100644 man/man3/9p-file.3 create mode 100644 man/man3/9p-intmap.3 create mode 100644 proto/allproto create mode 100644 src/cmd/sed.c create mode 100644 src/cmd/yacc.c diff --git a/bin/sig b/bin/sig new file mode 100755 index 00000000..b2f797f3 --- /dev/null +++ b/bin/sig @@ -0,0 +1,29 @@ +#!/usr/local/plan9/bin/rc +# Usage: sig key ... +# prints out function signatures by grepping the manual + + +*=`{echo $*|tr A-Z a-z|tr -dc 'a-z0-9_ \012'} # fold case, delete funny chars +if(~ $#* 0){ + echo Usage: sig function ... >[1=2] + exit 1 +} + +for (i) { + files=`{9 grep -il '[ ]\*?'$i'\(' $PLAN9/man/man3/*.3*} + for(j in $files) { + {echo .nr LL 20i; 9 sed -n '/^.SH SYNOPSIS/,/^.SH.*DESCR/p' $j } | + 9 nroff -man | + 9 sed ' + :a + /,$/ { + N + s/\n// + } + ta + s/[ ]+/ /g' | + 9 grep -i -e '[ ]\*?'$i'\(' | sed 's/^[ +]/ /' + } +} + +exit 0 diff --git a/lib/bclib b/lib/bclib new file mode 100755 index 00000000..97b1c92e --- /dev/null +++ b/lib/bclib @@ -0,0 +1,246 @@ +scale = 50 +define e(x) { + auto a, b, c, d, e, g, w, y, t, r + + r = ibase + ibase = A + + t = scale + scale = t + .434*x + 1 + + w = 0 + if(x<0) { + x = -x + w = 1 + } + y = 0 + while(x>2) { + x /= 2 + y++ + } + + a = 1 + b = 1 + c = b + d = 1 + e = 1 + for(a=1; 1; a++) { + b *= x + c = c*a+b + d *= a + g = c/d + if(g == e) { + g = g/1 + while(y--) { + g *= g + } + scale = t + if(w==1) { + ibase = r + return 1/g + } + ibase = r + return g/1 + } + e = g + } +} + +define l(x) { + auto a, b, c, d, e, f, g, u, s, t, r, z + + r = ibase + ibase = A + if(x <= 0) { + z = 1-10^scale + ibase = r + return z + } + t = scale + + f = 1 + scale += scale(x) - length(x) + 1 + s = scale + while(x > 2) { + s += (length(x)-scale(x))/2 + 1 + if(s>0) { + scale = s + } + x = sqrt(x) + f *= 2 + } + while(x < .5) { + s += (length(x)-scale(x))/2 + 1 + if(s>0) { + scale = s + } + x = sqrt(x) + f *= 2 + } + + scale = t + length(f) - scale(f) + 1 + u = (x-1)/(x+1) + + scale += 1.1*length(t) - 1.1*scale(t) + s = u*u + b = 2*f + c = b + d = 1 + e = 1 + for(a=3; 1; a=a+2){ + b *= s + c = c*a + d*b + d *= a + g = c/d + if(g==e) { + scale = t + ibase = r + return u*c/d + } + e = g + } +} + +define s(x) { + auto a, b, c, s, t, y, p, n, i, r + + r = ibase + ibase = A + t = scale + y = x/.7853 + s = t + length(y) - scale(y) + if(s=0) { + n = (x/(2*p)+1)/2 + } + if(x<0) { + n = (x/(2*p)-1)/2 + } + x -= 4*n*p + if(n%2 != 0) { + x = -x + } + + scale = t + length(1.2*t) - scale(1.2*t) + y = -x*x + a = x + b = 1 + s = x + for(i=3; 1; i+=2) { + a *= y + b *= i*(i-1) + c = a/b + if(c==0){ + scale = t + ibase = r + return s/1 + } + s += c + } +} + +define c(x) { + auto t, r + + r = ibase + ibase = A + t = scale + scale = scale+1 + x = s(x + 2*a(1)) + scale = t + ibase = r + return x/1 +} + +define a(x) { + auto a, b, c, d, e, f, g, s, t, r, z + + r = ibase + ibase = A + if(x==0) { + return 0 + } + if(x==1) { + z = .7853981633974483096156608458198757210492923498437764/1 + ibase = r + if(scale<52) { + return z + } + } + t = scale + f = 1 + while(x > .5) { + scale++ + x = -(1 - sqrt(1.+x*x))/x + f *= 2 + } + while(x < -.5) { + scale++ + x = -(1 - sqrt(1.+x*x))/x + f *= 2 + } + s = -x*x + b = f + c = f + d = 1 + e = 1 + for(a=3; 1; a+=2) { + b *= s + c = c*a + d*b + d *= a + g = c/d + if(g==e) { + scale = t + ibase = r + return x*c/d + } + e = g + } +} + +define j(n,x) { + auto a,b,c,d,e,g,i,s,k,t,r + + r = ibase + ibase = A + + t = scale + k = 1.36*x + 1.16*t - n + k = length(k) - scale(k) + if(k>0) { + scale += k + } + + s = -x*x/4 + if(n<0) { + n = -n + x = -x + } + a = 1 + c = 1 + for(i=1; i<=n; i++) { + a *= x + c *= 2*i + } + b = a + d = 1 + e = 1 + for(i=1; 1; i++) { + a *= s + b = b*i*(n+i) + a + c *= i*(n+i) + g = b/c + if(g==e) { + scale = t + ibase = r + return g/1 + } + e = g + } +} diff --git a/man/man1/9.1 b/man/man1/9.1 new file mode 100644 index 00000000..d590f3b3 --- /dev/null +++ b/man/man1/9.1 @@ -0,0 +1,19 @@ +.TH 9 1 +.SH NAME +9 \- run Plan 9 commands +.SH SYNOPSIS +.B . +.B 9 +.PP +.B 9 +.I cmd +[ +.I args +\&... +] +.SH DESCRIPTION +XXX +.SH SOURCE +.B \*9/bin/9 +.SH SEE ALSO +.IR intro (1) diff --git a/man/man3/9p-cmdbuf.3 b/man/man3/9p-cmdbuf.3 new file mode 100644 index 00000000..6aca825b --- /dev/null +++ b/man/man3/9p-cmdbuf.3 @@ -0,0 +1,119 @@ +.TH 9P-CMDBUF 3 +.SH NAME +Cmdbuf, parsecmd, respondcmderror, lookupcmd \- control message parsing +.SH SYNOPSIS +.ft L +.nf +#include +#include +#include +#include +#include <9p.h> +.fi +.PP +.ft L +.nf +.ta \w'\fL1234'u +\w'\fL12345678'u +typedef struct Cmdbuf +{ + char *buf; + char **f; + int nf; +} Cmdbuf; + +typedef struct Cmdtab +{ + int index; + char *cmd; + int narg; +}; + +Cmdbuf *parsecmd(char *p, int n) +Cmdtab *lookupcmd(Cmdbuf *cb, Cmdtab *tab, int ntab) +void respondcmderror(Req *r, Cmdbuf *cb, char *fmt, ...) +.fi +.SH DESCRIPTION +These data structures and functions provide parsing of textual control messages. +.PP +.I Parsecmd +treats the +.I n +bytes at +.I p +(which need not be NUL-terminated) as a UTF string and splits it +using +.I tokenize +(see +.IR getfields (3)). +It returns a +.B Cmdbuf +structure holding pointers to each field in the message. +.PP +.I Lookupcmd +walks through the array +.IR ctab , +which has +.I ntab +entries, +looking for the first +.B Cmdtab +that matches the parsed command. +(If the parsed command is empty, +.I lookupcmd +returns nil immediately.) +A +.B Cmdtab +matches the command if +.I cmd +is equal to +.IB cb -> f [0] +or if +.I cmd +is +.LR * . +Once a matching +.B Cmdtab +has been found, if +.I narg +is not zero, then the parsed command +must have exactly +.I narg +fields (including the command string itself). +If the command has the wrong number of arguments, +.I lookupcmd +returns nil. +Otherwise, it returns a pointer to the +.B Cmdtab +entry. +If +.I lookupcmd +does not find a matching command at all, +it returns nil. +Whenever +.I lookupcmd +returns nil, it sets the system error string. +.PP +.I Respondcmderror +resoponds to request +.I r +with an error of the form +`\fIfmt\fB:\fI cmd\fR,' +where +.I fmt +is the formatted string and +.I cmd +is a reconstruction of the parsed command. +Fmt +is often simply +.B "%r" . +.SH EXAMPLES +This interface is not used in any distributed 9P servers. +It was lifted from the Plan 9 kernel. +Almost any Plan 9 kernel driver +.RB ( /sys/src/9/*/dev*.c +on Plan 9) +is a good example. +.SH SOURCE +.B \*9/src/lib9p/parse.c +.SH SEE ALSO +.IR 9p (3) diff --git a/man/man3/9p-fid.3 b/man/man3/9p-fid.3 new file mode 100644 index 00000000..ddc3e093 --- /dev/null +++ b/man/man3/9p-fid.3 @@ -0,0 +1,204 @@ +.TH 9P-FID 3 +.SH NAME +Fid, Fidpool, allocfidpool, freefidpool, allocfid, closefid, lookupfid, removefid, +Req, Reqpool, allocreqpool, freereqpool, allocreq, closereq, lookupreq, removereq \- 9P fid, request tracking +.SH SYNOPSIS +.ft L +.nf +#include +#include +#include +#include +#include <9p.h> +.fi +.PP +.ft L +.nf +.ta \w'\fL 'u +\w'\fLulong 'u +typedef struct Fid +{ + ulong fid; + char omode; /* -1 if not open */ + char *uid; + Qid qid; + File *file; + void *aux; + \fI...\fP +} Fid; +.fi +.PP +.ft L +.nf +.ta \w'\fL 'u +\w'\fLulong 'u +typedef struct Req +{ + ulong tag; + Fcall ifcall; + Fcall ofcall; + Req *oldreq; + void *aux; + \fI...\fP +} Req; +.fi +.PP +.ft L +.nf +.ta \w'\fLFidpool* 'u +Fidpool* allocfidpool(void (*destroy)(Fid*)) +void freefidpool(Fidpool *p) +Fid* allocfid(Fidpool *p, ulong fid) +Fid* lookupfid(Fidpool *p, ulong fid) +void closefid(Fid *f) +void removefid(Fid *f) +.fi +.PP +.ft L +.nf +.ta \w'\fLReqpool* 'u +Reqpool* allocreqpool(void (*destroy)(Req*)) +void freereqpool(Reqpool *p) +Req* allocreq(Reqpool *p, ulong tag) +Req* lookupreq(Reqpool *p, ulong tag) +void closereq(Req *f) +void removereq(Req *r) +.fi +.SH DESCRIPTION +These routines provide management of +.B Fid +and +.B Req +structures from +.BR Fidpool s +and +.BR Reqpool s. +They are primarily used by the 9P server loop +described in +.IR 9p (3). +.PP +.B Fid +structures are intended to represent +active fids in a 9P connection, as +.B Chan +structures do in the Plan 9 kernel. +The +.B fid +element is the integer fid used in the 9P +connection. +.B Omode +is the mode under which the fid was opened, or +.B -1 +if this fid has not been opened yet. +Note that in addition to the values +.BR OREAD , +.BR OWRITE , +and +.BR ORDWR , +.B omode +can contain the various flags permissible in +an open call. +To ignore the flags, use +.BR omode&OMASK . +.B Omode +should not be changed by the client. +The fid derives from a successful authentication by +.BR uid . +.B Qid +contains the qid returned in the last successful +.B walk +or +.B create +transaction involving the fid. +In a file tree-based server, the +.BR Fid 's +.B file +element points at a +.B File +structure +(see +.IR 9p-file (3)) +corresponding to the fid. +The +.B aux +member is intended for use by the +client to hold information specific to a particular +.BR Fid . +With the exception of +.BR aux , +these elements should be treated +as read-only by the client. +.PP +.I Allocfidpool +creates a new +.BR Fidpool . +.I Freefidpool +destroys such a pool. +.I Allocfid +returns a new +.B Fid +whose fid number is +.IR fid . +There must not already be an extant +.B Fid +with that number in the pool. +Once a +.B Fid +has been allocated, it can be looked up by +fid number using +.IR lookupfid . +.BR Fid s +are reference counted: both +.I allocfid +and +.I lookupfid +increment the reference count on the +.B Fid +structure before +returning. +When a reference to a +.B Fid +is no longer needed, +.I closefid +should be called to note the destruction of the reference. +When the last reference to a +.B Fid +is removed, if +.I destroy +(supplied when creating the fid pool) +is not zero, it is called with the +.B Fid +as a parameter. +It should perform whatever cleanup is necessary +regarding the +.B aux +element. +.I Removefid +is equivalent to +.I closefid +but also removes the +.B Fid +from the pool. +Note that due to lingering references, +the return of +.I removefid +may not mean that +.I destroy +has been called. +.PP +.IR Allocreqpool , +.IR freereqpool , +.IR allocreq , +.IR lookupreq , +.IR closereq , +and +.I removereq +are analogous but +operate on +.BR Reqpool s +and +.B Req +structures. +.SH SOURCE +.B \*9/src/lib9p +.SH SEE ALSO +.IR 9p (3), +.IR 9p-file (3) diff --git a/man/man3/9p-file.3 b/man/man3/9p-file.3 new file mode 100644 index 00000000..80866177 --- /dev/null +++ b/man/man3/9p-file.3 @@ -0,0 +1,223 @@ +.TH 9P-FILE 3 +.SH NAME +Tree, alloctree, freetree, +File, createfile, closefile, removefile, walkfile, +opendirfile, readdirfile, closedirfile, hasperm \- in-memory file hierarchy +.SH SYNOPSIS +.ft L +.nf +#include +#include +#include +#include +#include <9p.h> +.fi +.PP +.ft L +.nf +.ta \w'\fLFile 'u +typedef struct File +{ + Ref; + Dir; + void *aux; + \fI...\fP +} File; +.fi +.PP +.ft L +.nf +.ta \w'\fLTree 'u +typedef struct Tree +{ + File *root; + \fI...\fP +} Tree; +.fi +.PP +.ft L +.nf +.ta \w'\fLReaddir* 'u +4n +4n +Tree* alloctree(char *uid, char *gid, ulong mode, + void (*destroy)(File*)) +void freetree(Tree *tree) +File* createfile(File *dir, char *name, char *uid, + ulong mode, void *aux) +int removefile(File *file) +void closefile(File *file) +File* walkfile(File *dir, char *path) +Readdir* opendirfile(File *dir) +long readdirfile(Readdir *rdir, char *buf, long n) +void closedirfile(Readdir *rdir) +int hasperm(File *file, char *uid, int p) +.fi +.SH DESCRIPTION +.BR File s +and +.BR Tree s +provide an in-memory file hierarchy +intended for use in 9P file servers. +.PP +.I Alloctree +creates a new tree of files, and +.I freetree +destroys it. +The root of the tree +(also the +.B root +element in the structure) +will have mode +.I mode +and be owned by user +.I uid +and group +.IR gid . +.I Destroy +is used when freeing +.B File +structures and is described later. +.PP +.BR File s +(including directories) +other than the root are created using +.IR createfile , +which attempts to create a file named +.I name +in the directory +.IR dir . +If created, the file will have owner +.I uid +and have a group inherited from +the directory. +.I Mode +and the permissions of +.I dir +are used to calculate the permission bits for +the file as described in +.IR open (9p). +It is permissible for +.I name +to be a slash-separated path rather than a single element. +.PP +.I Removefile +removes a file from the file tree. +The file will not be freed until the last +reference to it has been removed. +Directories may only be removed when empty. +.I Removefile +returns zero on success, \-1 on error. +It is correct to consider +.I removefile +to be +.I closefile +with the side effect of removing the file +when possible. +.PP +.I Walkfile +evaluates +.I path +relative to the directory +.IR dir , +returning the resulting file, +or zero if the named file or any intermediate element +does not exist. +.PP +The +.B File +structure's +.B aux +pointer may be used by the client +for +.RB per- File +storage. +.BR File s +are reference-counted: if not zero, +.I destroy +(specified in the call to +.IR alloctree ) +will be called for each file when its +last reference is removed or when the tree is freed. +.I Destroy +should take care of any necessary cleanup related to +.BR aux . +When creating new file references by copying pointers, +call +.I incref +(see +.IR lock (3)) +to update the reference count. +To note the removal of a reference to a file, call +.IR closefile . +.I Createfile +and +.I walkfile +return new references. +.IR Removefile , +.IR closefile , +and +.I walkfile +(but not +.IR createfile ) +consume the passed reference. +.PP +Directories may be read, yielding a directory entry structure +(see +.IR stat (9p)) +for each file in the directory. +In order to allow concurrent reading of directories, +clients must obtain a +.B Readdir +structure by calling +.I opendirfile +on a directory. +Subsequent calls to +.I readdirfile +will each yield an integral number of machine-independent +stat buffers, until end of directory. +When finished, call +.I closedirfile +to free the +.BR Readdir . +.PP +.I Hasperm +does simplistic permission checking; it assumes only +one-user groups named by uid and returns non-zero if +.I uid +has permission +.I p +(a bitwise-or of +.BR AREAD , +.BR AWRITE +and +.BR AEXEC ) +according to +.IB file ->mode \fR. +9P servers written using +.B File +trees will do standard permission checks automatically; +.I hasperm +may be called explicitly to do additional checks. +A 9P server may link against a different +.I hasperm +implementation to provide more complex groups. +.SH EXAMPLE +The following code correctly handles references +when elementwise walking a path and creating a file. +.IP +.EX +f = tree->root; +incref(f); +for(i=0; i +#include +#include +#include +#include <9p.h> +.fi +.PP +.ft L +.nf +.ta \w'\fLIntmap* 'u +Intmap* allocmap(void (*inc)(void*)) +void freemap(Intmap *map, void (*dec)(void*)) +void* lookupkey(Intmap *map, ulong key) +void* insertkey(Intmap *map, ulong key, void *val) +int caninsertkey(Intmap *map, ulong key, void *val) +void* lookupkey(Intmap *map, ulong key) +void* deletekey(Intmap *map, ulong key) +.fi +.SH DESCRIPTION +An +.B Intmap +is an arbitrary mapping from integers to pointers. +.I Allocmap +creates a new map, and +.I freemap +destroys it. +The +.I inc +function is called each time a new pointer is added +to the map; similarly, +.I dec +is called on each pointer left in the map +when it is being freed. +Typically these functions maintain reference counts. +New entries are added to the map by calling +.IR insertkey , +which will return the previous value +associated with the given +.IR key , +or zero if there was no previous value. +.I Caninsertkey +is like +.I insertkey +but only inserts +.I val +if there is no current mapping. +It returns 1 if +.I val +was inserted, 0 otherwise. +.I Lookupkey +returns the pointer associated with +.IR key , +or zero if there is no such pointer. +.I Deletekey +removes the entry for +.I id +from the map, returning the +associated pointer, if any. +.PP +Concurrent access to +.BR Intmap s +is safe, +moderated via a +.B QLock +stored in the +.B Intmap +structure. +.PP +In anticipation of the storage of reference-counted +structures, an increment function +.I inc +may be specified +at map creation time. +.I Lookupkey +calls +.I inc +(if non-zero) +on pointers before returning them. +If the reference count adjustments were +left to the caller (and thus not protected by the lock), +it would be possible to accidentally reclaim a structure +if, for example, it was deleted from the map and its +reference count decremented between the return +of +.I insertkey +and the external increment. +.IR Insertkey +and +.IR caninsertkey +do +.I not +call +.I inc +when inserting +.I val +into the map, nor do +.I insertkey +or +.I deletekey +call +.I inc +when returning old map entries. +The rationale is that calling +an insertion function transfers responsibility for the reference +to the map, and responsibility is given back via the return value of +.I deletekey +or the next +.IR insertkey . +.PP +.BR Intmap s +are used by the 9P library to implement +.BR Fidpool s +and +.BR Reqpool s. +.SH SOURCE +.B \*9/src/lib9p/intmap.c +.SH SEE ALSO +.IR 9p (3), +.IR 9p-fid (3) diff --git a/proto/allproto b/proto/allproto new file mode 100644 index 00000000..fd388616 --- /dev/null +++ b/proto/allproto @@ -0,0 +1 @@ ++ diff --git a/src/cmd/sed.c b/src/cmd/sed.c new file mode 100644 index 00000000..182163eb --- /dev/null +++ b/src/cmd/sed.c @@ -0,0 +1,1447 @@ +/* + * sed -- stream editor + * + * + */ +#include +#include +#include +#include + +enum { + DEPTH = 20, /* max nesting depth of {} */ + MAXCMDS = 512, /* max sed commands */ + ADDSIZE = 10000, /* size of add & read buffer */ + MAXADDS = 20, /* max pending adds and reads */ + LBSIZE = 8192, /* input line size */ + LABSIZE = 50, /* max label name size */ + MAXSUB = 10, /* max number of sub reg exp */ + MAXFILES = 120, /* max output files */ +}; + /* An address is a line #, a R.E., "$", a reference to the last + * R.E., or nothing. + */ +typedef struct { + enum { + A_NONE, + A_DOL, + A_LINE, + A_RE, + A_LAST, + }type; + union { + long line; /* Line # */ + Reprog *rp; /* Compiled R.E. */ + } u; +} Addr; + +typedef struct SEDCOM { + Addr ad1; /* optional start address */ + Addr ad2; /* optional end address */ + union { + Reprog *re1; /* compiled R.E. */ + Rune *text; /* added text or file name */ + struct SEDCOM *lb1; /* destination command of branch */ + } u; + Rune *rhs; /* Right-hand side of substitution */ + Biobuf* fcode; /* File ID for read and write */ + char command; /* command code -see below */ + char gfl; /* 'Global' flag for substitutions */ + char pfl; /* 'print' flag for substitutions */ + char active; /* 1 => data between start and end */ + char negfl; /* negation flag */ +} SedCom; + + /* Command Codes for field SedCom.command */ +#define ACOM 01 +#define BCOM 020 +#define CCOM 02 +#define CDCOM 025 +#define CNCOM 022 +#define COCOM 017 +#define CPCOM 023 +#define DCOM 03 +#define ECOM 015 +#define EQCOM 013 +#define FCOM 016 +#define GCOM 027 +#define CGCOM 030 +#define HCOM 031 +#define CHCOM 032 +#define ICOM 04 +#define LCOM 05 +#define NCOM 012 +#define PCOM 010 +#define QCOM 011 +#define RCOM 06 +#define SCOM 07 +#define TCOM 021 +#define WCOM 014 +#define CWCOM 024 +#define YCOM 026 +#define XCOM 033 + + +typedef struct label { /* Label symbol table */ + Rune asc[9]; /* Label name */ + SedCom *chain; + SedCom *address; /* Command associated with label */ +} Label; + +typedef struct FILE_CACHE { /* Data file control block */ + struct FILE_CACHE *next; /* Forward Link */ + char *name; /* Name of file */ +} FileCache; + +SedCom pspace[MAXCMDS]; /* Command storage */ +SedCom *pend = pspace+MAXCMDS; /* End of command storage */ +SedCom *rep = pspace; /* Current fill point */ + +Reprog *lastre = 0; /* Last regular expression */ +Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/ + +Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */ +Rune *addend = addspace+ADDSIZE; + +SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */ +SedCom **aptr = abuf; + +struct { /* Sed program input control block */ + enum PTYPE /* Either on command line or in file */ + { P_ARG, + P_FILE + } type; + union PCTL { /* Pointer to data */ + Biobuf *bp; + char *curr; + } pctl; +} prog; + +Rune genbuf[LBSIZE]; /* Miscellaneous buffer */ + +FileCache *fhead = 0; /* Head of File Cache Chain */ +FileCache *ftail = 0; /* Tail of File Cache Chain */ + +Rune *loc1; /* Start of pattern match */ +Rune *loc2; /* End of pattern match */ +Rune seof; /* Pattern delimiter char */ + +Rune linebuf[LBSIZE+1]; /* Input data buffer */ +Rune *lbend = linebuf+LBSIZE; /* End of buffer */ +Rune *spend = linebuf; /* End of input data */ +Rune *cp; /* Current scan point in linebuf */ + +Rune holdsp[LBSIZE+1]; /* Hold buffer */ +Rune *hend = holdsp+LBSIZE; /* End of hold buffer */ +Rune *hspend = holdsp; /* End of hold data */ + +int nflag; /* Command line flags */ +int gflag; + +int dolflag; /* Set when at true EOF */ +int sflag; /* Set when substitution done */ +int jflag; /* Set when jump required */ +int delflag; /* Delete current line when set */ + +long lnum = 0; /* Input line count */ + +char fname[MAXFILES][40]; /* File name cache */ +Biobuf *fcode[MAXFILES]; /* File ID cache */ +int nfiles = 0; /* Cache fill point */ + +Biobuf fout; /* Output stream */ +Biobuf bstdin; /* Default input */ +Biobuf* f = 0; /* Input data */ + +Label ltab[LABSIZE]; /* Label name symbol table */ +Label *labend = ltab+LABSIZE; /* End of label table */ +Label *lab = ltab+1; /* Current Fill point */ + +int depth = 0; /* {} stack pointer */ + +Rune bad; /* Dummy err ptr reference */ +Rune *badp = &bad; + + +char CGMES[] = "Command garbled: %S"; +char TMMES[] = "Too much text: %S"; +char LTL[] = "Label too long: %S"; +char AD0MES[] = "No addresses allowed: %S"; +char AD1MES[] = "Only one address allowed: %S"; + +void address(Addr *); +void arout(void); +int cmp(char *, char *); +int rcmp(Rune *, Rune *); +void command(SedCom *); +Reprog *compile(void); +Rune *compsub(Rune *, Rune *); +void dechain(void); +void dosub(Rune *); +int ecmp(Rune *, Rune *, int); +void enroll(char *); +void errexit(void); +int executable(SedCom *); +void execute(void); +void fcomp(void); +long getrune(void); +Rune *gline(Rune *); +int match(Reprog *, Rune *); +void newfile(enum PTYPE, char *); +int opendata(void); +Biobuf *open_file(char *); +Rune *place(Rune *, Rune *, Rune *); +void quit(char *, char *); +int rline(Rune *, Rune *); +Label *search(Label *); +int substitute(SedCom *); +char *text(char *); +Rune *stext(Rune *, Rune *); +int ycomp(SedCom *); +char * trans(int c); +void putline(Biobuf *bp, Rune *buf, int n); + +void +main(int argc, char **argv) +{ + int compfl; + + lnum = 0; + Binit(&fout, 1, OWRITE); + fcode[nfiles++] = &fout; + compfl = 0; + + if(argc == 1) + exits(0); + ARGBEGIN{ + case 'n': + nflag++; + continue; + case 'f': + if(argc <= 1) + quit("no pattern-file", 0); + newfile(P_FILE, ARGF()); + fcomp(); + compfl = 1; + continue; + case 'e': + if (argc <= 1) + quit("missing pattern", 0); + newfile(P_ARG, ARGF()); + fcomp(); + compfl = 1; + continue; + case 'g': + gflag++; + continue; + default: + fprint(2, "sed: Unknown flag: %c\n", ARGC()); + continue; + } ARGEND + + if(compfl == 0) { + if (--argc < 0) + quit("missing pattern", 0); + newfile(P_ARG, *argv++); + fcomp(); + } + + if(depth) + quit("Too many {'s", 0); + + ltab[0].address = rep; + + dechain(); + + if(argc <= 0) + enroll(0); /* Add stdin to cache */ + else while(--argc >= 0) { + enroll(*argv++); + } + execute(); + exits(0); +} +void +fcomp(void) +{ + Rune *tp; + SedCom *pt, *pt1; + int i; + Label *lpt; + + static Rune *p = addspace; + static SedCom **cmpend[DEPTH]; /* stack of {} operations */ + + while (rline(linebuf, lbend) >= 0) { + cp = linebuf; +comploop: + while(*cp == ' ' || *cp == '\t') + cp++; + if(*cp == '\0' || *cp == '#') + continue; + if(*cp == ';') { + cp++; + goto comploop; + } + + address(&rep->ad1); + if (rep->ad1.type != A_NONE) { + if (rep->ad1.type == A_LAST) { + if (!lastre) + quit("First RE may not be null", 0); + rep->ad1.type = A_RE; + rep->ad1.u.rp = lastre; + } + if(*cp == ',' || *cp == ';') { + cp++; + address(&rep->ad2); + if (rep->ad2.type == A_LAST) { + rep->ad1.type = A_RE; + rep->ad2.u.rp = lastre; + } + } else + rep->ad2.type = A_NONE; + } + while(*cp == ' ' || *cp == '\t') + cp++; + +swit: + switch(*cp++) { + + default: + quit("Unrecognized command: %S", (char *)linebuf); + + case '!': + rep->negfl = 1; + goto swit; + + case '{': + rep->command = BCOM; + rep->negfl = !(rep->negfl); + cmpend[depth++] = &rep->u.lb1; + if(++rep >= pend) + quit("Too many commands: %S", (char *) linebuf); + if(*cp == '\0') continue; + goto comploop; + + case '}': + if(rep->ad1.type != A_NONE) + quit(AD0MES, (char *) linebuf); + if(--depth < 0) + quit("Too many }'s", 0); + *cmpend[depth] = rep; + if(*cp == 0) continue; + goto comploop; + + case '=': + rep->command = EQCOM; + if(rep->ad2.type != A_NONE) + quit(AD1MES, (char *) linebuf); + break; + + case ':': + if(rep->ad1.type != A_NONE) + quit(AD0MES, (char *) linebuf); + + while(*cp == ' ') + cp++; + tp = lab->asc; + while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') { + *tp++ = *cp++; + if(tp >= &(lab->asc[8])) + quit(LTL, (char *) linebuf); + } + *tp = '\0'; + + if(lpt = search(lab)) { + if(lpt->address) + quit("Duplicate labels: %S", (char *) linebuf); + } else { + lab->chain = 0; + lpt = lab; + if(++lab >= labend) + quit("Too many labels: %S", (char *) linebuf); + } + lpt->address = rep; + if (*cp == '#') + continue; + rep--; /* reuse this slot */ + break; + + case 'a': + rep->command = ACOM; + if(rep->ad2.type != A_NONE) + quit(AD1MES, (char *) linebuf); + if(*cp == '\\') cp++; + if(*cp++ != '\n') + quit(CGMES, (char *) linebuf); + rep->u.text = p; + p = stext(p, addend); + break; + case 'c': + rep->command = CCOM; + if(*cp == '\\') cp++; + if(*cp++ != '\n') + quit(CGMES, (char *) linebuf); + rep->u.text = p; + p = stext(p, addend); + break; + case 'i': + rep->command = ICOM; + if(rep->ad2.type != A_NONE) + quit(AD1MES, (char *) linebuf); + if(*cp == '\\') cp++; + if(*cp++ != '\n') + quit(CGMES, (char *) linebuf); + rep->u.text = p; + p = stext(p, addend); + break; + + case 'g': + rep->command = GCOM; + break; + + case 'G': + rep->command = CGCOM; + break; + + case 'h': + rep->command = HCOM; + break; + + case 'H': + rep->command = CHCOM; + break; + + case 't': + rep->command = TCOM; + goto jtcommon; + + case 'b': + rep->command = BCOM; +jtcommon: + while(*cp == ' ')cp++; + if(*cp == '\0') { + if(pt = ltab[0].chain) { + while(pt1 = pt->u.lb1) + pt = pt1; + pt->u.lb1 = rep; + } else + ltab[0].chain = rep; + break; + } + tp = lab->asc; + while((*tp++ = *cp++)) + if(tp >= &(lab->asc[8])) + quit(LTL, (char *) linebuf); + cp--; + tp[-1] = '\0'; + + if(lpt = search(lab)) { + if(lpt->address) { + rep->u.lb1 = lpt->address; + } else { + pt = lpt->chain; + while(pt1 = pt->u.lb1) + pt = pt1; + pt->u.lb1 = rep; + } + } else { + lab->chain = rep; + lab->address = 0; + if(++lab >= labend) + quit("Too many labels: %S", + (char *) linebuf); + } + break; + + case 'n': + rep->command = NCOM; + break; + + case 'N': + rep->command = CNCOM; + break; + + case 'p': + rep->command = PCOM; + break; + + case 'P': + rep->command = CPCOM; + break; + + case 'r': + rep->command = RCOM; + if(rep->ad2.type != A_NONE) + quit(AD1MES, (char *) linebuf); + if(*cp++ != ' ') + quit(CGMES, (char *) linebuf); + rep->u.text = p; + p = stext(p, addend); + break; + + case 'd': + rep->command = DCOM; + break; + + case 'D': + rep->command = CDCOM; + rep->u.lb1 = pspace; + break; + + case 'q': + rep->command = QCOM; + if(rep->ad2.type != A_NONE) + quit(AD1MES, (char *) linebuf); + break; + + case 'l': + rep->command = LCOM; + break; + + case 's': + rep->command = SCOM; + seof = *cp++; + if ((rep->u.re1 = compile()) == 0) { + if(!lastre) + quit("First RE may not be null.", 0); + rep->u.re1 = lastre; + } + rep->rhs = p; + if((p = compsub(p, addend)) == 0) + quit(CGMES, (char *) linebuf); + if(*cp == 'g') { + cp++; + rep->gfl++; + } else if(gflag) + rep->gfl++; + + if(*cp == 'p') { + cp++; + rep->pfl = 1; + } + + if(*cp == 'P') { + cp++; + rep->pfl = 2; + } + + if(*cp == 'w') { + cp++; + if(*cp++ != ' ') + quit(CGMES, (char *) linebuf); + text(fname[nfiles]); + for(i = nfiles - 1; i >= 0; i--) + if(cmp(fname[nfiles],fname[i]) == 0) { + rep->fcode = fcode[i]; + goto done; + } + if(nfiles >= MAXFILES) + quit("Too many files in w commands 1", 0); + rep->fcode = open_file(fname[nfiles]); + } + break; + + case 'w': + rep->command = WCOM; + if(*cp++ != ' ') + quit(CGMES, (char *) linebuf); + text(fname[nfiles]); + for(i = nfiles - 1; i >= 0; i--) + if(cmp(fname[nfiles], fname[i]) == 0) { + rep->fcode = fcode[i]; + goto done; + } + if(nfiles >= MAXFILES){ + fprint(2, "sed: Too many files in w commands 2 \n"); + fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES); + errexit(); + } + rep->fcode = open_file(fname[nfiles]); + break; + + case 'x': + rep->command = XCOM; + break; + + case 'y': + rep->command = YCOM; + seof = *cp++; + if (ycomp(rep) == 0) + quit(CGMES, (char *) linebuf); + break; + + } +done: + if(++rep >= pend) + quit("Too many commands, last: %S", (char *) linebuf); + + if(*cp++ != '\0') { + if(cp[-1] == ';') + goto comploop; + quit(CGMES, (char *) linebuf); + } + + } +} + +Biobuf * +open_file(char *name) +{ + Biobuf *bp; + int fd; + + if ((bp = malloc(sizeof(Biobuf))) == 0) + quit("Out of memory", 0); + if ((fd = open(name, OWRITE)) < 0 && + (fd = create(name, OWRITE, 0666)) < 0) + quit("Cannot create %s", name); + Binit(bp, fd, OWRITE); + Bseek(bp, 0, 2); + fcode[nfiles++] = bp; + return bp; +} + +Rune * +compsub(Rune *rhs, Rune *end) +{ + Rune r; + + while ((r = *cp++) != '\0') { + if(r == '\\') { + if (rhs < end) + *rhs++ = 0xFFFF; + else + return 0; + r = *cp++; + if(r == 'n') + r = '\n'; + } else { + if(r == seof) { + if (rhs < end) + *rhs++ = '\0'; + else + return 0; + return rhs; + } + } + if (rhs < end) + *rhs++ = r; + else + return 0; + + } + return 0; +} + +Reprog * +compile(void) +{ + Rune c; + char *ep; + char expbuf[512]; + + if((c = *cp++) == seof) /* '//' */ + return 0; + ep = expbuf; + do { + if (c == 0 || c == '\n') + quit(TMMES, (char *) linebuf); + if (c == '\\') { + if (ep >= expbuf+sizeof(expbuf)) + quit(TMMES, (char *) linebuf); + ep += runetochar(ep, &c); + if ((c = *cp++) == 'n') + c = '\n'; + } + if (ep >= expbuf+sizeof(expbuf)) + quit(TMMES, (char *) linebuf); + ep += runetochar(ep, &c); + } while ((c = *cp++) != seof); + *ep = 0; + return lastre = regcomp(expbuf); +} + +void +regerror(char *s) +{ + USED(s); + quit(CGMES, (char *) linebuf); +} + +void +newfile(enum PTYPE type, char *name) +{ + if (type == P_ARG) + prog.pctl.curr = name; + else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0) + quit("Cannot open pattern-file: %s\n", name); + prog.type = type; +} + +int +rline(Rune *buf, Rune *end) +{ + long c; + Rune r; + + while ((c = getrune()) >= 0) { + r = c; + if (r == '\\') { + if (buf <= end) + *buf++ = r; + if ((c = getrune()) < 0) + break; + r = c; + } else if (r == '\n') { + *buf = '\0'; + return(1); + } + if (buf <= end) + *buf++ = r; + } + *buf = '\0'; + return(-1); +} + +long +getrune(void) +{ + char *p; + long c; + Rune r; + + if (prog.type == P_ARG) { + if ((p = prog.pctl.curr) != 0) { + if (*p) { + prog.pctl.curr += chartorune(&r, p); + c = r; + } else { + c = '\n'; /* fake an end-of-line */ + prog.pctl.curr = 0; + } + } else + c = -1; + } else if ((c = Bgetrune(prog.pctl.bp)) < 0) + Bterm(prog.pctl.bp); + return c; +} + +void +address(Addr *ap) +{ + int c; + long lno; + + if((c = *cp++) == '$') + ap->type = A_DOL; + else if(c == '/') { + seof = c; + if (ap->u.rp = compile()) + ap->type = A_RE; + else + ap->type = A_LAST; + } + else if (c >= '0' && c <= '9') { + lno = c-'0'; + while ((c = *cp) >= '0' && c <= '9') + lno = lno*10 + *cp++-'0'; + if(!lno) + quit("line number 0 is illegal",0); + ap->type = A_LINE; + ap->u.line = lno; + } + else { + cp--; + ap->type = A_NONE; + } +} + +int +cmp(char *a, char *b) /* compare characters */ +{ + while(*a == *b++) + if (*a == '\0') + return(0); + else a++; + return(1); +} + +int +rcmp(Rune *a, Rune *b) /* compare runes */ +{ + while(*a == *b++) + if (*a == '\0') + return(0); + else a++; + return(1); +} + +char * +text(char *p) /* extract character string */ +{ + Rune r; + + while(*cp == '\t' || *cp == ' ') + cp++; + while (*cp) { + if ((r = *cp++) == '\\') + if ((r = *cp++) == 0) + break;; + if (r == '\n') + while (*cp == '\t' || *cp == ' ') + cp++; + p += runetochar(p, &r); + } + *p++ = '\0'; + return p; +} + +Rune * +stext(Rune *p, Rune *end) /* extract rune string */ +{ + while(*cp == '\t' || *cp == ' ') + cp++; + while (*cp) { + if (*cp == '\\') + if (*++cp == 0) + break; + if (p >= end-1) + quit(TMMES, (char *) linebuf); + if ((*p++ = *cp++) == '\n') + while(*cp == '\t' || *cp == ' ') + cp++; + } + *p++ = 0; + return p; +} + + +Label * +search (Label *ptr) +{ + Label *rp; + + for (rp = ltab; rp < ptr; rp++) + if(rcmp(rp->asc, ptr->asc) == 0) + return(rp); + return(0); +} + +void +dechain(void) +{ + Label *lptr; + SedCom *rptr, *trptr; + + for(lptr = ltab; lptr < lab; lptr++) { + + if(lptr->address == 0) + quit("Undefined label: %S", (char *) lptr->asc); + + if(lptr->chain) { + rptr = lptr->chain; + while(trptr = rptr->u.lb1) { + rptr->u.lb1 = lptr->address; + rptr = trptr; + } + rptr->u.lb1 = lptr->address; + } + } +} + +int +ycomp(SedCom *r) +{ + int i; + Rune *rp; + Rune c, *tsp, highc; + Rune *sp; + + highc = 0; + for(tsp = cp; *tsp != seof; tsp++) { + if(*tsp == '\\') + tsp++; + if(*tsp == '\n' || *tsp == '\0') + return(0); + if (*tsp > highc) highc = *tsp; + } + tsp++; + if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0) + quit("Out of memory", 0); + *rp++ = highc; /* save upper bound */ + for (i = 0; i <= highc; i++) + rp[i] = i; + sp = cp; + while((c = *sp++) != seof) { + if(c == '\\' && *sp == 'n') { + sp++; + c = '\n'; + } + if((rp[c] = *tsp++) == '\\' && *tsp == 'n') { + rp[c] = '\n'; + tsp++; + } + if(rp[c] == seof || rp[c] == '\0') { + free(r->u.re1); + r->u.re1 = 0; + return(0); + } + } + if(*tsp != seof) { + free(r->u.re1); + r->u.re1 = 0; + return(0); + } + cp = tsp+1; + return(1); +} + +void +execute(void) +{ + SedCom *ipc; + + while (spend = gline(linebuf)){ + for(ipc = pspace; ipc->command; ) { + if (!executable(ipc)) { + ipc++; + continue; + } + command(ipc); + + if(delflag) + break; + if(jflag) { + jflag = 0; + if((ipc = ipc->u.lb1) == 0) + break; + } else + ipc++; + + } + if(!nflag && !delflag) + putline(&fout, linebuf, spend-linebuf); + if(aptr > abuf) { + arout(); + } + delflag = 0; + } +} + /* determine if a statement should be applied to an input line */ +int +executable(SedCom *ipc) +{ + if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */ + if (ipc->active == 1) /* Second line */ + ipc->active = 2; + switch(ipc->ad2.type) { + case A_NONE: /* No second addr; use first */ + ipc->active = 0; + break; + case A_DOL: /* Accept everything */ + return !ipc->negfl; + case A_LINE: /* Line at end of range? */ + if (lnum <= ipc->ad2.u.line) { + if (ipc->ad2.u.line == lnum) + ipc->active = 0; + return !ipc->negfl; + } + ipc->active = 0; /* out of range */ + return ipc->negfl; + case A_RE: /* Check for matching R.E. */ + if (match(ipc->ad2.u.rp, linebuf)) + ipc->active = 0; + return !ipc->negfl; + default: /* internal error */ + quit("Internal error", 0); + } + } + switch (ipc->ad1.type) { /* Check first address */ + case A_NONE: /* Everything matches */ + return !ipc->negfl; + case A_DOL: /* Only last line */ + if (dolflag) + return !ipc->negfl; + break; + case A_LINE: /* Check line number */ + if (ipc->ad1.u.line == lnum) { + ipc->active = 1; /* In range */ + return !ipc->negfl; + } + break; + case A_RE: /* Check R.E. */ + if (match(ipc->ad1.u.rp, linebuf)) { + ipc->active = 1; /* In range */ + return !ipc->negfl; + } + break; + default: + quit("Internal error", 0); + } + return ipc->negfl; +} + +int +match(Reprog *pattern, Rune *buf) +{ + if (!pattern) + return 0; + subexp[0].s.rsp = buf; + subexp[0].e.rep = 0; + if (rregexec(pattern, linebuf, subexp, MAXSUB)) { + loc1 = subexp[0].s.rsp; + loc2 = subexp[0].e.rep; + return 1; + } + loc1 = loc2 = 0; + return 0; +} + +int +substitute(SedCom *ipc) +{ + int len; + + if(!match(ipc->u.re1, linebuf)) + return 0; + + /* + * we have at least one match. some patterns, e.g. '$' or '^', can + * produce zero-length matches, so during a global substitute we + * must bump to the character after a zero-length match to keep from looping. + */ + sflag = 1; + if(ipc->gfl == 0) /* single substitution */ + dosub(ipc->rhs); + else + do{ /* global substitution */ + len = loc2-loc1; /* length of match */ + dosub(ipc->rhs); /* dosub moves loc2 */ + if(*loc2 == 0) /* end of string */ + break; + if(len == 0) /* zero-length R.E. match */ + loc2++; /* bump over zero-length match */ + if(*loc2 == 0) /* end of string */ + break; + } while(match(ipc->u.re1, loc2)); + return 1; +} + +void +dosub(Rune *rhsbuf) +{ + Rune *lp, *sp; + Rune *rp; + int c, n; + + lp = linebuf; + sp = genbuf; + rp = rhsbuf; + while (lp < loc1) + *sp++ = *lp++; + while(c = *rp++) { + if (c == '&') { + sp = place(sp, loc1, loc2); + continue; + } + if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') { + n = c-'0'; + if (subexp[n].s.rsp && subexp[n].e.rep) { + sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep); + continue; + } + else { + fprint(2, "sed: Invalid back reference \\%d\n",n); + errexit(); + } + } + *sp++ = c; + if (sp >= &genbuf[LBSIZE]) + fprint(2, "sed: Output line too long.\n"); + } + lp = loc2; + loc2 = sp - genbuf + linebuf; + while (*sp++ = *lp++) + if (sp >= &genbuf[LBSIZE]) + fprint(2, "sed: Output line too long.\n"); + lp = linebuf; + sp = genbuf; + while (*lp++ = *sp++) + ; + spend = lp-1; +} + +Rune * +place(Rune *sp, Rune *l1, Rune *l2) +{ + while (l1 < l2) { + *sp++ = *l1++; + if (sp >= &genbuf[LBSIZE]) + fprint(2, "sed: Output line too long.\n"); + } + return(sp); +} + +char * +trans(int c) +{ + static char buf[] = "\\x0000"; + static char hex[] = "0123456789abcdef"; + + switch(c) { + case '\b': + return "\\b"; + case '\n': + return "\\n"; + case '\r': + return "\\r"; + case '\t': + return "\\t"; + case '\\': + return "\\\\"; + } + buf[2] = hex[(c>>12)&0xF]; + buf[3] = hex[(c>>8)&0xF]; + buf[4] = hex[(c>>4)&0xF]; + buf[5] = hex[c&0xF]; + return buf; +} + +void +command(SedCom *ipc) +{ + int i, c; + Rune *p1, *p2; + char *ucp; + Rune *rp; + Rune *execp; + + switch(ipc->command) { + + case ACOM: + *aptr++ = ipc; + if(aptr >= abuf+MAXADDS) { + quit("sed: Too many appends after line %ld\n", + (char *) lnum); + } + *aptr = 0; + break; + case CCOM: + delflag = 1; + if(ipc->active == 1) { + for(rp = ipc->u.text; *rp; rp++) + Bputrune(&fout, *rp); + Bputc(&fout, '\n'); + } + break; + case DCOM: + delflag++; + break; + case CDCOM: + p1 = p2 = linebuf; + while(*p1 != '\n') { + if(*p1++ == 0) { + delflag++; + return; + } + } + p1++; + while(*p2++ = *p1++) + ; + spend = p2-1; + jflag++; + break; + case EQCOM: + Bprint(&fout, "%ld\n", lnum); + break; + case GCOM: + p1 = linebuf; + p2 = holdsp; + while(*p1++ = *p2++) + ; + spend = p1-1; + break; + case CGCOM: + *spend++ = '\n'; + p1 = spend; + p2 = holdsp; + while(*p1++ = *p2++) + if(p1 >= lbend) + break; + spend = p1-1; + break; + case HCOM: + p1 = holdsp; + p2 = linebuf; + while(*p1++ = *p2++); + hspend = p1-1; + break; + case CHCOM: + *hspend++ = '\n'; + p1 = hspend; + p2 = linebuf; + while(*p1++ = *p2++) + if(p1 >= hend) + break; + hspend = p1-1; + break; + case ICOM: + for(rp = ipc->u.text; *rp; rp++) + Bputrune(&fout, *rp); + Bputc(&fout, '\n'); + break; + case BCOM: + jflag = 1; + break; + case LCOM: + c = 0; + for (i = 0, rp = linebuf; *rp; rp++) { + c = *rp; + if(c >= 0x20 && c < 0x7F && c != '\\') { + Bputc(&fout, c); + if(i++ > 71) { + Bprint(&fout, "\\\n"); + i = 0; + } + } else { + for (ucp = trans(*rp); *ucp; ucp++){ + c = *ucp; + Bputc(&fout, c); + if(i++ > 71) { + Bprint(&fout, "\\\n"); + i = 0; + } + } + } + } + if(c == ' ') + Bprint(&fout, "\\n"); + Bputc(&fout, '\n'); + break; + case NCOM: + if(!nflag) + putline(&fout, linebuf, spend-linebuf); + + if(aptr > abuf) + arout(); + if((execp = gline(linebuf)) == 0) { + delflag = 1; + break; + } + spend = execp; + break; + case CNCOM: + if(aptr > abuf) + arout(); + *spend++ = '\n'; + if((execp = gline(spend)) == 0) { + delflag = 1; + break; + } + spend = execp; + break; + case PCOM: + putline(&fout, linebuf, spend-linebuf); + break; + case CPCOM: + cpcom: + for(rp = linebuf; *rp && *rp != '\n'; rp++) + Bputc(&fout, *rp); + Bputc(&fout, '\n'); + break; + case QCOM: + if(!nflag) + putline(&fout, linebuf, spend-linebuf); + if(aptr > abuf) + arout(); + exits(0); + case RCOM: + *aptr++ = ipc; + if(aptr >= &abuf[MAXADDS]) + quit("sed: Too many reads after line %ld\n", + (char *) lnum); + *aptr = 0; + break; + case SCOM: + i = substitute(ipc); + if(i && ipc->pfl) + if(ipc->pfl == 1) + putline(&fout, linebuf, spend-linebuf); + else + goto cpcom; + if(i && ipc->fcode) + goto wcom; + break; + + case TCOM: + if(sflag == 0) break; + sflag = 0; + jflag = 1; + break; + + wcom: + case WCOM: + putline(ipc->fcode,linebuf, spend-linebuf); + break; + case XCOM: + p1 = linebuf; + p2 = genbuf; + while(*p2++ = *p1++); + p1 = holdsp; + p2 = linebuf; + while(*p2++ = *p1++); + spend = p2 - 1; + p1 = genbuf; + p2 = holdsp; + while(*p2++ = *p1++); + hspend = p2 - 1; + break; + case YCOM: + p1 = linebuf; + p2 = ipc->u.text; + for (i = *p2++; *p1; p1++){ + if (*p1 <= i) *p1 = p2[*p1]; + } + break; + } + +} + +void +putline(Biobuf *bp, Rune *buf, int n) +{ + while (n--) + Bputrune(bp, *buf++); + Bputc(bp, '\n'); +} + +int +ecmp(Rune *a, Rune *b, int count) +{ + while(count--) + if(*a++ != *b++) return(0); + return(1); +} + +void +arout(void) +{ + Rune *p1; + Biobuf *fi; + int c; + char *s; + char buf[128]; + + for (aptr = abuf; *aptr; aptr++) { + if((*aptr)->command == ACOM) { + for(p1 = (*aptr)->u.text; *p1; p1++ ) + Bputrune(&fout, *p1); + Bputc(&fout, '\n'); + } else { + for(s = buf, p1= (*aptr)->u.text; *p1; p1++) + s += runetochar(s, p1); + *s = '\0'; + if((fi = Bopen(buf, OREAD)) == 0) + continue; + while((c = Bgetc(fi)) >= 0) + Bputc(&fout, c); + Bterm(fi); + } + } + aptr = abuf; + *aptr = 0; +} + +void +errexit(void) +{ + exits("error"); +} + +void +quit (char *msg, char *arg) +{ + fprint(2, "sed: "); + fprint(2, msg, arg); + fprint(2, "\n"); + errexit(); +} + +Rune * +gline(Rune *addr) +{ + long c; + Rune *p; + + static long peekc = 0; + + if (f == 0 && opendata() < 0) + return 0; + sflag = 0; + lnum++; +/* Bflush(&fout);********* dumped 4/30/92 - bobf****/ + do { + p = addr; + for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) { + if (c == '\n') { + if ((peekc = Bgetrune(f)) < 0) { + if (fhead == 0) + dolflag = 1; + } + *p = '\0'; + return p; + } + if (c && p < lbend) + *p++ = c; + } + /* return partial final line, adding implicit newline */ + if(p != addr) { + *p = '\0'; + peekc = -1; + if (fhead == 0) + dolflag = 1; + return p; + } + peekc = 0; + Bterm(f); + } while (opendata() > 0); /* Switch to next stream */ + f = 0; + return 0; +} + + /* Data file input section - the intent is to transparently + * catenate all data input streams. + */ +void +enroll(char *filename) /* Add a file to the input file cache */ +{ + FileCache *fp; + + if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0) + quit("Out of memory", 0); + if (ftail == 0) + fhead = fp; + else + ftail->next = fp; + ftail = fp; + fp->next = 0; + fp->name = filename; /* 0 => stdin */ +} + +int +opendata(void) +{ + if (fhead == 0) + return -1; + if (fhead->name) { + if ((f = Bopen(fhead->name, OREAD)) == 0) + quit("Can't open %s", fhead->name); + } else { + Binit(&bstdin, 0, OREAD); + f = &bstdin; + } + fhead = fhead->next; + return 1; +} diff --git a/src/cmd/yacc.c b/src/cmd/yacc.c new file mode 100644 index 00000000..3db6768f --- /dev/null +++ b/src/cmd/yacc.c @@ -0,0 +1,2942 @@ +#include +#include +#include +#include + +#define Bungetrune Bungetc /* ok for now. */ + +/* + * all these are 32 bit + */ +#define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */ +#define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037))) +#define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037))) +#define NWORDS(n) (((n)+32)/32) + +#define PARSER "#9/lib/yaccpar" +#define PARSERS "#9/lib/yaccpars" +#define TEMPNAME "y.tmp.XXXXXX" +#define ACTNAME "y.acts.XXXXXX" +#define OFILE "tab.c" +#define FILEU "output" +#define FILED "tab.h" +#define FILEDEBUG "debug" + +enum +{ +/* + * the following are adjustable + * according to memory size + */ + ACTSIZE = 40000, + MEMSIZE = 40000, + NSTATES = 2000, + NTERMS = 511, + NPROD = 1600, + NNONTERM = 600, + TEMPSIZE = 2000, + CNAMSZ = 10000, + LSETSIZE = 2400, + WSETSIZE = 350, + + NAMESIZE = 50, + NTYPES = 63, + ISIZE = 400, + + PRIVATE = 0xE000, /* unicode private use */ + + /* relationships which must hold: + TBITSET ints must hold NTERMS+1 bits... + WSETSIZE >= NNONTERM + LSETSIZE >= NNONTERM + TEMPSIZE >= NTERMS + NNONTERM + 1 + TEMPSIZE >= NSTATES + */ + + NTBASE = 010000, + ERRCODE = 8190, + ACCEPTCODE = 8191, + + NOASC = 0, /* no assoc. */ + LASC = 1, /* left assoc. */ + RASC = 2, /* right assoc. */ + BASC = 3, /* binary assoc. */ + + /* flags for state generation */ + + DONE = 0, + MUSTDO = 1, + MUSTLOOKAHEAD = 2, + + /* flags for a rule having an action, and being reduced */ + + ACTFLAG = 04, + REDFLAG = 010, + + /* output parser flags */ + YYFLAG1 = -1000, + + /* parse tokens */ + IDENTIFIER = PRIVATE, + MARK, + TERM, + LEFT, + RIGHT, + BINARY, + PREC, + LCURLY, + IDENTCOLON, + NUMBER, + START, + TYPEDEF, + TYPENAME, + UNION, + + ENDFILE = 0, + + EMPTY = 1, + WHOKNOWS = 0, + OK = 1, + NOMORE = -1000, +}; + + /* macros for getting associativity and precedence levels */ + +#define ASSOC(i) ((i)&03) +#define PLEVEL(i) (((i)>>4)&077) +#define TYPE(i) (((i)>>10)&077) + + /* macros for setting associativity and precedence levels */ + +#define SETASC(i,j) i |= j +#define SETPLEV(i,j) i |= (j<<4) +#define SETTYPE(i,j) i |= (j<<10) + + /* looping macros */ + +#define TLOOP(i) for(i=1; i<=ntokens; i++) +#define NTLOOP(i) for(i=0; i<=nnonter; i++) +#define PLOOP(s,i) for(i=s; i= 0 && j < 256) { + if(temp1[j]) { + print("yacc bug -- cant have 2 different Ts with same value\n"); + print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); + nerrors++; + } + temp1[j] = i; + if(j > c) + c = j; + } + } + warray("yytok1", temp1, c+1); + + /* table 2 has PRIVATE-PRIVATE+256 */ + aryfil(temp1, 256, 0); + c = 0; + TLOOP(i) { + j = tokset[i].value - PRIVATE; + if(j >= 0 && j < 256) { + if(temp1[j]) { + print("yacc bug -- cant have 2 different Ts with same value\n"); + print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); + nerrors++; + } + temp1[j] = i; + if(j > c) + c = j; + } + } + warray("yytok2", temp1, c+1); + + /* table 3 has everything else */ + Bprint(ftable, "long yytok3[] =\n{\n"); + c = 0; + TLOOP(i) { + j = tokset[i].value; + if(j >= 0 && j < 256) + continue; + if(j >= PRIVATE && j < 256+PRIVATE) + continue; + + Bprint(ftable, "%4d,%4d,", j, i); + c++; + if(c%5 == 0) + Bprint(ftable, "\n"); + } + Bprint(ftable, "%4d\n};\n", 0); + + /* copy parser text */ + while((c=Bgetrune(finput)) != Beof) { + if(c == '$') { + if((c = Bgetrune(finput)) != 'A') + Bputrune(ftable, '$'); + else { /* copy actions */ + faction = Bopen(actname, OREAD); + if(faction == 0) + error("cannot reopen action tempfile"); + while((c=Bgetrune(faction)) != Beof) + Bputrune(ftable, c); + Bterm(faction); + ZAPFILE(actname); + c = Bgetrune(finput); + } + } + Bputrune(ftable, c); + } + Bterm(ftable); +} + +/* + * copies string q into p, returning next free char ptr + */ +char* +chcopy(char* p, char* q) +{ + int c; + + while(c = *q) { + if(c == '"') + *p++ = '\\'; + *p++ = c; + q++; + } + *p = 0; + return p; +} + +/* + * creates output string for item pointed to by pp + */ +char* +writem(int *pp) +{ + int i,*p; + static char sarr[ISIZE]; + char* q; + + for(p=pp; *p>0; p++) + ; + p = prdptr[-*p]; + q = chcopy(sarr, nontrst[*p-NTBASE].name); + q = chcopy(q, ": "); + for(;;) { + *q = ' '; + p++; + if(p == pp) + *q = '.'; + q++; + *q = '\0'; + i = *p; + if(i <= 0) + break; + q = chcopy(q, symnam(i)); + if(q > &sarr[ISIZE-30]) + error("item too big"); + } + + /* an item calling for a reduction */ + i = *pp; + if(i < 0 ) { + q = chcopy(q, " ("); + sprint(q, "%d)", -i); + } + return sarr; +} + +/* + * return a pointer to the name of symbol i + */ +char* +symnam(int i) +{ + char* cp; + + cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name; + if(*cp == ' ') + cp++; + return cp; +} + +/* + * output the summary on y.output + */ +void +summary(void) +{ + + if(foutput != 0) { + Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n", + ntokens, NTERMS, nnonter, NNONTERM); + Bprint(foutput, "%d/%d grammar rules, %d/%d states\n", + nprod, NPROD, nstate, NSTATES); + Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n", + zzsrconf, zzrrconf); + Bprint(foutput, "%d/%d working sets used\n", + (int)(zzcwp-wsets), WSETSIZE); + Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n", + (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE); + Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE); + Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate); + Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp); + Bprint(foutput, "%d goto entries\n", zzgoent); + Bprint(foutput, "%d entries saved by goto default\n", zzgobest); + } + if(zzsrconf != 0 || zzrrconf != 0) { + print("\nconflicts: "); + if(zzsrconf) + print("%d shift/reduce", zzsrconf); + if(zzsrconf && zzrrconf) + print(", "); + if(zzrrconf) + print("%d reduce/reduce", zzrrconf); + print("\n"); + } + if(ftemp != 0) { + Bterm(ftemp); + ftemp = 0; + } + if(fdefine != 0) { + Bterm(fdefine); + fdefine = 0; + } +} + +/* + * write out error comment -- NEEDS WORK + */ +void +error(char *s, ...) +{ + va_list arg; + + nerrors++; + fprint(2, "\n fatal error:"); + va_start(arg, s); + vfprint(2, s, arg); + va_end(arg); + fprint(2, ", %s:%d\n", infile, lineno); + if(!fatfl) + return; + summary(); + cleantmp(); + exits("error"); +} + +/* + * set elements 0 through n-1 to c + */ +void +aryfil(int *v, int n, int c) +{ + int i; + + for(i=0; ilset; + if(pp == 0) + Bprint(foutput, "\tNULL"); + else { + Bprint(foutput, " { "); + TLOOP(j) + if(BIT(pp,j)) + Bprint(foutput, "%s ", symnam(j)); + Bprint(foutput, "}"); + } +} + +/* + * compute an array with the beginnings of productions yielding given nonterminals + * The array pres points to these lists + * the array pyield has the lists: the total size is only NPROD+1 + */ +void +cpres(void) +{ + int c, j, i, **pmem; + static int *pyield[NPROD]; + + pmem = pyield; + NTLOOP(i) { + c = i+NTBASE; + pres[i] = pmem; + fatfl = 0; /* make undefined symbols nonfatal */ + PLOOP(0, j) + if(*prdptr[j] == c) + *pmem++ = prdptr[j]+1; + if(pres[i] == pmem) + error("nonterminal %s not defined!", nontrst[i].name); + } + pres[i] = pmem; + fatfl = 1; + if(nerrors) { + summary(); + cleantmp(); + exits("error"); + } + if(pmem != &pyield[nprod]) + error("internal Yacc error: pyield %d", pmem-&pyield[nprod]); +} + +/* + * compute an array with the first of nonterminals + */ +void +cpfir(void) +{ + int *p, **s, i, **t, ch, changes; + + zzcwp = &wsets[nnonter]; + NTLOOP(i) { + aryfil(wsets[i].ws.lset, tbitset, 0); + t = pres[i+1]; + /* initially fill the sets */ + for(s=pres[i]; s 0; ++p) { + if(ch < NTBASE) { + SETBIT(wsets[i].ws.lset, ch); + break; + } + if(!pempty[ch-NTBASE]) + break; + } + } + + /* now, reflect transitivity */ + changes = 1; + while(changes) { + changes = 0; + NTLOOP(i) { + t = pres[i+1]; + for(s = pres[i]; s < t; ++s) + for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) { + changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset); + if(!pempty[ch]) + break; + } + } + } + + NTLOOP(i) + pfirst[i] = flset(&wsets[i].ws); + if(!indebug) + return; + if(foutput != 0) + NTLOOP(i) { + Bprint(foutput, "\n%s: ", nontrst[i].name); + prlook(pfirst[i]); + Bprint(foutput, " %d\n", pempty[i]); + } +} + +/* + * sorts last state,and sees if it equals earlier ones. returns state number + */ +int +state(int c) +{ + Item *p1, *p2, *k, *l, *q1, *q2; + int size1, size2, i; + + p1 = pstate[nstate]; + p2 = pstate[nstate+1]; + if(p1 == p2) + return 0; /* null state */ + /* sort the items */ + for(k = p2-1; k > p1; k--) /* make k the biggest */ + for(l = k-1; l >= p1; --l) + if(l->pitem > k->pitem) { + int *s; + Lkset *ss; + + s = k->pitem; + k->pitem = l->pitem; + l->pitem = s; + ss = k->look; + k->look = l->look; + l->look = ss; + } + size1 = p2 - p1; /* size of state */ + + for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) { + /* get ith state */ + q1 = pstate[i]; + q2 = pstate[i+1]; + size2 = q2 - q1; + if(size1 != size2) + continue; + k = p1; + for(l = q1; l < q2; l++) { + if(l->pitem != k->pitem) + break; + k++; + } + if(l != q2) + continue; + /* found it */ + pstate[nstate+1] = pstate[nstate]; /* delete last state */ + /* fix up lookaheads */ + if(nolook) + return i; + for(l = q1, k = p1; l < q2; ++l, ++k ) { + int s; + + SETLOOP(s) + clset.lset[s] = l->look->lset[s]; + if(setunion(clset.lset, k->look->lset)) { + tystate[i] = MUSTDO; + /* register the new set */ + l->look = flset( &clset ); + } + } + return i; + } + /* state is new */ + if(nolook) + error("yacc state/nolook error"); + pstate[nstate+2] = p2; + if(nstate+1 >= NSTATES) + error("too many states"); + if(c >= NTBASE) { + mstates[nstate] = ntstates[c-NTBASE]; + ntstates[c-NTBASE] = nstate; + } else { + mstates[nstate] = tstates[c]; + tstates[c] = nstate; + } + tystate[nstate] = MUSTDO; + return nstate++; +} + +void +putitem(int *ptr, Lkset *lptr) +{ + Item *j; + + if(pidebug && foutput != 0) + Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate); + j = pstate[nstate+1]; + j->pitem = ptr; + if(!nolook) + j->look = flset(lptr); + pstate[nstate+1] = ++j; + if((int*)j > zzmemsz) { + zzmemsz = (int*)j; + if(zzmemsz >= &mem0[MEMSIZE]) + error("out of state space"); + } +} + +/* + * mark nonterminals which derive the empty string + * also, look for nonterminals which don't derive any token strings + */ +void +cempty(void) +{ + + int i, *p; + + /* first, use the array pempty to detect productions that can never be reduced */ + /* set pempty to WHONOWS */ + aryfil(pempty, nnonter+1, WHOKNOWS); + + /* now, look at productions, marking nonterminals which derive something */ +more: + PLOOP(0, i) { + if(pempty[*prdptr[i] - NTBASE]) + continue; + for(p = prdptr[i]+1; *p >= 0; ++p) + if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS) + break; + /* production can be derived */ + if(*p < 0) { + pempty[*prdptr[i]-NTBASE] = OK; + goto more; + } + } + + /* now, look at the nonterminals, to see if they are all OK */ + NTLOOP(i) { + /* the added production rises or falls as the start symbol ... */ + if(i == 0) + continue; + if(pempty[i] != OK) { + fatfl = 0; + error("nonterminal %s never derives any token string", nontrst[i].name); + } + } + + if(nerrors) { + summary(); + cleantmp(); + exits("error"); + } + + /* now, compute the pempty array, to see which nonterminals derive the empty string */ + /* set pempty to WHOKNOWS */ + aryfil( pempty, nnonter+1, WHOKNOWS); + + /* loop as long as we keep finding empty nonterminals */ + +again: + PLOOP(1, i) { + /* not known to be empty */ + if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) { + for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p) + ; + /* we have a nontrivially empty nonterminal */ + if(*p < 0) { + pempty[*prdptr[i]-NTBASE] = EMPTY; + /* got one ... try for another */ + goto again; + } + } + } +} + +/* + * generate the states + */ +void +stagen(void) +{ + + int c, i, j, more; + Wset *p, *q; + + /* initialize */ + nstate = 0; + + /* THIS IS FUNNY from the standpoint of portability + * it represents the magic moment when the mem0 array, which has + * been holding the productions, starts to hold item pointers, of a + * different type... + * someday, alloc should be used to allocate all this stuff... for now, we + * accept that if pointers don't fit in integers, there is a problem... + */ + + pstate[0] = pstate[1] = (Item*)mem; + aryfil(clset.lset, tbitset, 0); + putitem(prdptr[0]+1, &clset); + tystate[0] = MUSTDO; + nstate = 1; + pstate[2] = pstate[1]; + + aryfil(amem, ACTSIZE, 0); + + /* now, the main state generation loop */ + for(more=1; more;) { + more = 0; + SLOOP(i) { + if(tystate[i] != MUSTDO) + continue; + tystate[i] = DONE; + aryfil(temp1, nnonter+1, 0); + /* take state i, close it, and do gotos */ + closure(i); + /* generate goto's */ + WSLOOP(wsets, p) { + if(p->flag) + continue; + p->flag = 1; + c = *(p->pitem); + if(c <= 1) { + if(pstate[i+1]-pstate[i] <= p-wsets) + tystate[i] = MUSTLOOKAHEAD; + continue; + } + /* do a goto on c */ + WSLOOP(p, q) + /* this item contributes to the goto */ + if(c == *(q->pitem)) { + putitem(q->pitem+1, &q->ws); + q->flag = 1; + } + if(c < NTBASE) + state(c); /* register new state */ + else + temp1[c-NTBASE] = state(c); + } + if(gsdebug && foutput != 0) { + Bprint(foutput, "%d: ", i); + NTLOOP(j) + if(temp1[j]) + Bprint(foutput, "%s %d, ", + nontrst[j].name, temp1[j]); + Bprint(foutput, "\n"); + } + indgo[i] = apack(&temp1[1], nnonter-1) - 1; + /* do some more */ + more = 1; + } + } +} + +/* + * generate the closure of state i + */ +void +closure(int i) +{ + + Wset *u, *v; + Item *p, *q; + int c, ch, work, k, *pi, **s, **t; + + zzclose++; + + /* first, copy kernel of state i to wsets */ + cwp = wsets; + ITMLOOP(i, p, q) { + cwp->pitem = p->pitem; + cwp->flag = 1; /* this item must get closed */ + SETLOOP(k) + cwp->ws.lset[k] = p->look->lset[k]; + WSBUMP(cwp); + } + + /* now, go through the loop, closing each item */ + work = 1; + while(work) { + work = 0; + WSLOOP(wsets, u) { + if(u->flag == 0) + continue; + /* dot is before c */ + c = *(u->pitem); + if(c < NTBASE) { + u->flag = 0; + /* only interesting case is where . is before nonterminal */ + continue; + } + + /* compute the lookahead */ + aryfil(clset.lset, tbitset, 0); + + /* find items involving c */ + WSLOOP(u, v) + if(v->flag == 1 && *(pi=v->pitem) == c) { + v->flag = 0; + if(nolook) + continue; + while((ch = *++pi) > 0) { + /* terminal symbol */ + if(ch < NTBASE) { + SETBIT(clset.lset, ch); + break; + } + /* nonterminal symbol */ + setunion(clset.lset, pfirst[ch-NTBASE]->lset); + if(!pempty[ch-NTBASE]) + break; + } + if(ch <= 0) + setunion(clset.lset, v->ws.lset); + } + + /* + * now loop over productions derived from c + * c is now nonterminal number + */ + c -= NTBASE; + t = pres[c+1]; + for(s = pres[c]; s < t; ++s) { + /* + * put these items into the closure + * is the item there + */ + WSLOOP(wsets, v) + /* yes, it is there */ + if(v->pitem == *s) { + if(nolook) + goto nexts; + if(setunion(v->ws.lset, clset.lset)) + v->flag = work = 1; + goto nexts; + } + + /* not there; make a new entry */ + if(cwp-wsets+1 >= WSETSIZE) + error( "working set overflow"); + cwp->pitem = *s; + cwp->flag = 1; + if(!nolook) { + work = 1; + SETLOOP(k) cwp->ws.lset[k] = clset.lset[k]; + } + WSBUMP(cwp); + + nexts:; + } + } + } + + /* have computed closure; flags are reset; return */ + if(cwp > zzcwp) + zzcwp = cwp; + if(cldebug && foutput != 0) { + Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook); + WSLOOP(wsets, u) { + if(u->flag) + Bprint(foutput, "flag set!\n"); + u->flag = 0; + Bprint(foutput, "\t%s", writem(u->pitem)); + prlook(&u->ws); + Bprint(foutput, "\n"); + } + } +} + +/* + * decide if the lookahead set pointed to by p is known + * return pointer to a perminent location for the set + */ +Lkset* +flset(Lkset *p) +{ + Lkset *q; + int *u, *v, *w, j; + + for(q = &lkst[nlset]; q-- > lkst;) { + u = p->lset; + v = q->lset; + w = &v[tbitset]; + while(v < w) + if(*u++ != *v++) + goto more; + /* we have matched */ + return q; + more:; + } + /* add a new one */ + q = &lkst[nlset++]; + if(nlset >= LSETSIZE) + error("too many lookahead sets"); + SETLOOP(j) + q->lset[j] = p->lset[j]; + return q; +} + +void +cleantmp(void) +{ + ZAPFILE(actname); + ZAPFILE(tempname); +} + +void +intr(void) +{ + cleantmp(); + exits("interrupted"); +} + +void +setup(int argc, char *argv[]) +{ + long c, t; + int i, j, fd, lev, ty, ytab, *p; + int vflag, dflag, stem; + char actnm[8], *stemc, *s, dirbuf[128]; + + ytab = 0; + vflag = 0; + dflag = 0; + stem = 0; + stemc = "y"; + foutput = 0; + fdefine = 0; + fdebug = 0; + ARGBEGIN{ + case 'v': + case 'V': + vflag++; + break; + case 'D': + yydebug = ARGF(); + break; + case 'd': + dflag++; + break; + case 'o': + ytab++; + ytabc = ARGF(); + break; + case 's': + stem++; + stemc = ARGF(); + break; + case 'S': + parser = PARSERS; + break; + default: + error("illegal option: %c", ARGC()); + }ARGEND + openup(stemc, dflag, vflag, ytab, ytabc); + + if((fd = mkstemp(ttempname)) >= 0){ + tempname = ttempname; + ftemp = Bfdopen(fd, OWRITE); + } + if((fd = mkstemp(tactname)) >= 0){ + actname = tactname; + faction = Bfdopen(fd, OWRITE); + } + if(ftemp == 0 || faction == 0) + error("cannot open temp file"); + if(argc < 1) + error("no input file"); + infile = argv[0]; + if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){ + i = strlen(infile)+1+strlen(dirbuf)+1+10; + s = malloc(i); + if(s != nil){ + snprint(s, i, "%s/%s", dirbuf, infile); + cleanname(s); + infile = s; + } + } + finput = Bopen(infile, OREAD); + if(finput == 0) + error("cannot open '%s'", argv[0]); + cnamp = cnames; + + defin(0, "$end"); + extval = PRIVATE; /* tokens start in unicode 'private use' */ + defin(0, "error"); + defin(1, "$accept"); + defin(0, "$unk"); + mem = mem0; + i = 0; + + for(t = gettok(); t != MARK && t != ENDFILE;) + switch(t) { + case ';': + t = gettok(); + break; + + case START: + if(gettok() != IDENTIFIER) + error("bad %%start construction"); + start = chfind(1, tokname); + t = gettok(); + continue; + + case TYPEDEF: + if(gettok() != TYPENAME) + error("bad syntax in %%type"); + ty = numbval; + for(;;) { + t = gettok(); + switch(t) { + case IDENTIFIER: + if((t=chfind(1, tokname)) < NTBASE) { + j = TYPE(toklev[t]); + if(j != 0 && j != ty) + error("type redeclaration of token %s", + tokset[t].name); + else + SETTYPE(toklev[t], ty); + } else { + j = nontrst[t-NTBASE].value; + if(j != 0 && j != ty) + error("type redeclaration of nonterminal %s", + nontrst[t-NTBASE].name ); + else + nontrst[t-NTBASE].value = ty; + } + case ',': + continue; + case ';': + t = gettok(); + default: + break; + } + break; + } + continue; + + case UNION: + /* copy the union declaration to the output */ + cpyunion(); + t = gettok(); + continue; + + case LEFT: + case BINARY: + case RIGHT: + i++; + + case TERM: + /* nonzero means new prec. and assoc. */ + lev = t-TERM; + ty = 0; + + /* get identifiers so defined */ + t = gettok(); + + /* there is a type defined */ + if(t == TYPENAME) { + ty = numbval; + t = gettok(); + } + for(;;) { + switch(t) { + case ',': + t = gettok(); + continue; + + case ';': + break; + + case IDENTIFIER: + j = chfind(0, tokname); + if(j >= NTBASE) + error("%s defined earlier as nonterminal", tokname); + if(lev) { + if(ASSOC(toklev[j])) + error("redeclaration of precedence of %s", tokname); + SETASC(toklev[j], lev); + SETPLEV(toklev[j], i); + } + if(ty) { + if(TYPE(toklev[j])) + error("redeclaration of type of %s", tokname); + SETTYPE(toklev[j],ty); + } + t = gettok(); + if(t == NUMBER) { + tokset[j].value = numbval; + if(j < ndefout && j > 3) + error("please define type number of %s earlier", + tokset[j].name); + t = gettok(); + } + continue; + } + break; + } + continue; + + case LCURLY: + defout(0); + cpycode(); + t = gettok(); + continue; + + default: + error("syntax error"); + } + if(t == ENDFILE) + error("unexpected EOF before %%"); + + /* t is MARK */ + Bprint(ftable, "extern int yyerrflag;\n"); + Bprint(ftable, "#ifndef YYMAXDEPTH\n"); + Bprint(ftable, "#define YYMAXDEPTH 150\n"); + Bprint(ftable, "#endif\n" ); + if(!ntypes) { + Bprint(ftable, "#ifndef YYSTYPE\n"); + Bprint(ftable, "#define YYSTYPE int\n"); + Bprint(ftable, "#endif\n"); + } + Bprint(ftable, "YYSTYPE yylval;\n"); + Bprint(ftable, "YYSTYPE yyval;\n"); + + prdptr[0] = mem; + + /* added production */ + *mem++ = NTBASE; + + /* if start is 0, we will overwrite with the lhs of the first rule */ + *mem++ = start; + *mem++ = 1; + *mem++ = 0; + prdptr[1] = mem; + while((t=gettok()) == LCURLY) + cpycode(); + if(t != IDENTCOLON) + error("bad syntax on first rule"); + + if(!start) + prdptr[0][1] = chfind(1, tokname); + + /* read rules */ + while(t != MARK && t != ENDFILE) { + /* process a rule */ + rlines[nprod] = lineno; + if(t == '|') + *mem++ = *prdptr[nprod-1]; + else + if(t == IDENTCOLON) { + *mem = chfind(1, tokname); + if(*mem < NTBASE) + error("token illegal on LHS of grammar rule"); + mem++; + } else + error("illegal rule: missing semicolon or | ?"); + /* read rule body */ + t = gettok(); + + more_rule: + while(t == IDENTIFIER) { + *mem = chfind(1, tokname); + if(*mem < NTBASE) + levprd[nprod] = toklev[*mem]; + mem++; + t = gettok(); + } + if(t == PREC) { + if(gettok() != IDENTIFIER) + error("illegal %%prec syntax"); + j = chfind(2, tokname); + if(j >= NTBASE) + error("nonterminal %s illegal after %%prec", + nontrst[j-NTBASE].name); + levprd[nprod] = toklev[j]; + t = gettok(); + } + if(t == '=') { + levprd[nprod] |= ACTFLAG; + Bprint(faction, "\ncase %d:", nprod); + cpyact(mem-prdptr[nprod]-1); + Bprint(faction, " break;"); + if((t=gettok()) == IDENTIFIER) { + + /* action within rule... */ + sprint(actnm, "$$%d", nprod); + + /* make it a nonterminal */ + j = chfind(1, actnm); + + /* + * the current rule will become rule number nprod+1 + * move the contents down, and make room for the null + */ + for(p = mem; p >= prdptr[nprod]; --p) + p[2] = *p; + mem += 2; + + /* enter null production for action */ + p = prdptr[nprod]; + *p++ = j; + *p++ = -nprod; + + /* update the production information */ + levprd[nprod+1] = levprd[nprod] & ~ACTFLAG; + levprd[nprod] = ACTFLAG; + if(++nprod >= NPROD) + error("more than %d rules", NPROD); + prdptr[nprod] = p; + + /* make the action appear in the original rule */ + *mem++ = j; + + /* get some more of the rule */ + goto more_rule; + } + } + + while(t == ';') + t = gettok(); + *mem++ = -nprod; + + /* check that default action is reasonable */ + if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) { + + /* no explicit action, LHS has value */ + int tempty; + + tempty = prdptr[nprod][1]; + if(tempty < 0) + error("must return a value, since LHS has a type"); + else + if(tempty >= NTBASE) + tempty = nontrst[tempty-NTBASE].value; + else + tempty = TYPE(toklev[tempty]); + if(tempty != nontrst[*prdptr[nprod]-NTBASE].value) + error("default action causes potential type clash"); + } + nprod++; + if(nprod >= NPROD) + error("more than %d rules", NPROD); + prdptr[nprod] = mem; + levprd[nprod] = 0; + } + + /* end of all rules */ + defout(1); + + finact(); + if(t == MARK) { + Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); + while((c=Bgetrune(finput)) != Beof) + Bputrune(ftable, c); + } + Bterm(finput); +} + +/* + * finish action routine + */ +void +finact(void) +{ + + Bterm(faction); + Bprint(ftable, "#define YYEOFCODE %d\n", 1); + Bprint(ftable, "#define YYERRCODE %d\n", 2); +} + +/* + * define s to be a terminal if t=0 + * or a nonterminal if t=1 + */ +int +defin(int nt, char *s) +{ + int val; + Rune rune; + + val = 0; + if(nt) { + nnonter++; + if(nnonter >= NNONTERM) + error("too many nonterminals, limit %d",NNONTERM); + nontrst[nnonter].name = cstash(s); + return NTBASE + nnonter; + } + + /* must be a token */ + ntokens++; + if(ntokens >= NTERMS) + error("too many terminals, limit %d", NTERMS); + tokset[ntokens].name = cstash(s); + + /* establish value for token */ + /* single character literal */ + if(s[0] == ' ') { + val = chartorune(&rune, &s[1]); + if(s[val+1] == 0) { + val = rune; + goto out; + } + } + + /* escape sequence */ + if(s[0] == ' ' && s[1] == '\\') { + if(s[3] == 0) { + /* single character escape sequence */ + switch(s[2]) { + case 'n': val = '\n'; break; + case 'r': val = '\r'; break; + case 'b': val = '\b'; break; + case 't': val = '\t'; break; + case 'f': val = '\f'; break; + case '\'': val = '\''; break; + case '"': val = '"'; break; + case '\\': val = '\\'; break; + default: error("invalid escape"); + } + goto out; + } + + /* \nnn sequence */ + if(s[2] >= '0' && s[2] <= '7') { + if(s[3] < '0' || + s[3] > '7' || + s[4] < '0' || + s[4] > '7' || + s[5] != 0) + error("illegal \\nnn construction"); + val = 64*s[2] + 8*s[3] + s[4] - 73*'0'; + if(val == 0) + error("'\\000' is illegal"); + goto out; + } + error("unknown escape"); + } + val = extval++; + +out: + tokset[ntokens].value = val; + toklev[ntokens] = 0; + return ntokens; +} + +/* + * write out the defines (at the end of the declaration section) + */ +void +defout(int last) +{ + int i, c; + char sar[NAMESIZE+10]; + + for(i=ndefout; i<=ntokens; i++) { + /* non-literals */ + c = tokset[i].name[0]; + if(c != ' ' && c != '$') { + Bprint(ftable, "#define %s %d\n", + tokset[i].name, tokset[i].value); + if(fdefine) + Bprint(fdefine, "#define\t%s\t%d\n", + tokset[i].name, tokset[i].value); + } + } + ndefout = ntokens+1; + if(last && fdebug) { + Bprint(fdebug, "char* yytoknames[] =\n{\n"); + TLOOP(i) { + if(tokset[i].name) { + chcopy(sar, tokset[i].name); + Bprint(fdebug, "\t\"%s\",\n", sar); + continue; + } + Bprint(fdebug, "\t0,\n"); + } + Bprint(fdebug, "};\n"); + } +} + +char* +cstash(char *s) +{ + char *temp; + + temp = cnamp; + do { + if(cnamp >= &cnames[cnamsz]) + error("too many characters in id's and literals"); + else + *cnamp++ = *s; + } while(*s++); + return temp; +} + +long +gettok(void) +{ + long c; + Rune rune; + int i, base, match, reserve; + static int peekline; + +begin: + reserve = 0; + lineno += peekline; + peekline = 0; + c = Bgetrune(finput); + while(c == ' ' || c == '\n' || c == '\t' || c == '\f') { + if(c == '\n') + lineno++; + c = Bgetrune(finput); + } + + /* skip comment */ + if(c == '/') { + lineno += skipcom(); + goto begin; + } + switch(c) { + case Beof: + return ENDFILE; + + case '{': + Bungetrune(finput); + return '='; + + case '<': + /* get, and look up, a type name (union member name) */ + i = 0; + while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') { + rune = c; + c = runetochar(&tokname[i], &rune); + if(i < NAMESIZE) + i += c; + } + if(c != '>') + error("unterminated < ... > clause"); + tokname[i] = 0; + for(i=1; i<=ntypes; i++) + if(!strcmp(typeset[i], tokname)) { + numbval = i; + return TYPENAME; + } + ntypes++; + numbval = ntypes; + typeset[numbval] = cstash(tokname); + return TYPENAME; + + case '"': + case '\'': + match = c; + tokname[0] = ' '; + i = 1; + for(;;) { + c = Bgetrune(finput); + if(c == '\n' || c <= 0) + error("illegal or missing ' or \"" ); + if(c == '\\') { + tokname[i] = '\\'; + if(i < NAMESIZE) + i++; + c = Bgetrune(finput); + } else + if(c == match) + break; + rune = c; + c = runetochar(&tokname[i], &rune); + if(i < NAMESIZE) + i += c; + } + break; + + case '%': + case '\\': + switch(c = Bgetrune(finput)) { + case '0': return TERM; + case '<': return LEFT; + case '2': return BINARY; + case '>': return RIGHT; + case '%': + case '\\': return MARK; + case '=': return PREC; + case '{': return LCURLY; + default: reserve = 1; + } + + default: + /* number */ + if(isdigit(c)) { + numbval = c-'0'; + base = (c=='0')? 8: 10; + for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput)) + numbval = numbval*base + (c-'0'); + Bungetrune(finput); + return NUMBER; + } + if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') { + i = 0; + while(islower(c) || isupper(c) || isdigit(c) || + c == '-' || c=='_' || c=='.' || c=='$') { + if(reserve && isupper(c)) + c += 'a'-'A'; + rune = c; + c = runetochar(&tokname[i], &rune); + if(i < NAMESIZE) + i += c; + c = Bgetrune(finput); + } + } else + return c; + Bungetrune(finput); + } + tokname[i] = 0; + + /* find a reserved word */ + if(reserve) { + for(c=0; resrv[c].name; c++) + if(strcmp(tokname, resrv[c].name) == 0) + return resrv[c].value; + error("invalid escape, or illegal reserved word: %s", tokname); + } + + /* look ahead to distinguish IDENTIFIER from IDENTCOLON */ + c = Bgetrune(finput); + while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') { + if(c == '\n') + peekline++; + /* look for comments */ + if(c == '/') + peekline += skipcom(); + c = Bgetrune(finput); + } + if(c == ':') + return IDENTCOLON; + Bungetrune(finput); + return IDENTIFIER; +} + +/* + * determine the type of a symbol + */ +int +fdtype(int t) +{ + int v; + + if(t >= NTBASE) + v = nontrst[t-NTBASE].value; + else + v = TYPE(toklev[t]); + if(v <= 0) + error("must specify type for %s", (t>=NTBASE)? + nontrst[t-NTBASE].name: tokset[t].name); + return v; +} + +int +chfind(int t, char *s) +{ + int i; + + if(s[0] == ' ') + t = 0; + TLOOP(i) + if(!strcmp(s, tokset[i].name)) + return i; + NTLOOP(i) + if(!strcmp(s, nontrst[i].name)) + return NTBASE+i; + + /* cannot find name */ + if(t > 1) + error("%s should have been defined earlier", s); + return defin(t, s); +} + +/* + * copy the union declaration to the output, and the define file if present + */ +void +cpyunion(void) +{ + long c; + int level; + + Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); + Bprint(ftable, "typedef union "); + if(fdefine != 0) + Bprint(fdefine, "\ntypedef union "); + + level = 0; + for(;;) { + if((c=Bgetrune(finput)) == Beof) + error("EOF encountered while processing %%union"); + Bputrune(ftable, c); + if(fdefine != 0) + Bputrune(fdefine, c); + switch(c) { + case '\n': + lineno++; + break; + case '{': + level++; + break; + case '}': + level--; + + /* we are finished copying */ + if(level == 0) { + Bprint(ftable, " YYSTYPE;\n"); + if(fdefine != 0) + Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n"); + return; + } + } + } +} + +/* + * copies code between \{ and \} + */ +void +cpycode(void) +{ + + long c; + + c = Bgetrune(finput); + if(c == '\n') { + c = Bgetrune(finput); + lineno++; + } + Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile); + while(c != Beof) { + if(c == '\\') { + if((c=Bgetrune(finput)) == '}') + return; + Bputc(ftable, '\\'); + } + if(c == '%') { + if((c=Bgetrune(finput)) == '}') + return; + Bputc(ftable, '%'); + } + Bputrune(ftable, c); + if(c == '\n') + lineno++; + c = Bgetrune(finput); + } + error("eof before %%}"); +} + +/* + * skip over comments + * skipcom is called after reading a '/' + */ +int +skipcom(void) +{ + long c; + int i; + + /* i is the number of lines skipped */ + i = 0; + if(Bgetrune(finput) != '*') + error("illegal comment"); + c = Bgetrune(finput); + while(c != Beof) { + while(c == '*') + if((c=Bgetrune(finput)) == '/') + return i; + if(c == '\n') + i++; + c = Bgetrune(finput); + } + error("EOF inside comment"); + return 0; +} + +/* + * copy C action to the next ; or closing } + */ +void +cpyact(int offset) +{ + long c; + int brac, match, j, s, fnd, tok; + + Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile); + brac = 0; + +loop: + c = Bgetrune(finput); +swt: + switch(c) { + case ';': + if(brac == 0) { + Bputrune(faction, c); + return; + } + goto lcopy; + + case '{': + brac++; + goto lcopy; + + case '$': + s = 1; + tok = -1; + c = Bgetrune(finput); + + /* type description */ + if(c == '<') { + Bungetrune(finput); + if(gettok() != TYPENAME) + error("bad syntax on $ clause"); + tok = numbval; + c = Bgetrune(finput); + } + if(c == '$') { + Bprint(faction, "yyval"); + + /* put out the proper tag... */ + if(ntypes) { + if(tok < 0) + tok = fdtype(*prdptr[nprod]); + Bprint(faction, ".%s", typeset[tok]); + } + goto loop; + } + if(c == '-') { + s = -s; + c = Bgetrune(finput); + } + if(isdigit(c)) { + j = 0; + while(isdigit(c)) { + j = j*10 + (c-'0'); + c = Bgetrune(finput); + } + Bungetrune(finput); + j = j*s - offset; + if(j > 0) + error("Illegal use of $%d", j+offset); + + dollar: + Bprint(faction, "yypt[-%d].yyv", -j); + + /* put out the proper tag */ + if(ntypes) { + if(j+offset <= 0 && tok < 0) + error("must specify type of $%d", j+offset); + if(tok < 0) + tok = fdtype(prdptr[nprod][j+offset]); + Bprint(faction, ".%s", typeset[tok]); + } + goto loop; + } + if(isupper(c) || islower(c) || c == '_' || c == '.') { + int tok; /* tok used oustide for type info */ + + /* look for $name */ + Bungetrune(finput); + if(gettok() != IDENTIFIER) + error("$ must be followed by an identifier"); + tok = chfind(2, tokname); + if((c = Bgetrune(finput)) != '#') { + Bungetrune(finput); + fnd = -1; + } else + if(gettok() != NUMBER) { + error("# must be followed by number"); + fnd = -1; + } else + fnd = numbval; + for(j=1; j<=offset; ++j) + if(tok == prdptr[nprod][j]) { + if(--fnd <= 0) { + j -= offset; + goto dollar; + } + } + error("$name or $name#number not found"); + } + Bputc(faction, '$'); + if(s < 0 ) + Bputc(faction, '-'); + goto swt; + + case '}': + brac--; + if(brac) + goto lcopy; + Bputrune(faction, c); + return; + + case '/': + /* look for comments */ + Bputrune(faction, c); + c = Bgetrune(finput); + if(c != '*') + goto swt; + + /* it really is a comment */ + Bputrune(faction, c); + c = Bgetrune(finput); + while(c >= 0) { + while(c == '*') { + Bputrune(faction, c); + if((c=Bgetrune(finput)) == '/') + goto lcopy; + } + Bputrune(faction, c); + if(c == '\n') + lineno++; + c = Bgetrune(finput); + } + error("EOF inside comment"); + + case '\'': + /* character constant */ + match = '\''; + goto string; + + case '"': + /* character string */ + match = '"'; + + string: + Bputrune(faction, c); + while(c = Bgetrune(finput)) { + if(c == '\\') { + Bputrune(faction, c); + c = Bgetrune(finput); + if(c == '\n') + lineno++; + } else + if(c == match) + goto lcopy; + if(c == '\n') + error("newline in string or char. const."); + Bputrune(faction, c); + } + error("EOF in string or character constant"); + + case Beof: + error("action does not terminate"); + + case '\n': + lineno++; + goto lcopy; + } + +lcopy: + Bputrune(faction, c); + goto loop; +} + +void +openup(char *stem, int dflag, int vflag, int ytab, char *ytabc) +{ + char buf[256]; + + if(vflag) { + sprint(buf, "%s.%s", stem, FILEU); + foutput = Bopen(buf, OWRITE); + if(foutput == 0) + error("cannot open %s", buf); + } + if(yydebug) { + sprint(buf, "%s.%s", stem, FILEDEBUG); + if((fdebug = Bopen(buf, OWRITE)) == 0) + error("can't open %s", buf); + } + if(dflag) { + sprint(buf, "%s.%s", stem, FILED); + fdefine = Bopen(buf, OWRITE); + if(fdefine == 0) + error("can't create %s", buf); + } + if(ytab == 0) + sprint(buf, "%s.%s", stem, OFILE); + else + strcpy(buf, ytabc); + ftable = Bopen(buf, OWRITE); + if(ftable == 0) + error("cannot open table file %s", buf); +} + +/* + * print the output for the states + */ +void +output(void) +{ + int i, k, c; + Wset *u, *v; + + Bprint(ftable, "short yyexca[] =\n{"); + if(fdebug) + Bprint(fdebug, "char* yystates[] =\n{\n"); + + /* output the stuff for state i */ + SLOOP(i) { + nolook = tystate[i]!=MUSTLOOKAHEAD; + closure(i); + + /* output actions */ + nolook = 1; + aryfil(temp1, ntokens+nnonter+1, 0); + WSLOOP(wsets, u) { + c = *(u->pitem); + if(c > 1 && c < NTBASE && temp1[c] == 0) { + WSLOOP(u, v) + if(c == *(v->pitem)) + putitem(v->pitem+1, (Lkset*)0); + temp1[c] = state(c); + } else + if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0) + temp1[c+ntokens] = amem[indgo[i]+c]; + } + if(i == 1) + temp1[1] = ACCEPTCODE; + + /* now, we have the shifts; look at the reductions */ + lastred = 0; + WSLOOP(wsets, u) { + c = *u->pitem; + + /* reduction */ + if(c <= 0) { + lastred = -c; + TLOOP(k) + if(BIT(u->ws.lset, k)) { + if(temp1[k] == 0) + temp1[k] = c; + else + if(temp1[k] < 0) { /* reduce/reduce conflict */ + if(foutput) + Bprint(foutput, + "\n%d: reduce/reduce conflict" + " (red'ns %d and %d ) on %s", + i, -temp1[k], lastred, + symnam(k)); + if(-temp1[k] > lastred) + temp1[k] = -lastred; + zzrrconf++; + } else + /* potential shift/reduce conflict */ + precftn( lastred, k, i ); + } + } + } + wract(i); + } + + if(fdebug) + Bprint(fdebug, "};\n"); + Bprint(ftable, "};\n"); + Bprint(ftable, "#define YYNPROD %d\n", nprod); + Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE); + if(yydebug) + Bprint(ftable, "#define yydebug %s\n", yydebug); +} + +/* + * pack state i from temp1 into amem + */ +int +apack(int *p, int n) +{ + int *pp, *qq, *rr, off, *q, *r; + + /* we don't need to worry about checking because + * we will only look at entries known to be there... + * eliminate leading and trailing 0's + */ + + q = p+n; + for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off) + ; + /* no actions */ + if(pp > q) + return 0; + p = pp; + + /* now, find a place for the elements from p to q, inclusive */ + r = &amem[ACTSIZE-1]; + for(rr = amem; rr <= r; rr++, off++) { + for(qq = rr, pp = p; pp <= q; pp++, qq++) + if(*pp != 0) + if(*pp != *qq && *qq != 0) + goto nextk; + + /* we have found an acceptable k */ + if(pkdebug && foutput != 0) + Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem)); + for(qq = rr, pp = p; pp <= q; pp++, qq++) + if(*pp) { + if(qq > r) + error("action table overflow"); + if(qq > memp) + memp = qq; + *qq = *pp; + } + if(pkdebug && foutput != 0) + for(pp = amem; pp <= memp; pp += 10) { + Bprint(foutput, "\t"); + for(qq = pp; qq <= pp+9; qq++) + Bprint(foutput, "%d ", *qq); + Bprint(foutput, "\n"); + } + return(off); + nextk:; + } + error("no space in action table"); + return 0; +} + +/* + * output the gotos for the nontermninals + */ +void +go2out(void) +{ + int i, j, k, best, count, cbest, times; + + /* mark begining of gotos */ + Bprint(ftemp, "$\n"); + for(i = 1; i <= nnonter; i++) { + go2gen(i); + + /* find the best one to make default */ + best = -1; + times = 0; + + /* is j the most frequent */ + for(j = 0; j <= nstate; j++) { + if(tystate[j] == 0) + continue; + if(tystate[j] == best) + continue; + + /* is tystate[j] the most frequent */ + count = 0; + cbest = tystate[j]; + for(k = j; k <= nstate; k++) + if(tystate[k] == cbest) + count++; + if(count > times) { + best = cbest; + times = count; + } + } + + /* best is now the default entry */ + zzgobest += times-1; + for(j = 0; j <= nstate; j++) + if(tystate[j] != 0 && tystate[j] != best) { + Bprint(ftemp, "%d,%d,", j, tystate[j]); + zzgoent++; + } + + /* now, the default */ + if(best == -1) + best = 0; + zzgoent++; + Bprint(ftemp, "%d\n", best); + } +} + +/* + * output the gotos for nonterminal c + */ +void +go2gen(int c) +{ + int i, work, cc; + Item *p, *q; + + + /* first, find nonterminals with gotos on c */ + aryfil(temp1, nnonter+1, 0); + temp1[c] = 1; + work = 1; + while(work) { + work = 0; + PLOOP(0, i) + + /* cc is a nonterminal */ + if((cc=prdptr[i][1]-NTBASE) >= 0) + /* cc has a goto on c */ + if(temp1[cc] != 0) { + + /* thus, the left side of production i does too */ + cc = *prdptr[i]-NTBASE; + if(temp1[cc] == 0) { + work = 1; + temp1[cc] = 1; + } + } + } + + /* now, we have temp1[c] = 1 if a goto on c in closure of cc */ + if(g2debug && foutput != 0) { + Bprint(foutput, "%s: gotos on ", nontrst[c].name); + NTLOOP(i) + if(temp1[i]) + Bprint(foutput, "%s ", nontrst[i].name); + Bprint(foutput, "\n"); + } + + /* now, go through and put gotos into tystate */ + aryfil(tystate, nstate, 0); + SLOOP(i) + ITMLOOP(i, p, q) + if((cc = *p->pitem) >= NTBASE) + /* goto on c is possible */ + if(temp1[cc-NTBASE]) { + tystate[i] = amem[indgo[i]+c]; + break; + } +} + +/* + * decide a shift/reduce conflict by precedence. + * r is a rule number, t a token number + * the conflict is in state s + * temp1[t] is changed to reflect the action + */ +void +precftn(int r, int t, int s) +{ + int lp, lt, action; + + lp = levprd[r]; + lt = toklev[t]; + if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) { + + /* conflict */ + if(foutput != 0) + Bprint(foutput, + "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s", + s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t)); + zzsrconf++; + return; + } + if(PLEVEL(lt) == PLEVEL(lp)) + action = ASSOC(lt); + else + if(PLEVEL(lt) > PLEVEL(lp)) + action = RASC; /* shift */ + else + action = LASC; /* reduce */ + switch(action) { + case BASC: /* error action */ + temp1[t] = ERRCODE; + break; + case LASC: /* reduce */ + temp1[t] = -r; + break; + } +} + +/* + * output state i + * temp1 has the actions, lastred the default + */ +void +wract(int i) +{ + int p, p0, p1, ntimes, tred, count, j, flag; + + /* find the best choice for lastred */ + lastred = 0; + ntimes = 0; + TLOOP(j) { + if(temp1[j] >= 0) + continue; + if(temp1[j]+lastred == 0) + continue; + /* count the number of appearances of temp1[j] */ + count = 0; + tred = -temp1[j]; + levprd[tred] |= REDFLAG; + TLOOP(p) + if(temp1[p]+tred == 0) + count++; + if(count > ntimes) { + lastred = tred; + ntimes = count; + } + } + + /* + * for error recovery, arrange that, if there is a shift on the + * error recovery token, `error', that the default be the error action + */ + if(temp1[2] > 0) + lastred = 0; + + /* clear out entries in temp1 which equal lastred */ + TLOOP(p) + if(temp1[p]+lastred == 0) + temp1[p] = 0; + + wrstate(i); + defact[i] = lastred; + flag = 0; + TLOOP(p0) + if((p1=temp1[p0]) != 0) { + if(p1 < 0) { + p1 = -p1; + goto exc; + } + if(p1 == ACCEPTCODE) { + p1 = -1; + goto exc; + } + if(p1 == ERRCODE) { + p1 = 0; + exc: + if(flag++ == 0) + Bprint(ftable, "-1, %d,\n", i); + Bprint(ftable, "\t%d, %d,\n", p0, p1); + zzexcp++; + continue; + } + Bprint(ftemp, "%d,%d,", p0, p1); + zzacent++; + } + if(flag) { + defact[i] = -2; + Bprint(ftable, "\t-2, %d,\n", lastred); + } + Bprint(ftemp, "\n"); +} + +/* + * writes state i + */ +void +wrstate(int i) +{ + int j0, j1; + Item *pp, *qq; + Wset *u; + + if(fdebug) { + if(lastred) { + Bprint(fdebug, " 0, /*%d*/\n", i); + } else { + Bprint(fdebug, " \""); + ITMLOOP(i, pp, qq) + Bprint(fdebug, "%s\\n", writem(pp->pitem)); + if(tystate[i] == MUSTLOOKAHEAD) + WSLOOP(wsets + (pstate[i+1] - pstate[i]), u) + if(*u->pitem < 0) + Bprint(fdebug, "%s\\n", writem(u->pitem)); + Bprint(fdebug, "\", /*%d*/\n", i); + } + } + if(foutput == 0) + return; + Bprint(foutput, "\nstate %d\n", i); + ITMLOOP(i, pp, qq) + Bprint(foutput, "\t%s\n", writem(pp->pitem)); + if(tystate[i] == MUSTLOOKAHEAD) + /* print out empty productions in closure */ + WSLOOP(wsets+(pstate[i+1]-pstate[i]), u) + if(*u->pitem < 0) + Bprint(foutput, "\t%s\n", writem(u->pitem)); + + /* check for state equal to another */ + TLOOP(j0) + if((j1=temp1[j0]) != 0) { + Bprint(foutput, "\n\t%s ", symnam(j0)); + /* shift, error, or accept */ + if(j1 > 0) { + if(j1 == ACCEPTCODE) + Bprint(foutput, "accept"); + else + if(j1 == ERRCODE) + Bprint(foutput, "error"); + else + Bprint(foutput, "shift %d", j1); + } else + Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]); + } + + /* output the final production */ + if(lastred) + Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n", + lastred, rlines[lastred]); + else + Bprint(foutput, "\n\t. error\n\n"); + + /* now, output nonterminal actions */ + j1 = ntokens; + for(j0 = 1; j0 <= nnonter; j0++) { + j1++; + if(temp1[j1]) + Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]); + } +} + +void +warray(char *s, int *v, int n) +{ + int i; + + Bprint(ftable, "short %s[] =\n{", s); + for(i=0;;) { + if(i%10 == 0) + Bprint(ftable, "\n"); + Bprint(ftable, "%4d", v[i]); + i++; + if(i >= n) { + Bprint(ftable, "\n};\n"); + break; + } + Bprint(ftable, ","); + } +} + +/* + * in order to free up the mem and amem arrays for the optimizer, + * and still be able to output yyr1, etc., after the sizes of + * the action array is known, we hide the nonterminals + * derived by productions in levprd. + */ + +void +hideprod(void) +{ + int i, j; + + j = 0; + levprd[0] = 0; + PLOOP(1, i) { + if(!(levprd[i] & REDFLAG)) { + j++; + if(foutput != 0) + Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i])); + } + levprd[i] = *prdptr[i] - NTBASE; + } + if(j) + print("%d rules never reduced\n", j); +} + +void +callopt(void) +{ + int i, *p, j, k, *q; + + /* read the arrays from tempfile and set parameters */ + finput = Bopen(tempname, OREAD); + if(finput == 0) + error("optimizer cannot open tempfile"); + + pgo[0] = 0; + temp1[0] = 0; + nstate = 0; + nnonter = 0; + for(;;) { + switch(gtnm()) { + case '\n': + nstate++; + pmem--; + temp1[nstate] = pmem - mem0; + case ',': + continue; + case '$': + break; + default: + error("bad tempfile %s", tempname); + } + break; + } + + pmem--; + temp1[nstate] = yypgo[0] = pmem - mem0; + for(;;) { + switch(gtnm()) { + case '\n': + nnonter++; + yypgo[nnonter] = pmem-mem0; + case ',': + continue; + case -1: + break; + default: + error("bad tempfile"); + } + break; + } + pmem--; + yypgo[nnonter--] = pmem - mem0; + for(i = 0; i < nstate; i++) { + k = 32000; + j = 0; + q = mem0 + temp1[i+1]; + for(p = mem0 + temp1[i]; p < q ; p += 2) { + if(*p > j) + j = *p; + if(*p < k) + k = *p; + } + /* nontrivial situation */ + if(k <= j) { + /* j is now the range */ +/* j -= k; *//* call scj */ + if(k > maxoff) + maxoff = k; + } + tystate[i] = (temp1[i+1]-temp1[i]) + 2*j; + if(j > maxspr) + maxspr = j; + } + + /* initialize ggreed table */ + for(i = 1; i <= nnonter; i++) { + ggreed[i] = 1; + j = 0; + + /* minimum entry index is always 0 */ + q = mem0 + yypgo[i+1] - 1; + for(p = mem0+yypgo[i]; p < q ; p += 2) { + ggreed[i] += 2; + if(*p > j) + j = *p; + } + ggreed[i] = ggreed[i] + 2*j; + if(j > maxoff) + maxoff = j; + } + + /* now, prepare to put the shift actions into the amem array */ + for(i = 0; i < ACTSIZE; i++) + amem[i] = 0; + maxa = amem; + for(i = 0; i < nstate; i++) { + if(tystate[i] == 0 && adb > 1) + Bprint(ftable, "State %d: null\n", i); + indgo[i] = YYFLAG1; + } + while((i = nxti()) != NOMORE) + if(i >= 0) + stin(i); + else + gin(-i); + + /* print amem array */ + if(adb > 2 ) + for(p = amem; p <= maxa; p += 10) { + Bprint(ftable, "%4d ", (int)(p-amem)); + for(i = 0; i < 10; ++i) + Bprint(ftable, "%4d ", p[i]); + Bprint(ftable, "\n"); + } + + /* write out the output appropriate to the language */ + aoutput(); + osummary(); + ZAPFILE(tempname); +} + +void +gin(int i) +{ + int *p, *r, *s, *q1, *q2; + + /* enter gotos on nonterminal i into array amem */ + ggreed[i] = 0; + + q2 = mem0+ yypgo[i+1] - 1; + q1 = mem0 + yypgo[i]; + + /* now, find amem place for it */ + for(p = amem; p < &amem[ACTSIZE]; p++) { + if(*p) + continue; + for(r = q1; r < q2; r += 2) { + s = p + *r + 1; + if(*s) + goto nextgp; + if(s > maxa) + if((maxa = s) > &amem[ACTSIZE]) + error("a array overflow"); + } + /* we have found amem spot */ + *p = *q2; + if(p > maxa) + if((maxa = p) > &amem[ACTSIZE]) + error("a array overflow"); + for(r = q1; r < q2; r += 2) { + s = p + *r + 1; + *s = r[1]; + } + pgo[i] = p-amem; + if(adb > 1) + Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]); + return; + + nextgp:; + } + error("cannot place goto %d\n", i); +} + +void +stin(int i) +{ + int *r, *s, n, flag, j, *q1, *q2; + + tystate[i] = 0; + + /* enter state i into the amem array */ + q2 = mem0+temp1[i+1]; + q1 = mem0+temp1[i]; + /* find an acceptable place */ + for(n = -maxoff; n < ACTSIZE; n++) { + flag = 0; + for(r = q1; r < q2; r += 2) { + if((s = *r + n + amem) < amem) + goto nextn; + if(*s == 0) + flag++; + else + if(*s != r[1]) + goto nextn; + } + + /* check that the position equals another only if the states are identical */ + for(j=0; j 1) + Bprint(ftable, + "State %d: entry at %d equals state %d\n", + i, n, j); + return; + } + + /* we have some disagreement */ + goto nextn; + } + } + + for(r = q1; r < q2; r += 2) { + if((s = *r+n+amem) >= &amem[ACTSIZE]) + error("out of space in optimizer a array"); + if(s > maxa) + maxa = s; + if(*s != 0 && *s != r[1]) + error("clobber of a array, pos'n %d, by %d", s-amem, r[1]); + *s = r[1]; + } + indgo[i] = n; + if(adb > 1) + Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]); + return; + nextn:; + } + error("Error; failure to place state %d\n", i); +} + +/* + * finds the next i + */ +int +nxti(void) +{ + int i, max, maxi; + + max = 0; + maxi = 0; + for(i = 1; i <= nnonter; i++) + if(ggreed[i] >= max) { + max = ggreed[i]; + maxi = -i; + } + for(i = 0; i < nstate; ++i) + if(tystate[i] >= max) { + max = tystate[i]; + maxi = i; + } + if(nxdb) + Bprint(ftable, "nxti = %d, max = %d\n", maxi, max); + if(max == 0) + return NOMORE; + return maxi; +} + +/* + * write summary + */ +void +osummary(void) +{ + + int i, *p; + + if(foutput == 0) + return; + i = 0; + for(p = maxa; p >= amem; p--) + if(*p == 0) + i++; + + Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n", + (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE); + Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i); + Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff); +} + +/* + * this version is for C + * write out the optimized parser + */ +void +aoutput(void) +{ + Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1)); + arout("yyact", amem, (maxa-amem)+1); + arout("yypact", indgo, nstate); + arout("yypgo", pgo, nnonter+1); +} + +void +arout(char *s, int *v, int n) +{ + int i; + + Bprint(ftable, "short %s[] =\n{", s); + for(i = 0; i < n;) { + if(i%10 == 0) + Bprint(ftable, "\n"); + Bprint(ftable, "%4d", v[i]); + i++; + if(i == n) + Bprint(ftable, "\n};\n"); + else + Bprint(ftable, ","); + } +} + +/* + * read and convert an integer from the standard input + * return the terminating character + * blanks, tabs, and newlines are ignored + */ +int +gtnm(void) +{ + int sign, val, c; + + sign = 0; + val = 0; + while((c=Bgetrune(finput)) != Beof) { + if(isdigit(c)) { + val = val*10 + c-'0'; + continue; + } + if(c == '-') { + sign = 1; + continue; + } + break; + } + if(sign) + val = -val; + *pmem++ = val; + if(pmem >= &mem0[MEMSIZE]) + error("out of space"); + return c; +} -- cgit v1.2.3