diff --git a/util/cmisc/.distr b/util/cmisc/.distr index 7a0e8a63a..d4fa0ebb5 100644 --- a/util/cmisc/.distr +++ b/util/cmisc/.distr @@ -8,3 +8,5 @@ mkdep.1 mkdep.c prid.1 prid.c +tabgen.1 +tabgen.c diff --git a/util/cmisc/Makefile b/util/cmisc/Makefile index 039a6f60f..eff689334 100644 --- a/util/cmisc/Makefile +++ b/util/cmisc/Makefile @@ -3,33 +3,35 @@ EMBIN = $(EM)/bin EMMAN = $(EM)/man CFLAGS = -O LDFLAGS = -i -TARGETS = mkdep cid cclash prid -all: mkdep cid cclash prid +all: mkdep cid cclash prid tabgen install: all - rm -f $(EMBIN)/mkdep $(EMBIN)/cid $(EMBIN)/cclash $(EMBIN)/prid - cp mkdep cid cclash prid $(EMBIN) - rm -f $(EMMAN)/mkdep.1 $(EMMAN)/cid.1 $(EMMAN)/cclash.1 $(EMMAN)/prid.1 - cp mkdep cid cclash prid $(EMBIN) - cp mkdep.1 cid.1 cclash.1 prid.1 $(EMMAN) + rm -f $(EMBIN)/mkdep $(EMBIN)/cid $(EMBIN)/cclash $(EMBIN)/prid \ + $(EMBIN)/tabgen + cp mkdep cid cclash prid tabgen $(EMBIN) + rm -f $(EMMAN)/mkdep.1 $(EMMAN)/cid.1 $(EMMAN)/cclash.1 \ + $(EMMAN)/prid.1 $(EMMAN)/tabgen.1 + cp mkdep.1 cid.1 cclash.1 prid.1 tabgen.1 $(EMMAN) cmp: all -cmp mkdep $(EMBIN)/mkdep -cmp cid $(EMBIN)/cid -cmp cclash $(EMBIN)/cclash -cmp prid $(EMBIN)/prid + -cmp tabgen $(EMBIN)/tabgen -cmp mkdep.1 $(EMMAN)/mkdep.1 -cmp cid.1 $(EMMAN)/cid.1 -cmp cclash.1 $(EMMAN)/cclash.1 -cmp prid.1 $(EMMAN)/prid.1 + -cmp tabgen.1 $(EMMAN)/tabgen.1 clean: - rm -f *.o mkdep cid cclash prid + rm -f *.o mkdep cid cclash prid tabgen pr: @pr `pwd`/Makefile `pwd`/mkdep.c `pwd`/cclash.c `pwd`/cid.c \ - `pwd`/prid.c `pwd`/GCIPM.c + `pwd`/prid.c `pwd`/GCIPM.c `pwd`/tabgen.c opr: make pr | opr @@ -37,6 +39,9 @@ opr: mkdep: mkdep.o $(CC) $(LDFLAGS) -o mkdep mkdep.o +tabgen: tabgen.o + $(CC) $(LDFLAGS) -o tabgen tabgen.o + cid: cid.o GCIPM.o $(CC) $(LDFLAGS) -o cid cid.o GCIPM.o diff --git a/util/cmisc/tabgen.1 b/util/cmisc/tabgen.1 new file mode 100644 index 000000000..d709a214b --- /dev/null +++ b/util/cmisc/tabgen.1 @@ -0,0 +1,116 @@ +.TH TABGEN 1ACK +.ad +.SH NAME +tabgen \- table generator for C-programs +.SH SYNOPSYS +.B tabgen \fIarguments\fP +.SH DESCRIPTION +.I Tabgen +is a handy tool for generating tables for C-programs from a compact +description. The current version is only suitable for generating character +tables. The output is produced on standard output. +It works by maintaining an internal table of values, printing this table +when this is requested. +.PP +Each argument given to +.I tabgen +is either a command or a description. Descriptions are discussed first. +.PP +A description consists of a value (a string), directly followed by a semicolon, +directly followed by a list of indices for which the table to be generated +has this value. This list of indices must be in a certain \fBinputformat\fP, +characterized by a charactet. +Currently, there is only one inputformat, "c". In this format, the indices +are characters. There are two special characters: '\e' and '-'. The +'\e' behaves like in a C-string, and the '-' describes a range, unless +it starts the list of indices. +.PP +Some examples of descriptions: +.nf + STIDF:a-zA-Z_ + STSKIP:\er \et\e013\ef +.fi +.PP +These descriptions have the effect that the internal table values for +'a' through 'z', 'A' through 'Z', and '_' are set to STIDF, and that the +internal table values for carriage-return, space, tab, vertical-tab, and +form-feed are set to STSKIP. +.PP +A command is introduced by a special character. On the command line, +a command is introduced by a '-'. The following commands are +recognized: +.IP I\fIchar\fP +switch to a different input format. This command is only there for future +extensions. +.IP f\fIfile\fP +read input from the file \fIfile\fP. In a file, each line is an argument +to \fItabgen\fP. Each line is either a command or a description. In a file, +commands are introduced by a '%'. +.IP F\fIformat\fP +Values are printed in a printf format. The default value for this format +is \fB"%s,\en"\fP. This can be changed with this command. +.IP T\fItext\fP +Print \fItext\fP literally at this point. +.IP p +Print the table as it is built at this point. +.IP C +Clear the table. This sets all internal table values to 0. +.IP i\fIstr\fP +Initialize all internal table values to \fIstr\fP. if \fIstr\fP is not +given, this command is equivalent to the C command. +.IP S\fInum\fP +Set the table size to \fInum\fP entries. The default size is 128. +.IP H\fIfilename\fP +Create tables which can be indexed by the full range of characters, +rather than 0..127. As this depends on the implementation of 'char' +in C (signed or unsigned), this also generates a file \fIfilename\fP, +with a #define for the constant "CharOffset". The generated tables can +be indexed by first adding "CharOffset" to the base of the table. +If \fIfilename\fP is not given, "charoffset.h" is used. +.SH "AN EXAMPLE" +.PP +The next example is a part of the \fItabgen\fP description of the +character tables used by the lexical analyser of the ACK Modula-2 compiler. +This description resides in a file called char.tab. +.I +Tabgen +is called as follows: +.nf + tabgen -fchar.tab > char.c +.fi +.PP +The description as given here generates 2 tables: one indicating a class +according to which token a character can be a start of, and one indicating +whether a character may occur in an identifier. +.nf + +% Comments are introduced with space or tab after the % +%S129 +%F %s, +% CHARACTER CLASSES +%iSTGARB +STSKIP: \et\e013\e014\e015 +STNL:\e012 +STSIMP:-#&()*+,/;=[]^{|}~ +STCOMP:.:<> +STIDF:a-zA-Z +STSTR:"' +STNUM:0-9 +STEOI:\e200 +%T#include "class.h" +% class.h contains #defines for STSKIP, STNL, ... +%Tchar tkclass[] = { +%p +%T}; +% INIDF +%C +1:a-zA-Z0-9 +%Tchar inidf[] = { +%F %s, +%p +%T}; +.fi +.SH BUGS +.PP +.I Tabgen +assumes that characters are 8 bits wide. diff --git a/util/cmisc/tabgen.c b/util/cmisc/tabgen.c new file mode 100644 index 000000000..fecd6e091 --- /dev/null +++ b/util/cmisc/tabgen.c @@ -0,0 +1,369 @@ +/* + * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. + * See the copyright notice in the ACK home directory, in the file "Copyright". + */ +/* + chtabgen - character table generator + + Author: Erik Baalbergen (..tjalk!erikb) + Many mods by Ceriel Jacobs +*/ + +#include + +#ifndef NORCSID +static char *RcsId = "$Header$"; +#endif + +#define MAXBUF 256 +#define MAXTAB 10000 +#define COMCOM '-' +#define FILECOM '%' + +int InputForm = 'c'; /* default input format (and, currently, only one) */ +char OutputForm[MAXBUF] = "%s,\n"; + /* format for spitting out a string */ +char *Table[MAXTAB]; +char *ProgCall; /* callname of this program */ +int signedch = 0; /* set if characters are signed */ +int TabSize = 128; /* default size of generated table */ +char *InitialValue; /* initial value of all table entries */ +#define CHROFFSETFILE "charoffset.h" +char *chroffsetfile = 0; + +extern char *malloc(), *strcpy(); + +main(argc, argv) + char *argv[]; +{ + if (((char) -1) < 0) signedch = 1; + + ProgCall = *argv++; + argc--; + while (argc-- > 0) { + if (**argv == COMCOM) { + option(*argv++); + } + else { + if (! process(*argv++, InputForm)) { + exit(1); + } + } + } + if (chroffsetfile) MkCharIndex(); + exit(0); +} + +MkCharIndex() +{ + /* Assumption: 8 bit bytes, ASCII character set */ + FILE *fp; + + if ((fp = fopen(chroffsetfile, "w")) == NULL) { + fprintf(stderr, "%s: cannot write file %s\n", ProgCall, chroffsetfile); + exit(1); + } + if (signedch) { + fputs("#define CharOffset 128\n", fp); + } + else fputs("#define CharOffset 0\n", fp); + fclose(fp); +} + +char * +Salloc(s) + char *s; +{ + char *ns = malloc((unsigned)strlen(s) + 1); + + if (ns) { + strcpy(ns, s); + } + else { + fprintf(stderr, "%s: out of memory\n", ProgCall); + exit(1); + } + return ns; +} + +option(str) + char *str; +{ + /* note that *str indicates the source of the option: + either COMCOM (from command line) or FILECOM (from a file). + */ + switch (*++str) { + + case ' ': /* command */ + case '\t': + case '\0': + break; + case 'I': /* for future extension */ + InputForm = *++str; + break; + case 'f': /* input from file ... */ + if (*++str == '\0') { + fprintf(stderr, "%s: -f: name expected\n", ProgCall); + exit(1); + } + DoFile(str); + break; + case 'F': /* new output format string */ + sprintf(OutputForm, "%s\n", ++str); + break; + case 'T': /* insert text literally */ + printf("%s\n", ++str); + break; + case 'p': /* print table */ + PrintTable(); + break; + case 'C': /* clear table */ + InitTable((char *)0); + break; + case 'i': /* initialize table with given value */ + if (*++str == '\0') { + InitTable((char *)0); + } + else InitTable(str); + break; + case 'H': /* create include file for character offset, + and create tables which can be indexed by the + full range of characters, rather than 0..127, + by adding "CharOffset" to the base. + */ + if (*++str == '\0') { + chroffsetfile = CHROFFSETFILE; + } + else chroffsetfile = ++str; + break; + case 'S': + { + register i = atoi(++str); + + if (i <= 0 || i > MAXTAB) { + fprintf(stderr, "%s: size would exceed maximum\n", + ProgCall); + } + else { + TabSize = i; + } + break; + } + default: + fprintf(stderr, "%s: bad option -%s\n", ProgCall, str); + } +} + +InitTable(ival) + char *ival; +{ + register i; + + for (i = 0; i < TabSize; i++) { + Table[i] = 0; + } + InitialValue = 0; + if (ival) { + InitialValue = Salloc(ival); + } +} + +PrintTable() +{ + register i; + + for (i = 0; i < TabSize; i++) { + if (Table[i]) { + printf(OutputForm, Table[i]); + } + else if (InitialValue) { + printf(OutputForm, InitialValue); + } + else { + printf(OutputForm, "0"); + } + } +} + +int +process(str, format) + char *str; +{ + char *cstr = str; + char *Name = cstr; /* overwrite original string! */ + + /* strip of the entry name + */ + while (*str && *str != ':') { + if (*str == '\\') { + ++str; + } + *cstr++ = *str++; + } + + if (*str != ':') { + fprintf(stderr, "%s: bad specification: \"%s\", ignored\n", + ProgCall, Name); + return 0; + } + *cstr = '\0'; + str++; + + switch (format) { + + case 'c': + return c_proc(str, Name); + default: + fprintf(stderr, "%s: bad input format\n", ProgCall); + } + return 0; +} + +c_proc(str, Name) + char *str; + char *Name; +{ + int ch, ch2; + int quoted(); + char *name = Salloc(Name); + + while (*str) { + if (*str == '\\') { + ch = quoted(&str); + } + else { + ch = *str++; + } + if (*str == '-') { + if (*++str == '\\') { + ch2 = quoted(&str); + } + else { + if (ch2 = *str++); + else str--; + } + if (ch > ch2) { + fprintf(stderr, "%s: bad range\n", ProgCall); + return 0; + } + while (ch <= ch2) { + if (! setval(ch, name)) return 0; + ch++; + } + } + else { + if (! setval(ch, name)) return 0; + } + } + if (chroffsetfile) Table[256] = Table[0]; + return 1; +} + +#define ind(X) (chroffsetfile && signedch?(X>=128?X-128:X+128):X) + +int +setval(ch, nm) + char *nm; +{ + register char **p = &Table[ind(ch)]; + + if (ch < 0 || ch >= TabSize) { + fprintf(stderr, "Illegal index: %d\n", ch); + return 0; + } + if (*(p = &Table[ind(ch)])) { + fprintf(stderr, "Warning: redefinition of index %d\n", ch); + } + *p = nm; + return 1; +} + +int +quoted(pstr) + char **pstr; +{ + register int ch; + register int i; + register char *str = *pstr; + + if ((*++str >= '0') && (*str <= '9')) { + ch = 0; + for (i = 0; i < 3; i++) { + ch = 8 * ch + (*str - '0'); + if (*++str < '0' || *str > '9') + break; + } + } + else { + switch (*str++) { + + case 'n': + ch = '\n'; + break; + case 't': + ch = '\t'; + break; + case 'b': + ch = '\b'; + break; + case 'r': + ch = '\r'; + break; + case 'f': + ch = '\f'; + break; + default : + ch = *str; + } + } + *pstr = str; + return ch & 0377; +} + +char * +getline(s, n, fp) + char *s; + FILE *fp; +{ + register c = getc(fp); + char *str = s; + + while (n--) { + if (c == EOF) { + return NULL; + } + else + if (c == '\n') { + *str++ = '\0'; + return s; + } + *str++ = c; + c = getc(fp); + } + s[n - 1] = '\0'; + return s; +} + +#define BUFSIZE 1024 + +DoFile(name) + char *name; +{ + char text[BUFSIZE]; + FILE *fp; + + if ((fp = fopen(name, "r")) == NULL) { + fprintf(stderr, "%s: cannot read file %s\n", ProgCall, name); + exit(1); + } + while (getline(text, BUFSIZE, fp) != NULL) { + if (text[0] == FILECOM) { + option(text); + } + else { + if (! process(text, InputForm)) { + exit(1); + } + } + } +}