Added tabgen + manual

This commit is contained in:
ceriel 1988-11-16 09:37:04 +00:00
parent f1245e2e00
commit fda83cb06d
4 changed files with 501 additions and 9 deletions

View file

@ -8,3 +8,5 @@ mkdep.1
mkdep.c
prid.1
prid.c
tabgen.1
tabgen.c

View file

@ -3,33 +3,35 @@ EMBIN = $(EM)/bin
EMMAN = $(EM)/man
CFLAGS = -O
LDFLAGS = -i
TARGETS = mkdep cid cclash prid
all: mkdep cid cclash prid
all: mkdep cid cclash prid tabgen
install: all
rm -f $(EMBIN)/mkdep $(EMBIN)/cid $(EMBIN)/cclash $(EMBIN)/prid
cp mkdep cid cclash prid $(EMBIN)
rm -f $(EMMAN)/mkdep.1 $(EMMAN)/cid.1 $(EMMAN)/cclash.1 $(EMMAN)/prid.1
cp mkdep cid cclash prid $(EMBIN)
cp mkdep.1 cid.1 cclash.1 prid.1 $(EMMAN)
rm -f $(EMBIN)/mkdep $(EMBIN)/cid $(EMBIN)/cclash $(EMBIN)/prid \
$(EMBIN)/tabgen
cp mkdep cid cclash prid tabgen $(EMBIN)
rm -f $(EMMAN)/mkdep.1 $(EMMAN)/cid.1 $(EMMAN)/cclash.1 \
$(EMMAN)/prid.1 $(EMMAN)/tabgen.1
cp mkdep.1 cid.1 cclash.1 prid.1 tabgen.1 $(EMMAN)
cmp: all
-cmp mkdep $(EMBIN)/mkdep
-cmp cid $(EMBIN)/cid
-cmp cclash $(EMBIN)/cclash
-cmp prid $(EMBIN)/prid
-cmp tabgen $(EMBIN)/tabgen
-cmp mkdep.1 $(EMMAN)/mkdep.1
-cmp cid.1 $(EMMAN)/cid.1
-cmp cclash.1 $(EMMAN)/cclash.1
-cmp prid.1 $(EMMAN)/prid.1
-cmp tabgen.1 $(EMMAN)/tabgen.1
clean:
rm -f *.o mkdep cid cclash prid
rm -f *.o mkdep cid cclash prid tabgen
pr:
@pr `pwd`/Makefile `pwd`/mkdep.c `pwd`/cclash.c `pwd`/cid.c \
`pwd`/prid.c `pwd`/GCIPM.c
`pwd`/prid.c `pwd`/GCIPM.c `pwd`/tabgen.c
opr:
make pr | opr
@ -37,6 +39,9 @@ opr:
mkdep: mkdep.o
$(CC) $(LDFLAGS) -o mkdep mkdep.o
tabgen: tabgen.o
$(CC) $(LDFLAGS) -o tabgen tabgen.o
cid: cid.o GCIPM.o
$(CC) $(LDFLAGS) -o cid cid.o GCIPM.o

116
util/cmisc/tabgen.1 Normal file
View file

@ -0,0 +1,116 @@
.TH TABGEN 1ACK
.ad
.SH NAME
tabgen \- table generator for C-programs
.SH SYNOPSYS
.B tabgen \fIarguments\fP
.SH DESCRIPTION
.I Tabgen
is a handy tool for generating tables for C-programs from a compact
description. The current version is only suitable for generating character
tables. The output is produced on standard output.
It works by maintaining an internal table of values, printing this table
when this is requested.
.PP
Each argument given to
.I tabgen
is either a command or a description. Descriptions are discussed first.
.PP
A description consists of a value (a string), directly followed by a semicolon,
directly followed by a list of indices for which the table to be generated
has this value. This list of indices must be in a certain \fBinputformat\fP,
characterized by a charactet.
Currently, there is only one inputformat, "c". In this format, the indices
are characters. There are two special characters: '\e' and '-'. The
'\e' behaves like in a C-string, and the '-' describes a range, unless
it starts the list of indices.
.PP
Some examples of descriptions:
.nf
STIDF:a-zA-Z_
STSKIP:\er \et\e013\ef
.fi
.PP
These descriptions have the effect that the internal table values for
'a' through 'z', 'A' through 'Z', and '_' are set to STIDF, and that the
internal table values for carriage-return, space, tab, vertical-tab, and
form-feed are set to STSKIP.
.PP
A command is introduced by a special character. On the command line,
a command is introduced by a '-'. The following commands are
recognized:
.IP I\fIchar\fP
switch to a different input format. This command is only there for future
extensions.
.IP f\fIfile\fP
read input from the file \fIfile\fP. In a file, each line is an argument
to \fItabgen\fP. Each line is either a command or a description. In a file,
commands are introduced by a '%'.
.IP F\fIformat\fP
Values are printed in a printf format. The default value for this format
is \fB"%s,\en"\fP. This can be changed with this command.
.IP T\fItext\fP
Print \fItext\fP literally at this point.
.IP p
Print the table as it is built at this point.
.IP C
Clear the table. This sets all internal table values to 0.
.IP i\fIstr\fP
Initialize all internal table values to \fIstr\fP. if \fIstr\fP is not
given, this command is equivalent to the C command.
.IP S\fInum\fP
Set the table size to \fInum\fP entries. The default size is 128.
.IP H\fIfilename\fP
Create tables which can be indexed by the full range of characters,
rather than 0..127. As this depends on the implementation of 'char'
in C (signed or unsigned), this also generates a file \fIfilename\fP,
with a #define for the constant "CharOffset". The generated tables can
be indexed by first adding "CharOffset" to the base of the table.
If \fIfilename\fP is not given, "charoffset.h" is used.
.SH "AN EXAMPLE"
.PP
The next example is a part of the \fItabgen\fP description of the
character tables used by the lexical analyser of the ACK Modula-2 compiler.
This description resides in a file called char.tab.
.I
Tabgen
is called as follows:
.nf
tabgen -fchar.tab > char.c
.fi
.PP
The description as given here generates 2 tables: one indicating a class
according to which token a character can be a start of, and one indicating
whether a character may occur in an identifier.
.nf
% Comments are introduced with space or tab after the %
%S129
%F %s,
% CHARACTER CLASSES
%iSTGARB
STSKIP: \et\e013\e014\e015
STNL:\e012
STSIMP:-#&()*+,/;=[]^{|}~
STCOMP:.:<>
STIDF:a-zA-Z
STSTR:"'
STNUM:0-9
STEOI:\e200
%T#include "class.h"
% class.h contains #defines for STSKIP, STNL, ...
%Tchar tkclass[] = {
%p
%T};
% INIDF
%C
1:a-zA-Z0-9
%Tchar inidf[] = {
%F %s,
%p
%T};
.fi
.SH BUGS
.PP
.I Tabgen
assumes that characters are 8 bits wide.

369
util/cmisc/tabgen.c Normal file
View file

@ -0,0 +1,369 @@
/*
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
* See the copyright notice in the ACK home directory, in the file "Copyright".
*/
/*
chtabgen - character table generator
Author: Erik Baalbergen (..tjalk!erikb)
Many mods by Ceriel Jacobs
*/
#include <stdio.h>
#ifndef NORCSID
static char *RcsId = "$Header$";
#endif
#define MAXBUF 256
#define MAXTAB 10000
#define COMCOM '-'
#define FILECOM '%'
int InputForm = 'c'; /* default input format (and, currently, only one) */
char OutputForm[MAXBUF] = "%s,\n";
/* format for spitting out a string */
char *Table[MAXTAB];
char *ProgCall; /* callname of this program */
int signedch = 0; /* set if characters are signed */
int TabSize = 128; /* default size of generated table */
char *InitialValue; /* initial value of all table entries */
#define CHROFFSETFILE "charoffset.h"
char *chroffsetfile = 0;
extern char *malloc(), *strcpy();
main(argc, argv)
char *argv[];
{
if (((char) -1) < 0) signedch = 1;
ProgCall = *argv++;
argc--;
while (argc-- > 0) {
if (**argv == COMCOM) {
option(*argv++);
}
else {
if (! process(*argv++, InputForm)) {
exit(1);
}
}
}
if (chroffsetfile) MkCharIndex();
exit(0);
}
MkCharIndex()
{
/* Assumption: 8 bit bytes, ASCII character set */
FILE *fp;
if ((fp = fopen(chroffsetfile, "w")) == NULL) {
fprintf(stderr, "%s: cannot write file %s\n", ProgCall, chroffsetfile);
exit(1);
}
if (signedch) {
fputs("#define CharOffset 128\n", fp);
}
else fputs("#define CharOffset 0\n", fp);
fclose(fp);
}
char *
Salloc(s)
char *s;
{
char *ns = malloc((unsigned)strlen(s) + 1);
if (ns) {
strcpy(ns, s);
}
else {
fprintf(stderr, "%s: out of memory\n", ProgCall);
exit(1);
}
return ns;
}
option(str)
char *str;
{
/* note that *str indicates the source of the option:
either COMCOM (from command line) or FILECOM (from a file).
*/
switch (*++str) {
case ' ': /* command */
case '\t':
case '\0':
break;
case 'I': /* for future extension */
InputForm = *++str;
break;
case 'f': /* input from file ... */
if (*++str == '\0') {
fprintf(stderr, "%s: -f: name expected\n", ProgCall);
exit(1);
}
DoFile(str);
break;
case 'F': /* new output format string */
sprintf(OutputForm, "%s\n", ++str);
break;
case 'T': /* insert text literally */
printf("%s\n", ++str);
break;
case 'p': /* print table */
PrintTable();
break;
case 'C': /* clear table */
InitTable((char *)0);
break;
case 'i': /* initialize table with given value */
if (*++str == '\0') {
InitTable((char *)0);
}
else InitTable(str);
break;
case 'H': /* create include file for character offset,
and create tables which can be indexed by the
full range of characters, rather than 0..127,
by adding "CharOffset" to the base.
*/
if (*++str == '\0') {
chroffsetfile = CHROFFSETFILE;
}
else chroffsetfile = ++str;
break;
case 'S':
{
register i = atoi(++str);
if (i <= 0 || i > MAXTAB) {
fprintf(stderr, "%s: size would exceed maximum\n",
ProgCall);
}
else {
TabSize = i;
}
break;
}
default:
fprintf(stderr, "%s: bad option -%s\n", ProgCall, str);
}
}
InitTable(ival)
char *ival;
{
register i;
for (i = 0; i < TabSize; i++) {
Table[i] = 0;
}
InitialValue = 0;
if (ival) {
InitialValue = Salloc(ival);
}
}
PrintTable()
{
register i;
for (i = 0; i < TabSize; i++) {
if (Table[i]) {
printf(OutputForm, Table[i]);
}
else if (InitialValue) {
printf(OutputForm, InitialValue);
}
else {
printf(OutputForm, "0");
}
}
}
int
process(str, format)
char *str;
{
char *cstr = str;
char *Name = cstr; /* overwrite original string! */
/* strip of the entry name
*/
while (*str && *str != ':') {
if (*str == '\\') {
++str;
}
*cstr++ = *str++;
}
if (*str != ':') {
fprintf(stderr, "%s: bad specification: \"%s\", ignored\n",
ProgCall, Name);
return 0;
}
*cstr = '\0';
str++;
switch (format) {
case 'c':
return c_proc(str, Name);
default:
fprintf(stderr, "%s: bad input format\n", ProgCall);
}
return 0;
}
c_proc(str, Name)
char *str;
char *Name;
{
int ch, ch2;
int quoted();
char *name = Salloc(Name);
while (*str) {
if (*str == '\\') {
ch = quoted(&str);
}
else {
ch = *str++;
}
if (*str == '-') {
if (*++str == '\\') {
ch2 = quoted(&str);
}
else {
if (ch2 = *str++);
else str--;
}
if (ch > ch2) {
fprintf(stderr, "%s: bad range\n", ProgCall);
return 0;
}
while (ch <= ch2) {
if (! setval(ch, name)) return 0;
ch++;
}
}
else {
if (! setval(ch, name)) return 0;
}
}
if (chroffsetfile) Table[256] = Table[0];
return 1;
}
#define ind(X) (chroffsetfile && signedch?(X>=128?X-128:X+128):X)
int
setval(ch, nm)
char *nm;
{
register char **p = &Table[ind(ch)];
if (ch < 0 || ch >= TabSize) {
fprintf(stderr, "Illegal index: %d\n", ch);
return 0;
}
if (*(p = &Table[ind(ch)])) {
fprintf(stderr, "Warning: redefinition of index %d\n", ch);
}
*p = nm;
return 1;
}
int
quoted(pstr)
char **pstr;
{
register int ch;
register int i;
register char *str = *pstr;
if ((*++str >= '0') && (*str <= '9')) {
ch = 0;
for (i = 0; i < 3; i++) {
ch = 8 * ch + (*str - '0');
if (*++str < '0' || *str > '9')
break;
}
}
else {
switch (*str++) {
case 'n':
ch = '\n';
break;
case 't':
ch = '\t';
break;
case 'b':
ch = '\b';
break;
case 'r':
ch = '\r';
break;
case 'f':
ch = '\f';
break;
default :
ch = *str;
}
}
*pstr = str;
return ch & 0377;
}
char *
getline(s, n, fp)
char *s;
FILE *fp;
{
register c = getc(fp);
char *str = s;
while (n--) {
if (c == EOF) {
return NULL;
}
else
if (c == '\n') {
*str++ = '\0';
return s;
}
*str++ = c;
c = getc(fp);
}
s[n - 1] = '\0';
return s;
}
#define BUFSIZE 1024
DoFile(name)
char *name;
{
char text[BUFSIZE];
FILE *fp;
if ((fp = fopen(name, "r")) == NULL) {
fprintf(stderr, "%s: cannot read file %s\n", ProgCall, name);
exit(1);
}
while (getline(text, BUFSIZE, fp) != NULL) {
if (text[0] == FILECOM) {
option(text);
}
else {
if (! process(text, InputForm)) {
exit(1);
}
}
}
}