Added tabgen + manual
This commit is contained in:
parent
f1245e2e00
commit
fda83cb06d
4 changed files with 501 additions and 9 deletions
|
@ -8,3 +8,5 @@ mkdep.1
|
|||
mkdep.c
|
||||
prid.1
|
||||
prid.c
|
||||
tabgen.1
|
||||
tabgen.c
|
||||
|
|
|
@ -3,33 +3,35 @@ EMBIN = $(EM)/bin
|
|||
EMMAN = $(EM)/man
|
||||
CFLAGS = -O
|
||||
LDFLAGS = -i
|
||||
TARGETS = mkdep cid cclash prid
|
||||
|
||||
all: mkdep cid cclash prid
|
||||
all: mkdep cid cclash prid tabgen
|
||||
|
||||
install: all
|
||||
rm -f $(EMBIN)/mkdep $(EMBIN)/cid $(EMBIN)/cclash $(EMBIN)/prid
|
||||
cp mkdep cid cclash prid $(EMBIN)
|
||||
rm -f $(EMMAN)/mkdep.1 $(EMMAN)/cid.1 $(EMMAN)/cclash.1 $(EMMAN)/prid.1
|
||||
cp mkdep cid cclash prid $(EMBIN)
|
||||
cp mkdep.1 cid.1 cclash.1 prid.1 $(EMMAN)
|
||||
rm -f $(EMBIN)/mkdep $(EMBIN)/cid $(EMBIN)/cclash $(EMBIN)/prid \
|
||||
$(EMBIN)/tabgen
|
||||
cp mkdep cid cclash prid tabgen $(EMBIN)
|
||||
rm -f $(EMMAN)/mkdep.1 $(EMMAN)/cid.1 $(EMMAN)/cclash.1 \
|
||||
$(EMMAN)/prid.1 $(EMMAN)/tabgen.1
|
||||
cp mkdep.1 cid.1 cclash.1 prid.1 tabgen.1 $(EMMAN)
|
||||
|
||||
cmp: all
|
||||
-cmp mkdep $(EMBIN)/mkdep
|
||||
-cmp cid $(EMBIN)/cid
|
||||
-cmp cclash $(EMBIN)/cclash
|
||||
-cmp prid $(EMBIN)/prid
|
||||
-cmp tabgen $(EMBIN)/tabgen
|
||||
-cmp mkdep.1 $(EMMAN)/mkdep.1
|
||||
-cmp cid.1 $(EMMAN)/cid.1
|
||||
-cmp cclash.1 $(EMMAN)/cclash.1
|
||||
-cmp prid.1 $(EMMAN)/prid.1
|
||||
-cmp tabgen.1 $(EMMAN)/tabgen.1
|
||||
|
||||
clean:
|
||||
rm -f *.o mkdep cid cclash prid
|
||||
rm -f *.o mkdep cid cclash prid tabgen
|
||||
|
||||
pr:
|
||||
@pr `pwd`/Makefile `pwd`/mkdep.c `pwd`/cclash.c `pwd`/cid.c \
|
||||
`pwd`/prid.c `pwd`/GCIPM.c
|
||||
`pwd`/prid.c `pwd`/GCIPM.c `pwd`/tabgen.c
|
||||
|
||||
opr:
|
||||
make pr | opr
|
||||
|
@ -37,6 +39,9 @@ opr:
|
|||
mkdep: mkdep.o
|
||||
$(CC) $(LDFLAGS) -o mkdep mkdep.o
|
||||
|
||||
tabgen: tabgen.o
|
||||
$(CC) $(LDFLAGS) -o tabgen tabgen.o
|
||||
|
||||
cid: cid.o GCIPM.o
|
||||
$(CC) $(LDFLAGS) -o cid cid.o GCIPM.o
|
||||
|
||||
|
|
116
util/cmisc/tabgen.1
Normal file
116
util/cmisc/tabgen.1
Normal file
|
@ -0,0 +1,116 @@
|
|||
.TH TABGEN 1ACK
|
||||
.ad
|
||||
.SH NAME
|
||||
tabgen \- table generator for C-programs
|
||||
.SH SYNOPSYS
|
||||
.B tabgen \fIarguments\fP
|
||||
.SH DESCRIPTION
|
||||
.I Tabgen
|
||||
is a handy tool for generating tables for C-programs from a compact
|
||||
description. The current version is only suitable for generating character
|
||||
tables. The output is produced on standard output.
|
||||
It works by maintaining an internal table of values, printing this table
|
||||
when this is requested.
|
||||
.PP
|
||||
Each argument given to
|
||||
.I tabgen
|
||||
is either a command or a description. Descriptions are discussed first.
|
||||
.PP
|
||||
A description consists of a value (a string), directly followed by a semicolon,
|
||||
directly followed by a list of indices for which the table to be generated
|
||||
has this value. This list of indices must be in a certain \fBinputformat\fP,
|
||||
characterized by a charactet.
|
||||
Currently, there is only one inputformat, "c". In this format, the indices
|
||||
are characters. There are two special characters: '\e' and '-'. The
|
||||
'\e' behaves like in a C-string, and the '-' describes a range, unless
|
||||
it starts the list of indices.
|
||||
.PP
|
||||
Some examples of descriptions:
|
||||
.nf
|
||||
STIDF:a-zA-Z_
|
||||
STSKIP:\er \et\e013\ef
|
||||
.fi
|
||||
.PP
|
||||
These descriptions have the effect that the internal table values for
|
||||
'a' through 'z', 'A' through 'Z', and '_' are set to STIDF, and that the
|
||||
internal table values for carriage-return, space, tab, vertical-tab, and
|
||||
form-feed are set to STSKIP.
|
||||
.PP
|
||||
A command is introduced by a special character. On the command line,
|
||||
a command is introduced by a '-'. The following commands are
|
||||
recognized:
|
||||
.IP I\fIchar\fP
|
||||
switch to a different input format. This command is only there for future
|
||||
extensions.
|
||||
.IP f\fIfile\fP
|
||||
read input from the file \fIfile\fP. In a file, each line is an argument
|
||||
to \fItabgen\fP. Each line is either a command or a description. In a file,
|
||||
commands are introduced by a '%'.
|
||||
.IP F\fIformat\fP
|
||||
Values are printed in a printf format. The default value for this format
|
||||
is \fB"%s,\en"\fP. This can be changed with this command.
|
||||
.IP T\fItext\fP
|
||||
Print \fItext\fP literally at this point.
|
||||
.IP p
|
||||
Print the table as it is built at this point.
|
||||
.IP C
|
||||
Clear the table. This sets all internal table values to 0.
|
||||
.IP i\fIstr\fP
|
||||
Initialize all internal table values to \fIstr\fP. if \fIstr\fP is not
|
||||
given, this command is equivalent to the C command.
|
||||
.IP S\fInum\fP
|
||||
Set the table size to \fInum\fP entries. The default size is 128.
|
||||
.IP H\fIfilename\fP
|
||||
Create tables which can be indexed by the full range of characters,
|
||||
rather than 0..127. As this depends on the implementation of 'char'
|
||||
in C (signed or unsigned), this also generates a file \fIfilename\fP,
|
||||
with a #define for the constant "CharOffset". The generated tables can
|
||||
be indexed by first adding "CharOffset" to the base of the table.
|
||||
If \fIfilename\fP is not given, "charoffset.h" is used.
|
||||
.SH "AN EXAMPLE"
|
||||
.PP
|
||||
The next example is a part of the \fItabgen\fP description of the
|
||||
character tables used by the lexical analyser of the ACK Modula-2 compiler.
|
||||
This description resides in a file called char.tab.
|
||||
.I
|
||||
Tabgen
|
||||
is called as follows:
|
||||
.nf
|
||||
tabgen -fchar.tab > char.c
|
||||
.fi
|
||||
.PP
|
||||
The description as given here generates 2 tables: one indicating a class
|
||||
according to which token a character can be a start of, and one indicating
|
||||
whether a character may occur in an identifier.
|
||||
.nf
|
||||
|
||||
% Comments are introduced with space or tab after the %
|
||||
%S129
|
||||
%F %s,
|
||||
% CHARACTER CLASSES
|
||||
%iSTGARB
|
||||
STSKIP: \et\e013\e014\e015
|
||||
STNL:\e012
|
||||
STSIMP:-#&()*+,/;=[]^{|}~
|
||||
STCOMP:.:<>
|
||||
STIDF:a-zA-Z
|
||||
STSTR:"'
|
||||
STNUM:0-9
|
||||
STEOI:\e200
|
||||
%T#include "class.h"
|
||||
% class.h contains #defines for STSKIP, STNL, ...
|
||||
%Tchar tkclass[] = {
|
||||
%p
|
||||
%T};
|
||||
% INIDF
|
||||
%C
|
||||
1:a-zA-Z0-9
|
||||
%Tchar inidf[] = {
|
||||
%F %s,
|
||||
%p
|
||||
%T};
|
||||
.fi
|
||||
.SH BUGS
|
||||
.PP
|
||||
.I Tabgen
|
||||
assumes that characters are 8 bits wide.
|
369
util/cmisc/tabgen.c
Normal file
369
util/cmisc/tabgen.c
Normal file
|
@ -0,0 +1,369 @@
|
|||
/*
|
||||
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
|
||||
* See the copyright notice in the ACK home directory, in the file "Copyright".
|
||||
*/
|
||||
/*
|
||||
chtabgen - character table generator
|
||||
|
||||
Author: Erik Baalbergen (..tjalk!erikb)
|
||||
Many mods by Ceriel Jacobs
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifndef NORCSID
|
||||
static char *RcsId = "$Header$";
|
||||
#endif
|
||||
|
||||
#define MAXBUF 256
|
||||
#define MAXTAB 10000
|
||||
#define COMCOM '-'
|
||||
#define FILECOM '%'
|
||||
|
||||
int InputForm = 'c'; /* default input format (and, currently, only one) */
|
||||
char OutputForm[MAXBUF] = "%s,\n";
|
||||
/* format for spitting out a string */
|
||||
char *Table[MAXTAB];
|
||||
char *ProgCall; /* callname of this program */
|
||||
int signedch = 0; /* set if characters are signed */
|
||||
int TabSize = 128; /* default size of generated table */
|
||||
char *InitialValue; /* initial value of all table entries */
|
||||
#define CHROFFSETFILE "charoffset.h"
|
||||
char *chroffsetfile = 0;
|
||||
|
||||
extern char *malloc(), *strcpy();
|
||||
|
||||
main(argc, argv)
|
||||
char *argv[];
|
||||
{
|
||||
if (((char) -1) < 0) signedch = 1;
|
||||
|
||||
ProgCall = *argv++;
|
||||
argc--;
|
||||
while (argc-- > 0) {
|
||||
if (**argv == COMCOM) {
|
||||
option(*argv++);
|
||||
}
|
||||
else {
|
||||
if (! process(*argv++, InputForm)) {
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (chroffsetfile) MkCharIndex();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
MkCharIndex()
|
||||
{
|
||||
/* Assumption: 8 bit bytes, ASCII character set */
|
||||
FILE *fp;
|
||||
|
||||
if ((fp = fopen(chroffsetfile, "w")) == NULL) {
|
||||
fprintf(stderr, "%s: cannot write file %s\n", ProgCall, chroffsetfile);
|
||||
exit(1);
|
||||
}
|
||||
if (signedch) {
|
||||
fputs("#define CharOffset 128\n", fp);
|
||||
}
|
||||
else fputs("#define CharOffset 0\n", fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
char *
|
||||
Salloc(s)
|
||||
char *s;
|
||||
{
|
||||
char *ns = malloc((unsigned)strlen(s) + 1);
|
||||
|
||||
if (ns) {
|
||||
strcpy(ns, s);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "%s: out of memory\n", ProgCall);
|
||||
exit(1);
|
||||
}
|
||||
return ns;
|
||||
}
|
||||
|
||||
option(str)
|
||||
char *str;
|
||||
{
|
||||
/* note that *str indicates the source of the option:
|
||||
either COMCOM (from command line) or FILECOM (from a file).
|
||||
*/
|
||||
switch (*++str) {
|
||||
|
||||
case ' ': /* command */
|
||||
case '\t':
|
||||
case '\0':
|
||||
break;
|
||||
case 'I': /* for future extension */
|
||||
InputForm = *++str;
|
||||
break;
|
||||
case 'f': /* input from file ... */
|
||||
if (*++str == '\0') {
|
||||
fprintf(stderr, "%s: -f: name expected\n", ProgCall);
|
||||
exit(1);
|
||||
}
|
||||
DoFile(str);
|
||||
break;
|
||||
case 'F': /* new output format string */
|
||||
sprintf(OutputForm, "%s\n", ++str);
|
||||
break;
|
||||
case 'T': /* insert text literally */
|
||||
printf("%s\n", ++str);
|
||||
break;
|
||||
case 'p': /* print table */
|
||||
PrintTable();
|
||||
break;
|
||||
case 'C': /* clear table */
|
||||
InitTable((char *)0);
|
||||
break;
|
||||
case 'i': /* initialize table with given value */
|
||||
if (*++str == '\0') {
|
||||
InitTable((char *)0);
|
||||
}
|
||||
else InitTable(str);
|
||||
break;
|
||||
case 'H': /* create include file for character offset,
|
||||
and create tables which can be indexed by the
|
||||
full range of characters, rather than 0..127,
|
||||
by adding "CharOffset" to the base.
|
||||
*/
|
||||
if (*++str == '\0') {
|
||||
chroffsetfile = CHROFFSETFILE;
|
||||
}
|
||||
else chroffsetfile = ++str;
|
||||
break;
|
||||
case 'S':
|
||||
{
|
||||
register i = atoi(++str);
|
||||
|
||||
if (i <= 0 || i > MAXTAB) {
|
||||
fprintf(stderr, "%s: size would exceed maximum\n",
|
||||
ProgCall);
|
||||
}
|
||||
else {
|
||||
TabSize = i;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fprintf(stderr, "%s: bad option -%s\n", ProgCall, str);
|
||||
}
|
||||
}
|
||||
|
||||
InitTable(ival)
|
||||
char *ival;
|
||||
{
|
||||
register i;
|
||||
|
||||
for (i = 0; i < TabSize; i++) {
|
||||
Table[i] = 0;
|
||||
}
|
||||
InitialValue = 0;
|
||||
if (ival) {
|
||||
InitialValue = Salloc(ival);
|
||||
}
|
||||
}
|
||||
|
||||
PrintTable()
|
||||
{
|
||||
register i;
|
||||
|
||||
for (i = 0; i < TabSize; i++) {
|
||||
if (Table[i]) {
|
||||
printf(OutputForm, Table[i]);
|
||||
}
|
||||
else if (InitialValue) {
|
||||
printf(OutputForm, InitialValue);
|
||||
}
|
||||
else {
|
||||
printf(OutputForm, "0");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
process(str, format)
|
||||
char *str;
|
||||
{
|
||||
char *cstr = str;
|
||||
char *Name = cstr; /* overwrite original string! */
|
||||
|
||||
/* strip of the entry name
|
||||
*/
|
||||
while (*str && *str != ':') {
|
||||
if (*str == '\\') {
|
||||
++str;
|
||||
}
|
||||
*cstr++ = *str++;
|
||||
}
|
||||
|
||||
if (*str != ':') {
|
||||
fprintf(stderr, "%s: bad specification: \"%s\", ignored\n",
|
||||
ProgCall, Name);
|
||||
return 0;
|
||||
}
|
||||
*cstr = '\0';
|
||||
str++;
|
||||
|
||||
switch (format) {
|
||||
|
||||
case 'c':
|
||||
return c_proc(str, Name);
|
||||
default:
|
||||
fprintf(stderr, "%s: bad input format\n", ProgCall);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
c_proc(str, Name)
|
||||
char *str;
|
||||
char *Name;
|
||||
{
|
||||
int ch, ch2;
|
||||
int quoted();
|
||||
char *name = Salloc(Name);
|
||||
|
||||
while (*str) {
|
||||
if (*str == '\\') {
|
||||
ch = quoted(&str);
|
||||
}
|
||||
else {
|
||||
ch = *str++;
|
||||
}
|
||||
if (*str == '-') {
|
||||
if (*++str == '\\') {
|
||||
ch2 = quoted(&str);
|
||||
}
|
||||
else {
|
||||
if (ch2 = *str++);
|
||||
else str--;
|
||||
}
|
||||
if (ch > ch2) {
|
||||
fprintf(stderr, "%s: bad range\n", ProgCall);
|
||||
return 0;
|
||||
}
|
||||
while (ch <= ch2) {
|
||||
if (! setval(ch, name)) return 0;
|
||||
ch++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (! setval(ch, name)) return 0;
|
||||
}
|
||||
}
|
||||
if (chroffsetfile) Table[256] = Table[0];
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define ind(X) (chroffsetfile && signedch?(X>=128?X-128:X+128):X)
|
||||
|
||||
int
|
||||
setval(ch, nm)
|
||||
char *nm;
|
||||
{
|
||||
register char **p = &Table[ind(ch)];
|
||||
|
||||
if (ch < 0 || ch >= TabSize) {
|
||||
fprintf(stderr, "Illegal index: %d\n", ch);
|
||||
return 0;
|
||||
}
|
||||
if (*(p = &Table[ind(ch)])) {
|
||||
fprintf(stderr, "Warning: redefinition of index %d\n", ch);
|
||||
}
|
||||
*p = nm;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
quoted(pstr)
|
||||
char **pstr;
|
||||
{
|
||||
register int ch;
|
||||
register int i;
|
||||
register char *str = *pstr;
|
||||
|
||||
if ((*++str >= '0') && (*str <= '9')) {
|
||||
ch = 0;
|
||||
for (i = 0; i < 3; i++) {
|
||||
ch = 8 * ch + (*str - '0');
|
||||
if (*++str < '0' || *str > '9')
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (*str++) {
|
||||
|
||||
case 'n':
|
||||
ch = '\n';
|
||||
break;
|
||||
case 't':
|
||||
ch = '\t';
|
||||
break;
|
||||
case 'b':
|
||||
ch = '\b';
|
||||
break;
|
||||
case 'r':
|
||||
ch = '\r';
|
||||
break;
|
||||
case 'f':
|
||||
ch = '\f';
|
||||
break;
|
||||
default :
|
||||
ch = *str;
|
||||
}
|
||||
}
|
||||
*pstr = str;
|
||||
return ch & 0377;
|
||||
}
|
||||
|
||||
char *
|
||||
getline(s, n, fp)
|
||||
char *s;
|
||||
FILE *fp;
|
||||
{
|
||||
register c = getc(fp);
|
||||
char *str = s;
|
||||
|
||||
while (n--) {
|
||||
if (c == EOF) {
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
if (c == '\n') {
|
||||
*str++ = '\0';
|
||||
return s;
|
||||
}
|
||||
*str++ = c;
|
||||
c = getc(fp);
|
||||
}
|
||||
s[n - 1] = '\0';
|
||||
return s;
|
||||
}
|
||||
|
||||
#define BUFSIZE 1024
|
||||
|
||||
DoFile(name)
|
||||
char *name;
|
||||
{
|
||||
char text[BUFSIZE];
|
||||
FILE *fp;
|
||||
|
||||
if ((fp = fopen(name, "r")) == NULL) {
|
||||
fprintf(stderr, "%s: cannot read file %s\n", ProgCall, name);
|
||||
exit(1);
|
||||
}
|
||||
while (getline(text, BUFSIZE, fp) != NULL) {
|
||||
if (text[0] == FILECOM) {
|
||||
option(text);
|
||||
}
|
||||
else {
|
||||
if (! process(text, InputForm)) {
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue