605 lines
12 KiB
Plaintext
605 lines
12 KiB
Plaintext
/* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
|
|
* For full copyright and restrictions on use see the file COPYING in the top
|
|
* level of the LLgen tree.
|
|
*/
|
|
|
|
/*
|
|
* L L G E N
|
|
*
|
|
* An Extended LL(1) Parser Generator
|
|
*
|
|
* Author : Ceriel J.H. Jacobs
|
|
*/
|
|
|
|
/*
|
|
* tokens.g
|
|
* Defines the tokens for the grammar of LLgen.
|
|
* The lexical analyser and LLmessage are also included here.
|
|
*/
|
|
|
|
{
|
|
# include "types.h"
|
|
# include "io.h"
|
|
# include "extern.h"
|
|
# include "assert.h"
|
|
# include "cclass.h"
|
|
|
|
# ifndef NORCSID
|
|
static string rcsidc = "$Id$";
|
|
# endif
|
|
|
|
/* Here are defined : */
|
|
extern int scanner();
|
|
extern LLmessage();
|
|
extern int input();
|
|
extern unput();
|
|
extern skipcomment();
|
|
# ifdef LINE_DIRECTIVE
|
|
STATIC linedirective();
|
|
# endif
|
|
STATIC string cpy();
|
|
STATIC string vallookup();
|
|
STATIC copyact();
|
|
|
|
static int nparams;
|
|
}
|
|
/* Classes */
|
|
|
|
%token C_IDENT ; /* lextoken.t_string contains the identifier read */
|
|
%token C_NUMBER ; /* lextoken.t_num contains the number read */
|
|
%token C_LITERAL ; /* lextoken.t_string contains the literal read */
|
|
%token C_EXPR ; /* A C expression (%if or %while) */
|
|
%token C_PARAMS ; /* formal or actual parameters */
|
|
%token C_ACTION ; /* a C action */
|
|
|
|
/* Keywords */
|
|
|
|
%token C_TOKEN ;
|
|
%token C_START ;
|
|
%token C_IF ;
|
|
%token C_WHILE ;
|
|
%token C_PERSISTENT ;
|
|
%token C_FIRST ;
|
|
%token C_LEXICAL ;
|
|
%token C_PREFIX ;
|
|
%token C_ONERROR ;
|
|
%token C_AVOID ;
|
|
%token C_PREFER ;
|
|
%token C_DEFAULT ;
|
|
%token C_SUBSTART ;
|
|
%token C_ERRONEOUS ;
|
|
%token C_ILLEGAL ;
|
|
|
|
%lexical scanner ;
|
|
|
|
{
|
|
|
|
/*
|
|
* Structure for a keyword
|
|
*/
|
|
|
|
typedef struct keyword {
|
|
string w_word;
|
|
int w_value;
|
|
} t_keyw, *p_keyw;
|
|
|
|
/*
|
|
* The list of keywords, the most often used keywords come first.
|
|
* Linear search is used, as there are not many keywords
|
|
*/
|
|
|
|
static t_keyw resword[] = {
|
|
{ "token", C_TOKEN },
|
|
{ "avoid", C_AVOID },
|
|
{ "prefer", C_PREFER },
|
|
{ "persistent", C_PERSISTENT },
|
|
{ "default", C_DEFAULT },
|
|
{ "if", C_IF },
|
|
{ "while", C_WHILE },
|
|
{ "first", C_FIRST },
|
|
{ "start", C_START },
|
|
{ "lexical", C_LEXICAL },
|
|
{ "onerror", C_ONERROR },
|
|
{ "prefix", C_PREFIX },
|
|
#ifdef NON_CORRECTING
|
|
{ "substart", C_SUBSTART },
|
|
{ "erroneous", C_ERRONEOUS },
|
|
{ "illegal", C_ILLEGAL },
|
|
#endif
|
|
{ 0, 0 }
|
|
};
|
|
|
|
static t_token savedtok; /* to save lextoken in case of an insertion */
|
|
# ifdef LINE_DIRECTIVE
|
|
static int nostartline; /* = 0 if at the start of a line */
|
|
# endif
|
|
|
|
STATIC
|
|
copyact(ch1,ch2,flag,level) char ch1,ch2; {
|
|
/*
|
|
* Copy an action to file f. Opening bracket is ch1, closing bracket
|
|
* is ch2.
|
|
* If flag & 1, copy opening and closing parameters too.
|
|
* If flag & 2, don't allow ','.
|
|
*/
|
|
static int text_seen = 0;
|
|
register FILE *f;
|
|
register ch; /* Current char */
|
|
register match; /* used to read strings */
|
|
int saved = linecount;
|
|
/* save linecount */
|
|
int sav_strip = strip_grammar;
|
|
|
|
f = fact;
|
|
if (ch1 == '{' || flag != 1) strip_grammar = 0;
|
|
if (!level) {
|
|
text_seen = 0;
|
|
nparams = 0; /* count comma's */
|
|
putc('\0',f);
|
|
fprintf(f,"# line %d \"%s\"\n", linecount,f_input);
|
|
}
|
|
if (level || (flag & 1)) putc(ch1,f);
|
|
for (;;) {
|
|
ch = input();
|
|
if (ch == ch2) {
|
|
if (!level) {
|
|
if (text_seen) nparams++;
|
|
}
|
|
if (level || (flag & 1)) putc(ch,f);
|
|
if (strip_grammar != sav_strip) {
|
|
if (ch1 == '{' || flag != 1) putchar(ch);
|
|
}
|
|
strip_grammar = sav_strip;
|
|
return;
|
|
}
|
|
switch(ch) {
|
|
case ')':
|
|
case '}':
|
|
case ']':
|
|
error(linecount,"Parentheses mismatch");
|
|
break;
|
|
case '(':
|
|
text_seen = 1;
|
|
copyact('(',')',flag,level+1);
|
|
continue;
|
|
case '{':
|
|
text_seen = 1;
|
|
copyact('{','}',flag,level+1);
|
|
continue;
|
|
case '[':
|
|
text_seen = 1;
|
|
copyact('[',']',flag,level+1);
|
|
continue;
|
|
case '/':
|
|
ch = input();
|
|
unput(ch);
|
|
if (ch == '*') {
|
|
putc('/', f);
|
|
skipcomment(1);
|
|
continue;
|
|
}
|
|
ch = '/';
|
|
text_seen = 1;
|
|
break;
|
|
case ';':
|
|
case ',':
|
|
if (! level && text_seen) {
|
|
text_seen = 0;
|
|
nparams++;
|
|
if (ch == ',' && (flag & 2)) {
|
|
warning(linecount, "Parameters may not be separated with a ','");
|
|
ch = ';';
|
|
}
|
|
}
|
|
break;
|
|
case '\'':
|
|
case '"' :
|
|
/*
|
|
* watch out for brackets in strings, they do not
|
|
* count !
|
|
*/
|
|
text_seen = 1;
|
|
match = ch;
|
|
putc(ch,f);
|
|
while((ch = input())) {
|
|
if (ch == match) break;
|
|
if (ch == '\\') {
|
|
putc(ch,f);
|
|
ch = input();
|
|
}
|
|
if (ch == '\n') {
|
|
error(linecount,"Newline in string");
|
|
unput(match);
|
|
}
|
|
putc(ch,f);
|
|
}
|
|
if (ch == match) break;
|
|
/* Fall through */
|
|
case EOF :
|
|
if (!level) error(saved,"Action does not terminate");
|
|
strip_grammar = sav_strip;
|
|
return;
|
|
default:
|
|
if (c_class[ch] != ISSPA) text_seen = 1;
|
|
}
|
|
putc(ch,f);
|
|
}
|
|
}
|
|
|
|
scanner() {
|
|
/*
|
|
* Lexical analyser, what else
|
|
*/
|
|
register int ch; /* Current char */
|
|
register char *p = ltext;
|
|
int reserved = 0; /* reserved word? */
|
|
char *max = <ext[LTEXTSZ - 1];
|
|
static int nextexpr;
|
|
int expect_expr = nextexpr;
|
|
long off;
|
|
|
|
nextexpr = 0;
|
|
if (savedtok.t_tokno) {
|
|
/* A token has been inserted.
|
|
* Now deliver the last lextoken again
|
|
*/
|
|
lextoken = savedtok;
|
|
savedtok.t_tokno = 0;
|
|
return lextoken.t_tokno;
|
|
}
|
|
for (;;) {
|
|
ch = input();
|
|
if (ch == EOF) return ch;
|
|
# ifdef LINE_DIRECTIVE
|
|
if (ch == '#' && !nostartline) {
|
|
linedirective();
|
|
continue;
|
|
}
|
|
# endif
|
|
switch(c_class[ch]) {
|
|
case ISACT :
|
|
if (ch == '{') {
|
|
copyact('{', '}', in_production, 0);
|
|
return C_ACTION;
|
|
}
|
|
assert(ch == '(');
|
|
if (expect_expr) {
|
|
copyact('(', ')', 1, 0);
|
|
return C_EXPR;
|
|
}
|
|
off = ftell(fact);
|
|
copyact('(', ')', in_production != 0 ? 0 : 2, 0);
|
|
if (nparams == 0) fseek(fact, off, 0);
|
|
lextoken.t_num = nparams;
|
|
return C_PARAMS;
|
|
case ISLIT :
|
|
for (;;) {
|
|
ch = input();
|
|
if (ch == '\n' || ch == EOF) {
|
|
error(linecount,"Missing '");
|
|
break;
|
|
}
|
|
if (ch == '\'') break;
|
|
if (ch == '\\') {
|
|
*p++ = ch;
|
|
ch = input();
|
|
}
|
|
*p++ = ch;
|
|
if (p > max) p--;
|
|
}
|
|
*p = '\0';
|
|
lextoken.t_string = ltext;
|
|
return C_LITERAL;
|
|
case ISCOM :
|
|
skipcomment(0);
|
|
/* Fall through */
|
|
case ISSPA :
|
|
continue;
|
|
case ISDIG : {
|
|
register i = 0;
|
|
do {
|
|
i = 10 * i + (ch - '0');
|
|
ch= input();
|
|
} while (c_class[ch] == ISDIG);
|
|
lextoken.t_num = i;
|
|
unput(ch);
|
|
return C_NUMBER; }
|
|
default:
|
|
return ch;
|
|
case ISKEY :
|
|
reserved = 1;
|
|
ch = input();
|
|
/* Fall through */
|
|
case ISLET :
|
|
do {
|
|
if (reserved && ch >= 'A' && ch <= 'Z') {
|
|
ch += 'a' - 'A';
|
|
}
|
|
*p++ = ch;
|
|
if (p > max) p--;
|
|
ch = input();
|
|
} while (c_class[ch] == ISDIG || c_class[ch] == ISLET);
|
|
unput(ch);
|
|
*p = '\0';
|
|
if (reserved) { /*
|
|
* Now search for the keyword
|
|
*/
|
|
register p_keyw w;
|
|
|
|
w = resword;
|
|
while (w->w_word) {
|
|
if (! strcmp(ltext,w->w_word)) {
|
|
/*
|
|
* Return token number.
|
|
*/
|
|
if (w->w_value == C_IF ||
|
|
w->w_value == C_WHILE) {
|
|
nextexpr = 1;
|
|
}
|
|
return w->w_value;
|
|
}
|
|
w++;
|
|
}
|
|
error(linecount,"Illegal reserved word");
|
|
}
|
|
lextoken.t_string = ltext;
|
|
return C_IDENT;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int backupc; /* for unput() */
|
|
static int nonline; /* = 1 if last char read was a newline */
|
|
|
|
input() {
|
|
/*
|
|
* Low level input routine, used by all other input routines
|
|
*/
|
|
register c;
|
|
|
|
if (c = backupc) {
|
|
/* Last char was "unput()". Deliver it again
|
|
*/
|
|
backupc = 0;
|
|
return c;
|
|
}
|
|
if ((c = getc(finput)) == EOF) {
|
|
nonline = 0;
|
|
return c;
|
|
}
|
|
# ifdef LINE_DIRECTIVE
|
|
nostartline = 1;
|
|
# endif
|
|
if (!nonline) {
|
|
linecount++;
|
|
# ifdef LINE_DIRECTIVE
|
|
nostartline = 0;
|
|
# endif
|
|
nonline = 1;
|
|
}
|
|
if (c == '\n') nonline = 0;
|
|
if (strip_grammar) putchar(c);
|
|
return c;
|
|
}
|
|
|
|
unput(c) {
|
|
/*
|
|
* "unread" c
|
|
*/
|
|
backupc = c;
|
|
}
|
|
|
|
skipcomment(flag) {
|
|
/*
|
|
* Skip comment. If flag != 0, the comment is inside a fragment
|
|
* of C-code, so keep it.
|
|
*/
|
|
register int ch;
|
|
int saved; /* line count on which comment starts */
|
|
|
|
saved = linecount;
|
|
if (input() != '*') error(linecount,"Illegal comment");
|
|
if (flag) putc('*', fact);
|
|
do {
|
|
ch = input();
|
|
if (flag) putc(ch, fact);
|
|
while (ch == '*') {
|
|
ch = input();
|
|
if (flag) putc(ch, fact);
|
|
if (ch == '/') return;
|
|
}
|
|
} while (ch != EOF);
|
|
error(saved,"Comment does not terminate");
|
|
}
|
|
|
|
# ifdef LINE_DIRECTIVE
|
|
STATIC
|
|
linedirective() {
|
|
/*
|
|
* Read a line directive
|
|
*/
|
|
register int ch;
|
|
register int i;
|
|
string s_error = "Illegal line directive";
|
|
string store();
|
|
register string c;
|
|
|
|
do { /*
|
|
* Skip to next digit
|
|
* Do not skip newlines
|
|
*/
|
|
ch = input();
|
|
} while (ch != '\n' && c_class[ch] != ISDIG);
|
|
if (ch == '\n') {
|
|
error(linecount,s_error);
|
|
return;
|
|
}
|
|
i = 0;
|
|
do {
|
|
i = i*10 + (ch - '0');
|
|
ch = input();
|
|
} while (c_class[ch] == ISDIG);
|
|
while (ch != '\n' && ch != '"') ch = input();
|
|
if (ch == '"') {
|
|
c = ltext;
|
|
do {
|
|
*c++ = ch = input();
|
|
} while (ch != '"' && ch != '\n');
|
|
if (ch == '\n') {
|
|
error(linecount,s_error);
|
|
return;
|
|
}
|
|
*--c = '\0';
|
|
do {
|
|
ch = input();
|
|
} while (ch != '\n');
|
|
/*
|
|
* Remember the file name
|
|
*/
|
|
if (strcmp(f_input,ltext)) f_input = store(ltext);
|
|
}
|
|
linecount = i;
|
|
}
|
|
# endif
|
|
|
|
STATIC string
|
|
vallookup(s) {
|
|
/*
|
|
* Look up the keyword that has token number s
|
|
*/
|
|
register p_keyw p = resword;
|
|
|
|
while (p->w_value) {
|
|
if (p->w_value == s) return p->w_word;
|
|
p++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
STATIC string
|
|
cpy(s,p,inserted) register string p; {
|
|
/*
|
|
* Create a piece of error message for token s and put it at p.
|
|
* inserted = 0 if the token s was deleted (in which case we have
|
|
* attributes), else it was inserted
|
|
*/
|
|
register string t = 0;
|
|
|
|
switch(s) {
|
|
case C_IDENT :
|
|
if (!inserted) t = lextoken.t_string;
|
|
else t = "identifier";
|
|
break;
|
|
case C_NUMBER :
|
|
t = "number";
|
|
break;
|
|
case C_LITERAL :
|
|
if (!inserted) {
|
|
*p++ = '\'';
|
|
t = lextoken.t_string;
|
|
break;
|
|
}
|
|
t = "literal";
|
|
break;
|
|
case C_ACTION:
|
|
t = "C action";
|
|
break;
|
|
case C_PARAMS:
|
|
t = "C parameter section";
|
|
break;
|
|
case C_EXPR:
|
|
t = "C expression";
|
|
break;
|
|
case EOFILE :
|
|
t = "end-of-file";
|
|
break;
|
|
}
|
|
if (!t && (t = vallookup(s))) {
|
|
*p++ = '%';
|
|
}
|
|
if (t) { /*
|
|
* We have a string for the token. Copy it
|
|
*/
|
|
while (*t) *p++ = *t++;
|
|
if (s == C_LITERAL && !inserted) {
|
|
*p++ = '\'';
|
|
}
|
|
return p;
|
|
}
|
|
/*
|
|
* The token is a literal
|
|
*/
|
|
*p++ = '\'';
|
|
if (s >= 040 && s <= 0176) *p++ = s;
|
|
else {
|
|
*p++ = '\\';
|
|
switch(s) {
|
|
case '\b' : *p++ = 'b'; break;
|
|
case '\f' : *p++ = 'f'; break;
|
|
case '\n' : *p++ = 'n'; break;
|
|
case '\r' : *p++ = 'r'; break;
|
|
case '\t' : *p++ = 't'; break;
|
|
default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07);
|
|
*p++='0'+(s&07);
|
|
}
|
|
}
|
|
*p++ = '\'';
|
|
return p;
|
|
}
|
|
|
|
string strcpy();
|
|
|
|
LLmessage(d) {
|
|
/*
|
|
* d is either 0, in which case the current token has been deleted,
|
|
* or non-zero, in which case it represents a token that is inserted
|
|
* before the current token
|
|
*/
|
|
register string s,t;
|
|
char buf[128];
|
|
|
|
nerrors++;
|
|
s = buf;
|
|
if (d < 0) {
|
|
strcpy(buf, "end-of-file expected");
|
|
}
|
|
else if (d == 0) {
|
|
#ifdef LLNONCORR
|
|
t = " unexpected";
|
|
#else
|
|
t = " deleted";
|
|
#endif
|
|
s = cpy(LLsymb,s,0);
|
|
do *s++ = *t; while (*t++);
|
|
} else {
|
|
s = cpy(d,s,1);
|
|
t = " inserted in front of ";
|
|
do *s++ = *t++; while (*t);
|
|
s = cpy(LLsymb,s,0);
|
|
*s = '\0';
|
|
}
|
|
if (d > 0) { /*
|
|
* Save the current token and make up some
|
|
* attributes for the inserted token
|
|
*/
|
|
savedtok = lextoken;
|
|
savedtok.t_tokno = LLsymb;
|
|
if (d == C_IDENT) lextoken.t_string = "dummy_identifier";
|
|
else if (d == C_LITERAL) lextoken.t_string = "dummy_literal";
|
|
else if (d == C_NUMBER) lextoken.t_num = 1;
|
|
}
|
|
#ifdef LLNONCORR
|
|
else
|
|
#endif
|
|
error(linecount, "%s", buf);
|
|
/* Don't change this line to
|
|
* error(linecount, buf).
|
|
* The string in "buf" might contain '%' ...
|
|
*/
|
|
#ifdef LLNONCORR
|
|
in_production = 1;
|
|
/* To prevent warnings from copyact */
|
|
#endif
|
|
}
|
|
}
|