ack/util/LLgen/src/tokens.g

605 lines
12 KiB
Plaintext

/* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
* For full copyright and restrictions on use see the file COPYING in the top
* level of the LLgen tree.
*/
/*
* L L G E N
*
* An Extended LL(1) Parser Generator
*
* Author : Ceriel J.H. Jacobs
*/
/*
* tokens.g
* Defines the tokens for the grammar of LLgen.
* The lexical analyser and LLmessage are also included here.
*/
{
# include "types.h"
# include "io.h"
# include "extern.h"
# include "assert.h"
# include "cclass.h"
# ifndef NORCSID
static string rcsidc = "$Id$";
# endif
/* Here are defined : */
extern int scanner();
extern LLmessage();
extern int input();
extern unput();
extern skipcomment();
# ifdef LINE_DIRECTIVE
STATIC linedirective();
# endif
STATIC string cpy();
STATIC string vallookup();
STATIC copyact();
static int nparams;
}
/* Classes */
%token C_IDENT ; /* lextoken.t_string contains the identifier read */
%token C_NUMBER ; /* lextoken.t_num contains the number read */
%token C_LITERAL ; /* lextoken.t_string contains the literal read */
%token C_EXPR ; /* A C expression (%if or %while) */
%token C_PARAMS ; /* formal or actual parameters */
%token C_ACTION ; /* a C action */
/* Keywords */
%token C_TOKEN ;
%token C_START ;
%token C_IF ;
%token C_WHILE ;
%token C_PERSISTENT ;
%token C_FIRST ;
%token C_LEXICAL ;
%token C_PREFIX ;
%token C_ONERROR ;
%token C_AVOID ;
%token C_PREFER ;
%token C_DEFAULT ;
%token C_SUBSTART ;
%token C_ERRONEOUS ;
%token C_ILLEGAL ;
%lexical scanner ;
{
/*
* Structure for a keyword
*/
typedef struct keyword {
string w_word;
int w_value;
} t_keyw, *p_keyw;
/*
* The list of keywords, the most often used keywords come first.
* Linear search is used, as there are not many keywords
*/
static t_keyw resword[] = {
{ "token", C_TOKEN },
{ "avoid", C_AVOID },
{ "prefer", C_PREFER },
{ "persistent", C_PERSISTENT },
{ "default", C_DEFAULT },
{ "if", C_IF },
{ "while", C_WHILE },
{ "first", C_FIRST },
{ "start", C_START },
{ "lexical", C_LEXICAL },
{ "onerror", C_ONERROR },
{ "prefix", C_PREFIX },
#ifdef NON_CORRECTING
{ "substart", C_SUBSTART },
{ "erroneous", C_ERRONEOUS },
{ "illegal", C_ILLEGAL },
#endif
{ 0, 0 }
};
static t_token savedtok; /* to save lextoken in case of an insertion */
# ifdef LINE_DIRECTIVE
static int nostartline; /* = 0 if at the start of a line */
# endif
STATIC
copyact(ch1,ch2,flag,level) char ch1,ch2; {
/*
* Copy an action to file f. Opening bracket is ch1, closing bracket
* is ch2.
* If flag & 1, copy opening and closing parameters too.
* If flag & 2, don't allow ','.
*/
static int text_seen = 0;
register FILE *f;
register ch; /* Current char */
register match; /* used to read strings */
int saved = linecount;
/* save linecount */
int sav_strip = strip_grammar;
f = fact;
if (ch1 == '{' || flag != 1) strip_grammar = 0;
if (!level) {
text_seen = 0;
nparams = 0; /* count comma's */
putc('\0',f);
fprintf(f,"# line %d \"%s\"\n", linecount,f_input);
}
if (level || (flag & 1)) putc(ch1,f);
for (;;) {
ch = input();
if (ch == ch2) {
if (!level) {
if (text_seen) nparams++;
}
if (level || (flag & 1)) putc(ch,f);
if (strip_grammar != sav_strip) {
if (ch1 == '{' || flag != 1) putchar(ch);
}
strip_grammar = sav_strip;
return;
}
switch(ch) {
case ')':
case '}':
case ']':
error(linecount,"Parentheses mismatch");
break;
case '(':
text_seen = 1;
copyact('(',')',flag,level+1);
continue;
case '{':
text_seen = 1;
copyact('{','}',flag,level+1);
continue;
case '[':
text_seen = 1;
copyact('[',']',flag,level+1);
continue;
case '/':
ch = input();
unput(ch);
if (ch == '*') {
putc('/', f);
skipcomment(1);
continue;
}
ch = '/';
text_seen = 1;
break;
case ';':
case ',':
if (! level && text_seen) {
text_seen = 0;
nparams++;
if (ch == ',' && (flag & 2)) {
warning(linecount, "Parameters may not be separated with a ','");
ch = ';';
}
}
break;
case '\'':
case '"' :
/*
* watch out for brackets in strings, they do not
* count !
*/
text_seen = 1;
match = ch;
putc(ch,f);
while((ch = input())) {
if (ch == match) break;
if (ch == '\\') {
putc(ch,f);
ch = input();
}
if (ch == '\n') {
error(linecount,"Newline in string");
unput(match);
}
putc(ch,f);
}
if (ch == match) break;
/* Fall through */
case EOF :
if (!level) error(saved,"Action does not terminate");
strip_grammar = sav_strip;
return;
default:
if (c_class[ch] != ISSPA) text_seen = 1;
}
putc(ch,f);
}
}
scanner() {
/*
* Lexical analyser, what else
*/
register int ch; /* Current char */
register char *p = ltext;
int reserved = 0; /* reserved word? */
char *max = &ltext[LTEXTSZ - 1];
static int nextexpr;
int expect_expr = nextexpr;
long off;
nextexpr = 0;
if (savedtok.t_tokno) {
/* A token has been inserted.
* Now deliver the last lextoken again
*/
lextoken = savedtok;
savedtok.t_tokno = 0;
return lextoken.t_tokno;
}
for (;;) {
ch = input();
if (ch == EOF) return ch;
# ifdef LINE_DIRECTIVE
if (ch == '#' && !nostartline) {
linedirective();
continue;
}
# endif
switch(c_class[ch]) {
case ISACT :
if (ch == '{') {
copyact('{', '}', in_production, 0);
return C_ACTION;
}
assert(ch == '(');
if (expect_expr) {
copyact('(', ')', 1, 0);
return C_EXPR;
}
off = ftell(fact);
copyact('(', ')', in_production != 0 ? 0 : 2, 0);
if (nparams == 0) fseek(fact, off, 0);
lextoken.t_num = nparams;
return C_PARAMS;
case ISLIT :
for (;;) {
ch = input();
if (ch == '\n' || ch == EOF) {
error(linecount,"Missing '");
break;
}
if (ch == '\'') break;
if (ch == '\\') {
*p++ = ch;
ch = input();
}
*p++ = ch;
if (p > max) p--;
}
*p = '\0';
lextoken.t_string = ltext;
return C_LITERAL;
case ISCOM :
skipcomment(0);
/* Fall through */
case ISSPA :
continue;
case ISDIG : {
register i = 0;
do {
i = 10 * i + (ch - '0');
ch= input();
} while (c_class[ch] == ISDIG);
lextoken.t_num = i;
unput(ch);
return C_NUMBER; }
default:
return ch;
case ISKEY :
reserved = 1;
ch = input();
/* Fall through */
case ISLET :
do {
if (reserved && ch >= 'A' && ch <= 'Z') {
ch += 'a' - 'A';
}
*p++ = ch;
if (p > max) p--;
ch = input();
} while (c_class[ch] == ISDIG || c_class[ch] == ISLET);
unput(ch);
*p = '\0';
if (reserved) { /*
* Now search for the keyword
*/
register p_keyw w;
w = resword;
while (w->w_word) {
if (! strcmp(ltext,w->w_word)) {
/*
* Return token number.
*/
if (w->w_value == C_IF ||
w->w_value == C_WHILE) {
nextexpr = 1;
}
return w->w_value;
}
w++;
}
error(linecount,"Illegal reserved word");
}
lextoken.t_string = ltext;
return C_IDENT;
}
}
}
static int backupc; /* for unput() */
static int nonline; /* = 1 if last char read was a newline */
input() {
/*
* Low level input routine, used by all other input routines
*/
register c;
if (c = backupc) {
/* Last char was "unput()". Deliver it again
*/
backupc = 0;
return c;
}
if ((c = getc(finput)) == EOF) {
nonline = 0;
return c;
}
# ifdef LINE_DIRECTIVE
nostartline = 1;
# endif
if (!nonline) {
linecount++;
# ifdef LINE_DIRECTIVE
nostartline = 0;
# endif
nonline = 1;
}
if (c == '\n') nonline = 0;
if (strip_grammar) putchar(c);
return c;
}
unput(c) {
/*
* "unread" c
*/
backupc = c;
}
skipcomment(flag) {
/*
* Skip comment. If flag != 0, the comment is inside a fragment
* of C-code, so keep it.
*/
register int ch;
int saved; /* line count on which comment starts */
saved = linecount;
if (input() != '*') error(linecount,"Illegal comment");
if (flag) putc('*', fact);
do {
ch = input();
if (flag) putc(ch, fact);
while (ch == '*') {
ch = input();
if (flag) putc(ch, fact);
if (ch == '/') return;
}
} while (ch != EOF);
error(saved,"Comment does not terminate");
}
# ifdef LINE_DIRECTIVE
STATIC
linedirective() {
/*
* Read a line directive
*/
register int ch;
register int i;
string s_error = "Illegal line directive";
string store();
register string c;
do { /*
* Skip to next digit
* Do not skip newlines
*/
ch = input();
} while (ch != '\n' && c_class[ch] != ISDIG);
if (ch == '\n') {
error(linecount,s_error);
return;
}
i = 0;
do {
i = i*10 + (ch - '0');
ch = input();
} while (c_class[ch] == ISDIG);
while (ch != '\n' && ch != '"') ch = input();
if (ch == '"') {
c = ltext;
do {
*c++ = ch = input();
} while (ch != '"' && ch != '\n');
if (ch == '\n') {
error(linecount,s_error);
return;
}
*--c = '\0';
do {
ch = input();
} while (ch != '\n');
/*
* Remember the file name
*/
if (strcmp(f_input,ltext)) f_input = store(ltext);
}
linecount = i;
}
# endif
STATIC string
vallookup(s) {
/*
* Look up the keyword that has token number s
*/
register p_keyw p = resword;
while (p->w_value) {
if (p->w_value == s) return p->w_word;
p++;
}
return 0;
}
STATIC string
cpy(s,p,inserted) register string p; {
/*
* Create a piece of error message for token s and put it at p.
* inserted = 0 if the token s was deleted (in which case we have
* attributes), else it was inserted
*/
register string t = 0;
switch(s) {
case C_IDENT :
if (!inserted) t = lextoken.t_string;
else t = "identifier";
break;
case C_NUMBER :
t = "number";
break;
case C_LITERAL :
if (!inserted) {
*p++ = '\'';
t = lextoken.t_string;
break;
}
t = "literal";
break;
case C_ACTION:
t = "C action";
break;
case C_PARAMS:
t = "C parameter section";
break;
case C_EXPR:
t = "C expression";
break;
case EOFILE :
t = "end-of-file";
break;
}
if (!t && (t = vallookup(s))) {
*p++ = '%';
}
if (t) { /*
* We have a string for the token. Copy it
*/
while (*t) *p++ = *t++;
if (s == C_LITERAL && !inserted) {
*p++ = '\'';
}
return p;
}
/*
* The token is a literal
*/
*p++ = '\'';
if (s >= 040 && s <= 0176) *p++ = s;
else {
*p++ = '\\';
switch(s) {
case '\b' : *p++ = 'b'; break;
case '\f' : *p++ = 'f'; break;
case '\n' : *p++ = 'n'; break;
case '\r' : *p++ = 'r'; break;
case '\t' : *p++ = 't'; break;
default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07);
*p++='0'+(s&07);
}
}
*p++ = '\'';
return p;
}
string strcpy();
LLmessage(d) {
/*
* d is either 0, in which case the current token has been deleted,
* or non-zero, in which case it represents a token that is inserted
* before the current token
*/
register string s,t;
char buf[128];
nerrors++;
s = buf;
if (d < 0) {
strcpy(buf, "end-of-file expected");
}
else if (d == 0) {
#ifdef LLNONCORR
t = " unexpected";
#else
t = " deleted";
#endif
s = cpy(LLsymb,s,0);
do *s++ = *t; while (*t++);
} else {
s = cpy(d,s,1);
t = " inserted in front of ";
do *s++ = *t++; while (*t);
s = cpy(LLsymb,s,0);
*s = '\0';
}
if (d > 0) { /*
* Save the current token and make up some
* attributes for the inserted token
*/
savedtok = lextoken;
savedtok.t_tokno = LLsymb;
if (d == C_IDENT) lextoken.t_string = "dummy_identifier";
else if (d == C_LITERAL) lextoken.t_string = "dummy_literal";
else if (d == C_NUMBER) lextoken.t_num = 1;
}
#ifdef LLNONCORR
else
#endif
error(linecount, "%s", buf);
/* Don't change this line to
* error(linecount, buf).
* The string in "buf" might contain '%' ...
*/
#ifdef LLNONCORR
in_production = 1;
/* To prevent warnings from copyact */
#endif
}
}