ack/util/LLgen/src/tokens.g

438 lines
8.4 KiB
Plaintext
Raw Normal View History

1984-10-08 14:14:53 +00:00
/*
* (c) copyright 1983 by the Vrije Universiteit, Amsterdam, The Netherlands.
*
* This product is part of the Amsterdam Compiler Kit.
*
* Permission to use, sell, duplicate or disclose this software must be
* obtained in writing. Requests for such permissions may be sent to
*
* Dr. Andrew S. Tanenbaum
* Wiskundig Seminarium
* Vrije Universiteit
* Postbox 7161
* 1007 MC Amsterdam
* The Netherlands
*
*/
/*
* L L G E N
*
* An Extended LL(1) Parser Generator
*
* Author : Ceriel J.H. Jacobs
*/
/*
* tokens.g
* Defines the tokens for the grammar of LLgen.
* The lexical analyser and LLmes are also included here.
*/
{
# include "types.h"
# include "io.h"
# include "tunable.h"
# include "extern.h"
# include "assert.h"
1984-10-08 17:11:03 +00:00
# ifndef NORCSID
static string rcsidc = "$Header$";
# endif
1984-10-08 14:14:53 +00:00
/* Here are defined : */
extern int scanner();
extern LLmessage();
extern int input();
extern unput();
extern skipcomment();
STATIC linedirective();
STATIC string cpy();
STATIC string vallookup();
}
/* Classes */
%token C_IDENT ; /* lextoken.t_string contains the identifier read */
%token C_NUMBER ; /* lextoken.t_num contains the number read */
%token C_LITERAL ; /* lextoken.t_string contains the literal read */
/* Keywords */
%token C_TOKEN ;
%token C_START ;
%token C_IF ;
%token C_WHILE ;
%token C_PERSISTENT ;
%token C_FIRST ;
%token C_LEXICAL ;
%token C_AVOID ;
%token C_PREFER ;
%token C_DEFAULT ;
%lexical scanner ;
{
/*
* Structure for a keyword
*/
struct keyword {
string w_word;
int w_value;
};
/*
* The list of keywords, the most often used keywords come first.
* Linear search is used, as there are not many keywords
*/
static struct keyword resword[] = {
{ "token", C_TOKEN },
{ "avoid", C_AVOID },
{ "prefer", C_PREFER },
{ "persistent", C_PERSISTENT },
{ "default", C_DEFAULT },
{ "if", C_IF },
{ "while", C_WHILE },
{ "first", C_FIRST },
{ "start", C_START },
{ "lexical", C_LEXICAL },
{ 0, 0 }
};
static t_token savedtok; /* to save lextoken in case of an insertion */
static int nostartline; /* = 0 if at the start of a line */
scanner() {
/*
* Lexical analyser, what else
*/
register ch; /* Current char */
register i;
register reserved = 0; /* reserved word? */
int last; /* Char before current char */
if (savedtok.t_tokno) { /*
* A token has been inserted.
* Now deliver the last lextoken again
*/
lextoken = savedtok;
savedtok.t_tokno = 0;
return lextoken.t_tokno;
}
for (;;) { /*
* First, skip space, comments, line directives, etc
*/
do ch = input();
while(isspace(ch));
if (ch == '/') skipcomment(0);
else if (ch == '#' && !nostartline) linedirective();
else break;
}
/*
* Now we have a first character of a token
*/
switch(ch) {
case EOF :
return EOF;
case '\'': /*
* Literal, put it in ltext
*/
i = 0;
for (;;) {
last = ch;
ch = input();
if (ch == '\n' || ch == EOF) {
error(linecount,"missing '");
break;
}
if (ch == '\'' && last != '\\') break;
ltext[i] = ch;
if (i < LTEXTSZ - 1) ++i;
}
ltext[i] = '\0';
lextoken.t_string = ltext;
return C_LITERAL;
case '%' : /*
* Start of a reserved word
*/
reserved = 1;
ch = input();
/* Fall through */
default :
i = 0;
if (isdigit(ch)) {
if (reserved) {
error(linecount," A reserved number ?");
}
while (isdigit(ch)) {
i = 10 * i + (ch - '0');
ch= input();
}
lextoken.t_num = i;
unput(ch);
return C_NUMBER;
}
if (isalpha(ch) || ch == '_') {
do {
if (reserved && isupper(ch)) ch += 'a' - 'A';
ltext[i] = ch;
if (i < LTEXTSZ - 1) ++i;
ch = input();
} while (isalnum(ch) || ch == '_');
} else return ch;
unput(ch);
}
ltext[i] = '\0';
if (reserved) { /*
* Now search for the keyword
*/
register struct keyword *w;
w = resword;
while (w->w_word) {
if (! strcmp(ltext,w->w_word)) {
/*
* Found it. Return token number.
*/
return w->w_value;
}
w++;
}
error(linecount,"illegal reserved word");
}
lextoken.t_string = ltext;
return C_IDENT;
}
static int backupc; /* for unput() */
static int nonline; /* = 1 if last char read was a newline */
input() {
/*
* Low level input routine, used by all other input routines
*/
register c;
register FILE *f;
if(backupc) { /*
* Last char was "unput()". Deliver it again
*/
c = backupc;
backupc = 0;
return c;
}
f = finput;
if ((c = getc(f)) == EOF) return c;
nostartline = 1;
if (!nonline) {
linecount++;
nostartline = 0;
nonline = 1;
}
if (c == ' ' || c == '\t') { /*
* Deliver space, but only once
*/
do c = getc(f);
while (c == ' ' || c == '\t');
ungetc(c,f);
return ' ';
}
if (c == '\n') nonline = 0;
return c;
}
unput(c) {
/*
* "unread" c
*/
backupc = c;
}
skipcomment(flag) {
/*
* Skip comment. If flag != 0, the comment is inside a fragment
* of C-code, so the newlines in it must be copied to enable the
* C-compiler to keep a correct line count
*/
register ch;
int saved; /* line count on which comment starts */
saved = linecount;
if (input() != '*') error(linecount,"illegal comment");
ch = input();
while (ch != EOF) {
if (flag && ch == '\n') putc(ch,fact);
while (ch == '*') {
if ((ch = input()) == '/') return;
if (flag && ch == '\n') putc(ch,fact);
}
ch = input();
}
error(saved,"Comment does not terminate");
}
STATIC
linedirective() {
/*
* Read a line directive
*/
register ch;
register i;
string s_error = "Illegal line directive";
string store();
register string c;
do { /*
* Skip to next digit
* Do not skip newlines
*/
ch = input();
} while (ch != '\n' && ! isdigit(ch));
if (ch == '\n') {
error(linecount,s_error);
return;
}
i = ch - '0';
ch = input();
while (isdigit(ch)) {
i = i*10 + (ch - '0');
ch = input();
}
while (ch != '\n' && ch != '"') ch = input();
if (ch == '"') {
c = ltext;
do {
*c++ = ch = input();
} while (ch != '"' && ch != '\n');
if (ch == '\n') {
error(linecount,s_error);
return;
}
*--c = '\0';
do {
ch = input();
} while (ch != '\n');
/*
* Remember the file name
*/
if (strcmp(f_input,ltext)) f_input = store(ltext);
}
linecount = i;
}
STATIC string
vallookup(s) {
/*
* Look up the keyword that has token number s
*/
register struct keyword *p = resword;
while (p->w_value) {
if (p->w_value == s) return p->w_word;
p++;
}
return 0;
}
STATIC string
cpy(s,p,flag) register s; register string p; {
/*
* Create a piece of error message for token s and put it at p.
* flag = 0 if the token s was deleted (in which case we have
* attributes), else it was inserted
*/
register string t = 0;
switch(s) {
case C_IDENT :
if (!flag) t = lextoken.t_string;
else t = "identifier";
break;
case C_NUMBER :
t = "number";
break;
case C_LITERAL :
if (!flag) {
*p++ = '"';
*p++ = '\'';
t = lextoken.t_string;
break;
}
t = "literal";
break;
case EOFILE :
t = "endoffile";
break;
}
if (!t) {
t = vallookup(s);
if (t) {
*p++ = '%';
}
}
if (t) { /*
* We have a string for the token. Copy it
*/
while (*t) *p++ = *t++;
if (s == C_LITERAL && !flag) {
*p++ = '\'';
*p++ = '"';
}
return p;
}
/*
* The token is a literal
*/
*p++ = '\'';
if (s >= 040 && s <= 0176) *p++ = s;
else switch(s) {
case '\b' : *p++ = '\\'; *p++ = 'b'; break;
case '\f' : *p++ = '\\'; *p++ = 'f'; break;
case '\n' : *p++ = '\\'; *p++ = 'n'; break;
case '\r' : *p++ = '\\'; *p++ = 'r'; break;
case '\t' : *p++ = '\\'; *p++ = 't'; break;
default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07);
*p++='0'+(s&07);
}
*p++ = '\'';
return p;
}
LLmessage(d) {
/*
* d is either 0, in which case the current token has been deleted,
* or non-zero, in which case it represents a token that is inserted
* before the current token
*/
register string s,t;
char buf[128];
nerrors++;
s = buf;
if (d == 0) {
s = cpy(LLsymb,s,0);
t = " deleted";
do *s++ = *t; while (*t++);
} else {
s = cpy(d,s,1);
t = " inserted in front of ";
do *s++ = *t++; while (*t);
s = cpy(LLsymb,s,0);
*s = '\0';
}
error(linecount,buf);
if (d) { /*
* Save the current token and make up some
* attributes for the inserted token
*/
savedtok = lextoken;
if (d == C_IDENT) lextoken.t_string = "dummy_identifier";
else if (d == C_LITERAL) lextoken.t_string = "dummy_literal";
else if (d == C_NUMBER) lextoken.t_num = 1;
}
}
}