ack/util/LLgen/src/tokens.g

/* Copyright (c) 1991 by the Vrije Universiteit, Amsterdam, the Netherlands.
 * For full copyright and restrictions on use see the file COPYING in the top
 * level of the LLgen tree.
 */

/*
 *  L L G E N
 *
 *  An Extended LL(1) Parser Generator
 *
 *  Author : Ceriel J.H. Jacobs
 */

/*
 * tokens.g
 * Defines the tokens for the grammar of LLgen.
 * The lexical analyser and LLmessage are also included here.
 */

{
# include "types.h"
# include "io.h"
# include "extern.h"
# include "assert.h"
# include "cclass.h"

# ifndef NORCSID
static string	rcsidc = "$Id$";
# endif

/* Here are defined : */
extern int	scanner();
extern		LLmessage();
extern int	input();
extern		unput();
extern		skipcomment();
# ifdef LINE_DIRECTIVE
STATIC		linedirective();
# endif
STATIC string	cpy();
STATIC string	vallookup();
STATIC		copyact();

static int	nparams;
}
/* Classes */

%token	C_IDENT ;	/* lextoken.t_string contains the identifier read */
%token	C_NUMBER ;	/* lextoken.t_num contains the number read */
%token	C_LITERAL ;	/* lextoken.t_string contains the literal read */
%token	C_EXPR ;	/* A C expression (%if or %while) */
%token	C_PARAMS ;	/* formal or actual parameters */
%token	C_ACTION ;	/* a C action */

/* Keywords */

%token	C_TOKEN ;
%token	C_START ;
%token	C_IF ;
%token	C_WHILE ;
%token	C_PERSISTENT ;
%token	C_FIRST ;
%token	C_LEXICAL ;
%token	C_PREFIX ;
%token	C_ONERROR ;
%token	C_AVOID ;
%token	C_PREFER ;
%token	C_DEFAULT ;
%token	C_SUBSTART ;
%token	C_ERRONEOUS ;
%token	C_ILLEGAL ;

%lexical scanner ;

{

/*
 * Structure for a keyword
 */

typedef struct keyword {
	string	w_word;
	int	w_value;
} t_keyw, *p_keyw;

/*
 * The list of keywords, the most often used keywords come first.
 * Linear search is used, as there are not many keywords
 */

static t_keyw resword[] = {
	{ "token",	C_TOKEN		},
	{ "avoid",	C_AVOID		},
	{ "prefer",	C_PREFER	},
	{ "persistent", C_PERSISTENT	},
	{ "default",	C_DEFAULT	},
	{ "if",		C_IF		},
	{ "while",	C_WHILE		},
	{ "first",	C_FIRST		},
	{ "start",	C_START		},
	{ "lexical",	C_LEXICAL	},
	{ "onerror",	C_ONERROR	},
	{ "prefix",	C_PREFIX	},
#ifdef NON_CORRECTING
	{ "substart",	C_SUBSTART	},
	{ "erroneous",	C_ERRONEOUS	},
	{ "illegal",	C_ILLEGAL	},
#endif
	{ 0,		0		}
};

static t_token	savedtok;	/* to save lextoken in case of an insertion */
# ifdef LINE_DIRECTIVE
static	int	nostartline;	/* = 0 if at the start of a line */
# endif

STATIC
copyact(ch1,ch2,flag,level) char ch1,ch2; {
	/*
	 * Copy an action to file f. Opening bracket is ch1, closing bracket
	 * is ch2.
	 * If flag & 1, copy opening and closing parameters too.
	 * If flag & 2, don't allow ','.
	 */
	static int	text_seen = 0;
	register	FILE *f;
	register	ch;		/* Current char */
	register	match;		/* used to read strings */
	int		saved = linecount;
					/* save linecount */
	int		sav_strip = strip_grammar;

	f = fact;
	if (ch1 == '{' || flag != 1) strip_grammar = 0;
	if (!level) {
		text_seen = 0;
		nparams = 0;			/* count comma's */
		putc('\0',f);
		fprintf(f,"# line %d \"%s\"\n", linecount,f_input);
	}
	if (level || (flag & 1)) putc(ch1,f);
	for (;;) {
		ch = input();
		if (ch == ch2) {
			if (!level) {
				if (text_seen) nparams++;
			}
			if (level || (flag & 1)) putc(ch,f);
			if (strip_grammar != sav_strip) {
				if (ch1 == '{' || flag != 1) putchar(ch);
			}
			strip_grammar = sav_strip;
			return;
		}
		switch(ch) {
		  case ')':
		  case '}':
		  case ']':
			error(linecount,"Parentheses mismatch");
			break;
		  case '(':
			text_seen = 1;
			copyact('(',')',flag,level+1);
			continue;
		  case '{':
			text_seen = 1;
			copyact('{','}',flag,level+1);
			continue;
		  case '[':
			text_seen = 1;
			copyact('[',']',flag,level+1);
			continue;
		  case '/':
			ch = input();
			unput(ch);
			if (ch == '*') {
				putc('/', f);
				skipcomment(1);
				continue;
			}
			ch = '/';
			text_seen = 1;
			break;
		  case ';':
		  case ',':
			if (! level && text_seen) {
				text_seen = 0;
				nparams++;
				if (ch == ',' && (flag & 2)) {
					warning(linecount, "Parameters may not be separated with a ','");
					ch = ';';
				}
			}
			break;
		  case '\'':
		  case '"' :
			/*
			 * watch out for brackets in strings, they do not
			 * count !
			 */
			text_seen = 1;
			match = ch;
			putc(ch,f);
			while((ch = input())) {
				if (ch == match) break;
				if (ch == '\\') {
					putc(ch,f);
					ch = input();
				}
				if (ch == '\n') {
					error(linecount,"Newline in string");
					unput(match);
				}
				putc(ch,f);
			}
			if (ch == match) break;
			/* Fall through */
		    case EOF :
			if (!level) error(saved,"Action does not terminate");
			strip_grammar = sav_strip;
			return;
		    default:
			if (c_class[ch] != ISSPA) text_seen = 1;
		}
		putc(ch,f);
	}
}

scanner() {
	/*
	 * Lexical analyser, what else
	 */
	register int	ch;		/* Current char */
	register char *p = ltext;
	int		reserved = 0;	/* reserved word? */
	char		*max = &ltext[LTEXTSZ - 1];
	static int	nextexpr;
	int		expect_expr = nextexpr;
	long		off;

	nextexpr = 0;
	if (savedtok.t_tokno) {
				/* A token has been inserted.
				 * Now deliver the last lextoken again
				 */
		lextoken = savedtok;
		savedtok.t_tokno = 0;
		return lextoken.t_tokno;
	}
	for (;;) {
		ch = input();
		if (ch == EOF) return ch;
# ifdef LINE_DIRECTIVE
		if (ch == '#' && !nostartline) {
			linedirective();
			continue;
		}
# endif
		switch(c_class[ch]) {
		  case ISACT :
			if (ch == '{') {
				copyact('{', '}', in_production, 0);
				return C_ACTION;
			}
			assert(ch == '(');
			if (expect_expr) {
				copyact('(', ')', 1, 0);
				return C_EXPR;
			}
			off = ftell(fact);
			copyact('(', ')', in_production != 0 ? 0 : 2, 0);
			if (nparams == 0) fseek(fact, off, 0);
			lextoken.t_num = nparams;
			return C_PARAMS;
		  case ISLIT :
			for (;;) {
				ch = input();
				if (ch == '\n' || ch == EOF) {
					error(linecount,"Missing '");
					break;
				}
				if (ch == '\'') break;
				if (ch == '\\') {
					*p++ = ch;
					ch = input();
				}
				*p++ = ch;
				if (p > max) p--;
			}
			*p = '\0';
			lextoken.t_string = ltext;
			return C_LITERAL;
		  case ISCOM :
			skipcomment(0);
			/* Fall through */
		  case ISSPA :
			continue;
		  case ISDIG : {
			register i = 0;
			do {
				i = 10 * i + (ch - '0');
				ch= input();
			} while (c_class[ch] == ISDIG);
			lextoken.t_num = i;
			unput(ch);
			return C_NUMBER; }
		  default:
			return ch;
		  case ISKEY :
			reserved = 1;
			ch = input();
			/* Fall through */
		  case ISLET :
			do {
				if (reserved && ch >= 'A' && ch <= 'Z') {
					ch += 'a' - 'A';
				}
				*p++ = ch;
				if (p > max) p--;
				ch = input();
			} while (c_class[ch] == ISDIG || c_class[ch] == ISLET);
			unput(ch);
			*p = '\0';
			if (reserved) {	/*
					 * Now search for the keyword
					 */
				register p_keyw w;

				w = resword;
				while (w->w_word) {
					if (! strcmp(ltext,w->w_word)) {
						/*
						 * Return token number.
						 */
						if (w->w_value == C_IF ||
						    w->w_value == C_WHILE) {
							nextexpr = 1;
						}
						return w->w_value;
					}
					w++;
				}
				error(linecount,"Illegal reserved word");
			}
			lextoken.t_string = ltext;
			return C_IDENT;
		}
	}
}

static int	backupc;	/* for unput() */
static int	nonline;	/* = 1 if last char read was a newline */

input() {
	/*
	 * Low level input routine, used by all other input routines
	 */
	register	c;

	if (c = backupc) {
			/* Last char was "unput()". Deliver it again
			 */
		backupc = 0;
		return c;
	}
	if ((c = getc(finput)) == EOF) {
		nonline = 0;
		return c;
	}
# ifdef LINE_DIRECTIVE
	nostartline = 1;
# endif
	if (!nonline) {
		linecount++;
# ifdef LINE_DIRECTIVE
		nostartline = 0;
# endif
		nonline = 1;
	}
	if (c == '\n') nonline = 0;
	if (strip_grammar) putchar(c);
	return c;
}

unput(c) {
	/*
	 * "unread" c
	 */
	backupc = c;
}

skipcomment(flag) {
	/*
	 * Skip comment. If flag != 0, the comment is inside a fragment
	 * of C-code, so keep it.
	 */
	register int	ch;
	int		saved;	/* line count on which comment starts */

	saved = linecount;
	if (input() != '*') error(linecount,"Illegal comment");
	if (flag) putc('*', fact);
	do {
		ch = input();
		if (flag) putc(ch, fact);
		while (ch == '*') {
			ch = input();
			if (flag) putc(ch, fact);
			if (ch == '/') return;
		}
	} while (ch != EOF);
	error(saved,"Comment does not terminate");
}

# ifdef LINE_DIRECTIVE
STATIC
linedirective() {
	/*
	 * Read a line directive
	 */
	register int	ch;
	register int	i;
	string		s_error = "Illegal line directive";
	string		store();
	register string	c;

	do {	/*
		 * Skip to next digit
		 * Do not skip newlines
		 */
		ch = input();
	} while (ch != '\n' && c_class[ch] != ISDIG);
	if (ch == '\n') {
		error(linecount,s_error);
		return;
	}
	i = 0;
	do  {
		i = i*10 + (ch - '0');
		ch = input();
	} while (c_class[ch] == ISDIG);
	while (ch != '\n' && ch != '"') ch = input();
	if (ch == '"') {
		c = ltext;
		do {
			*c++ = ch = input();
		} while (ch != '"' && ch != '\n');
		if (ch == '\n') {
			error(linecount,s_error);
			return;
		}
		*--c = '\0';
		do {
			ch = input();
		} while (ch != '\n');
		/*
		 * Remember the file name
		 */
		if (strcmp(f_input,ltext)) f_input = store(ltext);
	}
	linecount = i;
}
# endif

STATIC string
vallookup(s) {
	/*
	 * Look up the keyword that has token number s
	 */
	register p_keyw p = resword;

	while (p->w_value) {
		if (p->w_value == s) return p->w_word;
		p++;
	}
	return 0;
}

STATIC string
cpy(s,p,inserted) register string p; {
	/*
	 * Create a piece of error message for token s and put it at p.
	 * inserted = 0 if the token s was deleted (in which case we have
	 * attributes), else it was inserted
	 */
	register string t = 0;

	switch(s) {
	  case C_IDENT :
		if (!inserted) t = lextoken.t_string;
		else t = "identifier";
		break;
	  case C_NUMBER :
		t = "number";
		break;
	  case C_LITERAL :
		if (!inserted) {
			*p++ = '\'';
			t = lextoken.t_string;
			break;
		}
		t = "literal";
		break;
	  case C_ACTION:
		t = "C action";
		break;
	  case C_PARAMS:
		t = "C parameter section";
		break;
	  case C_EXPR:
		t = "C expression";
		break;
	  case EOFILE :
		t = "end-of-file";
		break;
	}
	if (!t && (t = vallookup(s))) {
		*p++ = '%';
	}
	if (t) {	/*
			 * We have a string for the token. Copy it
			 */
		while (*t) *p++ = *t++;
		if (s == C_LITERAL && !inserted) {
			*p++ = '\'';
		}
		return p;
	}
	/*
	 * The token is a literal
	 */
	*p++ = '\'';
	if (s >= 040 && s <= 0176) *p++ = s;
	else {
	    *p++ = '\\';
	    switch(s) {
	      case '\b' : *p++ = 'b'; break;
	      case '\f' : *p++ = 'f'; break;
	      case '\n' : *p++ = 'n'; break;
	      case '\r' : *p++ = 'r'; break;
	      case '\t' : *p++ = 't'; break;
	      default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07);
			*p++='0'+(s&07);
	    }
	}
	*p++ = '\'';
	return p;
}

string strcpy();

LLmessage(d) {
	/*
	 * d is either 0, in which case the current token has been deleted,
	 * or non-zero, in which case it represents a token that is inserted
	 * before the current token
	 */
	register string	s,t;
	char		buf[128];

	nerrors++;
	s = buf;
	if (d < 0) {
		strcpy(buf, "end-of-file expected");
	}
	else if (d == 0) {
#ifdef LLNONCORR
		t = " unexpected";
#else
		t = " deleted";
#endif
		s = cpy(LLsymb,s,0);
		do *s++ = *t; while (*t++);
	} else {
		s = cpy(d,s,1);
		t = " inserted in front of ";
		do *s++ = *t++; while (*t);
		s = cpy(LLsymb,s,0);
		*s = '\0';
	}
	if (d > 0) {	/*
			 * Save the current token and make up some
			 * attributes for the inserted token
			 */
		savedtok = lextoken;
		savedtok.t_tokno = LLsymb;
		if (d == C_IDENT) lextoken.t_string = "dummy_identifier";
		else if (d == C_LITERAL) lextoken.t_string = "dummy_literal";
		else if (d == C_NUMBER) lextoken.t_num = 1;
	}
#ifdef LLNONCORR
	else
#endif
	error(linecount, "%s", buf);
			/* Don't change this line to
			 * error(linecount, buf).
			 * The string in "buf" might contain '%' ...
			 */
#ifdef LLNONCORR
	in_production = 1;
			/* To prevent warnings from copyact */
#endif
}
}