/* * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands. * See the copyright notice in the ACK home directory, in the file "Copyright". * */ /* * L L G E N * * An Extended LL(1) Parser Generator * * Author : Ceriel J.H. Jacobs */ /* * tokens.g * Defines the tokens for the grammar of LLgen. * The lexical analyser and LLmessage are also included here. */ { # include "types.h" # include "io.h" # include "extern.h" # include "assert.h" # include "cclass.h" # ifndef NORCSID static string rcsidc = "$Header$"; # endif /* Here are defined : */ extern int scanner(); extern LLmessage(); extern int input(); extern unput(); extern skipcomment(); # ifdef LINE_DIRECTIVE STATIC linedirective(); # endif STATIC string cpy(); STATIC string vallookup(); } /* Classes */ %token C_IDENT ; /* lextoken.t_string contains the identifier read */ %token C_NUMBER ; /* lextoken.t_num contains the number read */ %token C_LITERAL ; /* lextoken.t_string contains the literal read */ /* Keywords */ %token C_TOKEN ; %token C_START ; %token C_IF ; %token C_WHILE ; %token C_PERSISTENT ; %token C_FIRST ; %token C_LEXICAL ; %token C_ONERROR ; %token C_AVOID ; %token C_PREFER ; %token C_DEFAULT ; %lexical scanner ; { /* * Structure for a keyword */ typedef struct keyword { string w_word; int w_value; } t_keyw, *p_keyw; /* * The list of keywords, the most often used keywords come first. * Linear search is used, as there are not many keywords */ static t_keyw resword[] = { { "token", C_TOKEN }, { "avoid", C_AVOID }, { "prefer", C_PREFER }, { "persistent", C_PERSISTENT }, { "default", C_DEFAULT }, { "if", C_IF }, { "while", C_WHILE }, { "first", C_FIRST }, { "start", C_START }, { "lexical", C_LEXICAL }, { "onerror", C_ONERROR }, { 0, 0 } }; static t_token savedtok; /* to save lextoken in case of an insertion */ # ifdef LINE_DIRECTIVE static int nostartline; /* = 0 if at the start of a line */ # endif scanner() { /* * Lexical analyser, what else */ register int ch; /* Current char */ register char *p = ltext; int reserved = 0; /* reserved word? */ char *max = <ext[LTEXTSZ - 1]; if (savedtok.t_tokno) { /* A token has been inserted. * Now deliver the last lextoken again */ lextoken = savedtok; savedtok.t_tokno = 0; return lextoken.t_tokno; } for (;;) { ch = input(); if (ch == EOF) return ch; # ifdef LINE_DIRECTIVE if (ch == '#' && !nostartline) { linedirective(); continue; } # endif switch(c_class[ch]) { case ISLIT : for (;;) { ch = input(); if (ch == '\n' || ch == EOF) { error(linecount,"missing '"); break; } if (ch == '\'') break; if (ch == '\\') { *p++ = ch; ch = input(); } *p++ = ch; if (p > max) p--; } *p = '\0'; lextoken.t_string = ltext; return C_LITERAL; case ISCOM : skipcomment(0); /* Fall through */ case ISSPA : continue; case ISDIG : { register i = 0; do { i = 10 * i + (ch - '0'); ch= input(); } while (c_class[ch] == ISDIG); lextoken.t_num = i; unput(ch); return C_NUMBER; } default: return ch; case ISKEY : reserved = 1; ch = input(); /* Fall through */ case ISLET : do { if (reserved && ch >= 'A' && ch <= 'Z') { ch += 'a' - 'A'; } *p++ = ch; if (p > max) p--; ch = input(); } while (c_class[ch] == ISDIG || c_class[ch] == ISLET); unput(ch); *p = '\0'; if (reserved) { /* * Now search for the keyword */ register p_keyw w; w = resword; while (w->w_word) { if (! strcmp(ltext,w->w_word)) { /* * Return token number. */ return w->w_value; } w++; } error(linecount,"illegal reserved word"); } lextoken.t_string = ltext; return C_IDENT; } } } static int backupc; /* for unput() */ static int nonline; /* = 1 if last char read was a newline */ input() { /* * Low level input routine, used by all other input routines */ register c; if (c = backupc) { /* Last char was "unput()". Deliver it again */ backupc = 0; return c; } if ((c = getc(finput)) == EOF) return c; # ifdef LINE_DIRECTIVE nostartline = 1; # endif if (!nonline) { linecount++; # ifdef LINE_DIRECTIVE nostartline = 0; # endif nonline = 1; } if (c == '\n') nonline = 0; return c; } unput(c) { /* * "unread" c */ backupc = c; } skipcomment(flag) { /* * Skip comment. If flag != 0, the comment is inside a fragment * of C-code, so the newlines in it must be copied to enable the * C-compiler to keep a correct line count */ register int ch; int saved; /* line count on which comment starts */ saved = linecount; if (input() != '*') error(linecount,"illegal comment"); do { ch = input(); while (ch == '*') { if ((ch = input()) == '/') return; } if (flag && ch == '\n') putc(ch,fact); } while (ch != EOF); error(saved,"Comment does not terminate"); } # ifdef LINE_DIRECTIVE STATIC linedirective() { /* * Read a line directive */ register int ch; register int i; string s_error = "Illegal line directive"; string store(); register string c; do { /* * Skip to next digit * Do not skip newlines */ ch = input(); } while (ch != '\n' && c_class[ch] != ISDIG); if (ch == '\n') { error(linecount,s_error); return; } i = 0; do { i = i*10 + (ch - '0'); ch = input(); } while (c_class[ch] == ISDIG); while (ch != '\n' && ch != '"') ch = input(); if (ch == '"') { c = ltext; do { *c++ = ch = input(); } while (ch != '"' && ch != '\n'); if (ch == '\n') { error(linecount,s_error); return; } *--c = '\0'; do { ch = input(); } while (ch != '\n'); /* * Remember the file name */ if (strcmp(f_input,ltext)) f_input = store(ltext); } linecount = i; } # endif STATIC string vallookup(s) { /* * Look up the keyword that has token number s */ register p_keyw p = resword; while (p->w_value) { if (p->w_value == s) return p->w_word; p++; } return 0; } STATIC string cpy(s,p,inserted) register string p; { /* * Create a piece of error message for token s and put it at p. * inserted = 0 if the token s was deleted (in which case we have * attributes), else it was inserted */ register string t = 0; switch(s) { case C_IDENT : if (!inserted) t = lextoken.t_string; else t = "identifier"; break; case C_NUMBER : t = "number"; break; case C_LITERAL : if (!inserted) { *p++ = '\''; t = lextoken.t_string; break; } t = "literal"; break; case EOFILE : t = "endoffile"; break; } if (!t && (t = vallookup(s))) { *p++ = '%'; } if (t) { /* * We have a string for the token. Copy it */ while (*t) *p++ = *t++; if (s == C_LITERAL && !inserted) { *p++ = '\''; } return p; } /* * The token is a literal */ *p++ = '\''; if (s >= 040 && s <= 0176) *p++ = s; else { *p++ = '\\'; switch(s) { case '\b' : *p++ = 'b'; break; case '\f' : *p++ = 'f'; break; case '\n' : *p++ = 'n'; break; case '\r' : *p++ = 'r'; break; case '\t' : *p++ = 't'; break; default : *p++='0'+((s&0377)>>6); *p++='0'+((s>>3)&07); *p++='0'+(s&07); } } *p++ = '\''; return p; } LLmessage(d) { /* * d is either 0, in which case the current token has been deleted, * or non-zero, in which case it represents a token that is inserted * before the current token */ register string s,t; char buf[128]; nerrors++; s = buf; if (d == 0) { s = cpy(LLsymb,s,0); t = " deleted"; do *s++ = *t; while (*t++); } else { s = cpy(d,s,1); t = " inserted in front of "; do *s++ = *t++; while (*t); s = cpy(LLsymb,s,0); *s = '\0'; } error(linecount, "%s", buf); /* Don't change this line to * error(linecount, buf). * The string in "buf" might contain '%' ... */ if (d) { /* * Save the current token and make up some * attributes for the inserted token */ savedtok = lextoken; savedtok.t_tokno = LLsymb; if (d == C_IDENT) lextoken.t_string = "dummy_identifier"; else if (d == C_LITERAL) lextoken.t_string = "dummy_literal"; else if (d == C_NUMBER) lextoken.t_num = 1; } } }