677 lines
16 KiB
C
677 lines
16 KiB
C
/*
|
|
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
|
|
* See the copyright notice in the ACK home directory, in the file "Copyright".
|
|
*/
|
|
/* $Header$ */
|
|
/* M A C R O R E P L A C E M E N T */
|
|
#include "nopp.h"
|
|
|
|
#ifndef NOPP
|
|
|
|
#include "debug.h"
|
|
#include "pathlength.h"
|
|
#include "strsize.h"
|
|
#include "nparams.h"
|
|
#include "idfsize.h"
|
|
#include "numsize.h"
|
|
#include <alloc.h>
|
|
#include "idf.h"
|
|
#include "input.h"
|
|
#include "macro.h"
|
|
#include "arith.h"
|
|
#include "LLlex.h"
|
|
#include "class.h"
|
|
#include "assert.h"
|
|
#include "static.h"
|
|
#include "lapbuf.h"
|
|
#include "argbuf.h"
|
|
#include "replace.h"
|
|
|
|
struct repl *ReplaceList; /* list of currently active macros */
|
|
|
|
int
|
|
replace(idf)
|
|
register struct idf *idf;
|
|
{
|
|
/* replace is called by the lexical analyzer to perform
|
|
macro replacement. The routine actualy functions as a
|
|
higher interface to the real thing: expand_macro().
|
|
*/
|
|
struct repl *repl;
|
|
int size;
|
|
|
|
repl = new_repl();
|
|
repl->r_ptr = repl->r_text;
|
|
repl->r_args = new_args();
|
|
if (!expand_macro(repl, idf, (struct idf *)0))
|
|
return 0;
|
|
free_args(repl->r_args);
|
|
InsertText(repl->r_text, repl->r_ptr - repl->r_text);
|
|
repl->next = ReplaceList;
|
|
ReplaceList = repl;
|
|
return 1;
|
|
}
|
|
|
|
unstackrepl()
|
|
{
|
|
struct repl *repl = ReplaceList;
|
|
|
|
#ifdef PERSONAL_TOUCH
|
|
if (repl == NO_REPL) {
|
|
print("Leendert, you don't understand the principle yet\n");
|
|
return;
|
|
}
|
|
#else
|
|
ASSERT(repl != NO_REPL);
|
|
#endif
|
|
ReplaceList = repl->next;
|
|
free_repl(repl);
|
|
}
|
|
|
|
expand_macro(repl, idf, previdf)
|
|
register struct repl *repl;
|
|
register struct idf *idf;
|
|
struct idf *previdf;
|
|
{
|
|
/* expand_macro() does the actual macro replacement.
|
|
"idf" is a description of the identifier which
|
|
caused the replacement.
|
|
If the identifier represents a function-like macro
|
|
call, the number of actual parameters is checked
|
|
against the number of formal parameters. Note that
|
|
in ANSI C the parameters are expanded first;
|
|
this is done by calling getactuals().
|
|
When the possible parameters are expanded, the replace-
|
|
ment list associated with "idf" is expanded.
|
|
expand_macro() returns 1 if the replacement succeeded
|
|
and 0 if some error occurred.
|
|
*/
|
|
register struct macro *mac = idf->id_macro;
|
|
struct args *args = repl->r_args;
|
|
register int ch;
|
|
|
|
if (mac->mc_nps != -1) { /* with parameter list */
|
|
if (mac->mc_flag & FUNC) {
|
|
/* the following assertion won't compile:
|
|
ASSERT(!strcmp("defined", idf->id_text));
|
|
*/
|
|
if (!AccDefined) return 0;
|
|
}
|
|
|
|
ch = GetChar();
|
|
ch = skipspaces(ch,1);
|
|
if (ch != '(') { /* no replacement if no () */
|
|
/* This is obscure. See the examples for the replace
|
|
algorithm in section 3`.8.3.5.
|
|
lexwarning("macro %s needs arguments", idf->id_text);
|
|
*/
|
|
UnGetChar();
|
|
return 0;
|
|
} else
|
|
getactuals(args, idf);
|
|
|
|
if (mac->mc_flag & FUNC) {
|
|
struct idf *param = str2idf(args->a_rawbuf);
|
|
|
|
*repl->r_ptr++ = param->id_macro ? '1' : '0';
|
|
*repl->r_ptr = '\0';
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (mac->mc_flag & FUNC) /* this macro leads to special action */
|
|
macro_func(idf);
|
|
|
|
if (mac->mc_nps == -1) {
|
|
register int size = mac->mc_length;
|
|
register char *text = mac->mc_text;
|
|
|
|
ASSERT((repl->r_ptr+size) < &(repl->r_text[LAPBUF]));
|
|
while (size-- > 0)
|
|
*repl->r_ptr++ = *text++;
|
|
*repl->r_ptr = '\0';
|
|
} else
|
|
macro2buffer(repl, idf, args);
|
|
|
|
/* According to the ANSI definition:
|
|
|
|
#define a +
|
|
a+b; --> + + b ;
|
|
|
|
'a' must be substituded, but the result should be
|
|
three tokens: + + ID. Because this preprocessor is
|
|
character based, we have a problem.
|
|
For now: just insert a space after all tokens,
|
|
until ANSI fixes this flaw.
|
|
*/
|
|
*repl->r_ptr++ = ' ';
|
|
*repl->r_ptr = '\0';
|
|
|
|
if (idf != previdf)
|
|
maccount(repl, idf);
|
|
return 1;
|
|
}
|
|
|
|
getactuals(args, idf)
|
|
register struct args *args;
|
|
register struct idf *idf;
|
|
{
|
|
/* Get the actual parameters from the input stream.
|
|
The hard part is done by actual(), only comma's and
|
|
other syntactic trivialities are checked here.
|
|
*/
|
|
register int nps = idf->id_macro->mc_nps;
|
|
register int argcnt;
|
|
register int ch;
|
|
|
|
argcnt = 0;
|
|
args->a_expvec[0] = args->a_expptr = &args->a_expbuf[0];
|
|
args->a_rawvec[0] = args->a_rawptr = &args->a_rawbuf[0];
|
|
if ((ch = GetChar()) != ')') {
|
|
PushBack();
|
|
while ((ch = actual(args, idf)) != ')' ) {
|
|
if (ch != ',') {
|
|
lexerror("illegal macro call");
|
|
return;
|
|
}
|
|
stash(args, '\0');
|
|
++argcnt;
|
|
args->a_expvec[argcnt] = args->a_expptr;
|
|
args->a_rawvec[argcnt] = args->a_rawptr;
|
|
if (argcnt == STDC_NPARAMS)
|
|
strict("number of parameters exceeds ANSI standard");
|
|
if (argcnt >= NPARAMS)
|
|
fatal("argument vector overflow");
|
|
}
|
|
stash(args, '\0');
|
|
++argcnt;
|
|
}
|
|
if (argcnt < nps)
|
|
lexerror("too few macro arguments");
|
|
if (argcnt > nps)
|
|
lexerror("too many macro arguments");
|
|
}
|
|
|
|
int
|
|
actual(args, idf)
|
|
register struct args *args;
|
|
register struct idf *idf;
|
|
{
|
|
/* This routine deals with the scanning of an actual parameter.
|
|
It keeps in account the openning and clossing brackets,
|
|
preprocessor numbers, strings and character constants.
|
|
*/
|
|
register int ch;
|
|
register int level = 0;
|
|
|
|
while (1) {
|
|
ch = GetChar();
|
|
|
|
if (class(ch) == STIDF || class(ch) == STELL) {
|
|
/* Scan a preprocessor identifier token. If the
|
|
token is a macro, it is expanded first.
|
|
*/
|
|
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
|
|
register char *p = buf;
|
|
register struct idf *idef;
|
|
register int pos = -1;
|
|
register int hash;
|
|
extern int idfsize;
|
|
int size;
|
|
|
|
hash = STARTHASH();
|
|
do {
|
|
if (++pos < idfsize) {
|
|
*p++ = ch;
|
|
hash = ENHASH(hash, ch, pos);
|
|
}
|
|
ch = GetChar();
|
|
} while (in_idf(ch));
|
|
hash = STOPHASH(hash);
|
|
*p++ = '\0';
|
|
UnGetChar();
|
|
|
|
/* When the identifier has an associated macro
|
|
replacement list, it's expanded.
|
|
*/
|
|
idef = idf_hashed(buf, p - buf, hash);
|
|
if (idef->id_macro) /* expand macro identifier */
|
|
expand_actual(args, idef, idf);
|
|
else
|
|
for (p = buf; *p != '\0'; p++)
|
|
stash(args, *p);
|
|
} else if (class(ch) == STNUM || class(ch) == '.') {
|
|
/* preprocessor number token. No this is no joke,
|
|
the commitee decided (in all it's wisdom) that
|
|
a preprocessing number has the following regular
|
|
expression:
|
|
[0-9"."]{[0-9"."a-zA-Z_]|{[Ee][+-]}}*
|
|
*/
|
|
do {
|
|
stash(args, ch);
|
|
if ((ch = GetChar()) == 'e' || ch == 'E') {
|
|
ch = GetChar();
|
|
if (ch == '+' || ch == '-') {
|
|
stash(args, ch);
|
|
ch = GetChar();
|
|
}
|
|
}
|
|
} while (class(ch) == STNUM || class(ch) == STIDF ||
|
|
class(ch) == STELL || ch == '.');
|
|
UnGetChar();
|
|
} else if (ch == '(' || ch == '[' || ch == '{') {
|
|
/* a comma may occur within these constructions */
|
|
level++;
|
|
stash(args, ch);
|
|
} else if (ch == ')' || ch == ']' || ch == '}') {
|
|
level--;
|
|
/* clossing parenthesis of macro call */
|
|
if (ch == ')' && level < 0)
|
|
return ')';
|
|
stash(args, ch);
|
|
} else if (ch == ',') {
|
|
if (level <= 0) { /* comma separator for next argument */
|
|
if (level)
|
|
lexerror("unbalanced parenthesis");
|
|
return ',';
|
|
}
|
|
stash(args, ch);
|
|
} else if (ch == '\n') {
|
|
/* newlines are accepted as white spaces */
|
|
LineNumber++;
|
|
while ((ch = GetChar()), class(ch) == STSKIP)
|
|
/* VOID */;
|
|
|
|
/* This piece of code needs some explanation:
|
|
consider the call of a macro defined as:
|
|
#define sum(a,b) (a+b)
|
|
in the following form:
|
|
sum(
|
|
#include phone_number
|
|
,2);
|
|
in which case the include must be handled
|
|
interpreted as such.
|
|
*/
|
|
if (ch == '#')
|
|
domacro();
|
|
UnGetChar();
|
|
stash(args, ' ');
|
|
} else if (ch == '/') {
|
|
/* comments are treated as one white space token */
|
|
if ((ch = GetChar()) == '*') {
|
|
skipcomment();
|
|
stash(args, ' ');
|
|
} else {
|
|
UnGetChar();
|
|
stash(args, '/');
|
|
}
|
|
} else if (ch == '\'' || ch == '"') {
|
|
/* Strings are considered as ONE token, thus no
|
|
replacement within strings.
|
|
*/
|
|
register int match = ch;
|
|
|
|
stash(args, ch);
|
|
while ((ch = GetChar()) != EOI) {
|
|
if (ch == match)
|
|
break;
|
|
if (ch == '\\') {
|
|
stash(args, ch);
|
|
ch = GetChar();
|
|
} else if (ch == '\n') {
|
|
lexerror("newline in string");
|
|
LineNumber++;
|
|
stash(args, match);
|
|
break;
|
|
}
|
|
stash(args, ch);
|
|
}
|
|
if (ch != match) {
|
|
lexerror("unterminated macro call");
|
|
return ')';
|
|
}
|
|
stash(args, ch);
|
|
} else
|
|
stash(args, ch);
|
|
}
|
|
}
|
|
|
|
expand_actual(args, idef, idf)
|
|
register struct args *args;
|
|
register struct idf *idf, *idef;
|
|
{
|
|
struct repl *nrepl = new_repl();
|
|
register char *p;
|
|
|
|
nrepl->r_args = new_args();
|
|
nrepl->r_ptr = nrepl->r_text;
|
|
if (expand_macro(nrepl, idef, idf)) {
|
|
register struct args *ap = nrepl->r_args;
|
|
|
|
for (p = nrepl->r_text; p < nrepl->r_ptr; p++)
|
|
*args->a_expptr++ = *p;
|
|
|
|
/* stash idef name */
|
|
for (p = idef->id_text; *p != '\0'; p++)
|
|
*args->a_rawptr++ = *p;
|
|
|
|
/* The following code deals with expanded function
|
|
like macro calls. It makes the following code
|
|
work:
|
|
|
|
#define def(a,b) x(a,b)
|
|
#define glue(a,b) a ## b
|
|
|
|
glue(abc,def(a,b))
|
|
|
|
Results in:
|
|
|
|
abcdef(a,b);
|
|
*/
|
|
if (ap->a_rawvec[0]) {
|
|
/* stash arguments */
|
|
register int i;
|
|
|
|
*args->a_rawptr++ = '(';
|
|
for (i = 0; ap->a_rawvec[i] != (char *)0; i++) {
|
|
for (p = ap->a_rawvec[i]; *p != '\0'; p++)
|
|
*args->a_rawptr++ = *p;
|
|
*args->a_rawptr++ = ',';
|
|
}
|
|
*--args->a_rawptr = ')';
|
|
++args->a_rawptr; /* one too far */
|
|
}
|
|
} else /* something happened during the macro expansion */
|
|
for (p = idef->id_text; *p != '\0'; p++)
|
|
stash(args, *p);
|
|
free_args(nrepl->r_args);
|
|
free_repl(nrepl);
|
|
}
|
|
|
|
maccount(repl, idf)
|
|
register struct repl *repl;
|
|
register struct idf *idf;
|
|
{
|
|
/* To prevent re-expansion of already expanded macro's we count
|
|
the occurrences of the currently expanded macro name in the
|
|
replacement list. This is mainly to prevent recursion as in:
|
|
|
|
#define f(a) f(2 * (a))
|
|
f(y+1);
|
|
|
|
This results in:
|
|
|
|
f(2*(y+1));
|
|
|
|
When reading the inserted text we decrement the count of a
|
|
macro name until it's zero. Then we start expanding it again.
|
|
*/
|
|
register char *text = repl->r_text;
|
|
register int pos = -1;
|
|
extern int idfsize;
|
|
|
|
while (*text != '\0') {
|
|
if (*text == '\'' || *text == '"') {
|
|
register int delim;
|
|
|
|
for (delim = *text++; *text != delim; text++)
|
|
if (*text == '\\')
|
|
text++;
|
|
text++;
|
|
} else
|
|
if (class(*text) == STIDF || class(*text) == STELL) {
|
|
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
|
|
register char *p = buf;
|
|
|
|
do {
|
|
if (++pos < idfsize)
|
|
*p++ = *text;
|
|
text++;
|
|
} while (in_idf(*text));
|
|
*p++ = '\0';
|
|
|
|
if (!strcmp(idf->id_text, buf))
|
|
idf->id_macro->mc_count++;
|
|
} else
|
|
text++;
|
|
}
|
|
}
|
|
|
|
macro_func(idef)
|
|
register struct idf *idef;
|
|
{
|
|
/* macro_func() performs the special actions needed with some
|
|
macros. These macros are __FILE__ and __LINE__ which
|
|
replacement texts must be evaluated at the time they are
|
|
used.
|
|
*/
|
|
register struct macro *mac = idef->id_macro;
|
|
static char FilNamBuf[PATHLENGTH];
|
|
char *long2str();
|
|
|
|
switch (idef->id_text[2]) {
|
|
case 'F': /* __FILE__ */
|
|
FilNamBuf[0] = '"';
|
|
strcpy(&FilNamBuf[1], FileName);
|
|
strcat(FilNamBuf, "\"");
|
|
mac->mc_text = FilNamBuf;
|
|
mac->mc_length = strlen(FilNamBuf);
|
|
break;
|
|
case 'L': /* __LINE__ */
|
|
mac->mc_text = long2str((long)LineNumber, 10);
|
|
mac->mc_length = 1;
|
|
break;
|
|
default:
|
|
crash("(macro_func)");
|
|
/*NOTREACHED*/
|
|
}
|
|
}
|
|
|
|
macro2buffer(repl, idf, args)
|
|
register struct repl *repl;
|
|
register struct idf *idf;
|
|
register struct args *args;
|
|
{
|
|
/* macro2buffer expands the replacement list and places the
|
|
result onto the replacement buffer. It deals with the #
|
|
and ## operators, and inserts the actual parameters.
|
|
The argument buffer contains the raw argument (needed
|
|
for the ## operator), and the expanded argument (for
|
|
all other parameter substitutions).
|
|
|
|
The grammar of the replacement list is:
|
|
|
|
repl_list: TOKEN repl_list
|
|
| PARAMETER repl_list
|
|
| '#' PARAMETER
|
|
| TOKEN '##' TOKEN
|
|
| PARAMETER '##' TOKEN
|
|
| TOKEN '##' PARAMETER
|
|
| PARAMETER '##' PARAMETER
|
|
;
|
|
|
|
As the grammar indicates, we could make a DFA and
|
|
use this finite state machine for the replacement
|
|
list parsing (inserting the arguments, etc.).
|
|
|
|
Currently we go through the replacement list in a
|
|
linear fashion. This is VERY expensive, something
|
|
smarter should be done (but even a DFA is O(|s|)).
|
|
*/
|
|
register char *ptr = idf->id_macro->mc_text;
|
|
char *stringify();
|
|
|
|
while (*ptr) {
|
|
ASSERT(repl->r_ptr < &(repl->r_text[LAPBUF]));
|
|
if (*ptr == '\'' || *ptr == '"') {
|
|
register int delim = *ptr;
|
|
|
|
do {
|
|
*repl->r_ptr++ = *ptr;
|
|
if (*ptr == '\\')
|
|
*repl->r_ptr++ = *++ptr;
|
|
if (*ptr == '\0') {
|
|
lexerror("unterminated string");
|
|
*repl->r_ptr = '\0';
|
|
return;
|
|
}
|
|
ptr++;
|
|
} while (*ptr != delim || *ptr == '\0');
|
|
*repl->r_ptr++ = *ptr++;
|
|
} else if (*ptr == '#') {
|
|
if (*++ptr == '#') {
|
|
/* ## - paste operator */
|
|
ptr++;
|
|
|
|
/* trim the actual replacement list */
|
|
--repl->r_ptr;
|
|
while (is_wsp(*repl->r_ptr) &&
|
|
repl->r_ptr >= repl->r_text)
|
|
--repl->r_ptr;
|
|
|
|
/* ## occurred at the beginning of the
|
|
replacement list.
|
|
*/
|
|
if (repl->r_ptr == repl->r_text)
|
|
goto paste;
|
|
++repl->r_ptr;
|
|
|
|
/* skip space in macro replacement list */
|
|
while ((*ptr & FORMALP) == 0 && is_wsp(*ptr))
|
|
ptr++;
|
|
|
|
/* ## occurred at the end of the
|
|
replacement list.
|
|
*/
|
|
if (*ptr & FORMALP) {
|
|
register int n = *ptr++ & 0177;
|
|
register char *p;
|
|
|
|
ASSERT(n != 0);
|
|
p = args->a_rawvec[n-1];
|
|
while (is_wsp(*p))
|
|
p++;
|
|
while (*p)
|
|
*repl->r_ptr++ = *p++;
|
|
} else if (*ptr == '\0')
|
|
goto paste;
|
|
} else
|
|
ptr = stringify(repl, ptr, args);
|
|
} else if (*ptr & FORMALP) {
|
|
/* insert actual parameter */
|
|
register int n = *ptr++ & 0177;
|
|
register char *p, *q;
|
|
|
|
ASSERT(n != 0);
|
|
|
|
/* This is VERY dirty, we look ahead for the
|
|
## operater. If it's found we use the raw
|
|
argument buffer instead of the expanded
|
|
one.
|
|
*/
|
|
for (p = ptr; (*p & FORMALP) == 0 && is_wsp(*p); p++)
|
|
/* VOID */;
|
|
if (*p == '#' && p[1] == '#')
|
|
q = args->a_rawvec[n-1];
|
|
else
|
|
q = args->a_expvec[n-1];
|
|
|
|
while (*q)
|
|
*repl->r_ptr++ = *q++;
|
|
|
|
*repl->r_ptr++ = ' ';
|
|
} else
|
|
*repl->r_ptr++ = *ptr++;
|
|
}
|
|
*repl->r_ptr = '\0';
|
|
return;
|
|
|
|
paste:
|
|
/* Sorry, i know this looks a bit like
|
|
a unix device driver code.
|
|
*/
|
|
lexerror("illegal use of the ## operator");
|
|
return;
|
|
}
|
|
|
|
char *
|
|
stringify(repl, ptr, args)
|
|
register struct repl *repl;
|
|
register char *ptr;
|
|
register struct args *args;
|
|
{
|
|
/* If a parameter is immediately preceded by a # token
|
|
both are replaced by a single string literal that
|
|
contains the spelling of the token sequence for the
|
|
corresponding argument.
|
|
Each occurrence of white space between the argument's
|
|
tokens become a single space character in the string
|
|
literal. White spaces before the first token and after
|
|
the last token comprising the argument are deleted.
|
|
To retain the original spelling we insert backslashes
|
|
as appropriate. We only escape backslashes if they
|
|
occure within string tokens.
|
|
*/
|
|
register int space = 1; /* skip leading spaces */
|
|
register int delim = 0; /* string or character constant delim */
|
|
register int backslash = 0; /* last character was a \ */
|
|
|
|
/* skip spaces macro replacement list */
|
|
while ((*ptr & FORMALP) == 0 && is_wsp(*ptr))
|
|
ptr++;
|
|
|
|
if (*ptr & FORMALP) {
|
|
register int n = *ptr++ & 0177;
|
|
register char *p;
|
|
|
|
ASSERT(n != 0);
|
|
p = args->a_expvec[n-1];
|
|
*repl->r_ptr++ = '"';
|
|
while (*p) {
|
|
if (is_wsp(*p)) {
|
|
if (!space) {
|
|
space = 1;
|
|
*repl->r_ptr++ = ' ';
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
space = 0;
|
|
|
|
if (!delim && (*p == '"' || *p == '\''))
|
|
delim = *p;
|
|
else if (*p == delim && !backslash)
|
|
delim = 0;
|
|
backslash = *p == '\\';
|
|
if (*p == '"' || (delim && *p == '\\'))
|
|
*repl->r_ptr++ = '\\';
|
|
*repl->r_ptr++ = *p++;
|
|
}
|
|
|
|
/* trim spaces in the replacement list */
|
|
for (--repl->r_ptr; is_wsp(*repl->r_ptr); repl->r_ptr--)
|
|
/* VOID */;
|
|
*++repl->r_ptr = '"';
|
|
++repl->r_ptr; /* oops, one to far */
|
|
} else
|
|
error("illegal use of # operator");
|
|
return ptr;
|
|
}
|
|
|
|
stash(args, ch)
|
|
register struct args *args;
|
|
register int ch;
|
|
{
|
|
/* Stash characters into the macro expansion buffer.
|
|
*/
|
|
if (args->a_expptr >= &(args->a_expbuf[ARGBUF]))
|
|
fatal("macro argument buffer overflow");
|
|
*args->a_expptr++ = ch;
|
|
|
|
if (args->a_rawptr >= &(args->a_rawbuf[ARGBUF]))
|
|
fatal("raw macro argument buffer overflow");
|
|
*args->a_rawptr++ = ch;
|
|
|
|
|
|
}
|
|
#endif NOPP
|