ack/lang/cem/cemcom.ansi/replace.c
1989-02-07 11:04:05 +00:00

677 lines
16 KiB
C

/*
* (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
* See the copyright notice in the ACK home directory, in the file "Copyright".
*/
/* $Header$ */
/* M A C R O R E P L A C E M E N T */
#include "nopp.h"
#ifndef NOPP
#include "debug.h"
#include "pathlength.h"
#include "strsize.h"
#include "nparams.h"
#include "idfsize.h"
#include "numsize.h"
#include <alloc.h>
#include "idf.h"
#include "input.h"
#include "macro.h"
#include "arith.h"
#include "LLlex.h"
#include "class.h"
#include "assert.h"
#include "static.h"
#include "lapbuf.h"
#include "argbuf.h"
#include "replace.h"
struct repl *ReplaceList; /* list of currently active macros */
int
replace(idf)
register struct idf *idf;
{
/* replace is called by the lexical analyzer to perform
macro replacement. The routine actualy functions as a
higher interface to the real thing: expand_macro().
*/
struct repl *repl;
int size;
repl = new_repl();
repl->r_ptr = repl->r_text;
repl->r_args = new_args();
if (!expand_macro(repl, idf, (struct idf *)0))
return 0;
free_args(repl->r_args);
InsertText(repl->r_text, repl->r_ptr - repl->r_text);
repl->next = ReplaceList;
ReplaceList = repl;
return 1;
}
unstackrepl()
{
struct repl *repl = ReplaceList;
#ifdef PERSONAL_TOUCH
if (repl == NO_REPL) {
print("Leendert, you don't understand the principle yet\n");
return;
}
#else
ASSERT(repl != NO_REPL);
#endif
ReplaceList = repl->next;
free_repl(repl);
}
expand_macro(repl, idf, previdf)
register struct repl *repl;
register struct idf *idf;
struct idf *previdf;
{
/* expand_macro() does the actual macro replacement.
"idf" is a description of the identifier which
caused the replacement.
If the identifier represents a function-like macro
call, the number of actual parameters is checked
against the number of formal parameters. Note that
in ANSI C the parameters are expanded first;
this is done by calling getactuals().
When the possible parameters are expanded, the replace-
ment list associated with "idf" is expanded.
expand_macro() returns 1 if the replacement succeeded
and 0 if some error occurred.
*/
register struct macro *mac = idf->id_macro;
struct args *args = repl->r_args;
register int ch;
if (mac->mc_nps != -1) { /* with parameter list */
if (mac->mc_flag & FUNC) {
/* the following assertion won't compile:
ASSERT(!strcmp("defined", idf->id_text));
*/
if (!AccDefined) return 0;
}
ch = GetChar();
ch = skipspaces(ch,1);
if (ch != '(') { /* no replacement if no () */
/* This is obscure. See the examples for the replace
algorithm in section 3`.8.3.5.
lexwarning("macro %s needs arguments", idf->id_text);
*/
UnGetChar();
return 0;
} else
getactuals(args, idf);
if (mac->mc_flag & FUNC) {
struct idf *param = str2idf(args->a_rawbuf);
*repl->r_ptr++ = param->id_macro ? '1' : '0';
*repl->r_ptr = '\0';
return 1;
}
}
if (mac->mc_flag & FUNC) /* this macro leads to special action */
macro_func(idf);
if (mac->mc_nps == -1) {
register int size = mac->mc_length;
register char *text = mac->mc_text;
ASSERT((repl->r_ptr+size) < &(repl->r_text[LAPBUF]));
while (size-- > 0)
*repl->r_ptr++ = *text++;
*repl->r_ptr = '\0';
} else
macro2buffer(repl, idf, args);
/* According to the ANSI definition:
#define a +
a+b; --> + + b ;
'a' must be substituded, but the result should be
three tokens: + + ID. Because this preprocessor is
character based, we have a problem.
For now: just insert a space after all tokens,
until ANSI fixes this flaw.
*/
*repl->r_ptr++ = ' ';
*repl->r_ptr = '\0';
if (idf != previdf)
maccount(repl, idf);
return 1;
}
getactuals(args, idf)
register struct args *args;
register struct idf *idf;
{
/* Get the actual parameters from the input stream.
The hard part is done by actual(), only comma's and
other syntactic trivialities are checked here.
*/
register int nps = idf->id_macro->mc_nps;
register int argcnt;
register int ch;
argcnt = 0;
args->a_expvec[0] = args->a_expptr = &args->a_expbuf[0];
args->a_rawvec[0] = args->a_rawptr = &args->a_rawbuf[0];
if ((ch = GetChar()) != ')') {
PushBack();
while ((ch = actual(args, idf)) != ')' ) {
if (ch != ',') {
lexerror("illegal macro call");
return;
}
stash(args, '\0');
++argcnt;
args->a_expvec[argcnt] = args->a_expptr;
args->a_rawvec[argcnt] = args->a_rawptr;
if (argcnt == STDC_NPARAMS)
strict("number of parameters exceeds ANSI standard");
if (argcnt >= NPARAMS)
fatal("argument vector overflow");
}
stash(args, '\0');
++argcnt;
}
if (argcnt < nps)
lexerror("too few macro arguments");
if (argcnt > nps)
lexerror("too many macro arguments");
}
int
actual(args, idf)
register struct args *args;
register struct idf *idf;
{
/* This routine deals with the scanning of an actual parameter.
It keeps in account the openning and clossing brackets,
preprocessor numbers, strings and character constants.
*/
register int ch;
register int level = 0;
while (1) {
ch = GetChar();
if (class(ch) == STIDF || class(ch) == STELL) {
/* Scan a preprocessor identifier token. If the
token is a macro, it is expanded first.
*/
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
register char *p = buf;
register struct idf *idef;
register int pos = -1;
register int hash;
extern int idfsize;
int size;
hash = STARTHASH();
do {
if (++pos < idfsize) {
*p++ = ch;
hash = ENHASH(hash, ch, pos);
}
ch = GetChar();
} while (in_idf(ch));
hash = STOPHASH(hash);
*p++ = '\0';
UnGetChar();
/* When the identifier has an associated macro
replacement list, it's expanded.
*/
idef = idf_hashed(buf, p - buf, hash);
if (idef->id_macro) /* expand macro identifier */
expand_actual(args, idef, idf);
else
for (p = buf; *p != '\0'; p++)
stash(args, *p);
} else if (class(ch) == STNUM || class(ch) == '.') {
/* preprocessor number token. No this is no joke,
the commitee decided (in all it's wisdom) that
a preprocessing number has the following regular
expression:
[0-9"."]{[0-9"."a-zA-Z_]|{[Ee][+-]}}*
*/
do {
stash(args, ch);
if ((ch = GetChar()) == 'e' || ch == 'E') {
ch = GetChar();
if (ch == '+' || ch == '-') {
stash(args, ch);
ch = GetChar();
}
}
} while (class(ch) == STNUM || class(ch) == STIDF ||
class(ch) == STELL || ch == '.');
UnGetChar();
} else if (ch == '(' || ch == '[' || ch == '{') {
/* a comma may occur within these constructions */
level++;
stash(args, ch);
} else if (ch == ')' || ch == ']' || ch == '}') {
level--;
/* clossing parenthesis of macro call */
if (ch == ')' && level < 0)
return ')';
stash(args, ch);
} else if (ch == ',') {
if (level <= 0) { /* comma separator for next argument */
if (level)
lexerror("unbalanced parenthesis");
return ',';
}
stash(args, ch);
} else if (ch == '\n') {
/* newlines are accepted as white spaces */
LineNumber++;
while ((ch = GetChar()), class(ch) == STSKIP)
/* VOID */;
/* This piece of code needs some explanation:
consider the call of a macro defined as:
#define sum(a,b) (a+b)
in the following form:
sum(
#include phone_number
,2);
in which case the include must be handled
interpreted as such.
*/
if (ch == '#')
domacro();
UnGetChar();
stash(args, ' ');
} else if (ch == '/') {
/* comments are treated as one white space token */
if ((ch = GetChar()) == '*') {
skipcomment();
stash(args, ' ');
} else {
UnGetChar();
stash(args, '/');
}
} else if (ch == '\'' || ch == '"') {
/* Strings are considered as ONE token, thus no
replacement within strings.
*/
register int match = ch;
stash(args, ch);
while ((ch = GetChar()) != EOI) {
if (ch == match)
break;
if (ch == '\\') {
stash(args, ch);
ch = GetChar();
} else if (ch == '\n') {
lexerror("newline in string");
LineNumber++;
stash(args, match);
break;
}
stash(args, ch);
}
if (ch != match) {
lexerror("unterminated macro call");
return ')';
}
stash(args, ch);
} else
stash(args, ch);
}
}
expand_actual(args, idef, idf)
register struct args *args;
register struct idf *idf, *idef;
{
struct repl *nrepl = new_repl();
register char *p;
nrepl->r_args = new_args();
nrepl->r_ptr = nrepl->r_text;
if (expand_macro(nrepl, idef, idf)) {
register struct args *ap = nrepl->r_args;
for (p = nrepl->r_text; p < nrepl->r_ptr; p++)
*args->a_expptr++ = *p;
/* stash idef name */
for (p = idef->id_text; *p != '\0'; p++)
*args->a_rawptr++ = *p;
/* The following code deals with expanded function
like macro calls. It makes the following code
work:
#define def(a,b) x(a,b)
#define glue(a,b) a ## b
glue(abc,def(a,b))
Results in:
abcdef(a,b);
*/
if (ap->a_rawvec[0]) {
/* stash arguments */
register int i;
*args->a_rawptr++ = '(';
for (i = 0; ap->a_rawvec[i] != (char *)0; i++) {
for (p = ap->a_rawvec[i]; *p != '\0'; p++)
*args->a_rawptr++ = *p;
*args->a_rawptr++ = ',';
}
*--args->a_rawptr = ')';
++args->a_rawptr; /* one too far */
}
} else /* something happened during the macro expansion */
for (p = idef->id_text; *p != '\0'; p++)
stash(args, *p);
free_args(nrepl->r_args);
free_repl(nrepl);
}
maccount(repl, idf)
register struct repl *repl;
register struct idf *idf;
{
/* To prevent re-expansion of already expanded macro's we count
the occurrences of the currently expanded macro name in the
replacement list. This is mainly to prevent recursion as in:
#define f(a) f(2 * (a))
f(y+1);
This results in:
f(2*(y+1));
When reading the inserted text we decrement the count of a
macro name until it's zero. Then we start expanding it again.
*/
register char *text = repl->r_text;
register int pos = -1;
extern int idfsize;
while (*text != '\0') {
if (*text == '\'' || *text == '"') {
register int delim;
for (delim = *text++; *text != delim; text++)
if (*text == '\\')
text++;
text++;
} else
if (class(*text) == STIDF || class(*text) == STELL) {
char buf[(IDFSIZE > NUMSIZE ? IDFSIZE : NUMSIZE) + 1];
register char *p = buf;
do {
if (++pos < idfsize)
*p++ = *text;
text++;
} while (in_idf(*text));
*p++ = '\0';
if (!strcmp(idf->id_text, buf))
idf->id_macro->mc_count++;
} else
text++;
}
}
macro_func(idef)
register struct idf *idef;
{
/* macro_func() performs the special actions needed with some
macros. These macros are __FILE__ and __LINE__ which
replacement texts must be evaluated at the time they are
used.
*/
register struct macro *mac = idef->id_macro;
static char FilNamBuf[PATHLENGTH];
char *long2str();
switch (idef->id_text[2]) {
case 'F': /* __FILE__ */
FilNamBuf[0] = '"';
strcpy(&FilNamBuf[1], FileName);
strcat(FilNamBuf, "\"");
mac->mc_text = FilNamBuf;
mac->mc_length = strlen(FilNamBuf);
break;
case 'L': /* __LINE__ */
mac->mc_text = long2str((long)LineNumber, 10);
mac->mc_length = 1;
break;
default:
crash("(macro_func)");
/*NOTREACHED*/
}
}
macro2buffer(repl, idf, args)
register struct repl *repl;
register struct idf *idf;
register struct args *args;
{
/* macro2buffer expands the replacement list and places the
result onto the replacement buffer. It deals with the #
and ## operators, and inserts the actual parameters.
The argument buffer contains the raw argument (needed
for the ## operator), and the expanded argument (for
all other parameter substitutions).
The grammar of the replacement list is:
repl_list: TOKEN repl_list
| PARAMETER repl_list
| '#' PARAMETER
| TOKEN '##' TOKEN
| PARAMETER '##' TOKEN
| TOKEN '##' PARAMETER
| PARAMETER '##' PARAMETER
;
As the grammar indicates, we could make a DFA and
use this finite state machine for the replacement
list parsing (inserting the arguments, etc.).
Currently we go through the replacement list in a
linear fashion. This is VERY expensive, something
smarter should be done (but even a DFA is O(|s|)).
*/
register char *ptr = idf->id_macro->mc_text;
char *stringify();
while (*ptr) {
ASSERT(repl->r_ptr < &(repl->r_text[LAPBUF]));
if (*ptr == '\'' || *ptr == '"') {
register int delim = *ptr;
do {
*repl->r_ptr++ = *ptr;
if (*ptr == '\\')
*repl->r_ptr++ = *++ptr;
if (*ptr == '\0') {
lexerror("unterminated string");
*repl->r_ptr = '\0';
return;
}
ptr++;
} while (*ptr != delim || *ptr == '\0');
*repl->r_ptr++ = *ptr++;
} else if (*ptr == '#') {
if (*++ptr == '#') {
/* ## - paste operator */
ptr++;
/* trim the actual replacement list */
--repl->r_ptr;
while (is_wsp(*repl->r_ptr) &&
repl->r_ptr >= repl->r_text)
--repl->r_ptr;
/* ## occurred at the beginning of the
replacement list.
*/
if (repl->r_ptr == repl->r_text)
goto paste;
++repl->r_ptr;
/* skip space in macro replacement list */
while ((*ptr & FORMALP) == 0 && is_wsp(*ptr))
ptr++;
/* ## occurred at the end of the
replacement list.
*/
if (*ptr & FORMALP) {
register int n = *ptr++ & 0177;
register char *p;
ASSERT(n != 0);
p = args->a_rawvec[n-1];
while (is_wsp(*p))
p++;
while (*p)
*repl->r_ptr++ = *p++;
} else if (*ptr == '\0')
goto paste;
} else
ptr = stringify(repl, ptr, args);
} else if (*ptr & FORMALP) {
/* insert actual parameter */
register int n = *ptr++ & 0177;
register char *p, *q;
ASSERT(n != 0);
/* This is VERY dirty, we look ahead for the
## operater. If it's found we use the raw
argument buffer instead of the expanded
one.
*/
for (p = ptr; (*p & FORMALP) == 0 && is_wsp(*p); p++)
/* VOID */;
if (*p == '#' && p[1] == '#')
q = args->a_rawvec[n-1];
else
q = args->a_expvec[n-1];
while (*q)
*repl->r_ptr++ = *q++;
*repl->r_ptr++ = ' ';
} else
*repl->r_ptr++ = *ptr++;
}
*repl->r_ptr = '\0';
return;
paste:
/* Sorry, i know this looks a bit like
a unix device driver code.
*/
lexerror("illegal use of the ## operator");
return;
}
char *
stringify(repl, ptr, args)
register struct repl *repl;
register char *ptr;
register struct args *args;
{
/* If a parameter is immediately preceded by a # token
both are replaced by a single string literal that
contains the spelling of the token sequence for the
corresponding argument.
Each occurrence of white space between the argument's
tokens become a single space character in the string
literal. White spaces before the first token and after
the last token comprising the argument are deleted.
To retain the original spelling we insert backslashes
as appropriate. We only escape backslashes if they
occure within string tokens.
*/
register int space = 1; /* skip leading spaces */
register int delim = 0; /* string or character constant delim */
register int backslash = 0; /* last character was a \ */
/* skip spaces macro replacement list */
while ((*ptr & FORMALP) == 0 && is_wsp(*ptr))
ptr++;
if (*ptr & FORMALP) {
register int n = *ptr++ & 0177;
register char *p;
ASSERT(n != 0);
p = args->a_expvec[n-1];
*repl->r_ptr++ = '"';
while (*p) {
if (is_wsp(*p)) {
if (!space) {
space = 1;
*repl->r_ptr++ = ' ';
}
p++;
continue;
}
space = 0;
if (!delim && (*p == '"' || *p == '\''))
delim = *p;
else if (*p == delim && !backslash)
delim = 0;
backslash = *p == '\\';
if (*p == '"' || (delim && *p == '\\'))
*repl->r_ptr++ = '\\';
*repl->r_ptr++ = *p++;
}
/* trim spaces in the replacement list */
for (--repl->r_ptr; is_wsp(*repl->r_ptr); repl->r_ptr--)
/* VOID */;
*++repl->r_ptr = '"';
++repl->r_ptr; /* oops, one to far */
} else
error("illegal use of # operator");
return ptr;
}
stash(args, ch)
register struct args *args;
register int ch;
{
/* Stash characters into the macro expansion buffer.
*/
if (args->a_expptr >= &(args->a_expbuf[ARGBUF]))
fatal("macro argument buffer overflow");
*args->a_expptr++ = ch;
if (args->a_rawptr >= &(args->a_rawbuf[ARGBUF]))
fatal("raw macro argument buffer overflow");
*args->a_rawptr++ = ch;
}
#endif NOPP