286 lines
7 KiB
Groff
286 lines
7 KiB
Groff
.TH IBURG 1 "local \- 1/26/93"
|
|
.\" $Id$
|
|
.SH NAME
|
|
iburg \- code generator generator
|
|
.SH SYNOPSIS
|
|
.B iburg
|
|
[
|
|
.I option
|
|
]...
|
|
[ [
|
|
.I input
|
|
]
|
|
.I output
|
|
]
|
|
.br
|
|
.SH DESCRIPTION
|
|
.PP
|
|
.I iburg
|
|
reads BURG specification from
|
|
.I input
|
|
and writes a pattern-matching code generator to
|
|
.IR output .
|
|
If
|
|
.I input
|
|
is `\-' or is omitted,
|
|
.I iburg
|
|
reads the standard input;
|
|
If
|
|
.I output
|
|
is `\-' or is omitted,
|
|
.I iburg
|
|
writes to the standard output.
|
|
.PP
|
|
.I iburg
|
|
accepts BURG specifications that conform to the following EBNF grammar.
|
|
Terminals are enclosed in single quotes, all other symbols are nonterminals,
|
|
{X} denotes zero or more instances of X, and [X] denotes an optional X.
|
|
.PP
|
|
.nf
|
|
.RS
|
|
.ft CW
|
|
spec: { dcl } `%%' { rule } [ `%%' ]
|
|
|
|
dcl: `%start' nonterm
|
|
`%term' { identifier `=' integer }
|
|
|
|
rule: nonterm `:' tree `=' integer [ cost ] `;'
|
|
|
|
cost: `(' integer ')'
|
|
|
|
tree: term `(' tree `,' tree `)'
|
|
term `(' tree `)'
|
|
term
|
|
nonterm
|
|
.RE
|
|
.fi
|
|
.PP
|
|
Specifications are structurally similar to
|
|
.IR yacc 's.
|
|
Text between
|
|
`\f(CW%{\fP'
|
|
and
|
|
`\f(CW%}\fP'
|
|
is called the configuration section; there may be several such segments.
|
|
All are concatenated and copied verbatim into the head of the generated
|
|
parser, which is called burm.
|
|
Text after the second
|
|
`\f(CW%%\fP',
|
|
if any, is also copied verbatim into
|
|
.IR burm ,
|
|
at the end.
|
|
.PP
|
|
Specifications consist of declarations, a
|
|
`\f(CW%%\fP'
|
|
separator, and rules.
|
|
Input is line-oriented; each declaration and rule must appear on a separate line,
|
|
and declarations must begin in column 1.
|
|
Declarations declare terminals \(em the operators in subject
|
|
trees \(em and associate a unique, positive external symbol
|
|
number with each one.
|
|
Non-terminals are declared by their presence
|
|
on the left side of rules. The
|
|
\f(CW%start\fP
|
|
declaration optionally declares a non-terminal as the start symbol.
|
|
In the grammar above,
|
|
\f(CWterm\fP
|
|
and
|
|
\f(CWnonterm\fP
|
|
denote identifiers that are terminals and non-terminals, respectively.
|
|
.PP
|
|
Rules define tree patterns in a fully parenthesized prefix
|
|
form. Every non-terminal denotes a tree.
|
|
Each operator has a fixed
|
|
arity, which is inferred from the rules in which it is used.
|
|
A chain rule is a rule whose pattern is another non-terminal.
|
|
If no start symbol is declared, the non-terminal defined by the first rule is used.
|
|
.PP
|
|
Each rule has a unique, positive external rule number, which
|
|
comes after the pattern and is preceded by a
|
|
`\f(CW=\fP'.
|
|
External rule numbers are used to report the
|
|
matching rule to a user-supplied semantic action routine.
|
|
Rules end with an optional non-negative, integer cost; omitted costs
|
|
default to zero.
|
|
.PP
|
|
The display below shows a fragment of a BURG specification.
|
|
This example uses upper-case for terminals and lower-case for non-terminals.
|
|
.PP
|
|
.nf
|
|
.ft CW
|
|
%{
|
|
enum { ADDI=309, ADDRLP=295, ASGNI=53,
|
|
CNSTI=21, CVCI=85, I0I=661, INDIRC=67 };
|
|
|
|
typedef struct tree {
|
|
int op;
|
|
struct tree *kids[2];
|
|
int val;
|
|
struct { int state; } x;
|
|
} *NODEPTR_TYPE, *Tree;
|
|
#define LEFT_CHILD(p) ((p)->kids[0])
|
|
#define RIGHT_CHILD(p) ((p)->kids[1])
|
|
#define PANIC printf
|
|
#define STATE_LABEL(p) ((p)->x.state)
|
|
|
|
int OP_LABEL(NODEPTR_TYPE p) {
|
|
switch (p->op) {
|
|
case CNSTI: if (p->val == 0) return 661 /* I0I */;
|
|
default: return p->op;
|
|
}
|
|
}
|
|
%}
|
|
%term ADDI=309 ADDRLP=295 ASGNI=53
|
|
%term CNSTI=21 CVCI=85 I0I=661 INDIRC=67
|
|
%%
|
|
stmt: ASGNI(disp,reg) = 4 (1);
|
|
stmt: reg = 5;
|
|
reg: ADDI(reg,rc) = 6 (1);
|
|
reg: CVCI(INDIRC(disp)) = 7 (1);
|
|
reg: I0I = 8;
|
|
reg: disp = 9 (1);
|
|
disp: ADDI(reg,con) = 10;
|
|
disp: ADDRLP = 11;
|
|
rc: con = 12;
|
|
rc: reg = 13;
|
|
con: CNSTI = 14;
|
|
con: I0I = 15;
|
|
%%
|
|
.fi
|
|
.PP
|
|
The configuration section configures
|
|
\f(CWburm\fP
|
|
for the trees being parsed and the client's environment.
|
|
As shown, this section must define
|
|
\f(CWNODEPTR_TYPE\fP
|
|
to be a visible typedef symbol for a pointer to a
|
|
node in the subject tree.
|
|
\f(CWburm\fP
|
|
invokes
|
|
\f(CWOP_LABEL(p)\fP,
|
|
\f(CWLEFT\_CHILD(p)\fP, and
|
|
\f(CWRIGHT\_CHILD(p)\fP
|
|
to read the operator and children from the node pointed to by \f(CWp\fP.
|
|
It invokes
|
|
\f(CWPANIC\fP
|
|
when it detects an error.
|
|
If the configuration section defines these operations as macros, they are implemented in-line;
|
|
otherwise, they must be implemented as functions.
|
|
.PP
|
|
By default,
|
|
\f(CWburm\fP
|
|
computes and stores a single integral state in each node of the subject tree.
|
|
The configuration section must define a macro
|
|
\f(CWSTATE_LABEL(p)\fP
|
|
to access the state field of the node pointed to
|
|
by \f(CWp\fP. It must be large enough to hold a pointer, and
|
|
a macro is required because it is used as an lvalue.
|
|
The configuration section may define the macro
|
|
\f(CWSTATE_TYPE\fP
|
|
to specify a different type for state values; if
|
|
\f(CWSTATE_TYPE\fP
|
|
is not defined, int is used.
|
|
.PP
|
|
The configuration section may also define
|
|
\f(CWALLOC(n)\fP
|
|
for allocating
|
|
\f(CWn\fP
|
|
bytes.
|
|
If
|
|
\f(CWALLOC\fP
|
|
is not defined,
|
|
.IR malloc (3)
|
|
is used.
|
|
.SH OPTIONS
|
|
.TP
|
|
.BI \-p \ prefix
|
|
.br
|
|
.ns
|
|
.TP
|
|
.BI \-p prefix
|
|
Use
|
|
.I prefix
|
|
as the disambiquating prefix for exported names and visible fields.
|
|
The default is `\f(CWburm\fP'.
|
|
.TP
|
|
.BI \-maxcost= ddd
|
|
Use the integral value
|
|
.I ddd
|
|
as the maximum cost.
|
|
The default is \f(CWSHRT_MAX\fR
|
|
when ints are bigger than shorts, and
|
|
\f(CWSHRT_MAX\fP/2 when ints and shorts are the same size.
|
|
.TP
|
|
.B \-I
|
|
Emit code for the following data values and functions.
|
|
.sp
|
|
.nf
|
|
.ft CW
|
|
char burm_arity[];
|
|
char *burm_opname[];
|
|
char *burm_ntname[];
|
|
char *burm_string[];
|
|
short burm_cost[][4];
|
|
int burm_op_label(NODEPTR_TYPE p);
|
|
NODEPTR_TYPE burm_child(NODEPTR_TYPE p, int index);
|
|
STATE_TYPE burm_state_label(NODEPTR_TYPE p);
|
|
.sp
|
|
.fi
|
|
.ft R
|
|
\f(CWburm_arity\fP and
|
|
\f(CWburm_opname\fP
|
|
are indexed by external symbol numbers and gives their arities.
|
|
\f(CWburm_ntname\fP
|
|
is indexed by a non-terminal number and gives its string name.
|
|
\f(CWburm_string\fP
|
|
and
|
|
\f(CWburm_cost\fP
|
|
are indexed by an external rule number and give the string
|
|
representation and cost of each rule.
|
|
The functions encapsulate the similarly named macros.
|
|
.TP
|
|
.B \-T
|
|
Arrange for
|
|
.sp
|
|
.nf
|
|
.ft CW
|
|
void burm_trace(NODEPTR_TYPE p, int eruleno,
|
|
int cost, int bestcost);
|
|
.sp
|
|
.fi
|
|
.ft R
|
|
to be called at each successful match.
|
|
\f(CWp\fP
|
|
identifies the node and
|
|
\f(CWeruleno\fP
|
|
identifies the matching rule;
|
|
\f(CWeruleno\fP
|
|
is an index into \f(CWburm_string\fP.
|
|
\f(CWcost\fP
|
|
is the cost of the match and
|
|
\f(CWbestcost\fP
|
|
is the cost of the best previous match. The current match
|
|
wins only if
|
|
\f(CWcost\fP
|
|
is less than \f(CWbestcost\fP.
|
|
SHRT_MAX represents the infinite cost of no previous match.
|
|
\f(CWburm_trace\fP must be declared in the configuration section.
|
|
.SH "SEE ALSO"
|
|
C. W. Fraser, R. R. Henry and T. A. Proebsting,
|
|
`BURG \(em Fast optimal instruction selection and tree parsing,'
|
|
.I
|
|
SIGPLAN Notices
|
|
.BR 27 ,
|
|
4 (Apr. 1992), 68-76.
|
|
.PP
|
|
C. W. Fraser, D. R. Hanson and T. A. Proebsting,
|
|
`Engineering a simple, efficient code generator generator,'
|
|
.I
|
|
ACM Letters on Programming Languages and Systems
|
|
.BR 1 ,
|
|
3 (Sep. 1992), 213-226.
|
|
.br
|
|
.SH BUGS
|
|
Mail bug reports along with the shortest input
|
|
that exposes them to drh@drhanson.net.
|